Hive Fresco Play Hands on Solution Hacker Rank

Lab 1: Welcome to Hive Hands-on - Creating Hive tables.

Solution 1: Creating Hive tables.




# Open HIVE in Terminal.
user1@kq9ca59d26w:/projects/challenge$ hive

-- Task 1: Create database named 'hive_tables'.
hive > create database hive_tables;

hive > use hive_tables;

--  Task 2: Under hive_tables database, create a managed table named bank_internal. Get the column names for the tables from the CSV file. Make sure to skip the header while loading the data into a table.

hive > create table bank_internal(age INT,job STRING,marital STRING,balance INT,loan STRING,contact STRING,month STRING,duration INT) row format delimited fields terminated by "," lines terminated BY '\n'  tblproperties('skip.header.line.count'='1') ;

--  Task 3: Load the CSV data into bank_internal table.
hive > load data local inpath '/projects/challenge/bank.csv' into table bank_internal;

-- Task 4: Create directory named '/bank_external' in hdfs. ( Hint: Open new Terminal and run this query).
user1@kq9ca59d26w:/projects/challenge$ hdfs dfs -mkdir -p /bank_external

--  Task 5: Create an external table named bank_external and set the location as '/bank_external'.  Get the column names for the table from the CSV file.  Make sure to skip the header while loading the data into a table.

hive > create external table bank_external(age INT,job STRING,marital STRING,balance INT,loan STRING,contact STRING,month STRING,duration INT) row format delimited fields terminated by "," lines terminated BY '\n'  LOCATION '/bank_external' tblproperties('skip.header.line.count'='1') ;

-- Task 6: Copy the bank.csv file to bank_external hdfs.
hive > load  data local inpath '/projects/challenge/bank.csv' into table bank_external ;

Lab 2: Welcome to Hive Handson - Insert the data.

Solution 2: Insert the data.




-- Open New Terminal and type 'hive'.

user@x916p8e7134d:/projects/challenge$ hive

-- Task 1: Create a database named hive_insert.

hive > create database hive_insert;
hive > use hive_insert;

-- Task 2: Under 'hive_insert' database, create a table named 'bank'. Get the column names for the table from the CSV file. 
-- Make sure to skip the header while loading the data into a table.

hive > create table bank(age INT,job STRING,marital STRING,balance DOUBLE,loan STRING,contact STRING,month STRING,duration INT) row format delimited fields terminated by "," lines terminated BY '\n'  tblproperties('skip.header.line.count'='1') ;

-- Task 3:  Load the bank.csv file into bank table.

hive > load data local inpath '/projects/challenge/bank.csv' into table bank;

-- Task 4: Create a new table named 'bank1' with the following columns: "age", "job", "balance", "month", "duration".

hive > create table bank1(age INT, job STRING, balance DOUBLE, month STRING, duration INT);


-- Task 5: From the bank table, fetch the records which is having age greater than 30 and duration greater than 300 and laod the data to bank1 table.

hive > insert into  bank1(age, job ,balance ,month ,duration )  select age, job, balance, month,duration from bank where age>30 and duration>300;

-- Task 6: Load the below records into bank1 table.
--(32,'technician', 900, 'mar', 400), 
--(33,'admin',900, 'may', 500 ), 
--(35,'services', 400, "jul",400 ),
--(36,'blue-collar', 600, "aug",600 ),
--(38,'technician', 800, "sep",700 )

hive > insert into bank1 values(32,'technician', 900, 'mar', 400), (33,'admin',900, 'may', 500 ), (35,'services', 400, "jul",400 ), (36,'blue-collar', 600, "aug",600 ),(38,'technician', 800, "sep",700 );

Lab 3: Welcome to Hive Handson - Query the data.

Solution 3: Query the data.




-- Open new Terminal and type 'hive'.
user@z9kv8w2762x:/projects/challenge$  hive

-- Task 1: Create a database named hive_query.
hive > create database hive_query;
hive > use hive_query;

-- Task 2: Under hive_query database, create a table named 'bank'. Get the column names for the table from the CSV file. Make sure to skip the header while loading the data into a table.

hive > create table bank(age INT,job STRING,marital STRING,balance DOUBLE,loan STRING,contact STRING,month STRING,duration INT) row format delimited fields terminated by "," lines terminated BY '\n'  tblproperties('skip.header.line.count'='1') ;

-- Task 3: Load the bank.csv file into bank table.
hive > load data local inpath '/projects/challenge/bank.csv' into table bank;

-- Task 4: Open new Terminal and create a directory "output1".
user@z9kv8w2762x:/projects/challenge$  hadoop fs -mkdir -p /output1

-- Task 5: Fetch the records which have job = 'technician', age less than 60 and months in 'jan', 'oct', 'jul'. Save the output in hdfs directory '/output1'.

hive > insert overwrite directory '/output1' row format delimited fields terminated by ','  select * from bank where job ='technician' and age<60 and month in("jan", "oct","jul");

-- Task 6: Open New Terminal and create a directory "output2".
user@z9kv8w2762x:/projects/challenge$  hadoop fs -mkdir -p /output2

-- Task 7: Get the count of the records based on job column. Save the output in hdfs directory "/output2".
hive > insert overwrite directory '/output2' row format delimited fields terminated by ','  select count(*) from bank group by job;


-- Task 8: Open New Terminal and create a directory "output3".
user@z9kv8w2762x:/projects/challenge$  hadoop fs -mkdir -p /output3

-- Task 9: Get maximum duration based on job column and save the output in  hdfs  directory "/output3".
insert overwrite directory '/output3' row format delimited fields terminated by ','  select max(duration), job from bank group by job;

Lab 4: Welcome to Hive Handson - Partition table.

Solution 4: Partition table.





-- Open new Terminal and type "hive".
user@t4n19po6f891k:/projects/challenge$  hive

-- Task 1: Create a database named 'hive_part'.
hive > create database hive_part;

-- Task 2: Create a temporary table and load the transaction.csv records into the table. (Use string datatype for the tran_date column).

hive > CREATE TEMPORARY TABLE tmp1(transaction_id STRING ,cust_id INT,tran_date STRING,prod_subcat_code INT,prod_cat_code INT ,Qty INT,Rate INT,Tax DOUBLE,total_amt DOUBLE,Store_type STRING)   row format delimited fields terminated by "," lines terminated by '\n';

hive > load data local inpath '/projects/challenge/Transactions.csv' into table tmp1;

hive > set hive.exec.dynamic.partition=true;
hive > set hive.exec.dynamic.partition.mode=nonstrict;

-- Task 3: Under the hive_part database create a table named transaction_part with the following columns:
-- (transaction_id cust_id ,  tran_date  Qty ,Rate ,Tax ,total_amt, Store_type ). The partition column should be Store_type.

hive > create table transaction_part(transaction_id STRING, cust_id INT ,  tran_date  STRING,  Qty INT,Rate INT,Tax DOUBLE,total_amt DOUBLE ) partitioned by ( Store_type STRING) row format delimited fields terminated by ',';


-- Task 4: Fetch the records from temporary table where the 'Qty' is greater than 2 and do  a dynamic insert in to transaction_part table.

hive > INSERT OVERWRITE TABLE transaction_part PARTITION ( Store_type ) SELECT transaction_id ,cust_id ,  tran_date  ,Qty ,Rate ,Tax ,total_amt, Store_type   FROM tmp1 where Qty>2;

-- Task 5: Open a new Terminal and create a new hdfs directory "/store".
user@t4n19po6f891k:/projects/challenge$  hdfs dfs -mkdir -p /store

-- Task 6: Fetch the records from transaction_part table where the store type is "e-Shop" and store the result in hdfs  directory named '/store'. The fields should be terminated by ','.
 
hive > insert overwrite directory '/store' row format delimited fields terminated by ','  select * from transaction_part where Store_type  = 'e-Shop';

Lab 5: Welcome to Hive Handson - Bucketing table.

Solution 5: Bucketing table.




-- Open a Terminal and type "hive".
user1@kq9ca59d26w:/projects/challenge$ hive

hive > create database hive_bucket;
hive > use hive_bucket;

-- Task 2: Create a temporary table and load the transaction.csv records into the table. (Use string datatype fro the tran_date column).

hive >  CREATE TEMPORARY TABLE tmp1(transaction_id STRING ,cust_id INT,tran_date STRING,prod_subcat_code INT,prod_cat_code INT ,Qty INT,Rate INT,Tax DOUBLE,total_amt DOUBLE,Store_type STRING)   row format delimited fields terminated by "," lines terminated by '\n';

hive > load data local inpath '/projects/challenge/Transactions.csv' into table tmp1;

hive > set hive.exec.dynamic.partition=true;
hive > set hive.exec.dynamic.partition.mode=nonstrict;

-- Task 3: Under 'hive_bucket' database, create a table named transaction_bucket with the following columns:  
-- (transaction_id ,cust_id ,  tran_date  ,Qty ,Rate ,Tax ,total_amt, Store_type ). Partition should be based on Store_type and  Clustered by cust_id columns with 3 buckets.

hive >  create table transaction_bucket(transaction_id STRING, cust_id INT ,  tran_date  STRING,  Qty INT,Rate INT,Tax DOUBLE,total_amt DOUBLE ) partitioned by ( Store_type STRING) clustered by (cust_id) into 3 buckets row format delimited fields terminated by ',';

-- Task 4: Fetch the records from temporary table where the 'Qty' is greater than 0 and do a dynamic insert in to transaction_bucket  table.

hive >  INSERT OVERWRITE TABLE transaction_bucket PARTITION ( Store_type ) SELECT transaction_id ,cust_id ,  tran_date  ,Qty ,Rate ,Tax ,total_amt, Store_type   FROM tmp1 where Qty>0;


-- Task 5:  Run the following commands in the hive shell: 

hive > SET hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
hive > SET hive.support.concurrency=true;
hive > SET hive.exec.dynamic.partition.mode=nonstrict;
hive > SET hive.compactor.initiator.on=true;
hive > SET hive.compactor.worker.threads=1;

-- Task 6: Create a new table named bucket1 with the following columns: 
-- (transaction_id ,  cust_id  , tran_date ,  Qty ,Rate ,Tax , total_amt , Store_type ). Clustered by cust_id with 3 buckets, stored type should be 'ORC' and set transational is true in table properties.

hive >  create table bucket1(transaction_id STRING,  cust_id INT , tran_date  STRING,  Qty INT,Rate INT,Tax DOUBLE,total_amt DOUBLE, Store_type STRING ) clustered by (cust_id) into 3 buckets row format delimited fields terminated by ','   STORED AS ORC TBLPROPERTIES ('transactional'='true')  ;



-- Task 7: Insert the below values into bucket1 table, 

hive > insert into bucket1 values (  "80712190438",270351,"28-02-2014",-5,-772,405.3,-4265.3,"e-Shop"),
( "29258453508",270384,"27-02-2014",-5,-1497,785.925,-8270.925,"e-Shop"),
( "93274880719",271509,"24-02-2014",-3,-1363,429.345,-4518.345,"e-Shop"),
( "97439039119",272357,"23-02-2014",-2,-824,173.04, -1821.04, "TeleShop"),
( "45649838090",273667,"22-02- 2014",-1,-1450,152.25,-1602.25, "e-Shop"),
( "22643667930",271489,"22-02-2014",-1,-1225,128.625,-1353.625,"TeleShop"),
( "79792372943",275108,"22-02- 2014",-3,-908,286.02,-3010.02,"MBR"),
( "50076728598",269014,"21-02- 2014",-4,-581,244.02,-2568.02,"e-Shop");

-- Task 8: Set the 'tran_data' as '25-02-2014' for the 'e-Shop' store type.

hive > update bucket1 set tran_date='25-02-2014' where Store_type='e-Shop';

PDFcup.com

Hive Fresco Play Hands on Solution Hacker Rank

Lab 1: Welcome to Hive Hands-on - Creating Hive tables.

Lab 2: Welcome to Hive Handson - Insert the data.

Lab 3: Welcome to Hive Handson - Query the data.

Lab 4: Welcome to Hive Handson - Partition table.

Lab 5: Welcome to Hive Handson - Bucketing table.

About the author

Post a Comment