Saturday, December 23, 2017

Load mass data into Hive

  • Method 1: load local data (text)
  • Method 2: Insert as select

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> drop table monthly_taxi_fleet3;
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> create table monthly_taxi_fleet3 (
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> month char(7), company varchar(50),fleet smallint)
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> with serdeproperties (
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def>   "separatorchar" = ",",
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def>    "quotechar"     = "",
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def>   "escapechar"    = "\\"
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> )
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> stored as textfile
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> tblproperties ("skip.header.line.count"="1");


0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> load data local inpath '/tmp/monthly_taxi_fleet.csv' overwrite into table monthly_taxi_fleet3;
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select * from monthly_taxi_fleet3 limit 10;
+----------------------------+------------------------------+----------------------------+--+
| monthly_taxi_fleet3.month  | monthly_taxi_fleet3.company  | monthly_taxi_fleet3.fleet  |
+----------------------------+------------------------------+----------------------------+--+
| 2005-01                    | Comfort                      | 9952                       |
| 2005-01                    | CityCab                      | 4965                       |
| 2005-01                    | SMRT                         | 2441                       |
| 2005-01                    | YTC                          | 1223                       |
| 2005-01                    | Individual Yellow- Top       | 696                        |
| 2005-01                    | Smart                        | 320                        |
| 2005-01                    | TransCab                     | 560                        |
| 2005-01                    | Premier                      | 370                        |
| 2005-02                    | Comfort                      | 10046                      |
| 2005-02                    | CityCab                      | 4968                       |
+----------------------------+------------------------------+----------------------------+--+
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def>

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> create table monthly_taxi_fleet4 (
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> month char(7), company varchar(50),fleet smallint)
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> stored as avro;

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> insert overwrite table monthly_taxi_fleet4
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select * from monthly_taxi_fleet3 limit 10;


0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select count(*) from monthly_taxi_fleet4;
+------+--+
| _c0  |
+------+--+
| 10   |
+------+--+

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> insert into table monthly_taxi_fleet4
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select * from monthly_taxi_fleet3 limit 10;


0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select count(*) from monthly_taxi_fleet4;
+------+--+
| _c0  |
+------+--+
| 20   |
+------+--+