Saturday, December 23, 2017

Use Hive external table to access CSV format data

[donghua@cdh-vm ~]$ hdfs dfs -cat /user/donghua/monthly_taxi_fleet/* |head -n 11
month,company,taxi_fleet
2005-01,Comfort,9952
2005-01,CityCab,4965
2005-01,SMRT,2441
2005-01,YTC,1223
2005-01,Individual Yellow- Top,696
2005-01,Smart,320
2005-01,TransCab,560
2005-01,Premier,370
2005-02,Comfort,10046
2005-02,CityCab,4968

[donghua@cdh-vm ~]$  beeline -u jdbc:hive2://cdh-vm.dbaglobe.com:10000/default -n donghua --silent=true
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=512M; support was removed in 8.0
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=512M; support was removed in 8.0

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> drop table monthly_taxi_fleet;

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> create external table monthly_taxi_fleet (
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> month char(7), company varchar(50),fleet smallint)
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> row format delimited
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> fields terminated by ','
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> location '/user/donghua/monthly_taxi_fleet'
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> tblproperties ("skip.header.line.count"="1");

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select * from monthly_taxi_fleet limit 10;
+---------------------------+-----------------------------+---------------------------+--+
| monthly_taxi_fleet.month  | monthly_taxi_fleet.company  | monthly_taxi_fleet.fleet  |
+---------------------------+-----------------------------+---------------------------+--+
| 2005-01                   | Comfort                     | 9952                      |
| 2005-01                   | CityCab                     | 4965                      |
| 2005-01                   | SMRT                        | 2441                      |
| 2005-01                   | YTC                         | 1223                      |
| 2005-01                   | Individual Yellow- Top      | 696                       |
| 2005-01                   | Smart                       | 320                       |
| 2005-01                   | TransCab                    | 560                       |
| 2005-01                   | Premier                     | 370                       |
| 2005-02                   | Comfort                     | 10046                     |
| 2005-02                   | CityCab                     | 4968                      |
+---------------------------+-----------------------------+---------------------------+--+

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> drop table monthly_taxi_fleet2;

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> create external table monthly_taxi_fleet2 (
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> month char(7), company varchar(50),fleet smallint)
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> with serdeproperties (
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def>    "separatorchar" = ",",
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def>    "quotechar"     = "",
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def>    "escapechar"    = "\\"
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> )
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> stored as textfile
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> location '/user/donghua/monthly_taxi_fleet'
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> tblproperties ("skip.header.line.count"="1");

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select * from monthly_taxi_fleet2 limit 10;
+----------------------------+------------------------------+----------------------------+--+
| monthly_taxi_fleet2.month  | monthly_taxi_fleet2.company  | monthly_taxi_fleet2.fleet  |
+----------------------------+------------------------------+----------------------------+--+
| 2005-01                    | Comfort                      | 9952                       |
| 2005-01                    | CityCab                      | 4965                       |
| 2005-01                    | SMRT                         | 2441                       |
| 2005-01                    | YTC                          | 1223                       |
| 2005-01                    | Individual Yellow- Top       | 696                        |
| 2005-01                    | Smart                        | 320                        |
| 2005-01                    | TransCab                     | 560                        |
| 2005-01                    | Premier                      | 370                        |
| 2005-02                    | Comfort                      | 10046                      |
| 2005-02                    | CityCab                      | 4968                       |
+----------------------------+------------------------------+----------------------------+--+

Source data: https://www.mytransport.sg/content/mytransport/home/dataMall.html