Monday, December 25, 2017

Hive Partition by Examples

[donghua@cdh-vm ~]$  beeline -u jdbc:hive2://cdh-vm.dbaglobe.com:10000/default -n donghua
Connecting to jdbc:hive2://cdh-vm.dbaglobe.com:10000/default
Connected to: Apache Hive (version 1.1.0-cdh5.13.1)
Driver: Hive JDBC (version 1.1.0-cdh5.13.0)
Transaction isolation: TRANSACTION_REPEATABLE_READ
Beeline version 1.1.0-cdh5.13.0 by Apache Hive
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def>

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select substr(month,0,4) as year,month,company,fleet from monthly_taxi_fleet limit 10;
+-------+----------+-------------------------+--------+--+
| year  |  month   |         company         | fleet  |
+-------+----------+-------------------------+--------+--+
| 2005  | 2005-01  | Comfort                 | 9952   |
| 2005  | 2005-01  | CityCab                 | 4965   |
| 2005  | 2005-01  | SMRT                    | 2441   |
| 2005  | 2005-01  | YTC                     | 1223   |
| 2005  | 2005-01  | Individual Yellow- Top  | 696    |
| 2005  | 2005-01  | Smart                   | 320    |
| 2005  | 2005-01  | TransCab                | 560    |
| 2005  | 2005-01  | Premier                 | 370    |
| 2005  | 2005-02  | Comfort                 | 10046  |
| 2005  | 2005-02  | CityCab                 | 4968   |
+-------+----------+-------------------------+--------+--+

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> create table monthly_taxi_fleet_company_year_part
. . . . . . . . . . . . . . . . . . . . . . .> (month char(7),fleet smallint)
. . . . . . . . . . . . . . . . . . . . . . .> partitioned by (company varchar(50),year char(4))
. . . . . . . . . . . . . . . . . . . . . . .> stored as avro;


0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> desc monthly_taxi_fleet_company_year_part;

+--------------------------+-----------------------+-----------------------+--+
|         col_name         |       data_type       |        comment        |
+--------------------------+-----------------------+-----------------------+--+
| month                    | char(7)               |                       |
| fleet                    | int                   |                       |
| company                  | varchar(50)           |                       |
| year                     | char(4)               |                       |
|                          | NULL                  | NULL                  |
| # Partition Information  | NULL                  | NULL                  |
| # col_name               | data_type             | comment               |
|                          | NULL                  | NULL                  |
| company                  | varchar(50)           |                       |
| year                     | char(4)               |                       |
+--------------------------+-----------------------+-----------------------+--+


0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> insert into monthly_taxi_fleet_company_year_part (month,fleet,company,year)
. . . . . . . . . . . . . . . . . . . . . . .> values('2017-10',10000,'Comfort','2017');
Error: Error while compiling statement: FAILED: SemanticException 1:50 '[year, company]' in insert schema specification are not found among regular columns of default.monthly_taxi_fleet_company_year_part nor dynamic partition columns.. Error encountered near token 'year' (state=42000,code=40000)

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> insert into monthly_taxi_fleet_company_year_part
. . . . . . . . . . . . . . . . . . . . . . .> partition (company='Comfort',year='2017')
. . . . . . . . . . . . . . . . . . . . . . .> values('2017-10',10000);

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select * from monthly_taxi_fleet_company_year_part o;
+----------+----------+------------+---------+--+
| o.month  | o.fleet  | o.company  | o.year  |
+----------+----------+------------+---------+--+
| 2017-10  | 10000    | Comfort    | 2017    |
+----------+----------+------------+---------+--+
1 row selected (0.102 seconds)

[donghua@cdh-vm ~]$ hdfs dfs -ls -R /user/hive/warehouse/monthly_taxi_fleet_company_year_part
drwxrwxrwt   - donghua hive          0 2017-12-25 21:25 /user/hive/warehouse/monthly_taxi_fleet_company_year_part/company=Comfort
drwxrwxrwt   - donghua hive          0 2017-12-25 21:25 /user/hive/warehouse/monthly_taxi_fleet_company_year_part/company=Comfort/year=2017
-rwxrwxrwt   1 donghua hive        318 2017-12-25 21:25 /user/hive/warehouse/monthly_taxi_fleet_company_year_part/company=Comfort/year=2017/000000_0

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> drop table monthly_taxi_fleet_company_year_part;

-- Dynamic partition example

-- Default for current session only,
-- To make it permanently, edit properties in hive-site.xml


0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> set hive.exec.dynamic.partition = true';
No rows affected (0.003 seconds)
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> set hive.exec.dynamic.partition.mode = nonstrict;
No rows affected (0.004 seconds)

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> insert into monthly_taxi_fleet_company_year_part
. . . . . . . . . . . . . . . . . . . . . . .> partition (company,year)
. . . . . . . . . . . . . . . . . . . . . . .> values('2017-10',10000,'Comfort','2017');


0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> insert into monthly_taxi_fleet_company_year_part
. . . . . . . . . . . . . . . . . . . . . . .> partition (company,year)
. . . . . . . . . . . . . . . . . . . . . . .> select month,fleet,company,substr(month,0,4) as year from monthly_taxi_fleet;
INFO  : Compiling command(queryId=hive_20171225221515_3c4f0c8e-9d61-40e4-9d21-9e6c3249c230): insert into monthly_taxi_fleet_company_year_part
partition (company,year)
select month,fleet,company,substr(month,0,4) as year from monthly_taxi_fleet
INFO  : Semantic Analysis Completed
INFO  : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:_col0, type:char(7), comment:null), FieldSchema(name:_col1, type:int, comment:null), FieldSchema(name:_col2, type:varchar(50), comment:null), FieldSchema(name:_col3, type:string, comment:null)], properties:null)
INFO  : Completed compiling command(queryId=hive_20171225221515_3c4f0c8e-9d61-40e4-9d21-9e6c3249c230); Time taken: 0.166 seconds
INFO  : Concurrency mode is disabled, not creating a lock manager
INFO  : Executing command(queryId=hive_20171225221515_3c4f0c8e-9d61-40e4-9d21-9e6c3249c230): insert into monthly_taxi_fleet_company_year_part
partition (company,year)
select month,fleet,company,substr(month,0,4) as year from monthly_taxi_fleet
INFO  : Query ID = hive_20171225221515_3c4f0c8e-9d61-40e4-9d21-9e6c3249c230
INFO  : Total jobs = 3
INFO  : Launching Job 1 out of 3
INFO  : Starting task [Stage-1:MAPRED] in serial mode
INFO  : Number of reduce tasks is set to 0 since there's no reduce operator
INFO  : number of splits:1
INFO  : Submitting tokens for job: job_1513984921012_0069
INFO  : The url to track the job:
http://cdh-vm.dbaglobe.com:8088/proxy/application_1513984921012_0069/
INFO  : Starting Job = job_1513984921012_0069, Tracking URL = http://cdh-vm.dbaglobe.com:8088/proxy/application_1513984921012_0069/
INFO  : Kill Command = /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/lib/hadoop/bin/hadoop job  -kill job_1513984921012_0069
INFO  : Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0
INFO  : 2017-12-25 22:15:51,245 Stage-1 map = 0%,  reduce = 0%
INFO  : 2017-12-25 22:16:04,336 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 4.81 sec
INFO  : MapReduce Total cumulative CPU time: 4 seconds 810 msec
INFO  : Ended Job = job_1513984921012_0069
INFO  : Starting task [Stage-7:CONDITIONAL] in serial mode
INFO  : Stage-4 is selected by condition resolver.
INFO  : Stage-3 is filtered out by condition resolver.
INFO  : Stage-5 is filtered out by condition resolver.
INFO  : Starting task [Stage-4:MOVE] in serial mode
INFO  : Moving data to: hdfs://cdh-vm.dbaglobe.com:8020/user/hive/warehouse/monthly_taxi_fleet_company_year_part/.hive-staging_hive_2017-12-25_22-15-43_889_5842295461208605082-6/-ext-10000 from hdfs://cdh-vm.dbaglobe.com:8020/user/hive/warehouse/monthly_taxi_fleet_company_year_part/.hive-staging_hive_2017-12-25_22-15-43_889_5842295461208605082-6/-ext-10002
INFO  : Starting task [Stage-0:MOVE] in serial mode
INFO  : Loading data to table default.monthly_taxi_fleet_company_year_part partition (company=null, year=null) from hdfs://cdh-vm.dbaglobe.com:8020/user/hive/warehouse/monthly_taxi_fleet_company_year_part/.hive-staging_hive_2017-12-25_22-15-43_889_5842295461208605082-6/-ext-10000
INFO  :          Time taken for load dynamic partitions : 15475
INFO  :         Loading partition {company=CityCab, year=2005}
INFO  :         Loading partition {company=Prime, year=2008}
INFO  :         Loading partition {company=CityCab, year=2009}
INFO  :         Loading partition {company=SMRT, year=2014}
INFO  :         Loading partition {company=Individual Yellow- Top, year=2013}
INFO  :         Loading partition {company=Comfort, year=2016}
<omitted for reading clarity>
INFO  :         Loading partition {company=Comfort, year=2008}
INFO  :         Loading partition {company=TransCab, year=2007}
INFO  :         Loading partition {company=Individual Yellow- Top, year=2008}
INFO  :         Loading partition {company=Prime, year=2016}
INFO  :         Loading partition {company=Prime, year=2013}
INFO  :         Loading partition {company=TransCab, year=2008}
INFO  :         Loading partition {company=Comfort, year=2009}
INFO  :         Loading partition {company=TransCab, year=2011}
INFO  :          Time taken for adding to write entity : 3
INFO  : Starting task [Stage-2:STATS] in serial mode
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=CityCab, year=2005} stats: [numFiles=1, numRows=12, totalSize=450, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=CityCab, year=2006} stats: [numFiles=1, numRows=12, totalSize=450, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=CityCab, year=2007} stats: [numFiles=1, numRows=12, totalSize=450, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=CityCab, year=2008} stats: [numFiles=1, numRows=12, totalSize=450, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=CityCab, year=2009} stats: [numFiles=1, numRows=12, totalSize=450, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=CityCab, year=2010} stats: [numFiles=1, numRows=12, totalSize=450, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=CityCab, year=2011} stats: [numFiles=1, numRows=12, totalSize=450, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=CityCab, year=2012} stats: [numFiles=1, numRows=12, totalSize=450, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=CityCab, year=2013} stats: [numFiles=1, numRows=12, totalSize=450, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=CityCab, year=2014} stats: [numFiles=1, numRows=12, totalSize=450, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=CityCab, year=2015} stats: [numFiles=1, numRows=12, totalSize=450, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=CityCab, year=2016} stats: [numFiles=1, numRows=12, totalSize=450, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=Comfort, year=2005} stats: [numFiles=1, numRows=12, totalSize=462, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=Comfort, year=2006} stats: [numFiles=1, numRows=12, totalSize=462, rawDataSize=0]

<omitted for reading clarity>

numRows=12, totalSize=450, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=YTC, year=2005} stats: [numFiles=1, numRows=12, totalSize=450, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=YTC, year=2006} stats: [numFiles=1, numRows=12, totalSize=450, rawDataSize=0]
INFO  : Partition default.monthly_taxi_fleet_company_year_part{company=YTC, year=2007} stats: [numFiles=1, numRows=12, totalSize=447, rawDataSize=0]
INFO  : MapReduce Jobs Launched:
INFO  : Stage-Stage-1: Map: 1   Cumulative CPU: 4.81 sec   HDFS Read: 31633 HDFS Write: 50547 SUCCESS
INFO  : Total MapReduce CPU Time Spent: 4 seconds 810 msec
INFO  : Completed executing command(queryId=hive_20171225221515_3c4f0c8e-9d61-40e4-9d21-9e6c3249c230); Time taken: 40.317 seconds
INFO  : OK
No rows affected (40.505 seconds)
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def>


0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> show partitions monthly_taxi_fleet_company_year_part;
+-------------------------------------------+--+
|                 partition                 |
+-------------------------------------------+--+
| company=CityCab/year=2005                 |
| company=CityCab/year=2006                 |
| company=CityCab/year=2007                 |
| company=CityCab/year=2008                 |
| company=CityCab/year=2009                 |
| company=CityCab/year=2010                 |
| company=CityCab/year=2011                 |
| company=CityCab/year=2012                 |
| company=CityCab/year=2013                 |
| company=CityCab/year=2014                 |
<omitted for reading clarity>
| company=TransCab/year=2012                |
| company=TransCab/year=2013                |
| company=TransCab/year=2014                |
| company=TransCab/year=2015                |
| company=TransCab/year=2016                |
| company=YTC/year=2005                     |
| company=YTC/year=2006                     |
| company=YTC/year=2007                     |
+-------------------------------------------+--+
94 rows selected (0.093 seconds)
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def>

[donghua@cdh-vm ~]$ hdfs dfs -ls -R /user/hive/warehouse/monthly_taxi_fleet_company_year_part
drwxrwxrwt   - donghua hive          0 2017-12-25 22:16 /user/hive/warehouse/monthly_taxi_fleet_company_year_part/company=CityCab
drwxrwxrwt   - donghua hive          0 2017-12-25 22:16 /user/hive/warehouse/monthly_taxi_fleet_company_year_part/company=CityCab/year=2005
-rwxrwxrwt   1 donghua hive        450 2017-12-25 22:16 /user/hive/warehouse/monthly_taxi_fleet_company_year_part/company=CityCab/year=2005/000000_0
drwxrwxrwt   - donghua hive          0 2017-12-25

<omitted for reading clarity>
-rwxrwxrwt   1 donghua hive        450 2017-12-25 22:15 /user/hive/warehouse/monthly_taxi_fleet_company_year_part/company=TransCab/year=2016/000000_0
drwxrwxrwt   - donghua hive          0 2017-12-25 22:16 /user/hive/warehouse/monthly_taxi_fleet_company_year_part/company=YTC
drwxrwxrwt   - donghua hive          0 2017-12-25 22:16 /user/hive/warehouse/monthly_taxi_fleet_company_year_part/company=YTC/year=2005
-rwxrwxrwt   1 donghua hive        450 2017-12-25 22:16 /user/hive/warehouse/monthly_taxi_fleet_company_year_part/company=YTC/year=2005/000000_0
drwxrwxrwt   - donghua hive          0 2017-12-25 22:16 /user/hive/warehouse/monthly_taxi_fleet_company_year_part/company=YTC/year=2006
-rwxrwxrwt   1 donghua hive        450 2017-12-25 22:16 /user/hive/warehouse/monthly_taxi_fleet_company_year_part/company=YTC/year=2006/000000_0
drwxrwxrwt   - donghua hive          0 2017-12-25 22:16 /user/hive/warehouse/monthly_taxi_fleet_company_year_part/company=YTC/year=2007
-rwxrwxrwt   1 donghua hive        447 2017-12-25 22:16 /user/hive/warehouse/monthly_taxi_fleet_company_year_part/company=YTC/year=2007/000000_0

Possible Errors & Solutions:

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> insert into monthly_taxi_fleet_company_year_part
. . . . . . . . . . . . . . . . . . . . . . .> partition (company,year)
. . . . . . . . . . . . . . . . . . . . . . .> select month,fleet,company,substr(month,0,4) as year from monthly_taxi_fleet;
INFO  : Compiling command(queryId=hive_20171225214545_54b5b6f8-8a03-451a-929f-fb175747a0ec): insert into monthly_taxi_fleet_company_year_part
partition (company,year)
select month,fleet,company,substr(month,0,4) as year from monthly_taxi_fleet
INFO  : Semantic Analysis Completed
INFO  : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:_col0, type:char(7), comment:null), FieldSchema(name:_col1, type:int, comment:null), FieldSchema(name:_col2, type:varchar(50), comment:null), FieldSchema(name:_col3, type:string, comment:null)], properties:null)
INFO  : Completed compiling command(queryId=hive_20171225214545_54b5b6f8-8a03-451a-929f-fb175747a0ec); Time taken: 0.166 seconds
INFO  : Concurrency mode is disabled, not creating a lock manager
INFO  : Executing command(queryId=hive_20171225214545_54b5b6f8-8a03-451a-929f-fb175747a0ec): insert into monthly_taxi_fleet_company_year_part
partition (company,year)
select month,fleet,company,substr(month,0,4) as year from monthly_taxi_fleet
INFO  : Query ID = hive_20171225214545_54b5b6f8-8a03-451a-929f-fb175747a0ec
INFO  : Total jobs = 3
INFO  : Launching Job 1 out of 3
INFO  : Starting task [Stage-1:MAPRED] in serial mode
INFO  : Number of reduce tasks is set to 0 since there's no reduce operator
INFO  : number of splits:1
INFO  : Submitting tokens for job: job_1513984921012_0065
INFO  : The url to track the job:
http://cdh-vm.dbaglobe.com:8088/proxy/application_1513984921012_0065/
INFO  : Starting Job = job_1513984921012_0065, Tracking URL = http://cdh-vm.dbaglobe.com:8088/proxy/application_1513984921012_0065/
INFO  : Kill Command = /opt/cloudera/parcels/CDH-5.13.1-1.cdh5.13.1.p0.2/lib/hadoop/bin/hadoop job  -kill job_1513984921012_0065
INFO  : Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0
INFO  : 2017-12-25 21:45:12,194 Stage-1 map = 0%,  reduce = 0%
INFO  : 2017-12-25 21:46:12,857 Stage-1 map = 0%,  reduce = 0%
INFO  : 2017-12-25 21:46:19,235 Stage-1 map = 100%,  reduce = 0%
ERROR : Ended Job = job_1513984921012_0065 with errors
ERROR : FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask
INFO  : MapReduce Jobs Launched:
INFO  : Stage-Stage-1: Map: 1   HDFS Read: 0 HDFS Write: 0 FAIL
INFO  : Total MapReduce CPU Time Spent: 0 msec
INFO  : Completed executing command(queryId=hive_20171225214545_54b5b6f8-8a03-451a-929f-fb175747a0ec); Time taken: 76.105 seconds
Error: Error while processing statement: FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask (state=08S01,code=2)

2017-12-25 21:45:24,056 FATAL [IPC Server handler 17 on 44101] org.apache.hadoop.mapred.TaskAttemptListenerImpl: Task: attempt_1513984921012_0065_m_000000_0 - exited : java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"month":"2006-02","company":"YTC","fleet":876}
    at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:179)
    at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
    at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:459)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
    at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:422)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1917)
    at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"month":"2006-02","company":"YTC","fleet":876}
    at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:507)
    at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:170)
    ... 8 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveFatalException: [Error 20004]: Fatal error occurred when node tried to create too many dynamic partitions. The maximum number of dynamic partitions is controlled by hive.exec.max.dynamic.partitions and hive.exec.max.dynamic.partitions.pernode. Maximum was set to: 100
    at org.apache.hadoop.hive.ql.exec.FileSinkOperator.getDynOutPaths(FileSinkOperator.java:897)
    at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:677)
    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:815)
    at org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:84)
    at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:815)
    at org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:98)
    at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:157)
    at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:497)
    ... 9 more

   
1 row selected (44.065 seconds)
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> set hive.exec.max.dynamic.partition=1000;
No rows affected (0.006 seconds)
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> set hive.exec.max.dynamic.partitions.pernode=1000;
No rows affected (0.002 seconds)