Sunday, December 24, 2017

Hive built-in function explode example

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> create table explodeexample (empid smallint, score array<double>);
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> desc explodeexample;
+-----------+----------------+----------+--+
| col_name  |   data_type    | comment  |
+-----------+----------------+----------+--+
| empid     | smallint       |          |
| score     | array<double>  |          |
+-----------+----------------+----------+--+
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> insert into explodeexample values (1,array(80,70.4,99.0,100));
Error: Error while compiling statement: FAILED: SemanticException [Error 10293]: Unable to create temp file for insert values Expression of type TOK_FUNCTION not supported in insert/values (state=42000,code=10293)

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> insert into explodeexample select 1,array(80,70.4,99.0,100);
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> insert into explodeexample select 2,array(70,59.5,80,85,95.6,60);
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select * from explodeexample;
+-----------------------+----------------------------------+--+
| explodeexample.empid  |       explodeexample.score       |
+-----------------------+----------------------------------+--+
| 1                     | [80.0,70.4,99.0,100.0]           |
| 2                     | [70.0,59.5,80.0,85.0,95.6,60.0]  |
+-----------------------+----------------------------------+--+

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select empid,size(score) score_count from explodeexample;;
+--------+--------------+--+
| empid  | score_count  |
+--------+--------------+--+
| 1      | 4            |
| 2      | 6            |
+--------+--------------+--+

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select explode(score) score from explodeexample where empid=1;
+--------+--+
| score  |
+--------+--+
| 80.0   |
| 70.4   |
| 99.0   |
| 100.0  |
+--------+--+

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select empid,explode(score) score from explodeexample where empid=1;
Error: Error while compiling statement: FAILED: SemanticException [Error 10081]: UDTF's are not supported outside the SELECT clause, nor nested in expressions (state=42000,code=10081)

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select empid,exp.score from explodeexample
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def>   lateral view explode(score) exp as score;
+--------+------------+--+
| empid  | exp.score  |
+--------+------------+--+
| 1      | 80.0       |
| 1      | 70.4       |
| 1      | 99.0       |
| 1      | 100.0      |
| 2      | 70.0       |
| 2      | 59.5       |
| 2      | 80.0       |
| 2      | 85.0       |
| 2      | 95.6       |
| 2      | 60.0       |
+--------+------------+—+

Alternative way to insert values into complex data type using UNION ALL

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> insert into explodeexample
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select 3,array(82,73.2,96.0)union all
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select 4,array(56,85.3,82.0,99.9)union all
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select 5,array(65,93.0)union all
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select 6,array(54,55.4,68.0,86);


0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> select * from explodeexample;
+-----------------------+----------------------------------+--+
| explodeexample.empid  |       explodeexample.score       |
+-----------------------+----------------------------------+--+
| 1                     | [80.0,70.4,99.0,100.0]           |
| 2                     | [70.0,59.5,80.0,85.0,95.6,60.0]  |
| 3                     | [82.0,73.2,96.0]                 |
| 4                     | [56.0,85.3,82.0,99.9]            |
| 5                     | [65.0,93.0]                      |
| 6                     | [54.0,55.4,68.0,86.0]            |
+-----------------------+----------------------------------+--+

Other tips to work with Hive functions:

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> desc function case;
+----------------------------------------------------+--+
|                      tab_name                      |
+----------------------------------------------------+--+
| CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END - When a = b, returns c; when a = d, return e; else return f |
+----------------------------------------------------+--+
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> desc function extended case;
+----------------------------------------------------+--+
|                      tab_name                      |
+----------------------------------------------------+--+
| CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END - When a = b, returns c; when a = d, return e; else return f |
| Example:                                           |
|  SELECT                                            |
|  CASE deptno                                       |
|    WHEN 1 THEN Engineering                         |
|    WHEN 2 THEN Finance                             |
|    ELSE admin                                      |
|  END,                                              |
|  CASE zone                                         |
|    WHEN 7 THEN Americas                            |
|    ELSE Asia-Pac                                   |
|  END                                               |
|  FROM emp_details                                  |
+----------------------------------------------------+—+

0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> show functions;
+-------------------------+--+
|        tab_name         |
+-------------------------+--+
| !                       |
| !=                      |
| %                       |
| &                       |
| *                       |
| +                       |
| -                       |
| /                       |
| <                       |
| <=                      |
| <=>                     |
| <>                      |
| =                       |
| ==                      |
| >                       |
| >=                      |
| ^                       |
| abs                     |
| acos                    |
| add_months              |
| and                     |
| array                   |
| array_contains          |
| ascii                   |
| asin                    |
| assert_true             |
| atan                    |
| avg                     |
| base64                  |
| between                 |
| bin                     |
| case                    |
| cbrt                    |
| ceil                    |
| ceiling                 |
| coalesce                |
| collect_list            |
| collect_set             |
| compute_stats           |
| concat                  |
| concat_ws               |
| context_ngrams          |
| conv                    |
| corr                    |
| cos                     |
| count                   |
| covar_pop               |
| covar_samp              |
| crc32                   |
| create_union            |
| cume_dist               |
| current_database        |
| current_date            |
| current_timestamp       |
| current_user            |
| date_add                |
| date_format             |
| date_sub                |
| datediff                |
| day                     |
| dayofmonth              |
| dayofweek               |
| decode                  |
| degrees                 |
| dense_rank              |
| div                     |
| e                       |
| elt                     |
| encode                  |
| ewah_bitmap             |
| ewah_bitmap_and         |
| ewah_bitmap_empty       |
| ewah_bitmap_or          |
| exp                     |
| explode                 |
| field                   |
| find_in_set             |
| first_value             |
| floor                   |
| format_number           |
| from_unixtime           |
| from_utc_timestamp      |
| get_json_object         |
| greatest                |
| hash                    |
| hex                     |
| histogram_numeric       |
| hour                    |
| if                      |
| in                      |
| in_file                 |
| index                   |
| initcap                 |
| inline                  |
| instr                   |
| isnotnull               |
| isnull                  |
| java_method             |
| json_tuple              |
| lag                     |
+-------------------------+--+
|        tab_name         |
+-------------------------+--+
| last_day                |
| last_value              |
| lcase                   |
| lead                    |
| least                   |
| length                  |
| levenshtein             |
| like                    |
| ln                      |
| locate                  |
| log                     |
| log10                   |
| log2                    |
| logged_in_user          |
| lower                   |
| lpad                    |
| ltrim                   |
| map                     |
| map_keys                |
| map_values              |
| matchpath               |
| max                     |
| md5                     |
| min                     |
| minute                  |
| month                   |
| months_between          |
| named_struct            |
| negative                |
| next_day                |
| ngrams                  |
| noop                    |
| noopstreaming           |
| noopwithmap             |
| noopwithmapstreaming    |
| not                     |
| ntile                   |
| nvl                     |
| or                      |
| parse_url               |
| parse_url_tuple         |
| percent_rank            |
| percentile              |
| percentile_approx       |
| pi                      |
| pmod                    |
| posexplode              |
| positive                |
| pow                     |
| power                   |
| printf                  |
| radians                 |
| rand                    |
| rank                    |
| reflect                 |
| reflect2                |
| regexp                  |
| regexp_extract          |
| regexp_replace          |
| repeat                  |
| reverse                 |
| rlike                   |
| round                   |
| row_number              |
| rpad                    |
| rtrim                   |
| second                  |
| sentences               |
| sha2                    |
| sign                    |
| sin                     |
| size                    |
| sort_array              |
| soundex                 |
| space                   |
| split                   |
| sqrt                    |
| stack                   |
| std                     |
| stddev                  |
| stddev_pop              |
| stddev_samp             |
| str_to_map              |
| struct                  |
| substr                  |
| substring               |
| sum                     |
| tan                     |
| to_date                 |
| to_unix_timestamp       |
| to_utc_timestamp        |
| translate               |
| trim                    |
| trunc                   |
| ucase                   |
| unbase64                |
| unhex                   |
| unix_timestamp          |
| upper                   |
| uuid                    |
+-------------------------+--+
|        tab_name         |
+-------------------------+--+
| var_pop                 |
| var_samp                |
| variance                |
| version                 |
| weekofyear              |
| when                    |
| windowingtablefunction  |
| xpath                   |
| xpath_boolean           |
| xpath_double            |
| xpath_float             |
| xpath_int               |
| xpath_long              |
| xpath_number            |
| xpath_short             |
| xpath_string            |
| year                    |
| |                       |
| ~                       |
+-------------------------+--+

No comments:

Post a Comment