Sunday, July 8, 2018

Using MySQL 8 with Hue in Cloudera CDH 5.15

Symptoms: Using MySQL 8 with Hue in Cloudera CDH 5.15

Error during adding Hue:

Unable to connect to database on host 'cdh-vm.dbaglobe.com' from host 'cdh-vm.dbaglobe.com' using the credential provided.


Error in cloudera-scm-server.log
+ exec /opt/cloudera/parcels/CDH-5.15.0-1.cdh5.15.0.p0.21/lib/hue/build/env/bin/hue is_db_alive
[08/Jul/2018 19:30:15 +0000] settings     DEBUG    DESKTOP_DB_TEST_NAME SET: /opt/cloudera/parcels/CDH-5.15.0-1.cdh5.15.0.p0.21/lib/hue/desktop/desktop-test.db
[08/Jul/2018 19:30:15 +0000] settings     DEBUG    DESKTOP_DB_TEST_USER SET: hue_test
[08/Jul/2018 04:30:23 +0000] __init__     INFO     Couldn't import snappy. Support for snappy compression disabled.
Error accessing DB: (2059, "Authentication plugin 'caching_sha2_password' cannot be loaded: /usr/lib64/mysql/plugin/caching_sha2_password.so: cannot open shared object file: No such file or directory")

How to fix:

alter user 'hue'@'%' IDENTIFIED WITH mysql_native_password BY 'my_complex_password';


Sunday, June 3, 2018

Two methods to modify HDFS custom metadata

Two methods to modify HDFS custom metadata with Cloudera Navigator

- Metadata file
    not recommended for production use as lead to small file problems
    update provided through metadata files are queued before merged

- Metadata API
    use either metadata file or API, not both
    API overwrites metadata, and take effects immediately
    


[donghua@cdh-vm data]$ hdfs dfs -ls /data/donghua/*drink*
-rw-r--r--   1 donghua hive        145 2018-06-03 11:27 /data/donghua/.drinks.csv.navigator
-rw-r--r--   1 donghua hive       5918 2018-06-03 00:07 /data/donghua/drinks.csv

[donghua@cdh-vm data]$ hdfs dfs -cat /data/donghua/.drinks.csv.navigator
{
"name":"drinks dataset"
"description": "metadata example using .drinks.csv.navigator"
"properties":{
"Dept":"myDept"
},
"tags":["external"]
}

curl -u admin:admin -X GET 'http://cdh01:7187/api/v13/entities/?query=originalName:imdb_1000.csv&limit=100&offset=0'
[donghua@cdh-vm data]$ curl -u admin:admin -X GET 'http://cdh-vm:7187/api/v13/entities/?query=originalName%3D%22imdb_1000.csv%22&limit=100&offset=0'
[ {
  "originalName" : "imdb_1000.csv",
  "originalDescription" : null,
  "sourceId" : "5",
  "firstClassParentId" : null,
  "parentPath" : "/data/donghua",
  "deleteTime" : 0,
  "extractorRunId" : "5##20",
  "customProperties" : null,
  "name" : null,
  "description" : null,
  "tags" : null,
  "properties" : {
    "__cloudera_internal__hueLink" : "http://cdh-vm:8889/filebrowser/#/data/donghua/imdb_1000.csv"
  },
  "technicalProperties" : null,
  "fileSystemPath" : "/data/donghua/imdb_1000.csv",
  "type" : "FILE",
  "size" : 91499,
  "created" : "2018-06-03T00:07:55.434Z",
  "lastModified" : "2018-06-03T00:07:55.434Z",
  "lastAccessed" : "2018-06-03T00:07:54.880Z",
  "permissions" : "rw-r--r--",
  "owner" : "donghua",
  "group" : "hive",
  "blockSize" : 134217728,
  "mimeType" : "application/octet-stream",
  "ezkeyName" : null,
  "replication" : 1,
  "metaClassName" : "fselement",
  "deleted" : false,
  "packageName" : "nav",
  "userEntity" : false,
  "sourceType" : "HDFS",
  "identity" : "20388",
  "internalType" : "fselement"
}, {
  "originalName" : "imdb_1000.csv",
  "originalDescription" : null,
  "sourceId" : "5",
  "firstClassParentId" : null,
  "parentPath" : "/user/hive/warehouse/testdb.db/imdb_1000",
  "deleteTime" : 0,
  "extractorRunId" : "5##22",
  "customProperties" : null,
  "name" : null,
  "description" : null,
  "tags" : null,
  "properties" : {
    "__cloudera_internal__hueLink" : "http://cdh-vm:8889/filebrowser/#/user/hive/warehouse/testdb.db/imdb_1000/imdb_1000.csv"
  },
  "technicalProperties" : null,
  "fileSystemPath" : "/user/hive/warehouse/testdb.db/imdb_1000/imdb_1000.csv",
  "type" : "FILE",
  "size" : 91499,
  "created" : "2018-06-03T01:06:12.920Z",
  "lastModified" : "2018-06-03T01:06:12.920Z",
  "lastAccessed" : "2018-06-03T01:06:12.920Z",
  "permissions" : "rw-r--r--",
  "owner" : "hive",
  "group" : "hive",
  "blockSize" : 134217728,
  "mimeType" : "application/octet-stream",
  "ezkeyName" : null,
  "replication" : 1,
  "metaClassName" : "fselement",
  "deleted" : false,
  "packageName" : "nav",
  "userEntity" : false,
  "sourceType" : "HDFS",
  "identity" : "22303",
  "internalType" : "fselement"
}, {
  "originalName" : "imdb_1000.csv._COPYING_",
  "originalDescription" : null,
  "sourceId" : "5",
  "firstClassParentId" : null,
  "parentPath" : "/data/donghua",
  "deleteTime" : 1527984475434,
  "extractorRunId" : "5##20",
  "customProperties" : null,
  "name" : null,
  "description" : null,
  "tags" : null,
  "properties" : null,
  "technicalProperties" : null,
  "fileSystemPath" : "/data/donghua/imdb_1000.csv._COPYING_",
  "type" : "FILE",
  "size" : 91499,
  "created" : "2018-06-03T00:07:54.880Z",
  "lastModified" : "2018-06-03T00:07:54.880Z",
  "lastAccessed" : "2018-06-03T00:07:54.880Z",
  "permissions" : "rw-r--r--",
  "owner" : "donghua",
  "group" : "hive",
  "blockSize" : 134217728,
  "mimeType" : "application/octet-stream",
  "ezkeyName" : null,
  "replication" : 1,
  "metaClassName" : "fselement",
  "deleted" : true,
  "packageName" : "nav",
  "userEntity" : false,
  "sourceType" : "HDFS",
  "identity" : "20386",
  "internalType" : "fselement"
} ]


curl -u admin:admin -X POST 'http://cdh-vm:7187/api/v13/entities/?query=originalName%3D%22imdb_1000.csv%22&limit=100&offset=0' \
-H "Content-Type:application/json" -d \
'{
"sourceId":"5",
"originalName" : "imdb_1000.csv",
"parentPath" : "/data/donghua",
"name":"imdb dataset",
"description": "metadata example using API",
"properties":{
"Dept":"myDept"
},
"tags":["external"]
}'


[donghua@cdh-vm data]$ curl -u admin:admin -X POST 'http://cdh-vm:7187/api/v13/entities/?query=originalName%3D%22imdb_1000.csv%22&limit=100&offset=0' \
> -H "Content-Type:application/json" -d \
> '{
> "sourceId":"5",
> "originalName" : "imdb_1000.csv",
> "parentPath" : "/data/donghua",
> "name":"imdb dataset",
> "description": "metadata example using API",
> "properties":{
> "Dept":"myDept"
> },
> "tags":["external"]
> }'
{
  "originalName" : "imdb_1000.csv",
  "originalDescription" : null,
  "sourceId" : "5",
  "firstClassParentId" : null,
  "parentPath" : "/data/donghua",
  "deleteTime" : 0,
  "extractorRunId" : "5##20",
  "customProperties" : null,
  "name" : "imdb dataset",
  "description" : "metadata example using API",
  "tags" : [ "external" ],
  "properties" : {
    "Dept" : "myDept",
    "__cloudera_internal__hueLink" : "http://cdh-vm:8889/filebrowser/#/data/donghua/imdb_1000.csv"
  },
  "technicalProperties" : null,
  "fileSystemPath" : "/data/donghua/imdb_1000.csv",
  "type" : "FILE",
  "size" : 91499,
  "created" : "2018-06-03T00:07:55.434Z",
  "lastModified" : "2018-06-03T00:07:55.434Z",
  "lastAccessed" : "2018-06-03T00:07:54.880Z",
  "permissions" : "rw-r--r--",
  "owner" : "donghua",
  "group" : "hive",
  "blockSize" : 134217728,
  "mimeType" : "application/octet-stream",
  "ezkeyName" : null,
  "replication" : 1,
  "metaClassName" : "fselement",
  "deleted" : false,
  "packageName" : "nav",
  "userEntity" : false,
  "sourceType" : "HDFS",
  "identity" : "20388",
  "internalType" : "fselement"
}





Saturday, June 2, 2018

Oozie command line example


[donghua@cdh-vm ~]$ oozie admin -servers -oozie http://cdh-vm:11000/oozie
cdh-vm : http://cdh-vm:11000/oozie

[donghua@cdh-vm ~]$ oozie admin -configuration -oozie http://cdh-vm:11000/oozie |grep -i jobtracker
oozie.service.HadoopAccessorService.jobTracker.whitelist : cdh-vm:8032


[donghua@cdh-vm ~]$ oozie jobs -oozie http://cdh-vm:11000/oozie
Job ID                                   App Name     Status    User      Group     Started                 Ended
------------------------------------------------------------------------------------------------------------------------------------
0000000-180602134544423-oozie-oozi-W     Append personSUCCEEDED donghua   -         2018-06-02 14:45 GMT    2018-06-02 14:47 GMT
------------------------------------------------------------------------------------------------------------------------------------

[donghua@cdh-vm ~]$ oozie job  -oozie http://cdh-vm:11000/oozie -info 0000000-180602134544423-oozie-oozi-W
Job ID : 0000000-180602134544423-oozie-oozi-W
------------------------------------------------------------------------------------------------------------------------------------
Workflow Name : Append person
App Path      : hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94
Status        : SUCCEEDED
Run           : 0
User          : donghua
Group         : -
Created       : 2018-06-02 14:45 GMT
Started       : 2018-06-02 14:45 GMT
Last Modified : 2018-06-02 14:47 GMT
Ended         : 2018-06-02 14:47 GMT
CoordAction ID: -

Actions
------------------------------------------------------------------------------------------------------------------------------------
ID                                                                            Status    Ext ID                 Ext Status Err Code
------------------------------------------------------------------------------------------------------------------------------------
0000000-180602134544423-oozie-oozi-W@:start:                                  OK        -                      OK         -
------------------------------------------------------------------------------------------------------------------------------------
0000000-180602134544423-oozie-oozi-W@hive2-3f03                               OK        job_1527947069421_0001 SUCCEEDED  -
------------------------------------------------------------------------------------------------------------------------------------
0000000-180602134544423-oozie-oozi-W@End                                      OK        -                      OK         -
------------------------------------------------------------------------------------------------------------------------------------

[donghua@cdh-vm ~]$ oozie job  -oozie http://cdh-vm:11000/oozie -configcontent 0000000-180602134544423-oozie-oozi-W

 
    hue-id-w
    29
 
 
    oozie.wf.application.path
    hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94
 
 
    oozie.use.system.libpath
    True
 
 
    dryrun
    False
 
 
    security_enabled
    True
 
 
    credentials
    {u'hcat': {'xml_name': u'hcat', 'properties': [('hcat.metastore.uri', u'thrift://cdh-vm:9083'), ('hcat.metastore.principal', u'hive/cdh-vm@DBAGLOBE.COM')]}, u'hive2': {'xml_name': u'hive2', 'properties': [('hive2.jdbc.url', 'jdbc:hive2://cdh-vm:10000/default'), ('hive2.server.principal', 'hive/cdh-vm@DBAGLOBE.COM')]}, u'hbase': {'xml_name': u'hbase', 'properties': []}}
 
 
    send_email
    False
 
 
    user.name
    donghua
 
 
    jobTracker
    cdh-vm:8032
 
 
    mapreduce.job.user.name
    donghua
 
 
    nameNode
    hdfs://cdh-vm:8020
 



[donghua@cdh-vm ~]$ oozie job  -oozie http://cdh-vm:11000/oozie -log 0000000-180602134544423-oozie-oozi-W
2018-06-02 14:45:51,754 INFO org.apache.oozie.service.JPAService: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[] No results found
2018-06-02 14:45:51,921 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@:start:] Start action [0000000-180602134544423-oozie-oozi-W@:start:] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10]
2018-06-02 14:45:51,927 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@:start:] [***0000000-180602134544423-oozie-oozi-W@:start:***]Action status=DONE
2018-06-02 14:45:51,927 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@:start:] [***0000000-180602134544423-oozie-oozi-W@:start:***]Action updated in DB!
2018-06-02 14:45:52,457 INFO org.apache.oozie.service.JPAService: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@:start:] No results found
2018-06-02 14:45:52,549 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[cdh-vm] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@:start:] No Notification URL is defined. Therefore nothing to notify for job 0000000-180602134544423-oozie-oozi-W@:start:
2018-06-02 14:45:52,552 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[cdh-vm] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[] No Notification URL is defined. Therefore nothing to notify for job 0000000-180602134544423-oozie-oozi-W
2018-06-02 14:45:52,648 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] Start action [0000000-180602134544423-oozie-oozi-W@hive2-3f03] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10]
2018-06-02 14:45:56,354 INFO org.apache.oozie.service.HadoopAccessorService: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] Delegation Token Renewer details: Principal=yarn/_HOST@DBAGLOBE.COM,Target=cdh-vm:8032,Renewer=yarn/cdh-vm@DBAGLOBE.COM
2018-06-02 14:46:02,993 INFO org.apache.oozie.action.hadoop.Hive2ActionExecutor: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] checking action, hadoop job ID [job_1527947069421_0001] status [RUNNING]
2018-06-02 14:46:03,024 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] [***0000000-180602134544423-oozie-oozi-W@hive2-3f03***]Action status=RUNNING
2018-06-02 14:46:03,042 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] [***0000000-180602134544423-oozie-oozi-W@hive2-3f03***]Action updated in DB!
2018-06-02 14:46:03,140 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[cdh-vm] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] No Notification URL is defined. Therefore nothing to notify for job 0000000-180602134544423-oozie-oozi-W@hive2-3f03
2018-06-02 14:47:42,121 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[cdh-vm] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] callback for action [0000000-180602134544423-oozie-oozi-W@hive2-3f03]
2018-06-02 14:47:42,903 INFO org.apache.oozie.action.hadoop.Hive2ActionExecutor: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] Hadoop Jobs launched : [job_1527947069421_0002]
2018-06-02 14:47:42,922 INFO org.apache.oozie.action.hadoop.Hive2ActionExecutor: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] action completed, external ID [job_1527947069421_0001]
2018-06-02 14:47:43,593 INFO org.apache.oozie.service.JPAService: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] No results found
2018-06-02 14:47:43,702 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@End] Start action [0000000-180602134544423-oozie-oozi-W@End] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10]
2018-06-02 14:47:43,708 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@End] [***0000000-180602134544423-oozie-oozi-W@End***]Action status=DONE
2018-06-02 14:47:43,709 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@End] [***0000000-180602134544423-oozie-oozi-W@End***]Action updated in DB!
2018-06-02 14:47:44,041 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[cdh-vm] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@End] No Notification URL is defined. Therefore nothing to notify for job 0000000-180602134544423-oozie-oozi-W@End
2018-06-02 14:47:44,041 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[cdh-vm] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[] No Notification URL is defined. Therefore nothing to notify for job 0000000-180602134544423-oozie-oozi-W
2018-06-02 14:47:44,041 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[cdh-vm] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] No Notification URL is defined. Therefore nothing to notify for job 0000000-180602134544423-oozie-oozi-W@hive2-3f03


[donghua@cdh-vm ~]$ hdfs dfs -ls hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94
Found 3 items
-rw-r--r--   1 donghua hue        506 2018-06-02 14:45 hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94/job.properties
drwxr-xr-x   - donghua hue          0 2018-06-02 14:43 hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94/lib
-rw-r--r--   1 donghua hue       1007 2018-06-02 14:45 hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94/workflow.xml

[donghua@cdh-vm ~]$ hdfs dfs -cat hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94/job.properties
oozie.use.system.libpath=True
send_email=False
dryrun=False
credentials={u'hcat': {'xml_name': u'hcat', 'properties': [('hcat.metastore.uri', u'thrift://cdh-vm:9083'), ('hcat.metastore.principal', u'hive/cdh-vm@DBAGLOBE.COM')]}, u'hive2': {'xml_name': u'hive2', 'properties': [('hive2.jdbc.url', 'jdbc:hive2://cdh-vm:10000/default'), ('hive2.server.principal', 'hive/cdh-vm@DBAGLOBE.COM')]}, u'hbase': {'xml_name': u'hbase', 'properties': []}}
nameNode=hdfs://cdh-vm:8020
jobTracker=cdh-vm:8032
security_enabled=True

[donghua@cdh-vm ~]$ hdfs dfs -cat hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94/workflow.xml

 
   
     
        hive2.jdbc.url
        jdbc:hive2://cdh-vm:10000/default
     
     
        hive2.server.principal
        hive/cdh-vm@DBAGLOBE.COM
     
   
 
   
   
        Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
   
   
       
            ${jobTracker}
            ${nameNode}
            jdbc:hive2://cdh-vm:10000/default
           
       
       
       
   
   



[donghua@cdh-vm ~]$ oozie help
usage:
      the env variable 'OOZIE_URL' is used as default value for the '-oozie' option
      the env variable 'OOZIE_TIMEZONE' is used as default value for the '-timezone' option
      the env variable 'OOZIE_AUTH' is used as default value for the '-auth' option
      custom headers for Oozie web services can be specified using '-Dheader:NAME=VALUE'

      oozie help : display usage for all commands or specified command

      oozie version : show client version

      oozie job : job operations
                -action           coordinator rerun/kill on action ids (requires -rerun/-kill);
                                       coordinator log retrieval on action ids(requires -log)
                -allruns               Get workflow jobs corresponding to a coordinator action
                                       including all the reruns
                -auth             select authentication type [SIMPLE|KERBEROS]
                -change           change a coordinator or bundle job
                -config           job configuration file '.xml' or '.properties'
                -configcontent    job configuration
                -coordinator      bundle rerun on coordinator names (requires -rerun)
                -D     set/override value for given property
                -date             coordinator/bundle rerun on action dates (requires -rerun);
                                       coordinator log retrieval on action dates (requires -log)
                -debug                 Use debug mode to see debugging statements on stdout
                -definition       job definition
                -diff             Show diff of the new coord definition and properties with the
                                       existing one (default true)
                -doas             doAs user, impersonates as the specified user
                -dryrun                Dryrun a workflow (since 3.3.2) or coordinator (since 2.0)
                                       job without actually executing it
                -failed                runs the failed workflow actions of the coordinator actions
                                       (requires -rerun)
                -filter           [;]*
                                       (All Coordinator actions satisfying the filters will be
                                       retreived).
                                       key: status or nominaltime
                                       comparator: =, !=, <, <=, >, >=. = is used as OR and others
                                       as AND
                                       status: values are valid status like SUCCEEDED, KILLED etc.
                                       Only = and != apply for status
                                       nominaltime: time of format yyyy-MM-dd'T'HH:mm'Z'
                -ignore           change status of a coordinator job or action to IGNORED
                                       (-action required to ignore coord actions)
                -info             info of a job
                -interval         polling interval in minutes (default is 5, requires -poll)
                -kill             kill a job (coordinator can mention -action or -date)
                -len              number of actions (default TOTAL ACTIONS, requires -info)
                -localtime             use local time (same as passing your time zone to -timezone).
                                       Overrides -timezone option
                -log              job log
                -logfilter        job log search parameter. Can be specified as -logfilter
                                       opt1=val1;opt2=val1;opt3=val1. Supported options are recent,
                                       start, end, loglevel, text, limit and debug
                -nocleanup             do not clean up output-events of the coordiantor rerun
                                       actions (requires -rerun)
                -offset           job info offset of actions (default '1', requires -info)
                -oozie            Oozie URL
                -order            order to show coord actions (default ascending order, 'desc'
                                       for descending order, requires -info)
                -poll             poll Oozie until a job reaches a terminal state or a timeout
                                       occurs
                -refresh               re-materialize the coordinator rerun actions (requires
                                       -rerun)
                -rerun            rerun a job  (coordinator requires -action or -date, bundle
                                       requires -coordinator or -date)
                -resume           resume a job
                -run                   run a job
                -start            start a job
                -submit                submit a job
                -suspend          suspend a job
                -timeout          timeout in minutes (default is 30, negative values indicate
                                       no timeout, requires -poll)
                -timezone         use time zone with the specified ID (default GMT).
                                       See 'oozie info -timezones' for a list
                -update           Update coord definition and properties
                -value            new endtime/concurrency/pausetime value for changing a
                                       coordinator job
                -verbose    verbose mode

      oozie jobs : jobs status
                 -auth        select authentication type [SIMPLE|KERBEROS]
                 -bulk        key-value pairs to filter bulk jobs response. e.g.
                                   bundle=\;coordinators=\;actionstatus=\;startcreatedtime=
                                   \;endcreatedtime=\;startscheduledtime=\;endscheduledt
                                   ime=\; bundle, coordinators and actionstatus can be multiple
                                   comma separated valuesbundle and coordinators can be id(s) or
                                   appName(s) of those jobs. Specifying bundle is mandatory, other
                                   params are optional
                 -doas        doAs user, impersonates as the specified user
                 -filter
                                   text=<*>\;user=\;name=\;group=\;status=\;frequency=
                                   >\;unit=\;startcreatedtime=\;endcreatedtime=
                                   \;sortBy=
                                   (text filter: matches partially with name and user or complete
                                   match with job IDvalid unit values are 'months', 'days', 'hours'
                                   or 'minutes'. startcreatedtime, endcreatedtime: time of format
                                   yyyy-MM-dd'T'HH:mm'Z'. valid values for sortBy are 'createdTime'
                                   or 'lastModifiedTime'.)
                 -jobtype     job type ('Supported in Oozie-2.0 or later versions ONLY -
                                   'coordinator' or 'bundle' or 'wf'(default))
                 -kill             bulk kill operation
                 -len         number of jobs (default '100')
                 -localtime        use local time (same as passing your time zone to -timezone).
                                   Overrides -timezone option
                 -offset      jobs offset (default '1')
                 -oozie       Oozie URL
                 -resume           bulk resume operation
                 -suspend          bulk suspend operation
                 -timezone    use time zone with the specified ID (default GMT).
                                   See 'oozie info -timezones' for a list
                 -verbose          verbose mode

      oozie admin : admin operations
                  -auth          select authentication type [SIMPLE|KERBEROS]
                  -configuration      show Oozie system configuration
                  -doas          doAs user, impersonates as the specified user
                  -instrumentation    show Oozie system instrumentation
                  -javasysprops       show Oozie Java system properties
                  -metrics            show Oozie system metrics
                  -oozie         Oozie URL
                  -osenv              show Oozie system OS environment
                  -queuedump          show Oozie server queue elements
                  -servers            list available Oozie servers (more than one only if HA is
                                      enabled)
                  -shareliblist       List available sharelib that can be specified in a workflow
                                      action
                  -sharelibupdate     Update server to use a newer version of sharelib
                  -status             show the current system status
                  -systemmode    Supported in Oozie-2.0 or later versions ONLY. Change oozie
                                      system mode [NORMAL|NOWEBSERVICE|SAFEMODE]
                  -version            show Oozie server build version

      oozie validate : validate a workflow, coordinator, bundle XML file
                     -auth     select authentication type [SIMPLE|KERBEROS]
                     -oozie    Oozie URL

      oozie sla : sla operations (Deprecated with Oozie 4.0)
                -auth      select authentication type [SIMPLE|KERBEROS]
                -filter    filter of SLA events. e.g., jobid=\;appname=
                -len       number of results (default '100', max '1000')
                -offset    start offset (default '0')
                -oozie     Oozie URL

      oozie pig -X : submit a pig job, everything after '-X' are pass-through parameters to pig, any '-D' arguments after '-X' are put in
                -auth            select authentication type [SIMPLE|KERBEROS]
                -config          job configuration file '.properties'
                -D    set/override value for given property
                -doas            doAs user, impersonates as the specified user
                -file            pig script
                -oozie           Oozie URL
                -P    set parameters for script

      oozie hive -X : submit a hive job, everything after '-X' are pass-through parameters to hive, any '-D' arguments after '-X' are put in
                 -auth            select authentication type [SIMPLE|KERBEROS]
                 -config          job configuration file '.properties'
                 -D    set/override value for given property
                 -doas            doAs user, impersonates as the specified user
                 -file            hive script
                 -oozie           Oozie URL
                 -P    set parameters for script

      oozie sqoop -X : submit a sqoop job, everything after '-X' are pass-through parameters to sqoop, any '-D' arguments after '-X' are put in
                  -auth            select authentication type [SIMPLE|KERBEROS]
                  -command     sqoop command
                  -config          job configuration file '.properties'
                  -D    set/override value for given property
                  -doas            doAs user, impersonates as the specified user
                  -oozie           Oozie URL

      oozie info : get more detailed info about specific topics
                 -timezones   display a list of available time zones

      oozie mapreduce : submit a mapreduce job
                      -auth            select authentication type [SIMPLE|KERBEROS]
                      -config          job configuration file '.properties'
                      -D    set/override value for given property
                      -doas            doAs user, impersonates as the specified user
                      -oozie           Oozie URL

Sunday, May 27, 2018

Miniconda installation on Linux

[root@cdh01 ~]# ./Miniconda2-latest-Linux-x86_64.sh -b -p /opt/miniconda2 -u
PREFIX=/opt/miniconda2
installing: python-2.7.14-h1571d57_31 ...
Python 2.7.14 :: Anaconda, Inc.
installing: ca-certificates-2018.03.07-0 ...
installing: conda-env-2.6.0-h36134e3_1 ...
installing: libgcc-ng-7.2.0-hdf63c60_3 ...
installing: libstdcxx-ng-7.2.0-hdf63c60_3 ...
installing: libffi-3.2.1-hd88cf55_4 ...
installing: ncurses-6.0-h9df7e31_2 ...
installing: openssl-1.0.2o-h20670df_0 ...
installing: tk-8.6.7-hc745277_3 ...
installing: yaml-0.1.7-had09818_2 ...
installing: zlib-1.2.11-ha838bed_2 ...
installing: libedit-3.1-heed3624_0 ...
installing: readline-7.0-ha6073c6_4 ...
installing: sqlite-3.23.1-he433501_0 ...
installing: asn1crypto-0.24.0-py27_0 ...
installing: certifi-2018.4.16-py27_0 ...
installing: chardet-3.0.4-py27hfa10054_1 ...
installing: enum34-1.1.6-py27h99a27e9_1 ...
installing: futures-3.2.0-py27h7b459c0_0 ...
installing: idna-2.6-py27h5722d68_1 ...
installing: ipaddress-1.0.22-py27_0 ...
installing: pycosat-0.6.3-py27ha4109ae_0 ...
installing: pycparser-2.18-py27hefa08c5_1 ...
installing: pysocks-1.6.8-py27_0 ...
installing: ruamel_yaml-0.15.35-py27h14c3975_1 ...
installing: six-1.11.0-py27h5f960f1_1 ...
installing: cffi-1.11.5-py27h9745a5d_0 ...
installing: setuptools-39.0.1-py27_0 ...
installing: cryptography-2.2.2-py27h14c3975_0 ...
installing: wheel-0.31.0-py27_0 ...
installing: pip-9.0.3-py27_0 ...
installing: pyopenssl-17.5.0-py27hcee3be0_0 ...
installing: urllib3-1.22-py27ha55213b_0 ...
installing: requests-2.18.4-py27hc5b0589_1 ...
installing: conda-4.5.1-py27_0 ...
installation finished.



[donghua@cdh01 ~]$ export PATH=/opt/miniconda2/bin:$PATH

# alternative, create using base python versoin
# /opt/miniconda2/bin/conda create --name py27 

[donghua@cdh01 ~]$ /opt/miniconda2/bin/conda create --name py27 python=2.7
Solving environment: done


==> WARNING: A newer version of conda exists. <==
  current version: 4.5.1
  latest version: 4.5.4

Please update conda by running

    $ conda update -n base conda



## Package Plan ##

  environment location: /home/donghua/.conda/envs/py27

  added / updated specs:
    - python=2.7


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ncurses-6.1                |       hf484d3e_0         943 KB
    pip-10.0.1                 |           py27_0         1.7 MB
    setuptools-39.1.0          |           py27_0         582 KB
    libedit-3.1.20170329       |       h6b74fdf_2         172 KB
    wheel-0.31.1               |           py27_0          62 KB
    python-2.7.15              |       h1571d57_0        12.1 MB
    ------------------------------------------------------------
                                           Total:        15.5 MB

The following NEW packages will be INSTALLED:

    ca-certificates: 2018.03.07-0
    certifi:         2018.4.16-py27_0
    libedit:         3.1.20170329-h6b74fdf_2
    libffi:          3.2.1-hd88cf55_4
    libgcc-ng:       7.2.0-hdf63c60_3
    libstdcxx-ng:    7.2.0-hdf63c60_3
    ncurses:         6.1-hf484d3e_0
    openssl:         1.0.2o-h20670df_0
    pip:             10.0.1-py27_0
    python:          2.7.15-h1571d57_0
    readline:        7.0-ha6073c6_4
    setuptools:      39.1.0-py27_0
    sqlite:          3.23.1-he433501_0
    tk:              8.6.7-hc745277_3
    wheel:           0.31.1-py27_0
    zlib:            1.2.11-ha838bed_2

Proceed ([y]/n)? y


Downloading and Extracting Packages
ncurses 6.1############################################# | 100%
pip 10.0.1############################################## | 100%
setuptools 39.1.0####################################### | 100%
libedit 3.1.20170329#################################### | 100%
wheel 0.31.1############################################ | 100%
python 2.7.15########################################### | 100%
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
#
# To activate this environment, use:
# > source activate py27
#
# To deactivate an active environment, use:
# > source deactivate
#

[donghua@cdh01 ~]$



[donghua@cdh01 ~]$ conda list
# packages in environment at /opt/miniconda2:
#
# Name                    Version                   Build  Channel
asn1crypto                0.24.0                   py27_0
ca-certificates           2018.03.07                    0
certifi                   2018.4.16                py27_0
cffi                      1.11.5           py27h9745a5d_0
chardet                   3.0.4            py27hfa10054_1
conda                     4.5.1                    py27_0
conda-env                 2.6.0                h36134e3_1
cryptography              2.2.2            py27h14c3975_0
enum34                    1.1.6            py27h99a27e9_1
futures                   3.2.0            py27h7b459c0_0
idna                      2.6              py27h5722d68_1
ipaddress                 1.0.22                   py27_0
libedit                   3.1                  heed3624_0
libffi                    3.2.1                hd88cf55_4
libgcc-ng                 7.2.0                hdf63c60_3
libstdcxx-ng              7.2.0                hdf63c60_3
ncurses                   6.0                  h9df7e31_2
openssl                   1.0.2o               h20670df_0
pip                       9.0.3                    py27_0
pycosat                   0.6.3            py27ha4109ae_0
pycparser                 2.18             py27hefa08c5_1
pyopenssl                 17.5.0           py27hcee3be0_0
pysocks                   1.6.8                    py27_0
python                    2.7.14              h1571d57_31
readline                  7.0                  ha6073c6_4
requests                  2.18.4           py27hc5b0589_1
ruamel_yaml               0.15.35          py27h14c3975_1
setuptools                39.0.1                   py27_0
six                       1.11.0           py27h5f960f1_1
sqlite                    3.23.1               he433501_0
tk                        8.6.7                hc745277_3
urllib3                   1.22             py27ha55213b_0
wheel                     0.31.0                   py27_0
yaml                      0.1.7                had09818_2
zlib                      1.2.11               ha838bed_2
[donghua@cdh01 ~]$ conda env list
# conda environments:
#
py27                     /home/donghua/.conda/envs/py27
base                  *  /opt/miniconda2



[donghua@cdh01 ~]$ source activate py27
(py27) [donghua@cdh01 ~]$
(py27) [donghua@cdh01 ~]$
(py27) [donghua@cdh01 ~]$
(py27) [donghua@cdh01 ~]$
(py27) [donghua@cdh01 ~]$ conda install pandas
Solving environment: done


==> WARNING: A newer version of conda exists. <==
  current version: 4.5.1
  latest version: 4.5.4

Please update conda by running

    $ conda update -n base conda



## Package Plan ##

  environment location: /home/donghua/.conda/envs/py27

  added / updated specs:
    - pandas


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    intel-openmp-2018.0.0      |                8         620 KB
    numpy-1.14.3               |   py27hcd700cb_1          41 KB
    mkl_fft-1.0.1              |   py27h3010b51_0         137 KB
    pytz-2018.4                |           py27_0         211 KB
    python-dateutil-2.7.3      |           py27_0         258 KB
    mkl-2018.0.2               |                1       205.2 MB
    numpy-base-1.14.3          |   py27h9be14a7_1         4.1 MB
    mkl_random-1.0.1           |   py27h629b387_0         361 KB
    blas-1.0                   |              mkl           6 KB
    libgfortran-ng-7.2.0       |       hdf63c60_3         1.2 MB
    pandas-0.23.0              |   py27h637b7d7_0        11.8 MB
    ------------------------------------------------------------
                                           Total:       223.9 MB

The following NEW packages will be INSTALLED:

    blas:            1.0-mkl
    intel-openmp:    2018.0.0-8
    libgfortran-ng:  7.2.0-hdf63c60_3
    mkl:             2018.0.2-1
    mkl_fft:         1.0.1-py27h3010b51_0
    mkl_random:      1.0.1-py27h629b387_0
    numpy:           1.14.3-py27hcd700cb_1
    numpy-base:      1.14.3-py27h9be14a7_1
    pandas:          0.23.0-py27h637b7d7_0
    python-dateutil: 2.7.3-py27_0
    pytz:            2018.4-py27_0
    six:             1.11.0-py27h5f960f1_1

Proceed ([y]/n)? y


Downloading and Extracting Packages
intel-openmp 2018.0.0####################################### | 100%
numpy 1.14.3################################################ | 100%
mkl_fft 1.0.1############################################### | 100%
pytz 2018.4################################################# | 100%
python-dateutil 2.7.3####################################### | 100%
mkl 2018.0.2################################################ | 100%
numpy-base 1.14.3########################################### | 100%
mkl_random 1.0.1############################################ | 100%
blas 1.0#################################################### | 100%
libgfortran-ng 7.2.0######################################## | 100%
pandas 0.23.0############################################### | 100%
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
(py27) [donghua@cdh01 ~]$ source deactivate


[donghua@cdh01 ~]$ conda install jupyter -n py27


[donghua@cdh01 ~]$ conda list -n py27
# packages in environment at /home/donghua/.conda/envs/py27:
#
# Name                    Version                   Build  Channel
blas                      1.0                         mkl
ca-certificates           2018.03.07                    0
certifi                   2018.4.16                py27_0
intel-openmp              2018.0.0                      8
libedit                   3.1.20170329         h6b74fdf_2
libffi                    3.2.1                hd88cf55_4
libgcc-ng                 7.2.0                hdf63c60_3
libgfortran-ng            7.2.0                hdf63c60_3
libstdcxx-ng              7.2.0                hdf63c60_3
mkl                       2018.0.2                      1
mkl_fft                   1.0.1            py27h3010b51_0
mkl_random                1.0.1            py27h629b387_0
ncurses                   6.1                  hf484d3e_0
numpy                     1.14.3           py27hcd700cb_1
numpy-base                1.14.3           py27h9be14a7_1
openssl                   1.0.2o               h20670df_0
pandas                    0.23.0           py27h637b7d7_0
pip                       10.0.1                   py27_0
python                    2.7.15               h1571d57_0
python-dateutil           2.7.3                    py27_0
pytz                      2018.4                   py27_0
readline                  7.0                  ha6073c6_4
setuptools                39.1.0                   py27_0
six                       1.11.0           py27h5f960f1_1
sqlite                    3.23.1               he433501_0
tk                        8.6.7                hc745277_3
wheel                     0.31.1                   py27_0
zlib                      1.2.11               ha838bed_2

[donghua@cdh01 ~]$ conda list -n base
# packages in environment at /opt/miniconda2:
#
# Name                    Version                   Build  Channel
asn1crypto                0.24.0                   py27_0
ca-certificates           2018.03.07                    0
certifi                   2018.4.16                py27_0
cffi                      1.11.5           py27h9745a5d_0
chardet                   3.0.4            py27hfa10054_1
conda                     4.5.1                    py27_0
conda-env                 2.6.0                h36134e3_1
cryptography              2.2.2            py27h14c3975_0
enum34                    1.1.6            py27h99a27e9_1
futures                   3.2.0            py27h7b459c0_0
idna                      2.6              py27h5722d68_1
ipaddress                 1.0.22                   py27_0
libedit                   3.1                  heed3624_0
libffi                    3.2.1                hd88cf55_4
libgcc-ng                 7.2.0                hdf63c60_3
libstdcxx-ng              7.2.0                hdf63c60_3
ncurses                   6.0                  h9df7e31_2
openssl                   1.0.2o               h20670df_0
pip                       9.0.3                    py27_0
pycosat                   0.6.3            py27ha4109ae_0
pycparser                 2.18             py27hefa08c5_1
pyopenssl                 17.5.0           py27hcee3be0_0
pysocks                   1.6.8                    py27_0
python                    2.7.14              h1571d57_31
readline                  7.0                  ha6073c6_4
requests                  2.18.4           py27hc5b0589_1
ruamel_yaml               0.15.35          py27h14c3975_1
setuptools                39.0.1                   py27_0
six                       1.11.0           py27h5f960f1_1
sqlite                    3.23.1               he433501_0
tk                        8.6.7                hc745277_3
urllib3                   1.22             py27ha55213b_0
wheel                     0.31.0                   py27_0
yaml                      0.1.7                had09818_2
zlib                      1.2.11               ha838bed_2


Reference: https://conda.io/docs/_downloads/conda-cheatsheet.pdf