Sunday, August 5, 2018

How to enable PAM Authentication in Zeppelin

Step 1: comment out preconfigured user

/usr/hdp/current/zeppelin-server/conf/shiro.ini

#[users]
# List of users with their password allowed to access Zeppelin.
# To use a different strategy (LDAP / Database / ...) check the shiro doc at http://shiro.apache.org/configuration.html#Configuration-INISections
#admin = $shiro1$SHA-256$500000$p6Be9+t2hdUXJQj2D0b1fg==$bea5JIMqcVF3J6eNZGWQ/3eeDByn5iEZDuGsEip06+M=, admin
#user1 = $shiro1$SHA-256$500000$G2ymy/qmuZnGY6or4v2KfA==$v9fabqWgCNCgechtOUqAQenGDs0OSLP28q2wolPT4wU=, role1, role2
#user2 = $shiro1$SHA-256$500000$aHBgiuwSgAcP3Xt5mEzeFw==$KosBnN2BNKA9/KHBL0hnU/woJFl+xzJFj12NQ0fnjCU=, role3
#user3 = $shiro1$SHA-256$500000$nf0GzH10GbYVoxa7DOlOSw==$ov/IA5W8mRWPwvAoBjNYxg3udJK0EmrVMvFCwcr9eAs=, role2


Step 2: enable PAM


/usr/hdp/current/zeppelin-server/conf/shiro.ini

### A sample PAM configuration
pamRealm=org.apache.zeppelin.realm.PamRealm
pamRealm.service=sshd


Step 3: Grant access to /etc/shadow

[root@hdp30 bin]# setfacl -m user:zeppelin:r /etc/shadow
[root@hdp30 bin]# getfacl /etc/shadow
getfacl: Removing leading '/' from absolute path names
# file: etc/shadow
# owner: root
# group: root
user::---
user:zeppelin:r--
group::---
mask::r--
other::---

Step 4: Restart Zeppelin and login using os user donghua


==> /var/log/zeppelin/zeppelin-zeppelin-hdp30.log <==

 WARN [2018-08-05 20:04:43,756] ({qtp110992469-18} LoginRestApi.java[postLogin]:206) - {"status":"OK","message":"","body":{"principal":"donghua","ticket":"fa4817f4-c68a-4c0e-b61a-39295daa3062","roles":"[]"}}


Reference: https://community.hortonworks.com/content/supportkb/167636/how-to-enable-pam-authentication-in-zeppelin.html

Saturday, August 4, 2018

Set timezone in Redhat Linux 7

[root@hdp30 ~]# timedatectl list-timezones|grep -i singapore
Asia/Singapore

[root@hdp30 ~]# timedatectl
      Local time: Sat 2018-08-04 00:56:47 UTC
  Universal time: Sat 2018-08-04 00:56:47 UTC
        RTC time: Sat 2018-08-04 00:56:47
       Time zone: Etc/UTC (UTC, +0000)
     NTP enabled: yes
NTP synchronized: yes
 RTC in local TZ: no
      DST active: n/a


[root@hdp30 ~]# timedatectl set-timezone Asia/Singapore

[root@hdp30 ~]# timedatectl
      Local time: Sat 2018-08-04 08:58:21 +08
  Universal time: Sat 2018-08-04 00:58:21 UTC
        RTC time: Sat 2018-08-04 00:58:22
       Time zone: Asia/Singapore (+08, +0800)
     NTP enabled: yes
NTP synchronized: yes
 RTC in local TZ: no
      DST active: n/a

Sunday, July 29, 2018

Use Jupyter Notebook with Spark2 on Cloudera

Step 1, find out necessary envronment variables

[donghua@cdh-vm ~]$ cat getPythonEnv.sh
import os
print "SPARK_HOME: %s"%(os.environ['SPARK_HOME'])
print "HADOOP_CONF_DIR: %s"%(os.environ['HADOOP_CONF_DIR'])
print "SPARK_CONF_DIR: %s"%(os.environ['SPARK_CONF_DIR'])
print "PYTHONPATH: %s"%(os.environ['PYTHONPATH'])
print "PYTHONSTARTUP: %s"%(os.environ['PYTHONSTARTUP'])
print "PYSPARK_SUBMIT_ARGS: %s"%(os.environ['PYSPARK_SUBMIT_ARGS'])

[donghua@cdh-vm ~]$ pyspark2
SPARK_HOME: /opt/cloudera/parcels/SPARK2-2.3.0.cloudera2-1.cdh5.13.3.p0.316101/lib/spark2
HADOOP_CONF_DIR: /opt/cloudera/parcels/SPARK2-2.3.0.cloudera2-1.cdh5.13.3.p0.316101/lib/spark2/conf/yarn-conf
SPARK_CONF_DIR: /opt/cloudera/parcels/SPARK2-2.3.0.cloudera2-1.cdh5.13.3.p0.316101/lib/spark2/conf
PYTHONPATH: /opt/cloudera/parcels/SPARK2-2.3.0.cloudera2-1.cdh5.13.3.p0.316101/lib/spark2/python/lib/py4j-0.10.6-src.zip:/opt/cloudera/parcels/SPARK2-2.3.0.cloudera2-1.cdh5.13.3.p0.316101/lib/spark2/python/:
PYTHONSTARTUP: /opt/cloudera/parcels/SPARK2-2.3.0.cloudera2-1.cdh5.13.3.p0.316101/lib/spark2/python/pyspark/shell.py
PYSPARK_SUBMIT_ARGS: "--name" "PySparkShell" "pyspark-shell"
[donghua@cdh-vm ~]$

Step 2: Prepare the kernel file, below is one workable example

mkdir /opt/anaconda2/share/jupyter/kernels/pyspark2/

[root@cdh-vm bin]# cat  /opt/anaconda2/share/jupyter/kernels/pyspark2/kernel.json
    {
      "argv": [
        "python2.7",
        "-m",
        "ipykernel_launcher",
        "-f",
        "{connection_file}"
      ],
      "display_name": "Python2.7 + Pyspark(Spark 2.3.0)",
      "language": "python",
      "env": {
        "PYSPARK_PYTHON": "/opt/anaconda2/bin/python2.7",
        "SPARK_HOME": "/opt/cloudera/parcels/SPARK2-2.3.0.cloudera2-1.cdh5.13.3.p0.316101/lib/spark2",
        "HADOOP_CONF_DIR": "/opt/cloudera/parcels/SPARK2-2.3.0.cloudera2-1.cdh5.13.3.p0.316101/lib/spark2/conf/yarn-conf",
        "SPARK_CONF_DIR": "/opt/cloudera/parcels/SPARK2-2.3.0.cloudera2-1.cdh5.13.3.p0.316101/lib/spark2/conf",
        "PYTHONPATH": "/opt/cloudera/parcels/SPARK2-2.3.0.cloudera2-1.cdh5.13.3.p0.316101/lib/spark2/python/lib/py4j-0.10.6-src.zip:/opt/cloudera/parcels/SPARK2-2.3.0.cloudera2-1.cdh5.13.3.p0.316101/lib/spark2/python/:",
        "PYTHONSTARTUP": "/opt/cloudera/parcels/SPARK2-2.3.0.cloudera2-1.cdh5.13.3.p0.316101/lib/spark2/python/pyspark/shell.py",
        "PYSPARK_SUBMIT_ARGS": "--name 'Jupyter Notebook' --master yarn --deploy-mode client pyspark-shell"
      }
    }


Start the notebook:

[donghua@cdh-vm ~]$ /opt/anaconda2/bin/jupyter-notebook --ip=192.168.31.238 --port 9999


Sunday, July 8, 2018

Using MySQL 8 with Hue in Cloudera CDH 5.15

Symptoms: Using MySQL 8 with Hue in Cloudera CDH 5.15

Error during adding Hue:

Unable to connect to database on host 'cdh-vm.dbaglobe.com' from host 'cdh-vm.dbaglobe.com' using the credential provided.


Error in cloudera-scm-server.log
+ exec /opt/cloudera/parcels/CDH-5.15.0-1.cdh5.15.0.p0.21/lib/hue/build/env/bin/hue is_db_alive
[08/Jul/2018 19:30:15 +0000] settings     DEBUG    DESKTOP_DB_TEST_NAME SET: /opt/cloudera/parcels/CDH-5.15.0-1.cdh5.15.0.p0.21/lib/hue/desktop/desktop-test.db
[08/Jul/2018 19:30:15 +0000] settings     DEBUG    DESKTOP_DB_TEST_USER SET: hue_test
[08/Jul/2018 04:30:23 +0000] __init__     INFO     Couldn't import snappy. Support for snappy compression disabled.
Error accessing DB: (2059, "Authentication plugin 'caching_sha2_password' cannot be loaded: /usr/lib64/mysql/plugin/caching_sha2_password.so: cannot open shared object file: No such file or directory")

How to fix:

alter user 'hue'@'%' IDENTIFIED WITH mysql_native_password BY 'my_complex_password';


Sunday, June 3, 2018

Two methods to modify HDFS custom metadata

Two methods to modify HDFS custom metadata with Cloudera Navigator

- Metadata file
    not recommended for production use as lead to small file problems
    update provided through metadata files are queued before merged

- Metadata API
    use either metadata file or API, not both
    API overwrites metadata, and take effects immediately
    


[donghua@cdh-vm data]$ hdfs dfs -ls /data/donghua/*drink*
-rw-r--r--   1 donghua hive        145 2018-06-03 11:27 /data/donghua/.drinks.csv.navigator
-rw-r--r--   1 donghua hive       5918 2018-06-03 00:07 /data/donghua/drinks.csv

[donghua@cdh-vm data]$ hdfs dfs -cat /data/donghua/.drinks.csv.navigator
{
"name":"drinks dataset"
"description": "metadata example using .drinks.csv.navigator"
"properties":{
"Dept":"myDept"
},
"tags":["external"]
}

curl -u admin:admin -X GET 'http://cdh01:7187/api/v13/entities/?query=originalName:imdb_1000.csv&limit=100&offset=0'
[donghua@cdh-vm data]$ curl -u admin:admin -X GET 'http://cdh-vm:7187/api/v13/entities/?query=originalName%3D%22imdb_1000.csv%22&limit=100&offset=0'
[ {
  "originalName" : "imdb_1000.csv",
  "originalDescription" : null,
  "sourceId" : "5",
  "firstClassParentId" : null,
  "parentPath" : "/data/donghua",
  "deleteTime" : 0,
  "extractorRunId" : "5##20",
  "customProperties" : null,
  "name" : null,
  "description" : null,
  "tags" : null,
  "properties" : {
    "__cloudera_internal__hueLink" : "http://cdh-vm:8889/filebrowser/#/data/donghua/imdb_1000.csv"
  },
  "technicalProperties" : null,
  "fileSystemPath" : "/data/donghua/imdb_1000.csv",
  "type" : "FILE",
  "size" : 91499,
  "created" : "2018-06-03T00:07:55.434Z",
  "lastModified" : "2018-06-03T00:07:55.434Z",
  "lastAccessed" : "2018-06-03T00:07:54.880Z",
  "permissions" : "rw-r--r--",
  "owner" : "donghua",
  "group" : "hive",
  "blockSize" : 134217728,
  "mimeType" : "application/octet-stream",
  "ezkeyName" : null,
  "replication" : 1,
  "metaClassName" : "fselement",
  "deleted" : false,
  "packageName" : "nav",
  "userEntity" : false,
  "sourceType" : "HDFS",
  "identity" : "20388",
  "internalType" : "fselement"
}, {
  "originalName" : "imdb_1000.csv",
  "originalDescription" : null,
  "sourceId" : "5",
  "firstClassParentId" : null,
  "parentPath" : "/user/hive/warehouse/testdb.db/imdb_1000",
  "deleteTime" : 0,
  "extractorRunId" : "5##22",
  "customProperties" : null,
  "name" : null,
  "description" : null,
  "tags" : null,
  "properties" : {
    "__cloudera_internal__hueLink" : "http://cdh-vm:8889/filebrowser/#/user/hive/warehouse/testdb.db/imdb_1000/imdb_1000.csv"
  },
  "technicalProperties" : null,
  "fileSystemPath" : "/user/hive/warehouse/testdb.db/imdb_1000/imdb_1000.csv",
  "type" : "FILE",
  "size" : 91499,
  "created" : "2018-06-03T01:06:12.920Z",
  "lastModified" : "2018-06-03T01:06:12.920Z",
  "lastAccessed" : "2018-06-03T01:06:12.920Z",
  "permissions" : "rw-r--r--",
  "owner" : "hive",
  "group" : "hive",
  "blockSize" : 134217728,
  "mimeType" : "application/octet-stream",
  "ezkeyName" : null,
  "replication" : 1,
  "metaClassName" : "fselement",
  "deleted" : false,
  "packageName" : "nav",
  "userEntity" : false,
  "sourceType" : "HDFS",
  "identity" : "22303",
  "internalType" : "fselement"
}, {
  "originalName" : "imdb_1000.csv._COPYING_",
  "originalDescription" : null,
  "sourceId" : "5",
  "firstClassParentId" : null,
  "parentPath" : "/data/donghua",
  "deleteTime" : 1527984475434,
  "extractorRunId" : "5##20",
  "customProperties" : null,
  "name" : null,
  "description" : null,
  "tags" : null,
  "properties" : null,
  "technicalProperties" : null,
  "fileSystemPath" : "/data/donghua/imdb_1000.csv._COPYING_",
  "type" : "FILE",
  "size" : 91499,
  "created" : "2018-06-03T00:07:54.880Z",
  "lastModified" : "2018-06-03T00:07:54.880Z",
  "lastAccessed" : "2018-06-03T00:07:54.880Z",
  "permissions" : "rw-r--r--",
  "owner" : "donghua",
  "group" : "hive",
  "blockSize" : 134217728,
  "mimeType" : "application/octet-stream",
  "ezkeyName" : null,
  "replication" : 1,
  "metaClassName" : "fselement",
  "deleted" : true,
  "packageName" : "nav",
  "userEntity" : false,
  "sourceType" : "HDFS",
  "identity" : "20386",
  "internalType" : "fselement"
} ]


curl -u admin:admin -X POST 'http://cdh-vm:7187/api/v13/entities/?query=originalName%3D%22imdb_1000.csv%22&limit=100&offset=0' \
-H "Content-Type:application/json" -d \
'{
"sourceId":"5",
"originalName" : "imdb_1000.csv",
"parentPath" : "/data/donghua",
"name":"imdb dataset",
"description": "metadata example using API",
"properties":{
"Dept":"myDept"
},
"tags":["external"]
}'


[donghua@cdh-vm data]$ curl -u admin:admin -X POST 'http://cdh-vm:7187/api/v13/entities/?query=originalName%3D%22imdb_1000.csv%22&limit=100&offset=0' \
> -H "Content-Type:application/json" -d \
> '{
> "sourceId":"5",
> "originalName" : "imdb_1000.csv",
> "parentPath" : "/data/donghua",
> "name":"imdb dataset",
> "description": "metadata example using API",
> "properties":{
> "Dept":"myDept"
> },
> "tags":["external"]
> }'
{
  "originalName" : "imdb_1000.csv",
  "originalDescription" : null,
  "sourceId" : "5",
  "firstClassParentId" : null,
  "parentPath" : "/data/donghua",
  "deleteTime" : 0,
  "extractorRunId" : "5##20",
  "customProperties" : null,
  "name" : "imdb dataset",
  "description" : "metadata example using API",
  "tags" : [ "external" ],
  "properties" : {
    "Dept" : "myDept",
    "__cloudera_internal__hueLink" : "http://cdh-vm:8889/filebrowser/#/data/donghua/imdb_1000.csv"
  },
  "technicalProperties" : null,
  "fileSystemPath" : "/data/donghua/imdb_1000.csv",
  "type" : "FILE",
  "size" : 91499,
  "created" : "2018-06-03T00:07:55.434Z",
  "lastModified" : "2018-06-03T00:07:55.434Z",
  "lastAccessed" : "2018-06-03T00:07:54.880Z",
  "permissions" : "rw-r--r--",
  "owner" : "donghua",
  "group" : "hive",
  "blockSize" : 134217728,
  "mimeType" : "application/octet-stream",
  "ezkeyName" : null,
  "replication" : 1,
  "metaClassName" : "fselement",
  "deleted" : false,
  "packageName" : "nav",
  "userEntity" : false,
  "sourceType" : "HDFS",
  "identity" : "20388",
  "internalType" : "fselement"
}





Saturday, June 2, 2018

Oozie command line example


[donghua@cdh-vm ~]$ oozie admin -servers -oozie http://cdh-vm:11000/oozie
cdh-vm : http://cdh-vm:11000/oozie

[donghua@cdh-vm ~]$ oozie admin -configuration -oozie http://cdh-vm:11000/oozie |grep -i jobtracker
oozie.service.HadoopAccessorService.jobTracker.whitelist : cdh-vm:8032


[donghua@cdh-vm ~]$ oozie jobs -oozie http://cdh-vm:11000/oozie
Job ID                                   App Name     Status    User      Group     Started                 Ended
------------------------------------------------------------------------------------------------------------------------------------
0000000-180602134544423-oozie-oozi-W     Append personSUCCEEDED donghua   -         2018-06-02 14:45 GMT    2018-06-02 14:47 GMT
------------------------------------------------------------------------------------------------------------------------------------

[donghua@cdh-vm ~]$ oozie job  -oozie http://cdh-vm:11000/oozie -info 0000000-180602134544423-oozie-oozi-W
Job ID : 0000000-180602134544423-oozie-oozi-W
------------------------------------------------------------------------------------------------------------------------------------
Workflow Name : Append person
App Path      : hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94
Status        : SUCCEEDED
Run           : 0
User          : donghua
Group         : -
Created       : 2018-06-02 14:45 GMT
Started       : 2018-06-02 14:45 GMT
Last Modified : 2018-06-02 14:47 GMT
Ended         : 2018-06-02 14:47 GMT
CoordAction ID: -

Actions
------------------------------------------------------------------------------------------------------------------------------------
ID                                                                            Status    Ext ID                 Ext Status Err Code
------------------------------------------------------------------------------------------------------------------------------------
0000000-180602134544423-oozie-oozi-W@:start:                                  OK        -                      OK         -
------------------------------------------------------------------------------------------------------------------------------------
0000000-180602134544423-oozie-oozi-W@hive2-3f03                               OK        job_1527947069421_0001 SUCCEEDED  -
------------------------------------------------------------------------------------------------------------------------------------
0000000-180602134544423-oozie-oozi-W@End                                      OK        -                      OK         -
------------------------------------------------------------------------------------------------------------------------------------

[donghua@cdh-vm ~]$ oozie job  -oozie http://cdh-vm:11000/oozie -configcontent 0000000-180602134544423-oozie-oozi-W

 
    hue-id-w
    29
 
 
    oozie.wf.application.path
    hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94
 
 
    oozie.use.system.libpath
    True
 
 
    dryrun
    False
 
 
    security_enabled
    True
 
 
    credentials
    {u'hcat': {'xml_name': u'hcat', 'properties': [('hcat.metastore.uri', u'thrift://cdh-vm:9083'), ('hcat.metastore.principal', u'hive/cdh-vm@DBAGLOBE.COM')]}, u'hive2': {'xml_name': u'hive2', 'properties': [('hive2.jdbc.url', 'jdbc:hive2://cdh-vm:10000/default'), ('hive2.server.principal', 'hive/cdh-vm@DBAGLOBE.COM')]}, u'hbase': {'xml_name': u'hbase', 'properties': []}}
 
 
    send_email
    False
 
 
    user.name
    donghua
 
 
    jobTracker
    cdh-vm:8032
 
 
    mapreduce.job.user.name
    donghua
 
 
    nameNode
    hdfs://cdh-vm:8020
 



[donghua@cdh-vm ~]$ oozie job  -oozie http://cdh-vm:11000/oozie -log 0000000-180602134544423-oozie-oozi-W
2018-06-02 14:45:51,754 INFO org.apache.oozie.service.JPAService: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[] No results found
2018-06-02 14:45:51,921 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@:start:] Start action [0000000-180602134544423-oozie-oozi-W@:start:] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10]
2018-06-02 14:45:51,927 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@:start:] [***0000000-180602134544423-oozie-oozi-W@:start:***]Action status=DONE
2018-06-02 14:45:51,927 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@:start:] [***0000000-180602134544423-oozie-oozi-W@:start:***]Action updated in DB!
2018-06-02 14:45:52,457 INFO org.apache.oozie.service.JPAService: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@:start:] No results found
2018-06-02 14:45:52,549 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[cdh-vm] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@:start:] No Notification URL is defined. Therefore nothing to notify for job 0000000-180602134544423-oozie-oozi-W@:start:
2018-06-02 14:45:52,552 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[cdh-vm] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[] No Notification URL is defined. Therefore nothing to notify for job 0000000-180602134544423-oozie-oozi-W
2018-06-02 14:45:52,648 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] Start action [0000000-180602134544423-oozie-oozi-W@hive2-3f03] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10]
2018-06-02 14:45:56,354 INFO org.apache.oozie.service.HadoopAccessorService: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] Delegation Token Renewer details: Principal=yarn/_HOST@DBAGLOBE.COM,Target=cdh-vm:8032,Renewer=yarn/cdh-vm@DBAGLOBE.COM
2018-06-02 14:46:02,993 INFO org.apache.oozie.action.hadoop.Hive2ActionExecutor: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] checking action, hadoop job ID [job_1527947069421_0001] status [RUNNING]
2018-06-02 14:46:03,024 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] [***0000000-180602134544423-oozie-oozi-W@hive2-3f03***]Action status=RUNNING
2018-06-02 14:46:03,042 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] [***0000000-180602134544423-oozie-oozi-W@hive2-3f03***]Action updated in DB!
2018-06-02 14:46:03,140 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[cdh-vm] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] No Notification URL is defined. Therefore nothing to notify for job 0000000-180602134544423-oozie-oozi-W@hive2-3f03
2018-06-02 14:47:42,121 INFO org.apache.oozie.servlet.CallbackServlet: SERVER[cdh-vm] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] callback for action [0000000-180602134544423-oozie-oozi-W@hive2-3f03]
2018-06-02 14:47:42,903 INFO org.apache.oozie.action.hadoop.Hive2ActionExecutor: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] Hadoop Jobs launched : [job_1527947069421_0002]
2018-06-02 14:47:42,922 INFO org.apache.oozie.action.hadoop.Hive2ActionExecutor: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] action completed, external ID [job_1527947069421_0001]
2018-06-02 14:47:43,593 INFO org.apache.oozie.service.JPAService: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] No results found
2018-06-02 14:47:43,702 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@End] Start action [0000000-180602134544423-oozie-oozi-W@End] with user-retry state : userRetryCount [0], userRetryMax [0], userRetryInterval [10]
2018-06-02 14:47:43,708 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@End] [***0000000-180602134544423-oozie-oozi-W@End***]Action status=DONE
2018-06-02 14:47:43,709 INFO org.apache.oozie.command.wf.ActionStartXCommand: SERVER[cdh-vm] USER[donghua] GROUP[-] TOKEN[] APP[Append person] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@End] [***0000000-180602134544423-oozie-oozi-W@End***]Action updated in DB!
2018-06-02 14:47:44,041 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[cdh-vm] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@End] No Notification URL is defined. Therefore nothing to notify for job 0000000-180602134544423-oozie-oozi-W@End
2018-06-02 14:47:44,041 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[cdh-vm] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[] No Notification URL is defined. Therefore nothing to notify for job 0000000-180602134544423-oozie-oozi-W
2018-06-02 14:47:44,041 INFO org.apache.oozie.command.wf.WorkflowNotificationXCommand: SERVER[cdh-vm] USER[-] GROUP[-] TOKEN[-] APP[-] JOB[0000000-180602134544423-oozie-oozi-W] ACTION[0000000-180602134544423-oozie-oozi-W@hive2-3f03] No Notification URL is defined. Therefore nothing to notify for job 0000000-180602134544423-oozie-oozi-W@hive2-3f03


[donghua@cdh-vm ~]$ hdfs dfs -ls hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94
Found 3 items
-rw-r--r--   1 donghua hue        506 2018-06-02 14:45 hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94/job.properties
drwxr-xr-x   - donghua hue          0 2018-06-02 14:43 hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94/lib
-rw-r--r--   1 donghua hue       1007 2018-06-02 14:45 hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94/workflow.xml

[donghua@cdh-vm ~]$ hdfs dfs -cat hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94/job.properties
oozie.use.system.libpath=True
send_email=False
dryrun=False
credentials={u'hcat': {'xml_name': u'hcat', 'properties': [('hcat.metastore.uri', u'thrift://cdh-vm:9083'), ('hcat.metastore.principal', u'hive/cdh-vm@DBAGLOBE.COM')]}, u'hive2': {'xml_name': u'hive2', 'properties': [('hive2.jdbc.url', 'jdbc:hive2://cdh-vm:10000/default'), ('hive2.server.principal', 'hive/cdh-vm@DBAGLOBE.COM')]}, u'hbase': {'xml_name': u'hbase', 'properties': []}}
nameNode=hdfs://cdh-vm:8020
jobTracker=cdh-vm:8032
security_enabled=True

[donghua@cdh-vm ~]$ hdfs dfs -cat hdfs://cdh-vm:8020/user/hue/oozie/workspaces/hue-oozie-1527950627.94/workflow.xml

 
   
     
        hive2.jdbc.url
        jdbc:hive2://cdh-vm:10000/default
     
     
        hive2.server.principal
        hive/cdh-vm@DBAGLOBE.COM
     
   
 
   
   
        Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]
   
   
       
            ${jobTracker}
            ${nameNode}
            jdbc:hive2://cdh-vm:10000/default
           
       
       
       
   
   



[donghua@cdh-vm ~]$ oozie help
usage:
      the env variable 'OOZIE_URL' is used as default value for the '-oozie' option
      the env variable 'OOZIE_TIMEZONE' is used as default value for the '-timezone' option
      the env variable 'OOZIE_AUTH' is used as default value for the '-auth' option
      custom headers for Oozie web services can be specified using '-Dheader:NAME=VALUE'

      oozie help : display usage for all commands or specified command

      oozie version : show client version

      oozie job : job operations
                -action           coordinator rerun/kill on action ids (requires -rerun/-kill);
                                       coordinator log retrieval on action ids(requires -log)
                -allruns               Get workflow jobs corresponding to a coordinator action
                                       including all the reruns
                -auth             select authentication type [SIMPLE|KERBEROS]
                -change           change a coordinator or bundle job
                -config           job configuration file '.xml' or '.properties'
                -configcontent    job configuration
                -coordinator      bundle rerun on coordinator names (requires -rerun)
                -D     set/override value for given property
                -date             coordinator/bundle rerun on action dates (requires -rerun);
                                       coordinator log retrieval on action dates (requires -log)
                -debug                 Use debug mode to see debugging statements on stdout
                -definition       job definition
                -diff             Show diff of the new coord definition and properties with the
                                       existing one (default true)
                -doas             doAs user, impersonates as the specified user
                -dryrun                Dryrun a workflow (since 3.3.2) or coordinator (since 2.0)
                                       job without actually executing it
                -failed                runs the failed workflow actions of the coordinator actions
                                       (requires -rerun)
                -filter           [;]*
                                       (All Coordinator actions satisfying the filters will be
                                       retreived).
                                       key: status or nominaltime
                                       comparator: =, !=, <, <=, >, >=. = is used as OR and others
                                       as AND
                                       status: values are valid status like SUCCEEDED, KILLED etc.
                                       Only = and != apply for status
                                       nominaltime: time of format yyyy-MM-dd'T'HH:mm'Z'
                -ignore           change status of a coordinator job or action to IGNORED
                                       (-action required to ignore coord actions)
                -info             info of a job
                -interval         polling interval in minutes (default is 5, requires -poll)
                -kill             kill a job (coordinator can mention -action or -date)
                -len              number of actions (default TOTAL ACTIONS, requires -info)
                -localtime             use local time (same as passing your time zone to -timezone).
                                       Overrides -timezone option
                -log              job log
                -logfilter        job log search parameter. Can be specified as -logfilter
                                       opt1=val1;opt2=val1;opt3=val1. Supported options are recent,
                                       start, end, loglevel, text, limit and debug
                -nocleanup             do not clean up output-events of the coordiantor rerun
                                       actions (requires -rerun)
                -offset           job info offset of actions (default '1', requires -info)
                -oozie            Oozie URL
                -order            order to show coord actions (default ascending order, 'desc'
                                       for descending order, requires -info)
                -poll             poll Oozie until a job reaches a terminal state or a timeout
                                       occurs
                -refresh               re-materialize the coordinator rerun actions (requires
                                       -rerun)
                -rerun            rerun a job  (coordinator requires -action or -date, bundle
                                       requires -coordinator or -date)
                -resume           resume a job
                -run                   run a job
                -start            start a job
                -submit                submit a job
                -suspend          suspend a job
                -timeout          timeout in minutes (default is 30, negative values indicate
                                       no timeout, requires -poll)
                -timezone         use time zone with the specified ID (default GMT).
                                       See 'oozie info -timezones' for a list
                -update           Update coord definition and properties
                -value            new endtime/concurrency/pausetime value for changing a
                                       coordinator job
                -verbose    verbose mode

      oozie jobs : jobs status
                 -auth        select authentication type [SIMPLE|KERBEROS]
                 -bulk        key-value pairs to filter bulk jobs response. e.g.
                                   bundle=\;coordinators=\;actionstatus=\;startcreatedtime=
                                   \;endcreatedtime=\;startscheduledtime=\;endscheduledt
                                   ime=\; bundle, coordinators and actionstatus can be multiple
                                   comma separated valuesbundle and coordinators can be id(s) or
                                   appName(s) of those jobs. Specifying bundle is mandatory, other
                                   params are optional
                 -doas        doAs user, impersonates as the specified user
                 -filter
                                   text=<*>\;user=\;name=\;group=\;status=\;frequency=
                                   >\;unit=\;startcreatedtime=\;endcreatedtime=
                                   \;sortBy=
                                   (text filter: matches partially with name and user or complete
                                   match with job IDvalid unit values are 'months', 'days', 'hours'
                                   or 'minutes'. startcreatedtime, endcreatedtime: time of format
                                   yyyy-MM-dd'T'HH:mm'Z'. valid values for sortBy are 'createdTime'
                                   or 'lastModifiedTime'.)
                 -jobtype     job type ('Supported in Oozie-2.0 or later versions ONLY -
                                   'coordinator' or 'bundle' or 'wf'(default))
                 -kill             bulk kill operation
                 -len         number of jobs (default '100')
                 -localtime        use local time (same as passing your time zone to -timezone).
                                   Overrides -timezone option
                 -offset      jobs offset (default '1')
                 -oozie       Oozie URL
                 -resume           bulk resume operation
                 -suspend          bulk suspend operation
                 -timezone    use time zone with the specified ID (default GMT).
                                   See 'oozie info -timezones' for a list
                 -verbose          verbose mode

      oozie admin : admin operations
                  -auth          select authentication type [SIMPLE|KERBEROS]
                  -configuration      show Oozie system configuration
                  -doas          doAs user, impersonates as the specified user
                  -instrumentation    show Oozie system instrumentation
                  -javasysprops       show Oozie Java system properties
                  -metrics            show Oozie system metrics
                  -oozie         Oozie URL
                  -osenv              show Oozie system OS environment
                  -queuedump          show Oozie server queue elements
                  -servers            list available Oozie servers (more than one only if HA is
                                      enabled)
                  -shareliblist       List available sharelib that can be specified in a workflow
                                      action
                  -sharelibupdate     Update server to use a newer version of sharelib
                  -status             show the current system status
                  -systemmode    Supported in Oozie-2.0 or later versions ONLY. Change oozie
                                      system mode [NORMAL|NOWEBSERVICE|SAFEMODE]
                  -version            show Oozie server build version

      oozie validate : validate a workflow, coordinator, bundle XML file
                     -auth     select authentication type [SIMPLE|KERBEROS]
                     -oozie    Oozie URL

      oozie sla : sla operations (Deprecated with Oozie 4.0)
                -auth      select authentication type [SIMPLE|KERBEROS]
                -filter    filter of SLA events. e.g., jobid=\;appname=
                -len       number of results (default '100', max '1000')
                -offset    start offset (default '0')
                -oozie     Oozie URL

      oozie pig -X : submit a pig job, everything after '-X' are pass-through parameters to pig, any '-D' arguments after '-X' are put in
                -auth            select authentication type [SIMPLE|KERBEROS]
                -config          job configuration file '.properties'
                -D    set/override value for given property
                -doas            doAs user, impersonates as the specified user
                -file            pig script
                -oozie           Oozie URL
                -P    set parameters for script

      oozie hive -X : submit a hive job, everything after '-X' are pass-through parameters to hive, any '-D' arguments after '-X' are put in
                 -auth            select authentication type [SIMPLE|KERBEROS]
                 -config          job configuration file '.properties'
                 -D    set/override value for given property
                 -doas            doAs user, impersonates as the specified user
                 -file            hive script
                 -oozie           Oozie URL
                 -P    set parameters for script

      oozie sqoop -X : submit a sqoop job, everything after '-X' are pass-through parameters to sqoop, any '-D' arguments after '-X' are put in
                  -auth            select authentication type [SIMPLE|KERBEROS]
                  -command     sqoop command
                  -config          job configuration file '.properties'
                  -D    set/override value for given property
                  -doas            doAs user, impersonates as the specified user
                  -oozie           Oozie URL

      oozie info : get more detailed info about specific topics
                 -timezones   display a list of available time zones

      oozie mapreduce : submit a mapreduce job
                      -auth            select authentication type [SIMPLE|KERBEROS]
                      -config          job configuration file '.properties'
                      -D    set/override value for given property
                      -doas            doAs user, impersonates as the specified user
                      -oozie           Oozie URL