Sunday, June 3, 2018

Two methods to modify HDFS custom metadata

Two methods to modify HDFS custom metadata with Cloudera Navigator

- Metadata file
    not recommended for production use as lead to small file problems
    update provided through metadata files are queued before merged

- Metadata API
    use either metadata file or API, not both
    API overwrites metadata, and take effects immediately
    


[donghua@cdh-vm data]$ hdfs dfs -ls /data/donghua/*drink*
-rw-r--r--   1 donghua hive        145 2018-06-03 11:27 /data/donghua/.drinks.csv.navigator
-rw-r--r--   1 donghua hive       5918 2018-06-03 00:07 /data/donghua/drinks.csv

[donghua@cdh-vm data]$ hdfs dfs -cat /data/donghua/.drinks.csv.navigator
{
"name":"drinks dataset"
"description": "metadata example using .drinks.csv.navigator"
"properties":{
"Dept":"myDept"
},
"tags":["external"]
}

curl -u admin:admin -X GET 'http://cdh01:7187/api/v13/entities/?query=originalName:imdb_1000.csv&limit=100&offset=0'
[donghua@cdh-vm data]$ curl -u admin:admin -X GET 'http://cdh-vm:7187/api/v13/entities/?query=originalName%3D%22imdb_1000.csv%22&limit=100&offset=0'
[ {
  "originalName" : "imdb_1000.csv",
  "originalDescription" : null,
  "sourceId" : "5",
  "firstClassParentId" : null,
  "parentPath" : "/data/donghua",
  "deleteTime" : 0,
  "extractorRunId" : "5##20",
  "customProperties" : null,
  "name" : null,
  "description" : null,
  "tags" : null,
  "properties" : {
    "__cloudera_internal__hueLink" : "http://cdh-vm:8889/filebrowser/#/data/donghua/imdb_1000.csv"
  },
  "technicalProperties" : null,
  "fileSystemPath" : "/data/donghua/imdb_1000.csv",
  "type" : "FILE",
  "size" : 91499,
  "created" : "2018-06-03T00:07:55.434Z",
  "lastModified" : "2018-06-03T00:07:55.434Z",
  "lastAccessed" : "2018-06-03T00:07:54.880Z",
  "permissions" : "rw-r--r--",
  "owner" : "donghua",
  "group" : "hive",
  "blockSize" : 134217728,
  "mimeType" : "application/octet-stream",
  "ezkeyName" : null,
  "replication" : 1,
  "metaClassName" : "fselement",
  "deleted" : false,
  "packageName" : "nav",
  "userEntity" : false,
  "sourceType" : "HDFS",
  "identity" : "20388",
  "internalType" : "fselement"
}, {
  "originalName" : "imdb_1000.csv",
  "originalDescription" : null,
  "sourceId" : "5",
  "firstClassParentId" : null,
  "parentPath" : "/user/hive/warehouse/testdb.db/imdb_1000",
  "deleteTime" : 0,
  "extractorRunId" : "5##22",
  "customProperties" : null,
  "name" : null,
  "description" : null,
  "tags" : null,
  "properties" : {
    "__cloudera_internal__hueLink" : "http://cdh-vm:8889/filebrowser/#/user/hive/warehouse/testdb.db/imdb_1000/imdb_1000.csv"
  },
  "technicalProperties" : null,
  "fileSystemPath" : "/user/hive/warehouse/testdb.db/imdb_1000/imdb_1000.csv",
  "type" : "FILE",
  "size" : 91499,
  "created" : "2018-06-03T01:06:12.920Z",
  "lastModified" : "2018-06-03T01:06:12.920Z",
  "lastAccessed" : "2018-06-03T01:06:12.920Z",
  "permissions" : "rw-r--r--",
  "owner" : "hive",
  "group" : "hive",
  "blockSize" : 134217728,
  "mimeType" : "application/octet-stream",
  "ezkeyName" : null,
  "replication" : 1,
  "metaClassName" : "fselement",
  "deleted" : false,
  "packageName" : "nav",
  "userEntity" : false,
  "sourceType" : "HDFS",
  "identity" : "22303",
  "internalType" : "fselement"
}, {
  "originalName" : "imdb_1000.csv._COPYING_",
  "originalDescription" : null,
  "sourceId" : "5",
  "firstClassParentId" : null,
  "parentPath" : "/data/donghua",
  "deleteTime" : 1527984475434,
  "extractorRunId" : "5##20",
  "customProperties" : null,
  "name" : null,
  "description" : null,
  "tags" : null,
  "properties" : null,
  "technicalProperties" : null,
  "fileSystemPath" : "/data/donghua/imdb_1000.csv._COPYING_",
  "type" : "FILE",
  "size" : 91499,
  "created" : "2018-06-03T00:07:54.880Z",
  "lastModified" : "2018-06-03T00:07:54.880Z",
  "lastAccessed" : "2018-06-03T00:07:54.880Z",
  "permissions" : "rw-r--r--",
  "owner" : "donghua",
  "group" : "hive",
  "blockSize" : 134217728,
  "mimeType" : "application/octet-stream",
  "ezkeyName" : null,
  "replication" : 1,
  "metaClassName" : "fselement",
  "deleted" : true,
  "packageName" : "nav",
  "userEntity" : false,
  "sourceType" : "HDFS",
  "identity" : "20386",
  "internalType" : "fselement"
} ]


curl -u admin:admin -X POST 'http://cdh-vm:7187/api/v13/entities/?query=originalName%3D%22imdb_1000.csv%22&limit=100&offset=0' \
-H "Content-Type:application/json" -d \
'{
"sourceId":"5",
"originalName" : "imdb_1000.csv",
"parentPath" : "/data/donghua",
"name":"imdb dataset",
"description": "metadata example using API",
"properties":{
"Dept":"myDept"
},
"tags":["external"]
}'


[donghua@cdh-vm data]$ curl -u admin:admin -X POST 'http://cdh-vm:7187/api/v13/entities/?query=originalName%3D%22imdb_1000.csv%22&limit=100&offset=0' \
> -H "Content-Type:application/json" -d \
> '{
> "sourceId":"5",
> "originalName" : "imdb_1000.csv",
> "parentPath" : "/data/donghua",
> "name":"imdb dataset",
> "description": "metadata example using API",
> "properties":{
> "Dept":"myDept"
> },
> "tags":["external"]
> }'
{
  "originalName" : "imdb_1000.csv",
  "originalDescription" : null,
  "sourceId" : "5",
  "firstClassParentId" : null,
  "parentPath" : "/data/donghua",
  "deleteTime" : 0,
  "extractorRunId" : "5##20",
  "customProperties" : null,
  "name" : "imdb dataset",
  "description" : "metadata example using API",
  "tags" : [ "external" ],
  "properties" : {
    "Dept" : "myDept",
    "__cloudera_internal__hueLink" : "http://cdh-vm:8889/filebrowser/#/data/donghua/imdb_1000.csv"
  },
  "technicalProperties" : null,
  "fileSystemPath" : "/data/donghua/imdb_1000.csv",
  "type" : "FILE",
  "size" : 91499,
  "created" : "2018-06-03T00:07:55.434Z",
  "lastModified" : "2018-06-03T00:07:55.434Z",
  "lastAccessed" : "2018-06-03T00:07:54.880Z",
  "permissions" : "rw-r--r--",
  "owner" : "donghua",
  "group" : "hive",
  "blockSize" : 134217728,
  "mimeType" : "application/octet-stream",
  "ezkeyName" : null,
  "replication" : 1,
  "metaClassName" : "fselement",
  "deleted" : false,
  "packageName" : "nav",
  "userEntity" : false,
  "sourceType" : "HDFS",
  "identity" : "20388",
  "internalType" : "fselement"
}