Sunday, May 27, 2018

Miniconda installation on Linux

[root@cdh01 ~]# ./Miniconda2-latest-Linux-x86_64.sh -b -p /opt/miniconda2 -u
PREFIX=/opt/miniconda2
installing: python-2.7.14-h1571d57_31 ...
Python 2.7.14 :: Anaconda, Inc.
installing: ca-certificates-2018.03.07-0 ...
installing: conda-env-2.6.0-h36134e3_1 ...
installing: libgcc-ng-7.2.0-hdf63c60_3 ...
installing: libstdcxx-ng-7.2.0-hdf63c60_3 ...
installing: libffi-3.2.1-hd88cf55_4 ...
installing: ncurses-6.0-h9df7e31_2 ...
installing: openssl-1.0.2o-h20670df_0 ...
installing: tk-8.6.7-hc745277_3 ...
installing: yaml-0.1.7-had09818_2 ...
installing: zlib-1.2.11-ha838bed_2 ...
installing: libedit-3.1-heed3624_0 ...
installing: readline-7.0-ha6073c6_4 ...
installing: sqlite-3.23.1-he433501_0 ...
installing: asn1crypto-0.24.0-py27_0 ...
installing: certifi-2018.4.16-py27_0 ...
installing: chardet-3.0.4-py27hfa10054_1 ...
installing: enum34-1.1.6-py27h99a27e9_1 ...
installing: futures-3.2.0-py27h7b459c0_0 ...
installing: idna-2.6-py27h5722d68_1 ...
installing: ipaddress-1.0.22-py27_0 ...
installing: pycosat-0.6.3-py27ha4109ae_0 ...
installing: pycparser-2.18-py27hefa08c5_1 ...
installing: pysocks-1.6.8-py27_0 ...
installing: ruamel_yaml-0.15.35-py27h14c3975_1 ...
installing: six-1.11.0-py27h5f960f1_1 ...
installing: cffi-1.11.5-py27h9745a5d_0 ...
installing: setuptools-39.0.1-py27_0 ...
installing: cryptography-2.2.2-py27h14c3975_0 ...
installing: wheel-0.31.0-py27_0 ...
installing: pip-9.0.3-py27_0 ...
installing: pyopenssl-17.5.0-py27hcee3be0_0 ...
installing: urllib3-1.22-py27ha55213b_0 ...
installing: requests-2.18.4-py27hc5b0589_1 ...
installing: conda-4.5.1-py27_0 ...
installation finished.



[donghua@cdh01 ~]$ export PATH=/opt/miniconda2/bin:$PATH

# alternative, create using base python versoin
# /opt/miniconda2/bin/conda create --name py27 

[donghua@cdh01 ~]$ /opt/miniconda2/bin/conda create --name py27 python=2.7
Solving environment: done


==> WARNING: A newer version of conda exists. <==
  current version: 4.5.1
  latest version: 4.5.4

Please update conda by running

    $ conda update -n base conda



## Package Plan ##

  environment location: /home/donghua/.conda/envs/py27

  added / updated specs:
    - python=2.7


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ncurses-6.1                |       hf484d3e_0         943 KB
    pip-10.0.1                 |           py27_0         1.7 MB
    setuptools-39.1.0          |           py27_0         582 KB
    libedit-3.1.20170329       |       h6b74fdf_2         172 KB
    wheel-0.31.1               |           py27_0          62 KB
    python-2.7.15              |       h1571d57_0        12.1 MB
    ------------------------------------------------------------
                                           Total:        15.5 MB

The following NEW packages will be INSTALLED:

    ca-certificates: 2018.03.07-0
    certifi:         2018.4.16-py27_0
    libedit:         3.1.20170329-h6b74fdf_2
    libffi:          3.2.1-hd88cf55_4
    libgcc-ng:       7.2.0-hdf63c60_3
    libstdcxx-ng:    7.2.0-hdf63c60_3
    ncurses:         6.1-hf484d3e_0
    openssl:         1.0.2o-h20670df_0
    pip:             10.0.1-py27_0
    python:          2.7.15-h1571d57_0
    readline:        7.0-ha6073c6_4
    setuptools:      39.1.0-py27_0
    sqlite:          3.23.1-he433501_0
    tk:              8.6.7-hc745277_3
    wheel:           0.31.1-py27_0
    zlib:            1.2.11-ha838bed_2

Proceed ([y]/n)? y


Downloading and Extracting Packages
ncurses 6.1############################################# | 100%
pip 10.0.1############################################## | 100%
setuptools 39.1.0####################################### | 100%
libedit 3.1.20170329#################################### | 100%
wheel 0.31.1############################################ | 100%
python 2.7.15########################################### | 100%
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
#
# To activate this environment, use:
# > source activate py27
#
# To deactivate an active environment, use:
# > source deactivate
#

[donghua@cdh01 ~]$



[donghua@cdh01 ~]$ conda list
# packages in environment at /opt/miniconda2:
#
# Name                    Version                   Build  Channel
asn1crypto                0.24.0                   py27_0
ca-certificates           2018.03.07                    0
certifi                   2018.4.16                py27_0
cffi                      1.11.5           py27h9745a5d_0
chardet                   3.0.4            py27hfa10054_1
conda                     4.5.1                    py27_0
conda-env                 2.6.0                h36134e3_1
cryptography              2.2.2            py27h14c3975_0
enum34                    1.1.6            py27h99a27e9_1
futures                   3.2.0            py27h7b459c0_0
idna                      2.6              py27h5722d68_1
ipaddress                 1.0.22                   py27_0
libedit                   3.1                  heed3624_0
libffi                    3.2.1                hd88cf55_4
libgcc-ng                 7.2.0                hdf63c60_3
libstdcxx-ng              7.2.0                hdf63c60_3
ncurses                   6.0                  h9df7e31_2
openssl                   1.0.2o               h20670df_0
pip                       9.0.3                    py27_0
pycosat                   0.6.3            py27ha4109ae_0
pycparser                 2.18             py27hefa08c5_1
pyopenssl                 17.5.0           py27hcee3be0_0
pysocks                   1.6.8                    py27_0
python                    2.7.14              h1571d57_31
readline                  7.0                  ha6073c6_4
requests                  2.18.4           py27hc5b0589_1
ruamel_yaml               0.15.35          py27h14c3975_1
setuptools                39.0.1                   py27_0
six                       1.11.0           py27h5f960f1_1
sqlite                    3.23.1               he433501_0
tk                        8.6.7                hc745277_3
urllib3                   1.22             py27ha55213b_0
wheel                     0.31.0                   py27_0
yaml                      0.1.7                had09818_2
zlib                      1.2.11               ha838bed_2
[donghua@cdh01 ~]$ conda env list
# conda environments:
#
py27                     /home/donghua/.conda/envs/py27
base                  *  /opt/miniconda2



[donghua@cdh01 ~]$ source activate py27
(py27) [donghua@cdh01 ~]$
(py27) [donghua@cdh01 ~]$
(py27) [donghua@cdh01 ~]$
(py27) [donghua@cdh01 ~]$
(py27) [donghua@cdh01 ~]$ conda install pandas
Solving environment: done


==> WARNING: A newer version of conda exists. <==
  current version: 4.5.1
  latest version: 4.5.4

Please update conda by running

    $ conda update -n base conda



## Package Plan ##

  environment location: /home/donghua/.conda/envs/py27

  added / updated specs:
    - pandas


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    intel-openmp-2018.0.0      |                8         620 KB
    numpy-1.14.3               |   py27hcd700cb_1          41 KB
    mkl_fft-1.0.1              |   py27h3010b51_0         137 KB
    pytz-2018.4                |           py27_0         211 KB
    python-dateutil-2.7.3      |           py27_0         258 KB
    mkl-2018.0.2               |                1       205.2 MB
    numpy-base-1.14.3          |   py27h9be14a7_1         4.1 MB
    mkl_random-1.0.1           |   py27h629b387_0         361 KB
    blas-1.0                   |              mkl           6 KB
    libgfortran-ng-7.2.0       |       hdf63c60_3         1.2 MB
    pandas-0.23.0              |   py27h637b7d7_0        11.8 MB
    ------------------------------------------------------------
                                           Total:       223.9 MB

The following NEW packages will be INSTALLED:

    blas:            1.0-mkl
    intel-openmp:    2018.0.0-8
    libgfortran-ng:  7.2.0-hdf63c60_3
    mkl:             2018.0.2-1
    mkl_fft:         1.0.1-py27h3010b51_0
    mkl_random:      1.0.1-py27h629b387_0
    numpy:           1.14.3-py27hcd700cb_1
    numpy-base:      1.14.3-py27h9be14a7_1
    pandas:          0.23.0-py27h637b7d7_0
    python-dateutil: 2.7.3-py27_0
    pytz:            2018.4-py27_0
    six:             1.11.0-py27h5f960f1_1

Proceed ([y]/n)? y


Downloading and Extracting Packages
intel-openmp 2018.0.0####################################### | 100%
numpy 1.14.3################################################ | 100%
mkl_fft 1.0.1############################################### | 100%
pytz 2018.4################################################# | 100%
python-dateutil 2.7.3####################################### | 100%
mkl 2018.0.2################################################ | 100%
numpy-base 1.14.3########################################### | 100%
mkl_random 1.0.1############################################ | 100%
blas 1.0#################################################### | 100%
libgfortran-ng 7.2.0######################################## | 100%
pandas 0.23.0############################################### | 100%
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
(py27) [donghua@cdh01 ~]$ source deactivate


[donghua@cdh01 ~]$ conda install jupyter -n py27


[donghua@cdh01 ~]$ conda list -n py27
# packages in environment at /home/donghua/.conda/envs/py27:
#
# Name                    Version                   Build  Channel
blas                      1.0                         mkl
ca-certificates           2018.03.07                    0
certifi                   2018.4.16                py27_0
intel-openmp              2018.0.0                      8
libedit                   3.1.20170329         h6b74fdf_2
libffi                    3.2.1                hd88cf55_4
libgcc-ng                 7.2.0                hdf63c60_3
libgfortran-ng            7.2.0                hdf63c60_3
libstdcxx-ng              7.2.0                hdf63c60_3
mkl                       2018.0.2                      1
mkl_fft                   1.0.1            py27h3010b51_0
mkl_random                1.0.1            py27h629b387_0
ncurses                   6.1                  hf484d3e_0
numpy                     1.14.3           py27hcd700cb_1
numpy-base                1.14.3           py27h9be14a7_1
openssl                   1.0.2o               h20670df_0
pandas                    0.23.0           py27h637b7d7_0
pip                       10.0.1                   py27_0
python                    2.7.15               h1571d57_0
python-dateutil           2.7.3                    py27_0
pytz                      2018.4                   py27_0
readline                  7.0                  ha6073c6_4
setuptools                39.1.0                   py27_0
six                       1.11.0           py27h5f960f1_1
sqlite                    3.23.1               he433501_0
tk                        8.6.7                hc745277_3
wheel                     0.31.1                   py27_0
zlib                      1.2.11               ha838bed_2

[donghua@cdh01 ~]$ conda list -n base
# packages in environment at /opt/miniconda2:
#
# Name                    Version                   Build  Channel
asn1crypto                0.24.0                   py27_0
ca-certificates           2018.03.07                    0
certifi                   2018.4.16                py27_0
cffi                      1.11.5           py27h9745a5d_0
chardet                   3.0.4            py27hfa10054_1
conda                     4.5.1                    py27_0
conda-env                 2.6.0                h36134e3_1
cryptography              2.2.2            py27h14c3975_0
enum34                    1.1.6            py27h99a27e9_1
futures                   3.2.0            py27h7b459c0_0
idna                      2.6              py27h5722d68_1
ipaddress                 1.0.22                   py27_0
libedit                   3.1                  heed3624_0
libffi                    3.2.1                hd88cf55_4
libgcc-ng                 7.2.0                hdf63c60_3
libstdcxx-ng              7.2.0                hdf63c60_3
ncurses                   6.0                  h9df7e31_2
openssl                   1.0.2o               h20670df_0
pip                       9.0.3                    py27_0
pycosat                   0.6.3            py27ha4109ae_0
pycparser                 2.18             py27hefa08c5_1
pyopenssl                 17.5.0           py27hcee3be0_0
pysocks                   1.6.8                    py27_0
python                    2.7.14              h1571d57_31
readline                  7.0                  ha6073c6_4
requests                  2.18.4           py27hc5b0589_1
ruamel_yaml               0.15.35          py27h14c3975_1
setuptools                39.0.1                   py27_0
six                       1.11.0           py27h5f960f1_1
sqlite                    3.23.1               he433501_0
tk                        8.6.7                hc745277_3
urllib3                   1.22             py27ha55213b_0
wheel                     0.31.0                   py27_0
yaml                      0.1.7                had09818_2
zlib                      1.2.11               ha838bed_2


Reference: https://conda.io/docs/_downloads/conda-cheatsheet.pdf


Saturday, May 26, 2018

Workaround using Hive+Sentry without Kerberos/LDAP

[donghua@cdh01 ~]$ beeline -u jdbc:hive2://cdh01:10000/testdb
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=512M; support was removed in 8.0
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=512M; support was removed in 8.0
scan complete in 3ms
Connecting to jdbc:hive2://cdh01:10000/testdb
Connected to: Apache Hive (version 1.1.0-cdh5.14.2)
Driver: Hive JDBC (version 1.1.0-cdh5.14.2)
Transaction isolation: TRANSACTION_REPEATABLE_READ
Beeline version 1.1.0-cdh5.14.2 by Apache Hive
0: jdbc:hive2://cdh01:10000/testdb> select current_user();
Error: Error while compiling statement: FAILED: InvalidConfigurationException hive.server2.authentication can't be none in non-testing mode (state=42000,code=40000)
0: jdbc:hive2://cdh01:10000/testdb>

[HiveServer2-Handler-Pool: Thread-58]: FAILED: InvalidConfigurationException hive.server2.authentication can't be none in non-testing mode
org.apache.sentry.binding.hive.conf.InvalidConfigurationException: hive.server2.authentication can't be none in non-testing mode
    at org.apache.sentry.binding.hive.authz.HiveAuthzBinding.validateHiveServer2Config(HiveAuthzBinding.java:180)
    at org.apache.sentry.binding.hive.authz.HiveAuthzBinding.validateHiveConfig(HiveAuthzBinding.java:148)
    at org.apache.sentry.binding.hive.authz.HiveAuthzBinding.<init>(HiveAuthzBinding.java:84)
    at org.apache.sentry.binding.hive.authz.HiveAuthzBinding.<init>(HiveAuthzBinding.java:80)
    at org.apache.sentry.binding.hive.HiveAuthzBindingHook.<init>(HiveAuthzBindingHook.java:121)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
    at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
    at java.lang.Class.newInstance(Class.java:442)
    at org.apache.hadoop.hive.ql.hooks.HooksLoader.getHooks(HooksLoader.java:100)
    at org.apache.hadoop.hive.ql.hooks.HooksLoader.getHooks(HooksLoader.java:64)

imageimage


[donghua@cdh01 ~]$ beeline -n donghua -u jdbc:hive2://cdh01:10000/testdb
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=512M; support was removed in 8.0
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=512M; support was removed in 8.0
scan complete in 2ms
Connecting to jdbc:hive2://cdh01:10000/testdb
Connected to: Apache Hive (version 1.1.0-cdh5.14.2)
Driver: Hive JDBC (version 1.1.0-cdh5.14.2)
Transaction isolation: TRANSACTION_REPEATABLE_READ
Beeline version 1.1.0-cdh5.14.2 by Apache Hive
0: jdbc:hive2://cdh01:10000/testdb> select current_user();
INFO  : Compiling command(queryId=hive_20180526200404_ef3e8dd5-4281-4338-b85d-386db2122937): select current_user()
INFO  : Semantic Analysis Completed
INFO  : Returning Hive schema: Schema(fieldSchemas:[FieldSchema(name:_c0, type:string, comment:null)], properties:null)
INFO  : Completed compiling command(queryId=hive_20180526200404_ef3e8dd5-4281-4338-b85d-386db2122937); Time taken: 0.132 seconds
INFO  : Executing command(queryId=hive_20180526200404_ef3e8dd5-4281-4338-b85d-386db2122937): select current_user()
INFO  : Completed executing command(queryId=hive_20180526200404_ef3e8dd5-4281-4338-b85d-386db2122937); Time taken: 0.0 seconds
INFO  : OK
+-------+--+
|  _c0  |
+-------+--+
| hive  |
+-------+--+
1 row selected (0.261 seconds)

ntp configuration to avoid KUDU ntptime error in lab/poc envronment

[root@cdh01 ~]# tail -n 5 /etc/ntp.conf
# Undisciplined Local Clock. This is a fake driver intended for backup
# and when no outside source of synchronized time is available.
server  127.127.1.0     # local clock
fudge   127.127.1.0 stratum 10


[root@cdh01 ~]# ntptime
ntp_gettime() returns code 0 (OK)
  time deb2b920.6f031890  Sat, May 26 2018  0:32:32.433, (.433641853),
  maximum error 133377 us, estimated error 527 us, TAI offset 0
ntp_adjtime() returns code 0 (OK)
  modes 0x0 (),
  offset 0.000 us, frequency -12.739 ppm, interval 1 s,
  maximum error 133377 us, estimated error 527 us,
  status 0x2000 (NANO),
  time constant 2, precision 0.001 us, tolerance 500 ppm,
=====================================

Alternative workaround:

Note: Use a Reliable clock in Linux.  Normally you need to install and configure NTP so Kudu has a reliable clock.  Without a reliable clock you may get errors starting and running Kudu that NTP would resolve. For development only, it is possible to avoid setting up and running ntpd with a reliable time server by running Kudu with the setting --use-hybrid-clock=false. However that setting has a serious effect on transactional consistency so it's not something we recommend for production or load testing.  Furthermore without a reliable clock provided by NTP, Kudu skips cleanup work it would normally do.  Your disk size will grow beyond what is normal and performance will be negatively impacted.5.For just this short lab only environment, because it is non-production, and we don’t care about performance in the lab, we will set the not recommended--use-hybrid-clock=false parameter.  Normally never forget to setup NTP for Kudu in production!

Click on the Kudu Service>Configuration. Look for the setting, “Kudu Service AdvancedConfiguration Snippet (Safety Valve) for gflagfile” and add the setting:

--use-hybrid-clock=false

Note: if you don’t set this clock setting correctly (or setup NTP), Kudu will fail to start intermittently and the full log file for the Kudu service will have errors indicating timesync problems such as:
Check failed:_s.ok() Bad status: Service unavailable: Cannot initialize clock: Error reading clock. Clock considered unsynchronized

Friday, May 25, 2018

Kudu partition example

create table kudu_partition
(  id string,
load_ts timestamp,
col1 string,
col2 string,
col3 string, 
primary key (id,load_ts))
partition by hash (id) partitions 20,
range (load_ts) 
( partition '2018-05-01' <= values < '2018-06-01',
    partition '2018-06-01' <= values < '2018-07-01',
    partition '2018-07-01' <= values < '2018-08-01',
    partition '2018-08-01' <= values < '2018-09-01',
partition '2018-09-01' <= values < '2018-10-01',
    partition '2018-10-01' <= values < '2018-11-01',
    partition '2018-11-01' <= values < '2018-12-01',
    partition '2018-12-01' <= values < '2019-01-01',
    partition '2019-01-01' <= values < '2019-02-01',
    partition '2019-02-01' <= values < '2019-03-01',
    partition '2019-03-01' <= values < '2019-04-01',
    partition '2019-04-01' <= values < '2019-05-01'
)stored as kudu;


insert into kudu_partition values('abc1',now(),'a','b','c');
insert into kudu_partition values('abc2','2018-05-01','a','b','c');
insert into kudu_partition values('abc3','2018-05-05 21:03:05','a','b','c');

alter table kudu_partition add range partition '2019-05-01' <= values < '2019-06-01';
alter table kudu_partition drop range partition '2018-05-01' <= values < '2018-06-01';


[cdh-vm.dbaglobe.com:21000] > show range partitions kudu_partition;
Query: show range partitions kudu_partition
+---------------------------------------------------------------------+
| RANGE (load_ts)                                                     |
+---------------------------------------------------------------------+
| 2018-06-01T00:00:00.000000Z <= VALUES < 2018-07-01T00:00:00.000000Z |
| 2018-07-01T00:00:00.000000Z <= VALUES < 2018-08-01T00:00:00.000000Z |
| 2018-08-01T00:00:00.000000Z <= VALUES < 2018-09-01T00:00:00.000000Z |
| 2018-09-01T00:00:00.000000Z <= VALUES < 2018-10-01T00:00:00.000000Z |
| 2018-10-01T00:00:00.000000Z <= VALUES < 2018-11-01T00:00:00.000000Z |
| 2018-11-01T00:00:00.000000Z <= VALUES < 2018-12-01T00:00:00.000000Z |
| 2018-12-01T00:00:00.000000Z <= VALUES < 2019-01-01T00:00:00.000000Z |
| 2019-01-01T00:00:00.000000Z <= VALUES < 2019-02-01T00:00:00.000000Z |
| 2019-02-01T00:00:00.000000Z <= VALUES < 2019-03-01T00:00:00.000000Z |
| 2019-03-01T00:00:00.000000Z <= VALUES < 2019-04-01T00:00:00.000000Z |
| 2019-04-01T00:00:00.000000Z <= VALUES < 2019-05-01T00:00:00.000000Z |
| 2019-05-01T00:00:00.000000Z <= VALUES < 2019-06-01T00:00:00.000000Z |
+---------------------------------------------------------------------+

select  *from kudu_partition;

Monday, May 21, 2018

Kafka hands-on with Docker

Reference URL: https://github.com/Landoop/fast-data-dev

docker run --rm --net=host -e ADV_HOST=192.168.1.86 landoop/fast-data-dev

# Start docker (change IP address to your Linux IP if external access needed, otherwise leave it to 127.0.0.1)
docker run --rm -it -p 2181:2181 -p 3030:3030 -p 8081:8081 -p 8082:8082 -p 8083:8083 -p 9092:9092 -e ADV_HOST=192.168.1.86 landoop/fast-data-dev

# Login the docker
docker run --rm -it --net=host landoop/fast-data-dev bash

root@fast-data-dev / $ kafka-topics --zookeeper 127.0.0.1:2181 --create --topic first_topic --partitions 3 --replication-factor 1
WARNING: Due to limitations in metric names, topics with a period ('.') or underscore ('_') could collide. To avoid issues it is best to use either, but not both.
Created topic "first_topic".

root@fast-data-dev / $ kafka-topics --zookeeper 127.0.0.1:2181 --list
__consumer_offsets
_schemas
backblaze_smart
connect-configs
connect-offsets
connect-statuses
coyote-test-avro
coyote-test-binary
coyote-test-json
first_topic
logs_broker
nyc_yellow_taxi_trip_data
reddit_posts
sea_vessel_position_reports
telecom_italia_data
telecom_italia_grid

root@fast-data-dev / $ kafka-topics --zookeeper 127.0.0.1:2181 --create --topic second_topic --partitions 3 --replication-factor 1
WARNING: Due to limitations in metric names, topics with a period ('.') or underscore ('_') could collide. To avoid issues it is best to use either, but not both.
Created topic "second_topic".
root@fast-data-dev / $ kafka-topics --zookeeper 127.0.0.1:2181 --delete --topic second_topic
Topic second_topic is marked for deletion.
Note: This will have no impact if delete.topic.enable is not set to true.

root@fast-data-dev / $ kafka-topics --zookeeper 127.0.0.1:2181 --describe --topic first_topic
Topic:first_topic PartitionCount:3 ReplicationFactor:1 Configs:
Topic: first_topic Partition: 0 Leader: 0 Replicas: 0 Isr: 0
Topic: first_topic Partition: 1 Leader: 0 Replicas: 0 Isr: 0
Topic: first_topic Partition: 2 Leader: 0 Replicas: 0 Isr: 0

root@fast-data-dev / $ kafka-console-producer --broker-list 127.0.0.1:9092 --topic first_topic
>first message
>second message
>^C


root@fast-data-dev / $ kafka-console-consumer --bootstrap-server 127.0.0.1:9092 --topic first_topic

root@fast-data-dev / $ kafka-console-consumer --bootstrap-server 127.0.0.1:9092 --topic first_topic --from-beginning

root@fast-data-dev / $ kafka-console-consumer --bootstrap-server 127.0.0.1:9092 --topic first_topic --from-beginning --partition 0

# named consumer group commits its position in kafka hidden queue
root@fast-data-dev / $ kafka-console-consumer --bootstrap-server 127.0.0.1:9092 --topic first_topic --consumer-property group.id=mygroup1 --from-beginning
second message
test
first message
test
test2
^CProcessed a total of 5 messages
# it will not read old message consumed before
root@fast-data-dev / $ kafka-console-consumer --bootstrap-server 127.0.0.1:9092 --topic first_topic --consumer-property group.id=mygroup1 --from-beginning







Friday, May 11, 2018

/etc/init.d/ambari-server start REASON: Server not yet listening on http port 8080 after 50 seconds. Exiting.

.
The following error just indicates that your Ambari Server is taking salightly more than default 50 Seconds time to open port 8080 and hence you see this message.

ERROR: Exiting with exit code 1.
REASON: Server not yet listening on http port 8080 after 50 seconds. Exiting.



You should try the following to fix this:

Edit the "/etc/ambari-server/conf/ambari.properties" and increase the following property value to 120 or 150 seconds.

server.startup.web.timeout=120 

Then restart the ambari-server again.

# ambari-server restart

Tuesday, May 8, 2018

flatMap & flatMapValues explained in example

In [101]: rdd = sc.parallelize([2, 3, 4])

In [102]: rdd.map(lambda x: range(1, x)).collect()
Out[102]: [[1], [1, 2], [1, 2, 3]]                                             

In [103]: rdd.flatMap(lambda x: range(1, x)).collect()
Out[103]: [1, 1, 2, 1, 2, 3]


In [104]: x = sc.parallelize([("a", ["x", "y", "z"]), ("b", ["p", "r"])])

In [106]: x.flatMapValues(lambda value:value).collect()
Out[106]: [('a', 'x'), ('a', 'y'), ('a', 'z'), ('b', 'p'), ('b', 'r')] 

Monday, May 7, 2018

Read CSV data into Spark (RDD and DataFrame comparatively)


# Sample data
[donghua@cdh-vm data]$ hdfs dfs -cat /data/salesmen.csv
Date,Salesman,Revenue
1/11/16,Bob,1053
1/12/16,Bob,4362
1/13/16,Bob,6812

Method 1: Using RDD directly

from pyspark.sql.types import *
from datetime import datetime


salesmanSchema = StructType(
[
StructField("Date",DateType()),
StructField("Salesman",StringType()),
StructField("Revenue",IntegerType())
])


salesmanRDD = sc.textFile('/data/salesmen.csv')

header = salesmanRDD.first()

dataRDD = salesmanRDD.filter(lambda line: line <> header)

salesmanSchemaRDD=dataRDD.map(lambda line: line.split(',')).\
map(lambda values:[datetime(2000+int(values[0].split('/')[2]),int(values[0].split('/')[0]),int(values[0].split('/')[1])),values[1],int(values[2])])

salesmanDF1=spark.createDataFrame(salesmanSchemaRDD,salesmanSchema)

salesmanDF1.show(5)


In [55]: salesmanDF1.show(5)
+----------+--------+-------+
|      Date|Salesman|Revenue|
+----------+--------+-------+
|2016-01-01|     Bob|   7172|
|2016-01-02|     Bob|   6362|
|2016-01-03|     Bob|   5982|
|2016-01-04|     Bob|   7917|
|2016-01-05|     Bob|   7837|
+----------+--------+-------+
only showing top 5 rows

Method 2: Using DataFrame with predefined scheme directly

from pyspark.sql.types import *

salesmanSchema = StructType(
[
StructField("Date",DateType()),
StructField("Salesman",StringType()),
StructField("Revenue",IntegerType())
])

In [59]: salesmanDF2 = spark.read.schema(salesmanSchema).csv('/data/salesmen.csv',header=True,dateFormat='MM/dd/yy')

In [60]: salesmanDF2.show(5)
+----------+--------+-------+
|      Date|Salesman|Revenue|
+----------+--------+-------+
|2016-01-01|     Bob|   7172|
|2016-01-02|     Bob|   6362|
|2016-01-03|     Bob|   5982|
|2016-01-04|     Bob|   7917|
|2016-01-05|     Bob|   7837|
+----------+--------+-------+
only showing top 5 rows


Tuesday, May 1, 2018

Elastic Search Example: Part 4

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/indices/blogs?v'
health status index uuid                   pri rep docs.count docs.deleted store.size pri.store.size
yellow open   blogs NWKFGM2GQe-fS481ZI7EuA   5   1          3            0     16.4kb         16.4kb

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/blogs/_settings?pretty'
{
  "blogs" : {
    "settings" : {
      "index" : {
        "creation_date" : "1525127903277",
        "number_of_shards" : "5",
        "number_of_replicas" : "1",
        "uuid" : "NWKFGM2GQe-fS481ZI7EuA",
        "version" : {
          "created" : "6020499"
        },
        "provided_name" : "blogs"
      }
    }
  }
}

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XPUT  -H 'Content-Type: application/json' -u elastic:elastic 'localhost:9200/blogs/_settings?pretty' -d '{
>   "index" : {
>     "number_of_replicas" : 0
>   }
> }'
{
  "acknowledged" : true
}

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/blogs/_settings?pretty'
{
  "blogs" : {
    "settings" : {
      "index" : {
        "creation_date" : "1525127903277",
        "number_of_shards" : "5",
        "number_of_replicas" : "0",
        "uuid" : "NWKFGM2GQe-fS481ZI7EuA",
        "version" : {
          "created" : "6020499"
        },
        "provided_name" : "blogs"
      }
    }
  }
}
Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/indices/blogs?v'
health status index uuid                   pri rep docs.count docs.deleted store.size pri.store.size
green  open   blogs NWKFGM2GQe-fS481ZI7EuA   5   0          3            0     16.4kb         16.4kb
Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ 


Symptoms if "index.number_of_replicas: 0" in "config/elasticsearch.yml"

Found index level settings on node level configuration.

Since elasticsearch 5.x index level settings can NOT be set on the nodes 
configuration like the elasticsearch.yaml, in system properties or command line 
arguments.In order to upgrade all indices the settings must be updated via the 
/${index}/_settings API. Unless all settings are dynamic all indices must be closed 
in order to apply the upgradeIndices created in the future should use index templates 
to set default values. 

Please ensure all required values are updated on all indices by executing: 

curl -XPUT 'http://localhost:9200/_all/_settings?preserve_existing=true' -d '{
  "index.number_of_replicas" : "0"
}'
*************************************************************************************

[2018-05-01T08:21:55,016][WARN ][o.e.b.ElasticsearchUncaughtExceptionHandler] [] uncaught exception in thread [main]
org.elasticsearch.bootstrap.StartupException: java.lang.IllegalArgumentException: node settings must not contain any index level settings
at org.elasticsearch.bootstrap.Elasticsearch.init(Elasticsearch.java:125) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.bootstrap.Elasticsearch.execute(Elasticsearch.java:112) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cli.EnvironmentAwareCommand.execute(EnvironmentAwareCommand.java:86) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.cli.Command.mainWithoutErrorHandling(Command.java:124) ~[elasticsearch-cli-6.2.4.jar:6.2.4]
at org.elasticsearch.cli.Command.main(Command.java:90) ~[elasticsearch-cli-6.2.4.jar:6.2.4]
at org.elasticsearch.bootstrap.Elasticsearch.main(Elasticsearch.java:92) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.bootstrap.Elasticsearch.main(Elasticsearch.java:85) ~[elasticsearch-6.2.4.jar:6.2.4]
Caused by: java.lang.IllegalArgumentException: node settings must not contain any index level settings
at org.elasticsearch.common.settings.SettingsModule.(SettingsModule.java:128) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.node.Node.(Node.java:331) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.node.Node.(Node.java:246) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.bootstrap.Bootstrap$5.(Bootstrap.java:213) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.bootstrap.Bootstrap.setup(Bootstrap.java:213) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.bootstrap.Bootstrap.init(Bootstrap.java:323) ~[elasticsearch-6.2.4.jar:6.2.4]
at org.elasticsearch.bootstrap.Elasticsearch.init(Elasticsearch.java:121) ~[elasticsearch-6.2.4.jar:6.2.4]

... 6 more

Elastic Search Example: Part 3


Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/allocation?help'
shards       | s              | number of shards on node      
disk.indices | di,diskIndices | disk used by ES indices       
disk.used    | du,diskUsed    | disk used (total, not just ES)
disk.avail   | da,diskAvail   | disk available                
disk.total   | dt,diskTotal   | total capacity of all volumes 
disk.percent | dp,diskPercent | percent disk used             
host         | h              | host of node                  
ip           |                | ip of node                    
node         | n              | name of node                  

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/allocation'
11 8.8mb 52gb 60.7gb 112.8gb 46 127.0.0.1 127.0.0.1 0rArjNg
 5                                                  UNASSIGNED

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/allocation?v&h=host,disk.total,disk.avail'
host      disk.total disk.avail
127.0.0.1    112.8gb     60.7gb
                               
Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/allocation?v'
shards disk.indices disk.used disk.avail disk.total disk.percent host      ip        node
    11        8.8mb      52gb     60.7gb    112.8gb           46 127.0.0.1 127.0.0.1 0rArjNg
     5                                                                               UNASSIGNED
Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/shards?v'
index                         shard prirep state      docs  store ip        node
.watcher-history-7-2018.04.30 0     p      STARTED     910    1mb 127.0.0.1 0rArjNg
.watches                      0     p      STARTED       6 33.2kb 127.0.0.1 0rArjNg
.security-6                   0     p      STARTED       3  9.8kb 127.0.0.1 0rArjNg
.monitoring-es-6-2018.04.30   0     p      STARTED    8123  7.8mb 127.0.0.1 0rArjNg
.monitoring-alerts-6          0     p      STARTED       3   18kb 127.0.0.1 0rArjNg
blogs                         3     p      STARTED       1  5.2kb 127.0.0.1 0rArjNg
blogs                         3     r      UNASSIGNED                       
blogs                         2     p      STARTED       1  5.3kb 127.0.0.1 0rArjNg
blogs                         2     r      UNASSIGNED                       
blogs                         1     p      STARTED       1  5.3kb 127.0.0.1 0rArjNg
blogs                         1     r      UNASSIGNED                       
blogs                         4     p      STARTED       0   230b 127.0.0.1 0rArjNg
blogs                         4     r      UNASSIGNED                       
blogs                         0     p      STARTED       0   230b 127.0.0.1 0rArjNg
blogs                         0     r      UNASSIGNED                       
.triggered_watches            0     p      STARTED       0 63.2kb 127.0.0.1 0rArjNg

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/shards/blogs/?v'
index shard prirep state      docs store ip        node
blogs 3     p      STARTED       1 5.2kb 127.0.0.1 0rArjNg
blogs 3     r      UNASSIGNED                      
blogs 2     p      STARTED       1 5.3kb 127.0.0.1 0rArjNg
blogs 2     r      UNASSIGNED                      
blogs 1     p      STARTED       1 5.3kb 127.0.0.1 0rArjNg
blogs 1     r      UNASSIGNED                      
blogs 4     p      STARTED       0  230b 127.0.0.1 0rArjNg
blogs 4     r      UNASSIGNED                      
blogs 0     p      STARTED       0  230b 127.0.0.1 0rArjNg
blogs 0     r      UNASSIGNED                      

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/master?v'
id                     host      ip        node
0rArjNgoSY2LaVL62XU5rQ 127.0.0.1 127.0.0.1 0rArjNg
Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ 
Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/nodes?v'
ip        heap.percent ram.percent cpu load_1m load_5m load_15m node.role master name
127.0.0.1           38         100  34    2.95                  mdi       *      0rArjNg

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/indices?v'
health status index                         uuid                   pri rep docs.count docs.deleted store.size pri.store.size
green  open   .triggered_watches            6MzxbYxdQ_C0njT4053-TA   1   0          0            0     63.2kb         63.2kb
green  open   .monitoring-alerts-6          KWHtzA78S2CK4wTYNer1zA   1   0          4            0     24.5kb         24.5kb
green  open   .monitoring-es-6-2018.04.30   ynwB-RSOStO_4f7wAFfVVg   1   0       8145           16      7.5mb          7.5mb
green  open   .watches                      GPsfCDzLSUydWzKEFfA4-Q   1   0          6            0     75.2kb         75.2kb
green  open   .security-6                   ax9wqeX6S3qNK8RzapGfwA   1   0          3            0      9.8kb          9.8kb
green  open   .watcher-history-7-2018.04.30 ov_uuro6QuKD4M5q1VdvBg   1   0        920            0      1.1mb          1.1mb
yellow open   blogs                         NWKFGM2GQe-fS481ZI7EuA   5   1          3            0     16.4kb         16.4kb

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/segments?v'
index                         shard prirep ip        segment generation docs.count docs.deleted   size size.memory committed searchable version compound
.triggered_watches            0     p      127.0.0.1 _2               2        474          474 62.9kb           0 true      false      7.2.1   true
.monitoring-es-6-2018.04.30   0     p      127.0.0.1 _nw            860       7953           16  3.5mb           0 true      false      7.2.1   false
.monitoring-es-6-2018.04.30   0     p      127.0.0.1 _nx            861         27           16 57.1kb           0 true      false      7.2.1   true
.monitoring-es-6-2018.04.30   0     p      127.0.0.1 _ny            862         27           16 57.2kb           0 true      false      7.2.1   true
.monitoring-es-6-2018.04.30   0     p      127.0.0.1 _nz            863         27           16 57.2kb           0 true      false      7.2.1   true
.monitoring-es-6-2018.04.30   0     p      127.0.0.1 _o0            864         27           16 57.1kb           0 true      false      7.2.1   true
.monitoring-es-6-2018.04.30   0     p      127.0.0.1 _o1            865         27           16 57.2kb           0 true      false      7.2.1   true
.monitoring-es-6-2018.04.30   0     p      127.0.0.1 _o2            866         27           16 57.1kb           0 true      false      7.2.1   true
.monitoring-es-6-2018.04.30   0     p      127.0.0.1 _o3            867         27           16 57.1kb           0 true      false      7.2.1   true
.monitoring-es-6-2018.04.30   0     p      127.0.0.1 _o4            868         27            0   57kb           0 true      false      7.2.1   true
.monitoring-es-6-2018.04.30   0     p      127.0.0.1 _og            880       8118           16  3.5mb       21904 false     true       7.2.1   false
.monitoring-es-6-2018.04.30   0     p      127.0.0.1 _oh            881         11           16 56.7kb        6896 false     true       7.2.1   true
.monitoring-es-6-2018.04.30   0     p      127.0.0.1 _oi            882         27            0 56.7kb        6896 false     true       7.2.1   true
.watcher-history-7-2018.04.30 0     p      127.0.0.1 _b4            400        910            0    1mb       22205 true      true       7.2.1   false
.watcher-history-7-2018.04.30 0     p      127.0.0.1 _b5            401          1            0 22.5kb       11958 false     true       7.2.1   true
.watcher-history-7-2018.04.30 0     p      127.0.0.1 _b6            402          1            0 22.6kb       11958 false     true       7.2.1   true
.watcher-history-7-2018.04.30 0     p      127.0.0.1 _b7            403          4            0 33.4kb       17085 false     true       7.2.1   true
.watcher-history-7-2018.04.30 0     p      127.0.0.1 _b8            404          4            0 32.6kb       16899 false     true       7.2.1   true
.monitoring-alerts-6          0     p      127.0.0.1 _0               0          1            0  5.8kb        2153 true      true       7.2.1   true
.monitoring-alerts-6          0     p      127.0.0.1 _1               1          1            0  5.8kb        2153 true      true       7.2.1   true
.monitoring-alerts-6          0     p      127.0.0.1 _21             73          1            0  5.9kb           0 true      false      7.2.1   true
.monitoring-alerts-6          0     p      127.0.0.1 _22             74          2            0  6.5kb        2155 false     true       7.2.1   true
blogs                         1     p      127.0.0.1 _0               0          1            0    5kb        2726 true      true       7.2.1   true
blogs                         2     p      127.0.0.1 _0               0          1            0  4.9kb        2726 true      true       7.2.1   true
blogs                         3     p      127.0.0.1 _2               2          1            0  4.9kb        2726 true      true       7.2.1   true
.watches                      0     p      127.0.0.1 _8w            320          3            0   12kb           0 true      false      7.2.1   true
.watches                      0     p      127.0.0.1 _8x            321          1            0  9.6kb           0 true      false      7.2.1   true
.watches                      0     p      127.0.0.1 _8y            322          2            0 11.2kb           0 true      false      7.2.1   true
.watches                      0     p      127.0.0.1 _8z            323          1            0  9.7kb        3757 false     true       7.2.1   true
.watches                      0     p      127.0.0.1 _90            324          2            0 11.4kb        3757 false     true       7.2.1   true
.watches                      0     p      127.0.0.1 _91            325          1            0  9.6kb        3757 false     true       7.2.1   true
.watches                      0     p      127.0.0.1 _92            326          2            0 11.2kb        3943 false     true       7.2.1   true
.security-6                   0     p      127.0.0.1 _0               0          1            0  3.1kb        1120 true      true       7.2.1   true
.security-6                   0     p      127.0.0.1 _1               1          1            0  3.1kb        1120 true      true       7.2.1   true
.security-6                   0     p      127.0.0.1 _3               3          1            0  3.1kb        1120 true      true       7.2.1   true

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/segments/blogs?v'
index shard prirep ip        segment generation docs.count docs.deleted  size size.memory committed searchable version compound
blogs 1     p      127.0.0.1 _0               0          1            0   5kb        2726 true      true       7.2.1   true
blogs 2     p      127.0.0.1 _0               0          1            0 4.9kb        2726 true      true       7.2.1   true
blogs 3     p      127.0.0.1 _2               2          1            0 4.9kb        2726 true      true       7.2.1   true

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/count?v'
epoch      timestamp count
1525132485 07:54:45  8951

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/count/blogs?v'
epoch      timestamp count
1525132491 07:54:51  3

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/recovery?v'
index                         shard time  type           stage source_host source_node target_host target_node repository snapshot files files_recovered files_percent files_total bytes bytes_recovered bytes_percent bytes_total translog_ops translog_ops_recovered translog_ops_percent
.triggered_watches            0     472ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        5           0     0               100.0%        64788       0            0                      100.0%
.monitoring-es-6-2018.04.30   0     381ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        44          0     0               100.0%        4176166     0            0                      100.0%
.watcher-history-7-2018.04.30 0     180ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        15          0     0               100.0%        1082799     0            0                      100.0%
.monitoring-alerts-6          0     330ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        10          0     0               100.0%        18450       0            0                      100.0%
blogs                         0     121ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        1           0     0               100.0%        230         0            0                      100.0%
blogs                         1     179ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        4           0     0               100.0%        5524        0            0                      100.0%
blogs                         2     193ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        4           0     0               100.0%        5435        0            0                      100.0%
blogs                         3     198ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        4           0     0               100.0%        5394        0            0                      100.0%
blogs                         4     538ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        1           0     0               100.0%        230         0            0                      100.0%
.watches                      0     600ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        10          0     0               100.0%        34088       0            0                      100.0%
.security-6                   0     543ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        10          0     0               100.0%        10095       0            0                      100.0%

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/recovery/blogs?v'
index shard time  type           stage source_host source_node target_host target_node repository snapshot files files_recovered files_percent files_total bytes bytes_recovered bytes_percent bytes_total translog_ops translog_ops_recovered translog_ops_percent
blogs 0     121ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        1           0     0               100.0%        230         0            0                      100.0%
blogs 1     179ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        4           0     0               100.0%        5524        0            0                      100.0%
blogs 2     193ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        4           0     0               100.0%        5435        0            0                      100.0%
blogs 3     198ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        4           0     0               100.0%        5394        0            0                      100.0%
blogs 4     538ms existing_store done  n/a         n/a         127.0.0.1   0rArjNg     n/a        n/a      0     0               100.0%        1           0     0               100.0%        230         0            0                      100.0%

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/health?v'
epoch      timestamp cluster       status node.total node.data shards pri relo init unassign pending_tasks max_task_wait_time active_shards_percent
1525132506 07:55:06  elasticsearch yellow          1         1     11  11    0    0        5             0                  -                 68.8%

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/pending_tasks?v'
insertOrder timeInQueue priority source

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/aliases?v'
alias     index       filter routing.index routing.search
.security .security-6 -      -             -

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/thread_pool?v'
node_name name                active queue rejected
0rArjNg   bulk                     0     0        0
0rArjNg   fetch_shard_started      0     0        0
0rArjNg   fetch_shard_store        0     0        0
0rArjNg   flush                    0     0        0
0rArjNg   force_merge              0     0        0
0rArjNg   generic                  0     0        0
0rArjNg   get                      0     0        0
0rArjNg   index                    0     0        0
0rArjNg   listener                 0     0        0
0rArjNg   management               1     0        0
0rArjNg   ml_autodetect            0     0        0
0rArjNg   ml_datafeed              0     0        0
0rArjNg   ml_utility               0     0        0
0rArjNg   refresh                  0     0        0
0rArjNg   search                   0     0        0
0rArjNg   security-token-key       0     0        0
0rArjNg   snapshot                 0     0        0
0rArjNg   warmer                   0     0        0
0rArjNg   watcher                  0     0        0
Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ 
Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/plugins?v'
name    component          version
0rArjNg x-pack-core        6.2.4
0rArjNg x-pack-deprecation 6.2.4
0rArjNg x-pack-graph       6.2.4
0rArjNg x-pack-logstash    6.2.4
0rArjNg x-pack-ml          6.2.4
0rArjNg x-pack-monitoring  6.2.4
0rArjNg x-pack-security    6.2.4
0rArjNg x-pack-upgrade     6.2.4
0rArjNg x-pack-watcher     6.2.4

Donghuas-MacBook-Air:elasticsearch-6.2.4 donghua$ curl -XGET  -u elastic:elastic 'localhost:9200/_cat/fielddata?v'
id host ip node field size