Sunday, October 20, 2019

How to fix block device permission issue without asmlib or asmfd


[root@oracle19c ~]# cat /etc/redhat-release 
CentOS Linux release 7.7.1908 (Core)

[root@oracle19c ~]# cat /etc/udev/rules.d/55-oracle.rules 
KERNEL=="sdc1" OWNER="oracle", GROUP="oinstall", MODE="0660"
KERNEL=="sdd1" OWNER="oracle", GROUP="oinstall", MODE="0660"
KERNEL=="sde1" OWNER="oracle", GROUP="oinstall", MODE="0660"

[root@oracle19c ~]# udevadm trigger

[root@oracle19c ~]# ls -l /dev/sd[cde]1
brw-rw----. 1 oracle oinstall 8, 33 Oct 20 20:17 /dev/sdc1
brw-rw----. 1 oracle oinstall 8, 49 Oct 20 20:17 /dev/sdd1
brw-rw----. 1 oracle oinstall 8, 65 Oct 20 20:17 /dev/sde1

Friday, October 18, 2019

Oracle characterset, char semantics & storage length

[oracle@oracle19c admin]$ sql donghua@orclpdb

SQLcl: Release 19.1 Production on Fri Oct 18 23:03:04 2019

Copyright (c) 1982, 2019, Oracle.  All rights reserved.

Password? (**********?) ********
Last Successful login time: Fri Oct 18 2019 23:03:08 +08:00

Connected to:
Oracle Database 19c Enterprise Edition Release 19.0.0.0.0 - Production
Version 19.3.0.0.0

SQL> select /*ansiconsole*/ * from nls_database_parameters where parameter like '%CHARACTERSET%';
PARAMETER                VALUE       
NLS_NCHAR_CHARACTERSET   AL16UTF16   
NLS_CHARACTERSET         AL32UTF8    


SQL> create table t1 (c1 varchar2(20 byte), c2 varchar2(20 char), c3 nvarchar2(20));

Table created.

SQL> desc t1;

Name                 Null?    Type                        
-------------------- -------- ----------------------------
C1                            VARCHAR2(20)                
C2                            VARCHAR2(20 CHAR)     
C3                            NVARCHAR2(20)      

SQL> insert into t1 values ('a','a','a');

1 row created.

SQL> insert into t1 values ('中','中','中');

1 row created.

SQL> commit;

Commit complete.

SQL> select /*ansiconsole*/ * from t1;

C1                   C2                   C3                  
-------------------- -------------------- --------------------
a                    a                    a                   
中                    中                    中                   

SQL> select /*ansiconsole*/ length(c1),length(c2),length(c3) from t1;

LENGTH(C1) LENGTH(C2) LENGTH(C3)
---------- ---------- ----------
         1          1          1
         1          1          1

SQL> select /*ansiconsole*/ lengthb(c1),lengthb(c2),lengthb(c3) from t1;

LENGTHB(C1) LENGTHB(C2) LENGTHB(C3)
----------- ----------- -----------
          1           1           2
          3           3           2


SQL> update t1 set c1='中中中中中中';

2 rows updated.

SQL> update t1 set c1='中中中中中中中';

update t1 set c1='中中中中中中中'
                 *
ERROR at line 1:
ORA-12899: value too large for column "DONGHUA"."T1"."C1" (actual: 21, maximum: 20)



SQL> update t1 set c2='中中中中中中中中中中中中中中中中中中中中';

2 rows updated.

SQL> update t1 set c2='中中中中中中中中中中中中中中中中中中中中中';

update t1 set c2='中中中中中中中中中中中中中中中中中中中中中'
                 *
ERROR at line 1:
ORA-12899: value too large for column "DONGHUA"."T1"."C2" (actual: 21, maximum: 20)



SQL> update t1 set c3='中中中中中中中中中中中中中中中中中中中中';

2 rows updated.

SQL> update t1 set c3='中中中中中中中中中中中中中中中中中中中中中';

update t1 set c3='中中中中中中中中中中中中中中中中中中中中中'
                 *
ERROR at line 1:
ORA-12899: value too large for column "DONGHUA"."T1"."C3" (actual: 21, maximum: 20)




Friday, August 16, 2019

How to limit NiFi resource usage

File to modify: /opt/nifi-1.9.2/bin/nifi.sh

    if [ "$1" = "start" ]; then
        ( eval "cd ${NIFI_HOME} && ${run_nifi_cmd}" & )> /dev/null 1>&-
        sleep 10
        taskset -pc 0 `cat /opt/nifi-1.9.2/run/nifi.pid`
    else


To verify:

root@devops bin]# ./nifi.sh start
nifi.sh: JAVA_HOME not set; results may vary

Java home:
NiFi home: /opt/nifi-1.9.2

Bootstrap Config File: /opt/nifi-1.9.2/conf/bootstrap.conf

pid 4427's current affinity list: 0-2
pid 4427's new affinity list: 0

[root@devops bin]# taskset  -p `cat /opt/nifi-1.9.2/run/nifi.pid`
pid 4427's current affinity mask: 1



Tuesday, August 6, 2019

Tips to identify yarn applications from resource manager for Hive Tez sessions

How To:

Use "hive.query.tag" at the connection string or set using "set hive.query.tag=usertag"

beeline -n hive -u jdbc:hive2://hdp:10000/default?hive.query.tag=donghuatag

Output:

[root@hdp ~]# yarn application -list -appTags donghuatag
19/08/06 21:52:30 INFO client.RMProxy: Connecting to ResourceManager at hdp.dbaglobe.com/192.168.31.22:8050
Total number of applications (application-types: [], states: [SUBMITTED, ACCEPTED, RUNNING] and tags: [donghuatag]):1
                Application-Id      Application-Name      Application-Type        User       Queue               State         Final-State         Progress                        Tracking-URL
application_1565096253064_0004  HIVE-7c5ec7e6-6944-483d-bf2d-b7a2d18ad7c1                  TEZ        hive     default             RUNNING           UNDEFINED               0%                http://hdp:41592/ui/

Monday, June 3, 2019

Ansible playbook - prepare the target demo machine

More Scripts:


prepare_ansible_target.yaml
---
# Run with ansible-playbook  -k
#  (make sure to add the IPs of machines you want to manage to /etc/ansibles/hosts first)

- hosts: gw
  gather_facts: False
  remote_user: root
  # become: yes
  # become_user: root
  # become_method: sudo

  tasks:
    - name: Install python 2
      raw: test -e /usr/bin/python || (yum update -y && yum install -y python)

    - name: Fancy way of doing authorized_keys
      authorized_key: user=root
                      exclusive=no
                      key="{{ lookup('file', '~/.ssh/id_rsa.pub') }}"

    #- name: Create /root/.ssh
    #  file: path=/root/.ssh state=directory mode=0700

    #- name: Create /root/.ssh/authorized_keys from our local ssh pubkey
    #  lineinfile: dest=/root/.ssh/authorized_keys lines="{{ lookup('file','~/.ssh/id_rsa.pub') }}"

    - name: Set timezone to Asia/Singapore
      raw: ( timedatectl |grep "Time zone" |grep -i Singapore) || timedatectl set-timezone Asia/Singapore
      
    - name: Add lines to avoid "LC_TYPE" error in Mac OSX terminal
      lineinfile: dest=/etc/environment line='{{ item }}' state=present backup=yes
      with_items:
        - '# Add following 2 lines to fix LC_TYPE error'
        - 'LANG=en_US.utf-8'
        - 'LC_ALL=en_US.utf-8'

host
[all]
Donghuas-MacBook-Air.local

[gw]
192.168.31.78 ansible_python_interpreter=/usr/bin/python ansible_user=root
Execute the playbook
Donghuas-MacBook-Air:ansible donghua$ ansible-playbook prepare_ansible_target.yaml -i host 

PLAY [gw] ****************************************************************************************************************************************

TASK [Install python 2] **************************************************************************************************************************
changed: [192.168.31.78]

TASK [Fancy way of doing authorized_keys] ********************************************************************************************************
ok: [192.168.31.78]

TASK [Set timezone to Asia/Singapore] ************************************************************************************************************
changed: [192.168.31.78]

TASK [Add lines to avoid "LC_TYPE" error in Mac OSX terminal] ************************************************************************************
ok: [192.168.31.78] => (item=# Add following 2 lines to fix LC_TYPE error)
ok: [192.168.31.78] => (item=LANG=en_US.utf-8)
ok: [192.168.31.78] => (item=LC_ALL=en_US.utf-8)

PLAY RECAP ***************************************************************************************************************************************
192.168.31.78              : ok=4    changed=2    unreachable=0    failed=0    skipped=0    rescued=0    ignored=0   

Saturday, May 4, 2019

Convert tables in all PDF pages into CSV


(bigdata) Donghuas-MacBook-Air:Documents donghua$ pip install tabula-py
(bigdata) Donghuas-MacBook-Air:Documents donghua$ python
Python 2.7.15 |Anaconda, Inc.| (default, Dec 14 2018, 13:10:39) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import tabula
>>> tabula.convert_into("/Users/donghua/Documents/SASMO-Result-Grade-3.pdf", "/Users/donghua/Documents/SASMO-Result-Grade-3.csv", output_format="csv",pages="all")
May 04, 2019 12:36:19 AM org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB suggestKCMS
INFO: To get higher rendering speed on JDK8 or later,
May 04, 2019 12:36:19 AM org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB suggestKCMS
INFO:   use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider
May 04, 2019 12:36:19 AM org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB suggestKCMS



Wednesday, April 17, 2019

Differences between Parquet and ORC formats

ORC format
0: jdbc:hive2://hdp.dbaglobe.com:10000/demodb> create external table managed_t1 (id int, name string, remark string) stored as orc location '/data/managed_t1' tblproperties('transactional'='false');

0: jdbc:hive2://hdp.dbaglobe.com:10000/demodb> alter table managed_t1 replace columns (id int, name string);
INFO  : Compiling command(queryId=hive_20190417210521_b1634dd1-280f-41c2-96a1-d0e921c64041): alter table managed_t1 replace columns (id int, name string)
INFO  : Semantic Analysis Completed (retrial = false)
INFO  : Returning Hive schema: Schema(fieldSchemas:null, properties:null)
INFO  : Completed compiling command(queryId=hive_20190417210521_b1634dd1-280f-41c2-96a1-d0e921c64041); Time taken: 0.047 seconds
INFO  : Executing command(queryId=hive_20190417210521_b1634dd1-280f-41c2-96a1-d0e921c64041): alter table managed_t1 replace columns (id int, name string)
INFO  : Starting task [Stage-0:DDL] in serial mode
ERROR : FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Replacing columns cannot drop columns for table demodb.managed_t1. SerDe may be incompatible
INFO  : Completed executing command(queryId=hive_20190417210521_b1634dd1-280f-41c2-96a1-d0e921c64041); Time taken: 0.021 seconds
Error: Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Replacing columns cannot drop columns for table demodb.managed_t1. SerDe may be incompatible (state=42000,code=1)
Parquet format

0: jdbc:hive2://hdp.dbaglobe.com:10000/demodb> create external table managed_t1 (id int, name string, remark string) stored as parquet location '/data/managed_t1' tblproperties('transactional'='false');

0: jdbc:hive2://hdp.dbaglobe.com:10000/demodb> alter table managed_t1 replace columns (id int, name string);
INFO  : Compiling command(queryId=hive_20190417210231_90e8f8bd-bb03-4aa3-b138-e693ca59dc1b): alter table managed_t1 replace columns (id int, name string)
INFO  : Semantic Analysis Completed (retrial = false)
INFO  : Returning Hive schema: Schema(fieldSchemas:null, properties:null)
INFO  : Completed compiling command(queryId=hive_20190417210231_90e8f8bd-bb03-4aa3-b138-e693ca59dc1b); Time taken: 0.088 seconds
INFO  : Executing command(queryId=hive_20190417210231_90e8f8bd-bb03-4aa3-b138-e693ca59dc1b): alter table managed_t1 replace columns (id int, name string)
INFO  : Starting task [Stage-0:DDL] in serial mode
INFO  : Completed executing command(queryId=hive_20190417210231_90e8f8bd-bb03-4aa3-b138-e693ca59dc1b); Time taken: 0.148 seconds
INFO  : OK
No rows affected (0.278 seconds)