Monday, October 25, 2021

How to estimate postgresSQL table size

Looking at below example, what is the math formula for this 4,000,000 rows table, each rows with 9 bytes data (4 bytes for int, and 5 bytes for varchar type of data), ends up consumes 169MB disk storage?


CREATE TABLE t_test (id serial, name text);
INSERT INTO t_test (name) SELECT 'hans' FROM generate_series(1, 2000000);
INSERT INTO t_test (name) SELECT 'paul' FROM generate_series(1, 2000000);
mytest=> \dt+ t_test
                            List of relations
 Schema |  Name  | Type  |  Owner   | Persistence |  Size  | Description 
--------+--------+-------+----------+-------------+--------+-------------
 public | t_test | table | postgres | permanent   | 169 MB | 
(1 row)

How many rows per 8K page? The output shows there are 185 rows (tuples) per page (block).

mytest=> create extension pageinspect;
CREATE EXTENSION
mytest=> SELECT count(*) FROM heap_page_items(get_raw_page('t_test', 0));
 count 
-------
   185

mytest=> SELECT count(*) FROM heap_page_items(get_raw_page('t_test', 1));
 count 
-------
   185

The tuple length is 33, actual storage consumed will be padded to multiple of 8, which is 40. This can be confirmed by 8152 - 8112 = 40

mytest=> SELECT * FROM heap_page_items(get_raw_page('t_test', 1)) where lp=1;
-[ RECORD 1 ]---------------------
lp          | 1
lp_off      | 8152
lp_flags    | 1
lp_len      | 33
t_xmin      | 584
t_xmax      | 0
t_field3    | 0
t_ctid      | (1,1)
t_infomask2 | 2
t_infomask  | 2306
t_hoff      | 24
t_bits      | 
t_oid       | 
t_data      | \xba0000000b68616e73

mytest=> SELECT * FROM heap_page_items(get_raw_page('t_test', 1)) where lp=2;
-[ RECORD 1 ]---------------------
lp          | 2
lp_off      | 8112
lp_flags    | 1
lp_len      | 33
t_xmin      | 584
t_xmax      | 0
t_field3    | 0
t_ctid      | (1,2)
t_infomask2 | 2
t_infomask  | 2306
t_hoff      | 24
t_bits      | 
t_oid       | 
t_data      | \xbb0000000b68616e73

mytest=> SELECT * FROM heap_page_items(get_raw_page('t_test', 1)) where lp=185;
-[ RECORD 1 ]---------------------
lp          | 185
lp_off      | 792
lp_flags    | 1
lp_len      | 33
t_xmin      | 584
t_xmax      | 0
t_field3    | 0
t_ctid      | (1,185)
t_infomask2 | 2
t_infomask  | 2306
t_hoff      | 24
t_bits      | 
t_oid       | 
t_data      | \x720100000b68616e73


Reference: https://www.postgresql.org/docs/14/storage-page-layout.html


==> Block Size                  8192 bytes

--> PageHeaderData              24 bytes

  --> Per row Data              ceiling(24+9)/8 + 4 = 44

    --> ItemId: 4 bytes per row 

    --> HeapTupleHeaderData:    24 bytes per row

    --> Actual Data             9 bytes (4 bytes for column "id", 5 bytes for column "name")


Here is how much space actually used in each data block in our scenario: 24 bytes Page Header + 44*185 = 8164 bytes.

Total Size: ( 2000000 + 2000000 ) / 185 * 8 / 1024 = 168 MB, which is very close to actual size 169MB.

Sunday, October 24, 2021

Timestamp and Timestamp with Time Zone in PostgreSQL

-- Demonstration Script 
-- (RDS PostgreSQL default is UTC)

create table t (timezone varchar(20),ts timestamp, tstz timestamp with time zone);

set timezone='UTC';
insert into t values ('UTC', '2021-11-01 08:00:00'::timestamp,'2021-11-01 08:00:00'::timestamp);
insert into t values ('UTC', '2021-10-01 08:00:00'::timestamp,'2021-10-01 08:00:00'::timestamp);

set timezone='Australia/Sydney';
insert into t values ('Sydney', '2021-11-01 08:00:00'::timestamp,'2021-11-01 08:00:00'::timestamp);
insert into t values ('Sydney', '2021-10-01 08:00:00'::timestamp,'2021-10-01 08:00:00'::timestamp);

set timezone='Asia/Singapore';
insert into t values ('Singapore', '2021-11-01 08:00:00'::timestamp,'2021-11-01 08:00:00'::timestamp);
insert into t values ('Singapore', '2021-10-01 08:00:00'::timestamp,'2021-10-01 08:00:00'::timestamp);



mytest=> set timezone='UTC';
SET
mytest=> select * from t;
 timezone  |         ts          |          tstz          
-----------+---------------------+------------------------
 UTC       | 2021-11-01 08:00:00 | 2021-11-01 08:00:00+00
 UTC       | 2021-10-01 08:00:00 | 2021-10-01 08:00:00+00
 Sydney    | 2021-11-01 08:00:00 | 2021-10-31 21:00:00+00
 Sydney    | 2021-10-01 08:00:00 | 2021-09-30 22:00:00+00
 Singapore | 2021-11-01 08:00:00 | 2021-11-01 00:00:00+00
 Singapore | 2021-10-01 08:00:00 | 2021-10-01 00:00:00+00
(6 rows)

mytest=> set timezone='Australia/Sydney';
SET
mytest=> select * from t;
 timezone  |         ts          |          tstz          
-----------+---------------------+------------------------
 UTC       | 2021-11-01 08:00:00 | 2021-11-01 19:00:00+11
 UTC       | 2021-10-01 08:00:00 | 2021-10-01 18:00:00+10
 Sydney    | 2021-11-01 08:00:00 | 2021-11-01 08:00:00+11
 Sydney    | 2021-10-01 08:00:00 | 2021-10-01 08:00:00+10
 Singapore | 2021-11-01 08:00:00 | 2021-11-01 11:00:00+11
 Singapore | 2021-10-01 08:00:00 | 2021-10-01 10:00:00+10
(6 rows)


mytest=> set timezone='Asia/Singapore';
SET
mytest=> select * from t;
 timezone  |         ts          |          tstz          
-----------+---------------------+------------------------
 UTC       | 2021-11-01 08:00:00 | 2021-11-01 16:00:00+08
 UTC       | 2021-10-01 08:00:00 | 2021-10-01 16:00:00+08
 Sydney    | 2021-11-01 08:00:00 | 2021-11-01 05:00:00+08
 Sydney    | 2021-10-01 08:00:00 | 2021-10-01 06:00:00+08
 Singapore | 2021-11-01 08:00:00 | 2021-11-01 08:00:00+08
 Singapore | 2021-10-01 08:00:00 | 2021-10-01 08:00:00+08
(6 rows)

There is no extra storage used for "timestamp with time zone", internally both of them implemented using int64. PostgreSQL will convert "timestamp with time zone" to UTC prior storing the data on disk.

As the example demonstrated, "Timestamp with time zone" handles both timezone and "DST", the "Daylight Saving Time".

mytest=> select timezone, pg_column_size(ts) ts_size, pg_column_size(tstz) tstz_size from t;
 timezone  | ts_size | tstz_size 
-----------+---------+-----------
 UTC       |       8 |         8
 UTC       |       8 |         8
 Sydney    |       8 |         8
 Sydney    |       8 |         8
 Singapore |       8 |         8
 Singapore |       8 |         8
(6 rows)

pageinspect extension to analyse PostgreSQL btree index leaf block

 1. Setup

mytest=> create extension pageinspect;
CREATE EXTENSION
mytest=> \dx
                                List of installed extensions
    Name     | Version |   Schema   |                      Description                      
-------------+---------+------------+-------------------------------------------------------
 pageinspect | 1.8     | public     | inspect the contents of database pages at a low level
 plpgsql     | 1.0     | pg_catalog | PL/pgSQL procedural language
(2 rows)

create table t (id int, name varchar(10), extra text);
create unique index t_id on t(id);
create index t_name on t(name);
create index t_id_name on t(id,name);


mytest=> \d t
                        Table "public.t"
 Column |         Type          | Collation | Nullable | Default 
--------+-----------------------+-----------+----------+---------
 id     | integer               |           |          | 
 name   | character varying(10) |           |          | 
 extra  | text                  |           |          | 
Indexes:
    "t_id" UNIQUE, btree (id)
    "t_id_name" btree (id, name)
    "t_name" btree (name)

insert into t values(1,'a','xxxx');
insert into t values(2,'a','xxxx');
insert into t values(3,'b','xxxx');
insert into t values(4,'c','xxxx');


mytest=> select ctid,* from t;
 ctid  | id | name | extra 
-------+----+------+-------
 (0,1) |  1 | a    | xxxx
 (0,2) |  2 | a    | xxxx
 (0,3) |  3 | b    | xxxx
 (0,4) |  4 | c    | xxxx


2. Analyse Heap (Table) Block:


Decode: \x0100000005610b78787878
 01 -> 1
 61 -> a
 78 -> x

mytest=> select convert_from('\x78','utf-8');
 convert_from 
--------------
 x

mytest=> \x
Expanded display is on.

mytest=> SELECT * FROM page_header(get_raw_page('t', 0));
-[ RECORD 1 ]---------
lsn       | 0/1C020408
checksum  | 0
flags     | 0
lower     | 40
upper     | 8032
special   | 8192
pagesize  | 8192
version   | 4
prune_xid | 0

mytest=> SELECT * FROM heap_page_items(get_raw_page('t', 0));
-[ RECORD 1 ]-------------------------
lp          | 1
lp_off      | 8152
lp_flags    | 1
lp_len      | 35
t_xmin      | 586
t_xmax      | 0
t_field3    | 0
t_ctid      | (0,1)
t_infomask2 | 3
t_infomask  | 2306
t_hoff      | 24
t_bits      | 
t_oid       | 
t_data      | \x0100000005610b78787878
-[ RECORD 2 ]-------------------------
lp          | 2
lp_off      | 8112
lp_flags    | 1
lp_len      | 35
t_xmin      | 587
t_xmax      | 0
t_field3    | 0
t_ctid      | (0,2)
t_infomask2 | 3
t_infomask  | 2306
t_hoff      | 24
t_bits      | 
t_oid       | 
t_data      | \x0200000005610b78787878
-[ RECORD 3 ]-------------------------
lp          | 3
lp_off      | 8072
lp_flags    | 1
lp_len      | 35
t_xmin      | 588
t_xmax      | 0
t_field3    | 0
t_ctid      | (0,3)
t_infomask2 | 3
t_infomask  | 2306
t_hoff      | 24
t_bits      | 
t_oid       | 
t_data      | \x0300000005620b78787878
-[ RECORD 4 ]-------------------------
lp          | 4
lp_off      | 8032
lp_flags    | 1
lp_len      | 35
t_xmin      | 589
t_xmax      | 0
t_field3    | 0
t_ctid      | (0,4)
t_infomask2 | 3
t_infomask  | 2306
t_hoff      | 24
t_bits      | 
t_oid       | 
t_data      | \x0400000005630b78787878

3. Analyse Index t_id

mytest=> SELECT * FROM bt_metap('t_id');
-[ RECORD 1 ]-----------+-------
magic                   | 340322
version                 | 4
root                    | 1
level                   | 0
fastroot                | 1
fastlevel               | 0
oldest_xact             | 0
last_cleanup_num_tuples | -1
allequalimage           | t


mytest=> SELECT * FROM bt_page_stats('t_id', 1);
-[ RECORD 1 ]-+-----
blkno         | 1
type          | l
live_items    | 4
dead_items    | 0
avg_item_size | 16
page_size     | 8192
free_size     | 8068
btpo_prev     | 0
btpo_next     | 0
btpo          | 0
btpo_flags    | 3

Check the "data" and "htid" mapping on the page items

mytest=> SELECT itemoffset, ctid, itemlen, nulls, vars, data, dead, htid, tids[0:2] AS some_tids
mytest-> FROM bt_page_items('t_id', 1);
-[ RECORD 1 ]-----------------------
itemoffset | 1
ctid       | (0,1)
itemlen    | 16
nulls      | f
vars       | f
data       | 01 00 00 00 00 00 00 00
dead       | f
htid       | (0,1)
some_tids  | 
-[ RECORD 2 ]-----------------------
itemoffset | 2
ctid       | (0,2)
itemlen    | 16
nulls      | f
vars       | f
data       | 02 00 00 00 00 00 00 00
dead       | f
htid       | (0,2)
some_tids  | 
-[ RECORD 3 ]-----------------------
itemoffset | 3
ctid       | (0,3)
itemlen    | 16
nulls      | f
vars       | f
data       | 03 00 00 00 00 00 00 00
dead       | f
htid       | (0,3)
some_tids  | 
-[ RECORD 4 ]-----------------------
itemoffset | 4
ctid       | (0,4)
itemlen    | 16
nulls      | f
vars       | f
data       | 04 00 00 00 00 00 00 00
dead       | f
htid       | (0,4)
some_tids  | 


4. Analyse Index t_name


mytest=> SELECT * FROM bt_metap('t_name');
-[ RECORD 1 ]-----------+-------
magic                   | 340322
version                 | 4
root                    | 1
level                   | 0
fastroot                | 1
fastlevel               | 0
oldest_xact             | 0
last_cleanup_num_tuples | -1
allequalimage           | t

mytest=> 
mytest=> 
mytest=> SELECT * FROM bt_page_stats('t_name', 1);
-[ RECORD 1 ]-+-----
blkno         | 1
type          | l
live_items    | 4
dead_items    | 0
avg_item_size | 16
page_size     | 8192
free_size     | 8068
btpo_prev     | 0
btpo_next     | 0
btpo          | 0
btpo_flags    | 3

mytest=> 
mytest=> SELECT itemoffset, ctid, itemlen, nulls, vars, data, dead, htid, tids[0:2] AS some_tids
mytest-> FROM bt_page_items('t_name', 1);
-[ RECORD 1 ]-----------------------
itemoffset | 1
ctid       | (0,1)
itemlen    | 16
nulls      | f
vars       | t
data       | 05 61 00 00 00 00 00 00
dead       | f
htid       | (0,1)
some_tids  | 
-[ RECORD 2 ]-----------------------
itemoffset | 2
ctid       | (0,2)
itemlen    | 16
nulls      | f
vars       | t
data       | 05 61 00 00 00 00 00 00
dead       | f
htid       | (0,2)
some_tids  | 
-[ RECORD 3 ]-----------------------
itemoffset | 3
ctid       | (0,3)
itemlen    | 16
nulls      | f
vars       | t
data       | 05 62 00 00 00 00 00 00
dead       | f
htid       | (0,3)
some_tids  | 
-[ RECORD 4 ]-----------------------
itemoffset | 4
ctid       | (0,4)
itemlen    | 16
nulls      | f
vars       | t
data       | 05 63 00 00 00 00 00 00
dead       | f
htid       | (0,4)
some_tids  | 

5. Analyse composite index t_id_name


mytest=> SELECT * FROM bt_metap('t_id_name');
-[ RECORD 1 ]-----------+-------
magic                   | 340322
version                 | 4
root                    | 1
level                   | 0
fastroot                | 1
fastlevel               | 0
oldest_xact             | 0
last_cleanup_num_tuples | -1
allequalimage           | t


mytest=> SELECT * FROM bt_page_stats('t_id_name', 1);
-[ RECORD 1 ]-+-----
blkno         | 1
type          | l
live_items    | 4
dead_items    | 0
avg_item_size | 16
page_size     | 8192
free_size     | 8068
btpo_prev     | 0
btpo_next     | 0
btpo          | 0
btpo_flags    | 3


mytest=> SELECT itemoffset, ctid, itemlen, nulls, vars, data, dead, htid, tids[0:2] AS some_tids
mytest-> FROM bt_page_items('t_id_name', 1);
-[ RECORD 1 ]-----------------------
itemoffset | 1
ctid       | (0,1)
itemlen    | 16
nulls      | f
vars       | t
data       | 01 00 00 00 05 61 00 00
dead       | f
htid       | (0,1)
some_tids  | 
-[ RECORD 2 ]-----------------------
itemoffset | 2
ctid       | (0,2)
itemlen    | 16
nulls      | f
vars       | t
data       | 02 00 00 00 05 61 00 00
dead       | f
htid       | (0,2)
some_tids  | 
-[ RECORD 3 ]-----------------------
itemoffset | 3
ctid       | (0,3)
itemlen    | 16
nulls      | f
vars       | t
data       | 03 00 00 00 05 62 00 00
dead       | f
htid       | (0,3)
some_tids  | 
-[ RECORD 4 ]-----------------------
itemoffset | 4
ctid       | (0,4)
itemlen    | 16
nulls      | f
vars       | t
data       | 04 00 00 00 05 63 00 00
dead       | f
htid       | (0,4)
some_tids  | 

6. What if the data size more than 16 bytes?

mytest=> insert into t values(1000000000,'abcdefghij','yyyy');

mytest=> SELECT itemoffset, ctid, itemlen, nulls, vars, data, dead, htid, tids[0:2] AS some_tids
mytest-> FROM bt_page_items('t_id', 1);
 itemoffset | ctid  | itemlen | nulls | vars |          data           | dead | htid  | some_tids 
------------+-------+---------+-------+------+-------------------------+------+-------+-----------
          1 | (0,1) |      16 | f     | f    | 01 00 00 00 00 00 00 00 | f    | (0,1) | 
          2 | (0,2) |      16 | f     | f    | 02 00 00 00 00 00 00 00 | f    | (0,2) | 
          3 | (0,3) |      16 | f     | f    | 03 00 00 00 00 00 00 00 | f    | (0,3) | 
          4 | (0,4) |      16 | f     | f    | 04 00 00 00 00 00 00 00 | f    | (0,4) | 
          5 | (0,5) |      16 | f     | f    | 00 ca 9a 3b 00 00 00 00 | f    | (0,5) | 
(5 rows)

mytest=> SELECT itemoffset, ctid, itemlen, nulls, vars, data, dead, htid, tids[0:2] AS some_tids
FROM bt_page_items('t_name', 1);
 itemoffset | ctid  | itemlen | nulls | vars |                      data                       | dead | htid  | some_tids 
------------+-------+---------+-------+------+-------------------------------------------------+------+-------+-----------
          1 | (0,1) |      16 | f     | t    | 05 61 00 00 00 00 00 00                         | f    | (0,1) | 
          2 | (0,2) |      16 | f     | t    | 05 61 00 00 00 00 00 00                         | f    | (0,2) | 
          3 | (0,5) |      24 | f     | t    | 17 61 62 63 64 65 66 67 68 69 6a 00 00 00 00 00 | f    | (0,5) | 
          4 | (0,3) |      16 | f     | t    | 05 62 00 00 00 00 00 00                         | f    | (0,3) | 
          5 | (0,4) |      16 | f     | t    | 05 63 00 00 00 00 00 00                         | f    | (0,4) | 
(5 rows)


mytest=> SELECT itemoffset, ctid, itemlen, nulls, vars, data, dead, htid, tids[0:2] AS some_tids
mytest-> FROM bt_page_items('t_id_name', 1);
 itemoffset | ctid  | itemlen | nulls | vars |                      data                       | dead | htid  | some_tids 
------------+-------+---------+-------+------+-------------------------------------------------+------+-------+-----------
          1 | (0,1) |      16 | f     | t    | 01 00 00 00 05 61 00 00                         | f    | (0,1) | 
          2 | (0,2) |      16 | f     | t    | 02 00 00 00 05 61 00 00                         | f    | (0,2) | 
          3 | (0,3) |      16 | f     | t    | 03 00 00 00 05 62 00 00                         | f    | (0,3) | 
          4 | (0,4) |      16 | f     | t    | 04 00 00 00 05 63 00 00                         | f    | (0,4) | 
          5 | (0,5) |      24 | f     | t    | 00 ca 9a 3b 17 61 62 63 64 65 66 67 68 69 6a 00 | f    | (0,5) | 

Saturday, October 23, 2021

Upgrade PostgreSQL 13 to PostgreSQL 14 on Oracle Linux 8 using YUM/RPM

 1. Install the PostgreSQL 14 Packages

#  yum install postgresql14-server.x86_64  postgresql14.x86_64  postgresql14-contrib.x86_64

2. Initialize PostgreSQL 14 database

# /usr/pgsql-14/bin/postgresql-14-setup initdb

3. Start PostgreSQL 13 database (required for pre-upgrade check)

 $ /usr/pgsql-13/bin/pg_ctl -D /var/lib/pgsql/13/data/ start

4. Run pre-upgrade check:

/usr/pgsql-14/bin/pg_upgrade \
  --old-bindir=/usr/pgsql-13/bin \
  --new-bindir=/usr/pgsql-14/bin \
  --old-datadir=/var/lib/pgsql/13/data \
  --new-datadir=/var/lib/pgsql/14/data \
  --user=postgres \
  --check

"--check" means check clusters only, don't change any data, additional parameter "--verbose' useful for troubleshoting.

[postgres@ol8 ~]$ /usr/pgsql-14/bin/pg_upgrade \
>   --old-bindir=/usr/pgsql-13/bin \
>   --new-bindir=/usr/pgsql-14/bin \
>   --old-datadir=/var/lib/pgsql/13/data \
>   --new-datadir=/var/lib/pgsql/14/data \
>   --user=postgres \
>   --check
Performing Consistency Checks on Old Live Server
------------------------------------------------
Checking cluster versions                                   ok
Checking database user is the install user                  ok
Checking database connection settings                       ok
Checking for prepared transactions                          ok
Checking for system-defined composite types in user tables  ok
Checking for reg* data types in user tables                 ok
Checking for contrib/isn with bigint-passing mismatch       ok
Checking for user-defined encoding conversions              ok
Checking for user-defined postfix operators                 ok
Checking for presence of required libraries                 ok
Checking database user is the install user                  ok
Checking for prepared transactions                          ok
Checking for new cluster tablespace directories             ok

*Clusters are compatible*

5. Shutdown original PostgreSQL 13
 
$ /usr/pgsql-13/bin/pg_ctl -D /var/lib/pgsql/13/data/ stop

6. Run the actual upgrade

/usr/pgsql-14/bin/pg_upgrade \
  --old-bindir=/usr/pgsql-13/bin \
  --new-bindir=/usr/pgsql-14/bin \
  --old-datadir=/var/lib/pgsql/13/data \
  --new-datadir=/var/lib/pgsql/14/data \
  --user=postgres 

Additional useful parameters are:
  -j, --jobs=NUM         number of simultaneous processes or threads to use
  -k, --link                    link instead of copying files to new cluster 

[postgres@ol8 ~]$ /usr/pgsql-14/bin/pg_upgrade \
>   --old-bindir=/usr/pgsql-13/bin \
>   --new-bindir=/usr/pgsql-14/bin \
>   --old-datadir=/var/lib/pgsql/13/data \
>   --new-datadir=/var/lib/pgsql/14/data \
>   --user=postgres 
Performing Consistency Checks
-----------------------------
Checking cluster versions                                   ok
Checking database user is the install user                  ok
Checking database connection settings                       ok
Checking for prepared transactions                          ok
Checking for system-defined composite types in user tables  ok
Checking for reg* data types in user tables                 ok
Checking for contrib/isn with bigint-passing mismatch       ok
Checking for user-defined encoding conversions              ok
Checking for user-defined postfix operators                 ok
Creating dump of global objects                             ok
Creating dump of database schemas
                                                            ok
Checking for presence of required libraries                 ok
Checking database user is the install user                  ok
Checking for prepared transactions                          ok
Checking for new cluster tablespace directories             ok

If pg_upgrade fails after this point, you must re-initdb the
new cluster before continuing.

Performing Upgrade
------------------
Analyzing all rows in the new cluster                       ok
Freezing all rows in the new cluster                        ok
Deleting files from new pg_xact                             ok
Copying old pg_xact to new server                           ok
Setting oldest XID for new cluster                          ok
Setting next transaction ID and epoch for new cluster       ok
Deleting files from new pg_multixact/offsets                ok
Copying old pg_multixact/offsets to new server              ok
Deleting files from new pg_multixact/members                ok
Copying old pg_multixact/members to new server              ok
Setting next multixact ID and offset for new cluster        ok
Resetting WAL archives                                      ok
Setting frozenxid and minmxid counters in new cluster       ok
Restoring global objects in the new cluster                 ok
Restoring database schemas in the new cluster
                                                            ok
Copying user relation files
                                                            ok
Setting next OID for new cluster                            ok
Sync data directory to disk                                 ok
Creating script to delete old cluster                       ok
Checking for extension updates                              notice

Your installation contains extensions that should be updated
with the ALTER EXTENSION command.  The file
    update_extensions.sql
when executed by psql by the database superuser will update
these extensions.


Upgrade Complete
----------------
Optimizer statistics are not transferred by pg_upgrade.
Once you start the new server, consider running:
    /usr/pgsql-14/bin/vacuumdb -U postgres --all --analyze-in-stages

Running this script will delete the old cluster's data files:
    ./delete_old_cluster.sh

7. Start PostgreSQL 14 cluster

[root@ol8 ~]# systemctl start postgresql-14.service

[root@ol8 ~]# systemctl status postgresql-14.service

● postgresql-14.service - PostgreSQL 14 database server
   Loaded: loaded (/usr/lib/systemd/system/postgresql-14.service; enabled; vendor preset: disabled)
   Active: active (running) since Sat 2021-10-23 14:41:05 +08; 6s ago
     Docs: https://www.postgresql.org/docs/14/static/
  Process: 62779 ExecStartPre=/usr/pgsql-14/bin/postgresql-14-check-db-dir ${PGDATA} (code=exited, status=0/SUCCESS)
 Main PID: 62787 (postmaster)
    Tasks: 8 (limit: 48578)
   Memory: 17.0M
   CGroup: /system.slice/postgresql-14.service
           ├─62787 /usr/pgsql-14/bin/postmaster -D /var/lib/pgsql/14/data/
           ├─62788 postgres: logger 
           ├─62790 postgres: checkpointer 
           ├─62791 postgres: background writer 
           ├─62792 postgres: walwriter 
           ├─62793 postgres: autovacuum launcher 
           ├─62794 postgres: stats collector 
           └─62795 postgres: logical replication launcher 

Oct 23 14:41:05 ol8.lab.net systemd[1]: Starting PostgreSQL 14 database server...
Oct 23 14:41:05 ol8.lab.net postmaster[62787]: 2021-10-23 14:41:05.238 +08 [62787] LOG:  redirecting log output to logging collector process
Oct 23 14:41:05 ol8.lab.net postmaster[62787]: 2021-10-23 14:41:05.238 +08 [62787] HINT:  Future log output will appear in directory "log".
Oct 23 14:41:05 ol8.lab.net systemd[1]: Started PostgreSQL 14 database server.

8. Upgrade the extension

[postgres@ol8 ~]$ cat update_extensions.sql
\connect test
ALTER EXTENSION "pageinspect" UPDATE;

[postgres@ol8 ~]$ psql -U postgres
psql (14.0)
Type "help" for help.

postgres=# \i update_extensions.sql
You are now connected to database "test" as user "postgres".
ALTER EXTENSION
test=# 


9. Update optimiser statistics

[postgres@ol8 ~]$  /usr/pgsql-14/bin/vacuumdb -U postgres --all --analyze-in-stages
vacuumdb: processing database "postgres": Generating minimal optimizer statistics (1 target)
vacuumdb: processing database "template1": Generating minimal optimizer statistics (1 target)
vacuumdb: processing database "test": Generating minimal optimizer statistics (1 target)
vacuumdb: processing database "postgres": Generating medium optimizer statistics (10 targets)
vacuumdb: processing database "template1": Generating medium optimizer statistics (10 targets)
vacuumdb: processing database "test": Generating medium optimizer statistics (10 targets)
vacuumdb: processing database "postgres": Generating default (full) optimizer statistics
vacuumdb: processing database "template1": Generating default (full) optimizer statistics
vacuumdb: processing database "test": Generating default (full) optimizer statistics

10. Clean up

# yum remove postgresql13-13.4-1PGDG.rhel8.x86_64 postgresql13-contrib-13.4-1PGDG.rhel8.x86_64 postgresql13-libs-13.4-1PGDG.rhel8.x86_64 postgresql13-server-13.4-1PGDG.rhel8.x86_64
# rm -rf '/var/lib/pgsql/13/data'




Index size becomes larger after rebuild in Oracle

Let's look this scenario for Oracle: after 5% rows randomly deleted from a table, will index size become smaller if we rebuild index?

The answer is may not, and the size could be even larger if the index is based on sequence or identity columns.

Here are steps to reproduce it:

create table t (
    id int generated as identity,
    value varchar2(10),
    last_update date default sysdate,
    constraint t_pk primary key(id));

-- set sort_area_size to avoid "ORA-30009: Not enough memory for CONNECT BY operation"
alter session set workarea_size_policy=manual;
alter session set sort_area_size=1000000000;

insert into t (value) select rpad('a',10,'a') from dual connect by level <= 10000000;

commit;

-- randomly delete 5% records
begin
  for i in 1..10000000*0.05 loop
    delete from t where id>dbms_random.value(1,10000000) and rownum=1;
    if mod(i,1000) = 0 then
      commit;
    end if;
  end loop;
  commit;
end;
/

SQL> select bytes from user_segments where segment_name ='T_PK';

           BYTES
----------------
     167,772,160

SQL> alter index t_pk rebuild;

Index T_PK altered.

SQL> select bytes from user_segments where segment_name ='T_PK';

           BYTES
----------------
     176,160,768


The index size has been increased even 5% rows removed. 

Why? Originally when rows inserted into the index, it occupied 100% of the index leaf blocks. The default index rebuild will reserve 10% free space.

Before Rebuild:

SQL> analyze index t_pk validate structure;

Index T_PK analyzed.

SQL> select blocks,lf_rows,lf_blks,pct_used from index_stats where name='T_PK';

    BLOCKS    LF_ROWS    LF_BLKS   PCT_USED
---------- ---------- ---------- ----------
     20480    9842163      20000         99

After Rebuild:

SQL> analyze index t_pk validate structure;

Index T_PK analyzed.

SQL> select blocks,lf_rows,lf_blks,pct_used from index_stats where name='T_PK';

    BLOCKS    LF_ROWS    LF_BLKS   PCT_USED
---------- ---------- ---------- ----------
     21504    9500000      21159         90

Rebuild with PCTFREE=0

SQL> alter index t_pk rebuild pctfree 0;
Index T_PK altered.

SQL> select bytes from user_segments where segment_name ='T_PK';
           BYTES
----------------
     159,383,552

SQL> analyze index t_pk validate structure;
Index T_PK analyzed.

SQL> select blocks,lf_rows,lf_blks,pct_used from index_stats where name='T_PK';
    BLOCKS    LF_ROWS    LF_BLKS   PCT_USED
---------- ---------- ---------- ----------
     19456    9500000      19001        100

ORA-30009: Not enough memory for CONNECT BY operation

 Symptom:

SQL> insert into t (value) select rpad('a',10,'a') from dual connect by level <= 10000000;

Error starting at line : 1 in command -insert into t (value) select rpad('a',10,'a') from dual connect by level <= 10000000

Error report -

ORA-30009: Not enough memory for CONNECT BY operation


 Solution: Increase the sort area size

SQL> alter session set workarea_size_policy=manual;

Session altered.


SQL> alter session set sort_area_size=1000000000;

Session altered.


SQL> insert into t (value) select rpad('a',10,'a') from dual connect by level <= 10000000;

10,000,000 rows inserted.


SQL> select pga_used_mem,pga_alloc_mem,pga_max_mem from v$process 

  2* where addr=(select paddr from v$session where sid=userenv('SID'));


PGA_USED_MEM PGA_ALLOC_MEM PGA_MAX_MEM

------------ ------------- -----------

     6678135       7287743   654783423

Thursday, October 14, 2021

"All" Operator in Oracle Database

Envronment Setup

create table t (id integer, order_date date, 
                constraint t_pk primary key(id));
insert into t 
select rownum, rownum+sysdate from dual connect by level <= 1000000;
commit;
exec dbms_stats.gather_table_stats('ADMIN','T');

Execution Time and Plan for “ALL” operator

SQL> select * from t where order_date > ALL (select order_date from t where id=999995);

        ID    ORDER_DATE 
__________ _____________ 
    999996 07/09/59      
    999997 08/09/59      
    999998 09/09/59      
    999999 10/09/59      
   1000000 11/09/59      

Elapsed: 00:00:00.613

SQL> explain plan for select * from t where order_date > ALL (select order_date from t where id=999995);
Explained.

SQL> select * from table(dbms_xplan.display());

                                                                                PLAN_TABLE_OUTPUT 
_________________________________________________________________________________________________ 
Plan hash value: 86575764                                                                         
                                                                                                  
----------------------------------------------------------------------------------------------    
| Id  | Operation                     | Name | Rows  | Bytes |TempSpc| Cost (%CPU)| Time     |    
----------------------------------------------------------------------------------------------    
|   0 | SELECT STATEMENT              |      | 10000 |   253K|       |  5443   (1)| 00:00:01 |    
|   1 |  MERGE JOIN ANTI NA           |      | 10000 |   253K|       |  5443   (1)| 00:00:01 |    
|   2 |   SORT JOIN                   |      |  1000K|    12M|    45M|  5439   (1)| 00:00:01 |    
|   3 |    TABLE ACCESS FULL          | T    |  1000K|    12M|       |   688   (1)| 00:00:01 |    
|*  4 |   SORT UNIQUE                 |      |     1 |    13 |       |     4  (25)| 00:00:01 |    
|   5 |    TABLE ACCESS BY INDEX ROWID| T    |     1 |    13 |       |     3   (0)| 00:00:01 |    
|*  6 |     INDEX UNIQUE SCAN         | T_PK |     1 |       |       |     2   (0)| 00:00:01 |    
----------------------------------------------------------------------------------------------    
 
Predicate Information (identified by operation id):                                               
---------------------------------------------------                                               
                                                                                                  
   4 - access("ORDER_DATE"<="ORDER_DATE")                                                         
       filter("ORDER_DATE"<="ORDER_DATE")                                                         
   6 - access("ID"=999995)                                                                        

20 rows selected. 

Execution Time and Plan for MAX+Subquery

SQL> select * from t where order_date > (select max(order_date) from t where id=999995);

        ID    ORDER_DATE 
__________ _____________ 
    999996 07/09/59      
    999997 08/09/59      
    999998 09/09/59      
    999999 10/09/59      
   1000000 11/09/59      

Elapsed: 00:00:00.286

SQL> explain plan for select * from t where order_date > (select max(order_date) from t where id=999995);

Explained.

SQL> select * from table(dbms_xplan.display());

                                                                        PLAN_TABLE_OUTPUT 
_________________________________________________________________________________________ 
Plan hash value: 3070503375                                                               
                                                                                          
--------------------------------------------------------------------------------------    
| Id  | Operation                     | Name | Rows  | Bytes | Cost (%CPU)| Time     |    
--------------------------------------------------------------------------------------    
|   0 | SELECT STATEMENT              |      | 50000 |   634K|   692   (2)| 00:00:01 |    
|*  1 |  TABLE ACCESS FULL            | T    | 50000 |   634K|   689   (2)| 00:00:01 |    
|   2 |   SORT AGGREGATE              |      |     1 |    13 |            |          |    
|   3 |    TABLE ACCESS BY INDEX ROWID| T    |     1 |    13 |     3   (0)| 00:00:01 |    
|*  4 |     INDEX UNIQUE SCAN         | T_PK |     1 |       |     2   (0)| 00:00:01 |    
--------------------------------------------------------------------------------------    
                                                                                          
Predicate Information (identified by operation id):                                       
---------------------------------------------------                                       
                                                                                          
   1 - filter("ORDER_DATE"> (SELECT MAX("ORDER_DATE") FROM "T" "T" WHERE                  
              "ID"=999995))                                                               
   4 - access("ID"=999995)                                                                

18 rows selected. 

The behaviour differences between ALL Operator and MAX+Subquery (ALL operator evaluates “NULL” as TRUE)


SQL> select order_date from t where id=1000001;

no rows selected

SQL> select count(*) from t where order_date > ALL (select order_date from t where id=1000001);

   COUNT(*) 
___________ 
    1000000 

SQL> select * from t where order_date > (select max(order_date) from t where id=1000001);

no rows selected

Friday, October 8, 2021

Examples to generate large datasets in Oracle, PostgreSQL and MySQL

 In Oracle:

create table t_test (id integer generated as identity, name varchar2(20));

insert into t_test (name) select 'hans' from dual connect by level <= 2000000;

insert into t_test (name) select 'paul' from dual connect by level <= 2000000;

COMMIT;

CREATE INDEX idx_id ON t_test (id);


SQL> select segment_name,bytes from user_segments where segment_name in ('IDX_ID','T_TEST');


   SEGMENT_NAME       BYTES 

_______________ ___________ 

IDX_ID             83886080 

T_TEST             75497472 


In PostgreSQL:

CREATE TABLE t_test (id serial, name text);

INSERT INTO t_test (name) SELECT 'hans' FROM generate_series(1, 2000000);

INSERT INTO t_test (name) SELECT 'paul' FROM generate_series(1, 2000000);

CREATE INDEX idx_id ON t_test (id);


mytest=> SELECT pg_size_pretty(pg_relation_size('T_TEST')) AS "T_TEST", 

pg_size_pretty(pg_relation_size('IDX_ID')) AS "IDX_ID";

 T_TEST | IDX_ID 

--------+--------

 169 MB | 86 MB

(1 row)



In MySQL:

CREATE TABLE t_test (id int auto_increment, name varchar(10), primary key 

(id));


set @@cte_max_recursion_depth=100000000;


INSERT INTO t_test (name) 

WITH RECURSIVE cte (n) AS

(

  SELECT 1

  UNION ALL

  SELECT n + 1 FROM cte WHERE n < 2000000

)

SELECT 'hans' FROM cte;


INSERT INTO t_test (name) 

WITH RECURSIVE cte (n) AS

(

  SELECT 1

  UNION ALL

  SELECT n + 1 FROM cte WHERE n < 2000000

)

SELECT 'hans' FROM cte;


------ Redo the testing without depending on MySQL "Primary" index


CREATE TABLE t_test (id int, name varchar(10));


set @@cte_max_recursion_depth=100000000;


INSERT INTO t_test

WITH RECURSIVE cte (n) AS

(

  SELECT 1

  UNION ALL

  SELECT n + 1 FROM cte WHERE n < 2000000

)

SELECT *, 'hans' FROM cte;


INSERT INTO t_test

WITH RECURSIVE cte (n) AS

(

  SELECT 1

  UNION ALL

  SELECT n + 1 FROM cte WHERE n < 2000000

)

SELECT  *, 'paul' FROM cte;


CREATE INDEX idx_id ON t_test (id);



mysql> SELECT database_name, table_name, index_name, 

ROUND(stat_value * @@innodb_page_size / 1024 / 1024, 2) size_in_mb FROM mysql.innodb_index_stats 

WHERE stat_name = 'size' AND index_name != 'PRIMARY' and table_name='t_test' ORDER BY size_in_mb DESC;

+---------------+------------+-----------------+------------+

| database_name | table_name | index_name      | size_in_mb |

+---------------+------------+-----------------+------------+

| test2         | t_test     | GEN_CLUST_INDEX |     142.67 |

| test2         | t_test     | idx_id          |      73.61 |

+---------------+------------+-----------------+------------+

2 rows in set (0.00 sec)