Monday, July 8, 2024

Performance validation for UUIDv7 implemented using SQL in PostgreSQL v16

Source code for UUIDv7 for PostgreSQL: https://gist.github.com/fabiolimace/515a0440e3e40efeb234e12644a6a346

create or replace function uuid7() returns uuid as $$
declare
begin
    return uuid7(clock_timestamp());
end $$ language plpgsql;
create or replace function uuid7(p_timestamp timestamp with time zone) returns uuid as $$
declare

    v_time double precision := null;

    v_unix_t bigint := null;
    v_rand_a bigint := null;
    v_rand_b bigint := null;

    v_unix_t_hex varchar := null;
    v_rand_a_hex varchar := null;
    v_rand_b_hex varchar := null;

    c_milli double precision := 10^3;  -- 1 000
    c_micro double precision := 10^6;  -- 1 000 000
    c_scale double precision := 4.096; -- 4.0 * (1024 / 1000)
    
    c_version bigint := x'0000000000007000'::bigint; -- RFC-9562 version: b'0111...'
    c_variant bigint := x'8000000000000000'::bigint; -- RFC-9562 variant: b'10xx...'

begin

    v_time := extract(epoch from p_timestamp);

    v_unix_t := trunc(v_time * c_milli);
    v_rand_a := trunc((v_time * c_micro - v_unix_t * c_milli) * c_scale);
    v_rand_b := trunc(random() * 2^30)::bigint << 32 | trunc(random() * 2^32)::bigint;

    v_unix_t_hex := lpad(to_hex(v_unix_t), 12, '0');
    v_rand_a_hex := lpad(to_hex((v_rand_a | c_version)::bigint), 4, '0');
    v_rand_b_hex := lpad(to_hex((v_rand_b | c_variant)::bigint), 16, '0');

    return (v_unix_t_hex || v_rand_a_hex || v_rand_b_hex)::uuid;
    
end $$ language plpgsql;

Performance Testing

Testing for UUID generation

Setup

do
$$
declare 
u uuid;
begin
    for i in 1..10000000
    loop
        select gen_random_uuid() into u;
--        raise notice '%', u;
    end loop;
end;
$$;
do
$$
declare 
u uuid;
begin
    for i in 1..10000000
    loop
        select uuid7() into u;
--        raise notice '%', u;
    end loop;
end;
$$;

Test

mytest=# do
mytest-# $$
mytest$# declare
mytest$# u uuid;
mytest$# begin
mytest$#     for i in 1..10000000
mytest$#     loop
mytest$#         select gen_random_uuid() into u;
mytest$# --        raise notice '%', u;
mytest$#     end loop;
mytest$# end;
mytest$# $$;
DO
Time: 31743.056 ms (00:31.743)
-- each execution 0.031743056ms
mytest=# do
mytest-# $$
mytest$# declare
mytest$# u uuid;
mytest$# begin
mytest$#     for i in 1..10000000
mytest$#     loop
mytest$#         select uuid7() into u;
mytest$# --        raise notice '%', u;
mytest$#     end loop;
mytest$# end;
mytest$# $$;
DO
Time: 54132.676 ms (00:54.133)
-- each execution 0.054132676ms

Testing for UUID insertion

Setup

create table tbl_uuidv4 
(u uuid primary key, c text);
insert into tbl_uuidv4
select gen_random_uuid(), 'additional data' from generate_series(1,10000000);
create table tbl_uuidv7 
(u uuid primary key, c text);
insert into tbl_uuidv7
select uuid7(), 'additional data' from generate_series(1,10000000);

Test

mytest=# insert into tbl_uuidv4
mytest-# select gen_random_uuid(), 'additional data' from generate_series(1,10000000);
INSERT 0 10000000
Time: 60405.272 ms (01:00.405)
mytest=# insert into tbl_uuidv7
mytest-# select uuid7(), 'additional data' from generate_series(1,10000000);

INSERT 0 10000000
Time: 56902.619 ms (00:56.903)
mytest=# \dt+ tbl_uuid*
                                      List of relations
 Schema |    Name    | Type  |  Owner   | Persistence | Access method |  Size  | Description
--------+------------+-------+----------+-------------+---------------+--------+-------------
 public | tbl_uuidv4 | table | donghua  | permanent   | heap          | 575 MB |
 public | tbl_uuidv7 | table | donghua  | permanent   | heap          | 575 MB |
(2 rows)
mytest=# \di+ tbl_uuid*
                                               List of relations
 Schema |      Name       | Type  |  Owner   |   Table    | Persistence | Access method |  Size  | Description
--------+-----------------+-------+----------+------------+-------------+---------------+--------+-------------
 public | tbl_uuidv4_pkey | index | donghua  | tbl_uuidv4 | permanent   | btree         | 392 MB |
 public | tbl_uuidv7_pkey | index | donghua  | tbl_uuidv7 | permanent   | btree         | 301 MB |
(2 rows)

Testing how many buffers dirtied for UUID insertion

Setup

explain (analyze,buffers)
insert into tbl_uuidv4
select gen_random_uuid(), 'additional data' from generate_series(1,100);
explain (analyze,buffers)
insert into tbl_uuidv7
select uuid7(), 'additional data' from generate_series(1,100);

Test

mytest=# explain (analyze,buffers)
mytest-# insert into tbl_uuidv4
mytest-# select gen_random_uuid(), 'additional data' from generate_series(1,100);
                                                          QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------
 Insert on tbl_uuidv4  (cost=0.00..2.25 rows=0 width=0) (actual time=40.337..40.338 rows=0 loops=1)
   Buffers: shared hit=231 read=186 dirtied=106 written=1
   ->  Subquery Scan on "*SELECT*"  (cost=0.00..2.25 rows=100 width=48) (actual time=0.038..1.379 rows=100 loops=1)
         ->  Function Scan on generate_series  (cost=0.00..1.25 rows=100 width=48) (actual time=0.037..1.306 rows=100 loops=1)
 Planning Time: 0.048 ms
 Execution Time: 40.365 ms
(6 rows)

Time: 41.754 ms
mytest=# explain (analyze,buffers)
mytest-# insert into tbl_uuidv7
mytest-# select uuid7(), 'additional data' from generate_series(1,100);
                                                           QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------
 Insert on tbl_uuidv7  (cost=0.00..27.00 rows=0 width=0) (actual time=1.204..1.205 rows=0 loops=1)
   Buffers: shared hit=219 dirtied=1 written=1
   ->  Subquery Scan on "*SELECT*"  (cost=0.00..27.00 rows=100 width=48) (actual time=0.077..0.614 rows=100 loops=1)
         ->  Function Scan on generate_series  (cost=0.00..26.00 rows=100 width=48) (actual time=0.076..0.594 rows=100 loops=1)
 Planning Time: 0.036 ms
 Execution Time: 1.227 ms
(6 rows)

Time: 1.904 ms

Sunday, May 12, 2024

Install PostgreSQL Server or Client on MacOS for testing purpose

The default PostgreSQL installation for MacOS is via installer packaged by EDB, provides more features and easier for beginners. Personally I prefer to have PostgreSQL installed in specified folders, easier to manage multiple versions co-exists on my laptop and easier to uninstall and upgrade.

  1. Download PostgreSQL binaries archive.
  1. Unzip them to a folder. Here it will unzip into /Users/donghua/Tools/pgsql
unzip postgresql-16.3-1-osx-binaries.zip -d /Users/donghua/Tools
  1. Fix com.apple.quarantine issue on MacOS

sudo xattr -dr com.apple.quarantine $i /Users/donghua/Tools/pgsql

for i in `find /Users/donghua/Tools/pgsql/bin -type f`
do
 	ls -l@ $i|grep com.apple.quarantine > /dev/null
	if [[ $? -eq 0 ]] then
		print "File quarantine: $i"
		sudo xattr -d com.apple.quarantine $i
	fi
done


for i in `find /Users/donghua/Tools/pgsql/lib -type f`
do
 	ls -l@ $i|grep com.apple.quarantine > /dev/null
	if [[ $? -eq 0 ]] then
		print "File quarantine: $i"
		sudo xattr -d com.apple.quarantine $i
	fi
done
  1. Setup your environment variables.

Here I use zsh, put below entries to ~/.zprofile


export PGHOME=/Users/donghua/Tools/pgsql
export PGDATA=/Users/donghua/Tools/pgdata/16/dataexport PATH=$PATH:$PGHOME/binalias pgadmin4="open -a '/Users/donghua/Tools/pgsql/pgAdmin 4.app'"
alias pglog='cd $PGDATA/log/;ls -1t  | head -n 1  | xargs tail -f'

Saturday, February 10, 2024

"ERROR: duplicate key value violates unique constraint" in PostgreSQL but not in Oracle/SQLServer

In both Oracle and SQL Server, these non-deferrable constraints are evaluated at the statement completion, and it does allow intermediate state which could break constraint rules as long as final state at the statement completion is compliant.

PostgreSQL implements this differently, and the output can depends on the physical data orders.

PostgreSQL Example

mytest=# select version();
                                                     version
------------------------------------------------------------------------------------------------------------------
 PostgreSQL 16.1 on x86_64-apple-darwin20.6.0, compiled by Apple clang version 12.0.5 (clang-1205.0.22.9), 64-bit
(1 row)
mytest=# drop table p;
DROP TABLE
mytest=# create table p (id int primary key);
CREATE TABLE
mytest=# insert into p values(1),(2),(3);
INSERT 0 3
mytest=# select * from p;
 id
----
  1
  2
  3
(3 rows)
mytest=# update p set id=id+1;
ERROR:  duplicate key value violates unique constraint "p_pkey"
DETAIL:  Key (id)=(2) already exists.

If we update the table backwards, it would succeed. As showed in followed code:

mytest=# do
$$
declare
  rec_p record;
  cur_p cursor for
       select id from p order by id desc;
begin
  open cur_p;
  loop
    fetch cur_p into rec_p;
    exit when not found;
      update p set id=id+1 where id=rec_p.id;
  end loop;
end;
$$ language plpgsql;
DO
mytest=# select * from p;
 id
----
  4
  3
  2
(3 rows)

If the data in different physical order, it would succeed too.

mytest=# truncate table p;
TRUNCATE TABLE
mytest=# insert into p values(3),(2),(1);
INSERT 0 3
mytest=# select * from p;
 id
----
  3
  2
  1
(3 rows)
mytest=# update p set id=id+1;
UPDATE 3
mytest=# select * from p;
 id
----
  4
  3
  2
(3 rows)

MS SQL Server Example

[oracle@ol ~]$ sqlcmd  -S localhost -U sa -C -P p_ssw0rd -d TestDB
1> select @@version;
2> go

-------------------------------------------------------------------
Microsoft SQL Server 2022 (RTM-CU11) (KB5032679) - 16.0.4105.2 (X64)
  Nov 14 2023 18:33:19
  Copyright (C) 2022 Microsoft Corporation
  Developer Edition (64-bit) on Linux (Oracle Linux Server 9.3) <X64>

(1 rows affected)
1> create table p (id int primary key);
2> go

1> select * from t;
2> go

1> select * from p;
2> go
id
-----------

(0 rows affected)

1> insert into p values(1),(2),(3);
2> go

(3 rows affected)
1> update p set id=id+1;
2> go

(3 rows affected)
1> select * from p;
2> go

id
-----------
          2
          3
          4

(3 rows affected)

Oracle Example

Oracle Database 23c Free, Release 23.0.0.0.0 - Developer-Release Version 23.2.0.0.0

SQL> create table p (id int primary key);

Table created.

SQL> insert into p values(1),(2),(3);

3 rows created.

SQL> commit;

Commit complete.

SQL> update p set id=id+1;

3 rows updated.

SQL> commit;

Commit complete.
SQL> select * from p;

  ID
----------
   2
   3
   4

Tuesday, February 6, 2024

How to keep PostgreSQL sequences' value in sync via script/automation

 Sample scripts to demonstrate how to generate DDLs to keep sequences in sync between 2 PostgreSQL databases.

One assumption is made that both database contains same sequence definitions, and the only differences are these last_value for these sequences.

Method 1 - Generate DDL on source

  1. In order to retrieve last_value for different sequence, create following helper function.
create or replace function get_sequence_last_value 
(schema_name name, sequence_name name)
returns table (sequence_value bigint)
language 'plpgsql'
AS
$$
begin
   return query execute format ('select last_value from %I.%I',schema_name,sequence_name);
end;
$$;

Test the function, make sure it works.

mytest=# select get_sequence_last_value('public','s1');
 get_sequence_last_value
-------------------------
                      35
(1 row)
  1. Generate DDL scripts.
select 'alter sequence '||seq.schema_name||'.'||seq.sequence_name||' restart with '||v.sequence_value||';'
from
(select s.nspname schema_name, c.relname sequence_name
       from pg_class c, pg_namespace s
       where c.relnamespace=s.oid 
         and c.relkind='S'
         and s.nspname in ('public')) seq 
join lateral get_sequence_last_value(seq.schema_name,seq.sequence_name) v
on true
order by seq.schema_name,seq.sequence_name;

Sample output:

mytest=# select 'alter sequence '||seq.schema_name||'.'||seq.sequence_name||' restart with '||v.sequence_value||';'
mytest-# from
mytest-# (select s.nspname schema_name, c.relname sequence_name
mytest(#        from pg_class c, pg_namespace s
mytest(#        where c.relnamespace=s.oid
mytest(#          and c.relkind='S'
mytest(#          and s.nspname in ('public')) seq
mytest-# join lateral get_sequence_last_value(seq.schema_name,seq.sequence_name) v
mytest-# on true
mytest-# order by seq.schema_name,seq.sequence_name;
                          ?column?
-------------------------------------------------------------
 alter sequence public.s1 restart with 35;
 alter sequence public.s2 restart with 28;
 alter sequence public.sporting_event_id_seq restart with 1;
(3 rows)

  1. Apply these DDLs to the target database

In below script, it create dblink, retrieve latest value from over the dblink and set the restart value for these sequences in the current database.

do
$$
declare
  seq_value bigint;
  rec_seq record;
  cur_seq cursor for 
       select s.nspname schema_name, c.relname sequence_name
       from pg_class c, pg_namespace s
       where c.relnamespace=s.oid 
         and c.relkind='S'
         and s.nspname in ('public');
begin
       begin
         execute 'SELECT dblink_connect($1,$2)' using 'myconn','dbname=mytest';
       exception when others then 
         raise notice 'Connection creation exception, safely ignore if subsequent code works';
       end;
  
       open cur_seq;
       loop
         fetch cur_seq into rec_seq;
         exit when not found;
         raise notice 'Processing: %', rec_seq.schema_name||'.'||rec_seq.sequence_name;
         -- process for each sequence found on the target
         begin
           select id into seq_value 
           from dblink('myconn', 'select last_value from '||rec_seq.schema_name||'.'||rec_seq.sequence_name) 
           as p(id int);
           
           begin
                      raise notice 'alter sequence %.% restart with %', rec_seq.schema_name,rec_seq.sequence_name,seq_value;
             execute format('alter sequence %I.%I restart with %s',rec_seq.schema_name,rec_seq.sequence_name,seq_value);
           exception when others then 
             raise notice 'Exception occurred to set sequence %', rec_seq.schema_name||'.'||rec_seq.sequence_name; 
           end;
           
         exception when others then 
           raise notice 'Exception occurred to fetch sequence %', rec_seq.schema_name||'.'||rec_seq.sequence_name;
         end;
     
       end loop;
       begin
         execute 'SELECT dblink_disconnect($1)' using 'myconn';
       exception when others then 
         raise notice 'Connection disconnect exception, safely ignore.';
       end;
       
end;
$$ language plpgsql;

Sample output:


NOTICE:  Processing: public.sporting_event_id_seq
NOTICE:  alter sequence public.sporting_event_id_seq restart with 1
NOTICE:  Processing: public.s3
NOTICE:  Exception occurred to fetch sequence public.s3
NOTICE:  Processing: public.s1
NOTICE:  alter sequence public.s1 restart with 35
NOTICE:  Processing: public.s2
NOTICE:  alter sequence public.s2 restart with 28
DO

Query returned successfully in 87 msec.