Sunday, March 19, 2023

Compare [NOT] IN/EXISTS query optimisation between Oracle and PostgreSQL

 

Overview

We will compare "IN", "EXISTS", "JOIN", "NOT IN", "NOT EXISTS" and "OUTER JOIN" between Oracle 21c and PostgresQL 15.

The execution summary is as below:

QueryID
Query
OraclePostgreSQL
Query 1: "IN"select sum(v) from c where pid in (select pid from p where v>0.95);00:00:00.0176.142 ms
Query 2: "EXISTS"select sum(v) from c where exists (select pid from p where c.pid = p.pid and v>0.95);Same plan as query 1Same plan as query 1
Query 3: "JOIN"select sum(c.v) from c join p on c.pid=p.pid and p.v>0.95;Same plan as query 1Same plan as query 1
Query 4: "NOT IN"select sum(v) from c where pid not in (select pid from p where v<=0.95);00:00:00.01112.662 ms
Query 5: "NOT EXISTS"select sum(v) from c where not exists (select pid from p where c.pid = p.pid and v<=0.95);Same plan as query 4103.857 ms
Query 6: "OUTER JOIN"select sum(c.v) from c left outer join (select * from p where p.v<=0.95 ) p2 on c.pid=p2.pid where p2.pid is null;00:00:00.02Same plan as query 5

Setup before testing

Oracle

-- setup demo table

drop table c;
drop table p;

create table p(pid int primary key, v number(3,2), extra char(100));
create table c(cid int primary key, pid int, v number(3,2), extra char(100), 
                constraint c_p_fk foreign key (pid) references p(pid));

insert into p select level, round(dbms_random.value,2), 'x' 
                from dual connect by level <= 1000;
insert into c select level, floor(dbms_random.value(1,1000)),round(dbms_random.value,2), 'x' 
                from dual connect by level <= 100000;
commit;

create index idx_p_v on p(v);
create index idx_c_pid on c(pid);

exec dbms_stats.gather_table_stats('','P');
exec dbms_stats.gather_table_stats('','C');

-- formating related
set timing on;
set autotrace on explain
set linesize 120
set tab off

PostgreSQL

-- setup demo table

drop table c;
drop table p;

create table p(pid int primary key, v numeric(3,2), extra char(100));
create table c(cid int primary key, pid int, v numeric(3,2), extra char(100), 
                constraint c_p_fk foreign key (pid) references p(pid));

insert into p select level, round(random()::numeric,2), 'x' 
                from generate_series(1,1000) level;
insert into c select level, floor(random()::numeric*1000)+1,round(random()::numeric,2), 'x' 
                from generate_series(1,1000000) level;

create index idx_p_v on p(v);
create index idx_c_pid on c(pid);

vacuum analyze p;
vacuum analyze c;

-- formating related
\timing

Database Version

SQL> select banner_full from v$version;

BANNER_FULL
----------------------------------------------------------------------
Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
Version 21.9.0.0.0
mytest=# select version();
                                                 version
---------------------------------------------------------------------------------------------------------
 PostgreSQL 15.2 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 8.5.0 20210514 (Red Hat 8.5.0-16), 64-bit
(1 row)

Query 1: "IN"

select sum(v) from c
  where pid in (select pid from p where v>0.95);

Oracle

SQL> select sum(v) from c
  2     where pid in (select pid from p where v>0.95);

    SUM(V)
----------
    2484.1

Elapsed: 00:00:00.01

Execution Plan
----------------------------------------------------------
Plan hash value: 423065292

---------------------------------------------------------------------------------------------
| Id  | Operation                | Name             | Rows  | Bytes | Cost (%CPU)| Time     |
---------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT         |                  |     1 |    14 |   484   (1)| 00:00:01 |
|   1 |  SORT AGGREGATE          |                  |     1 |    14 |            |          |
|*  2 |   HASH JOIN              |                  |  5005 | 70070 |   484   (1)| 00:00:01 |
|*  3 |    VIEW                  | index$_join$_002 |    50 |   350 |     5   (0)| 00:00:01 |
|*  4 |     HASH JOIN            |                  |       |       |            |          |
|*  5 |      INDEX RANGE SCAN    | IDX_P_V          |    50 |   350 |     2   (0)| 00:00:01 |
|   6 |      INDEX FAST FULL SCAN| SYS_C008978      |    50 |   350 |     4   (0)| 00:00:01 |
|   7 |    TABLE ACCESS FULL     | C                |   100K|   683K|   479   (1)| 00:00:01 |
---------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("PID"="PID")
   3 - filter("V">0.95)
   4 - access(ROWID=ROWID)
   5 - access("V">0.95)

Note
-----
   - this is an adaptive plan

PostgreSQL

mytest=# select sum(v) from c
mytest-#   where pid in (select pid from p where v>0.95);
   sum
----------
 24233.38
(1 row)

Time: 76.142 ms

mytest=# explain
mytest-# select sum(v) from c
mytest-#    where pid in (select pid from p where v>0.95);
                                             QUERY PLAN
-----------------------------------------------------------------------------------------------------
 Finalize Aggregate  (cost=24522.20..24522.21 rows=1 width=32)
   ->  Gather  (cost=24521.98..24522.19 rows=2 width=32)
         Workers Planned: 2
         ->  Partial Aggregate  (cost=23521.98..23521.99 rows=1 width=32)
               ->  Hash Join  (cost=23.88..23470.94 rows=20417 width=4)
                     Hash Cond: (c.pid = p.pid)
                     ->  Parallel Seq Scan on c  (cost=0.00..22348.67 rows=416667 width=8)
                     ->  Hash  (cost=23.27..23.27 rows=49 width=4)
                           ->  Bitmap Heap Scan on p  (cost=4.65..23.27 rows=49 width=4)
                                 Recheck Cond: (v > 0.95)
                                 ->  Bitmap Index Scan on idx_p_v  (cost=0.00..4.64 rows=49 width=0)
                                       Index Cond: (v > 0.95)
(12 rows)

Time: 0.715 ms

Query 2: "EXISTS"

select sum(v) from c 
  where exists (select pid from p where c.pid = p.pid and v>0.95);

Oracle

SQL> select sum(v) from c
  2  where exists (select pid from p where c.pid = p.pid and v>0.95);

    SUM(V)
----------
    2484.1

Elapsed: 00:00:00.01

Execution Plan
----------------------------------------------------------
Plan hash value: 423065292

---------------------------------------------------------------------------------------------
| Id  | Operation                | Name             | Rows  | Bytes | Cost (%CPU)| Time     |
---------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT         |                  |     1 |    14 |   484   (1)| 00:00:01 |
|   1 |  SORT AGGREGATE          |                  |     1 |    14 |            |          |
|*  2 |   HASH JOIN              |                  |  5005 | 70070 |   484   (1)| 00:00:01 |
|*  3 |    VIEW                  | index$_join$_002 |    50 |   350 |     5   (0)| 00:00:01 |
|*  4 |     HASH JOIN            |                  |       |       |            |          |
|*  5 |      INDEX RANGE SCAN    | IDX_P_V          |    50 |   350 |     2   (0)| 00:00:01 |
|   6 |      INDEX FAST FULL SCAN| SYS_C008978      |    50 |   350 |     4   (0)| 00:00:01 |
|   7 |    TABLE ACCESS FULL     | C                |   100K|   683K|   479   (1)| 00:00:01 |
---------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("C"."PID"="P"."PID")
   3 - filter("V">0.95)
   4 - access(ROWID=ROWID)
   5 - access("V">0.95)

Note
-----
   - this is an adaptive plan

PostgreSQL

mytest=# select sum(v) from c
mytest-#   where exists (select pid from p where c.pid = p.pid and v>0.95);
   sum
----------
 24233.38
(1 row)

Time: 76.109 ms
mytest=# explain
mytest-# select sum(v) from c
mytest-#   where exists (select pid from p where c.pid = p.pid and v>0.95);
                                             QUERY PLAN
-----------------------------------------------------------------------------------------------------
 Finalize Aggregate  (cost=24522.20..24522.21 rows=1 width=32)
   ->  Gather  (cost=24521.98..24522.19 rows=2 width=32)
         Workers Planned: 2
         ->  Partial Aggregate  (cost=23521.98..23521.99 rows=1 width=32)
               ->  Hash Join  (cost=23.88..23470.94 rows=20417 width=4)
                     Hash Cond: (c.pid = p.pid)
                     ->  Parallel Seq Scan on c  (cost=0.00..22348.67 rows=416667 width=8)
                     ->  Hash  (cost=23.27..23.27 rows=49 width=4)
                           ->  Bitmap Heap Scan on p  (cost=4.65..23.27 rows=49 width=4)
                                 Recheck Cond: (v > 0.95)
                                 ->  Bitmap Index Scan on idx_p_v  (cost=0.00..4.64 rows=49 width=0)
                                       Index Cond: (v > 0.95)
(12 rows)

Time: 0.723 ms

Query 3: "JOIN"

select sum(c.v) from c join p on c.pid=p.pid and p.v>0.95;

Oracle

SQL> select sum(c.v) from c join p on c.pid=p.pid and p.v>0.95;

  SUM(C.V)
----------
    2484.1

Elapsed: 00:00:00.01

Execution Plan
----------------------------------------------------------
Plan hash value: 423065292

---------------------------------------------------------------------------------------------
| Id  | Operation                | Name             | Rows  | Bytes | Cost (%CPU)| Time     |
---------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT         |                  |     1 |    14 |   484   (1)| 00:00:01 |
|   1 |  SORT AGGREGATE          |                  |     1 |    14 |            |          |
|*  2 |   HASH JOIN              |                  |  5005 | 70070 |   484   (1)| 00:00:01 |
|*  3 |    VIEW                  | index$_join$_002 |    50 |   350 |     5   (0)| 00:00:01 |
|*  4 |     HASH JOIN            |                  |       |       |            |          |
|*  5 |      INDEX RANGE SCAN    | IDX_P_V          |    50 |   350 |     2   (0)| 00:00:01 |
|   6 |      INDEX FAST FULL SCAN| SYS_C008978      |    50 |   350 |     4   (0)| 00:00:01 |
|   7 |    TABLE ACCESS FULL     | C                |   100K|   683K|   479   (1)| 00:00:01 |
---------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("C"."PID"="P"."PID")
   3 - filter("P"."V">0.95)
   4 - access(ROWID=ROWID)
   5 - access("P"."V">0.95)

Note
-----
   - this is an adaptive plan

PostgreSQL

mytest=# select sum(c.v) from c join p on c.pid=p.pid and p.v>0.95;
   sum
----------
 24233.38
(1 row)

Time: 75.596 ms
mytest=# explain
mytest-# select sum(c.v) from c join p on c.pid=p.pid and p.v>0.95;
                                             QUERY PLAN
-----------------------------------------------------------------------------------------------------
 Finalize Aggregate  (cost=24522.20..24522.21 rows=1 width=32)
   ->  Gather  (cost=24521.98..24522.19 rows=2 width=32)
         Workers Planned: 2
         ->  Partial Aggregate  (cost=23521.98..23521.99 rows=1 width=32)
               ->  Hash Join  (cost=23.88..23470.94 rows=20417 width=4)
                     Hash Cond: (c.pid = p.pid)
                     ->  Parallel Seq Scan on c  (cost=0.00..22348.67 rows=416667 width=8)
                     ->  Hash  (cost=23.27..23.27 rows=49 width=4)
                           ->  Bitmap Heap Scan on p  (cost=4.65..23.27 rows=49 width=4)
                                 Recheck Cond: (v > 0.95)
                                 ->  Bitmap Index Scan on idx_p_v  (cost=0.00..4.64 rows=49 width=0)
                                       Index Cond: (v > 0.95)
(12 rows)

Time: 0.707 ms

Query 4: "NOT IN"

select sum(v) from c
  where pid not in (select pid from p where v<=0.95);

Oracle

SQL> select sum(v) from c
  2    where pid not in (select pid from p where v<=0.95);

    SUM(V)
----------
    2484.1

Elapsed: 00:00:00.01

Execution Plan
----------------------------------------------------------
Plan hash value: 257737780

----------------------------------------------------------------------------------------------
| Id  | Operation                 | Name             | Rows  | Bytes | Cost (%CPU)| Time     |
----------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT          |                  |     1 |    14 |   485   (1)| 00:00:01 |
|   1 |  SORT AGGREGATE           |                  |     1 |    14 |            |          |
|*  2 |   HASH JOIN RIGHT ANTI SNA|                  |  4905 | 68670 |   485   (1)| 00:00:01 |
|*  3 |    VIEW                   | index$_join$_002 |   950 |  6650 |     6   (0)| 00:00:01 |
|*  4 |     HASH JOIN             |                  |       |       |            |          |
|*  5 |      INDEX RANGE SCAN     | IDX_P_V          |   950 |  6650 |     3   (0)| 00:00:01 |
|   6 |      INDEX FAST FULL SCAN | SYS_C008978      |   950 |  6650 |     4   (0)| 00:00:01 |
|   7 |    TABLE ACCESS FULL      | C                |   100K|   683K|   479   (1)| 00:00:01 |
----------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("PID"="PID")
   3 - filter("V"<=0.95)
   4 - access(ROWID=ROWID)
   5 - access("V"<=0.95)

PostgreSQL

mytest=# select sum(v) from c
mytest-#   where pid not in (select pid from p where v<=0.95);
   sum
----------
 24233.38
(1 row)
mytest=# explain
mytest-# select sum(v) from c
mytest-#   where pid not in (select pid from p where v<=0.95);
                                      QUERY PLAN
--------------------------------------------------------------------------------------
 Finalize Aggregate  (cost=24944.27..24944.28 rows=1 width=32)
   ->  Gather  (cost=24944.05..24944.26 rows=2 width=32)
         Workers Planned: 2
         ->  Partial Aggregate  (cost=23944.05..23944.06 rows=1 width=32)
               ->  Parallel Seq Scan on c  (cost=32.88..23423.21 rows=208333 width=4)
                     Filter: (NOT (hashed SubPlan 1))
                     SubPlan 1
                       ->  Seq Scan on p  (cost=0.00..30.50 rows=951 width=4)
                             Filter: (v <= 0.95)
(9 rows)

Time: 0.493 ms

Query 5: "NOT EXISTS"

select sum(v) from c 
  where not exists (select pid from p where c.pid = p.pid and v<=0.95);

Oracle

SQL> select sum(v) from c
  2    where not exists (select pid from p where c.pid = p.pid and v<=0.95);

    SUM(V)
----------
    2484.1

Elapsed: 00:00:00.02

Execution Plan
----------------------------------------------------------
Plan hash value: 3094349839

---------------------------------------------------------------------------------------------
| Id  | Operation                | Name             | Rows  | Bytes | Cost (%CPU)| Time     |
---------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT         |                  |     1 |    14 |   485   (1)| 00:00:01 |
|   1 |  SORT AGGREGATE          |                  |     1 |    14 |            |          |
|*  2 |   HASH JOIN RIGHT ANTI   |                  |  4905 | 68670 |   485   (1)| 00:00:01 |
|*  3 |    VIEW                  | index$_join$_002 |   950 |  6650 |     6   (0)| 00:00:01 |
|*  4 |     HASH JOIN            |                  |       |       |            |          |
|*  5 |      INDEX RANGE SCAN    | IDX_P_V          |   950 |  6650 |     3   (0)| 00:00:01 |
|   6 |      INDEX FAST FULL SCAN| SYS_C008978      |   950 |  6650 |     4   (0)| 00:00:01 |
|   7 |    TABLE ACCESS FULL     | C                |   100K|   683K|   479   (1)| 00:00:01 |
---------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("C"."PID"="P"."PID")
   3 - filter("V"<=0.95)
   4 - access(ROWID=ROWID)
   5 - access("V"<=0.95)

PostgreSQL

mytest=# select sum(v) from c
mytest-#   where not exists (select pid from p where c.pid = p.pid and v<=0.95);
   sum
----------
 24233.38
(1 row)

Time: 103.857 ms
mytest=# explain
mytest-# select sum(v) from c
mytest-#   where not exists (select pid from p where c.pid = p.pid and v<=0.95);
                                        QUERY PLAN
-------------------------------------------------------------------------------------------
 Finalize Aggregate  (cost=25186.02..25186.03 rows=1 width=32)
   ->  Gather  (cost=25185.80..25186.01 rows=2 width=32)
         Workers Planned: 2
         ->  Partial Aggregate  (cost=24185.80..24185.81 rows=1 width=32)
               ->  Hash Anti Join  (cost=42.39..24134.76 rows=20417 width=4)
                     Hash Cond: (c.pid = p.pid)
                     ->  Parallel Seq Scan on c  (cost=0.00..22348.67 rows=416667 width=8)
                     ->  Hash  (cost=30.50..30.50 rows=951 width=4)
                           ->  Seq Scan on p  (cost=0.00..30.50 rows=951 width=4)
                                 Filter: (v <= 0.95)
(10 rows)

Time: 0.751 ms

Query 6: "OUTER JOIN""

select sum(c.v) from c left outer join (select * from p where p.v<=0.95 ) p2 
    on c.pid=p2.pid where p2.pid is null;

Oracle

SQL> select sum(c.v) from c left outer join (select * from p where p.v<=0.95 ) p2
  2      on c.pid=p2.pid where p2.pid is null;

  SUM(C.V)
----------
    2484.1

Elapsed: 00:00:00.02

Execution Plan
----------------------------------------------------------
Plan hash value: 3112675040

----------------------------------------------------------------------------------------------
| Id  | Operation                 | Name             | Rows  | Bytes | Cost (%CPU)| Time     |
----------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT          |                  |     1 |    14 |   485   (1)| 00:00:01 |
|   1 |  SORT AGGREGATE           |                  |     1 |    14 |            |          |
|*  2 |   FILTER                  |                  |       |       |            |          |
|*  3 |    HASH JOIN RIGHT OUTER  |                  |  4905 | 68670 |   485   (1)| 00:00:01 |
|*  4 |     VIEW                  | index$_join$_003 |   950 |  6650 |     6   (0)| 00:00:01 |
|*  5 |      HASH JOIN            |                  |       |       |            |          |
|*  6 |       INDEX RANGE SCAN    | IDX_P_V          |   950 |  6650 |     3   (0)| 00:00:01 |
|   7 |       INDEX FAST FULL SCAN| SYS_C008978      |   950 |  6650 |     4   (0)| 00:00:01 |
|   8 |     TABLE ACCESS FULL     | C                |   100K|   683K|   479   (1)| 00:00:01 |
----------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - filter("P"."PID" IS NULL)
   3 - access("C"."PID"="P"."PID"(+))
   4 - filter("P"."V"(+)<=0.95)
   5 - access(ROWID=ROWID)
   6 - access("P"."V"<=0.95)

PostgreSQL

mytest=# select sum(c.v) from c left outer join (select * from p where p.v<=0.95 ) p2
mytest-#     on c.pid=p2.pid where p2.pid is null;
   sum
----------
 24233.38
(1 row)

Time: 104.988 ms
mytest=# explain
mytest-# select sum(c.v) from c left outer join (select * from p where p.v<=0.95 ) p2
mytest-#     on c.pid=p2.pid where p2.pid is null;
                                        QUERY PLAN
-------------------------------------------------------------------------------------------
 Finalize Aggregate  (cost=25186.02..25186.03 rows=1 width=32)
   ->  Gather  (cost=25185.80..25186.01 rows=2 width=32)
         Workers Planned: 2
         ->  Partial Aggregate  (cost=24185.80..24185.81 rows=1 width=32)
               ->  Hash Anti Join  (cost=42.39..24134.76 rows=20417 width=4)
                     Hash Cond: (c.pid = p.pid)
                     ->  Parallel Seq Scan on c  (cost=0.00..22348.67 rows=416667 width=8)
                     ->  Hash  (cost=30.50..30.50 rows=951 width=4)
                           ->  Seq Scan on p  (cost=0.00..30.50 rows=951 width=4)
                                 Filter: (v <= 0.95)
(10 rows)

Time: 0.844 ms