git.postgresql.org Git - postgresql.git/commitdiff

git projects / postgresql.git / commitdiff

Consider the "LIMIT 1" optimization with parallel DISTINCT

author David Rowley <drowley@postgresql.org>

2024年1月31日 04:22:02 +0000 (17:22 +1300)

committer David Rowley <drowley@postgresql.org>

2024年1月31日 04:22:02 +0000 (17:22 +1300)

Similar to what was done in 5543677ec for non-parallel DISTINCT, apply
the same optimization when the distinct_pathkeys are empty for the
partial paths too.

This can be faster than the non-parallel version when the first row
matching the WHERE clause of the query takes a while to find. Parallel
workers could speed that process up considerably.

Author: Richard Guo
Reviewed-by: David Rowley
Discussion: https://postgr.es/m/CAMbWs49JC0qvfUbzs-TVzgMpSSBiMJ_6sN=BaA9iohBgYkr=LA@mail.gmail.com

src/backend/optimizer/plan/planner.c patch | blob | blame | history

src/test/regress/expected/select_distinct.out patch | blob | blame | history

src/test/regress/sql/select_distinct.sql patch | blob | blame | history

diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index 01fa45b9255150153dd7ac8782f78f3ee36c3c0f..342f5ad8d0a11f6d00a49e7b8c2a1c2cdafcc987 100644 (file)

--- a/src/backend/optimizer/plan/planner.c

+++ b/src/backend/optimizer/plan/planner.c

@@ -4737,11 +4737,45 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,

-1.0);

}

- add_partial_path(partial_distinct_rel, (Path *)

- create_upper_unique_path(root, partial_distinct_rel,

- sorted_path,

- list_length(root->distinct_pathkeys),

- numDistinctRows));

+ /*

+ * An empty distinct_pathkeys means all tuples have the same value

+ * for the DISTINCT clause. See create_final_distinct_paths()

+ */

+ if (root->distinct_pathkeys == NIL)

+ {

+ Node *limitCount;

+ limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,

+ sizeof(int64),

+ Int64GetDatum(1), false,

+ FLOAT8PASSBYVAL);

+ /*

+ * Apply a LimitPath onto the partial path to restrict the

+ * tuples from each worker to 1. create_final_distinct_paths

+ * will need to apply an additional LimitPath to restrict this

+ * to a single row after the Gather node. If the query

+ * already has a LIMIT clause, then we could end up with three

+ * Limit nodes in the final plan. Consolidating the top two

+ * of these could be done, but does not seem worth troubling

+ * over.

+ */

+ add_partial_path(partial_distinct_rel, (Path *)

+ create_limit_path(root, partial_distinct_rel,

+ sorted_path,

+ NULL,

+ limitCount,

+ LIMIT_OPTION_COUNT,

+ 0, 1));

+ }

+ else

+ {

+ add_partial_path(partial_distinct_rel, (Path *)

+ create_upper_unique_path(root, partial_distinct_rel,

+ sorted_path,

+ list_length(root->distinct_pathkeys),

+ numDistinctRows));

+ }

}

diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out

index 9d44ea8056d1949b66bd591ac2633f3d4e9e75f1..1f72756ccb4c60e0873da354966a792097acb12e 100644 (file)

--- a/src/test/regress/expected/select_distinct.out

+++ b/src/test/regress/expected/select_distinct.out

@@ -348,6 +348,26 @@ SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;

0 | 1 | 2 | 3

(1 row)

+SET parallel_setup_cost=0;

+SET min_parallel_table_scan_size=0;

+SET max_parallel_workers_per_gather=2;

+-- Ensure we get a plan with a Limit 1 in both partial distinct and final

+-- distinct

+EXPLAIN (COSTS OFF)

+SELECT DISTINCT four FROM tenk1 WHERE four = 10;

+ QUERY PLAN

+----------------------------------------------

+ Limit

+ -> Gather

+ Workers Planned: 2

+ -> Limit

+ -> Parallel Seq Scan on tenk1

+ Filter: (four = 10)

+(6 rows)

+RESET max_parallel_workers_per_gather;

+RESET min_parallel_table_scan_size;

+RESET parallel_setup_cost;

-- Also, some tests of IS DISTINCT FROM, which doesn't quite deserve its

-- very own regression file.

diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql

index 1643526d991807b38c7d651d3a4390a784f02edb..da92c197aba5c9102ef57ec3f47045593526820c 100644 (file)

--- a/src/test/regress/sql/select_distinct.sql

+++ b/src/test/regress/sql/select_distinct.sql

@@ -180,6 +180,19 @@ SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;

-- Ensure we only get 1 row

SELECT DISTINCT four,1,2,3 FROM tenk1 WHERE four = 0;