Skip to content

Commit e2debb6

Browse files
author
Richard Guo
committed
Reduce "Var IS [NOT] NULL" quals during constant folding
In commit b262ad4, we introduced an optimization that reduces an IS [NOT] NULL qual on a NOT NULL column to constant true or constant false, provided we can prove that the input expression of the NullTest is not nullable by any outer joins or grouping sets. This deduction happens quite late in the planner, during the distribution of quals to rels in query_planner. However, this approach has some drawbacks: we can't perform any further folding with the constant, and it turns out to be prone to bugs. Ideally, this deduction should happen during constant folding. However, the per-relation information about which columns are defined as NOT NULL is not available at that point. This information is currently collected from catalogs when building RelOptInfos for base or "other" relations. This patch moves the collection of NOT NULL attribute information for relations before pull_up_sublinks, storing it in a hash table keyed by relation OID. It then uses this information to perform the NullTest deduction for Vars during constant folding. This also makes it possible to leverage this information to pull up NOT IN subqueries. Note that this patch does not get rid of restriction_is_always_true and restriction_is_always_false. Removing them would prevent us from reducing some IS [NOT] NULL quals that we were previously able to reduce, because (a) the self-join elimination may introduce new IS NOT NULL quals after constant folding, and (b) if some outer joins are converted to inner joins, previously irreducible NullTest quals may become reducible. Author: Richard Guo <guofenglinux@gmail.com> Reviewed-by: Robert Haas <robertmhaas@gmail.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/CAMbWs4-bFJ1At4btk5wqbezdu8PLtQ3zv-aiaY3ry9Ymm=jgFQ@mail.gmail.com
1 parent 904f6a5 commit e2debb6

File tree

17 files changed

+336
-81
lines changed

17 files changed

+336
-81
lines changed

contrib/postgres_fdw/expected/postgres_fdw.out

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -710,12 +710,12 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = -c1; -- Op
710710
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" = (- "C 1")))
711711
(3 rows)
712712

713-
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE (c1 IS NOT NULL) IS DISTINCT FROM (c1 IS NOT NULL); -- DistinctExpr
714-
QUERY PLAN
715-
--------------------------------------------------------------------------------------------------------------------------------------------
713+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE (c3 IS NOT NULL) IS DISTINCT FROM (c3 IS NOT NULL); -- DistinctExpr
714+
QUERY PLAN
715+
--------------------------------------------------------------------------------------------------------------------------------------
716716
Foreign Scan on public.ft1 t1
717717
Output: c1, c2, c3, c4, c5, c6, c7, c8
718-
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE ((("C 1" IS NOT NULL) IS DISTINCT FROM ("C 1" IS NOT NULL)))
718+
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (((c3 IS NOT NULL) IS DISTINCT FROM (c3 IS NOT NULL)))
719719
(3 rows)
720720

721721
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = ANY(ARRAY[c2, 1, c1 + 0]); -- ScalarArrayOpExpr

contrib/postgres_fdw/sql/postgres_fdw.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c3 IS NULL; -- Nu
352352
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c3 IS NOT NULL; -- NullTest
353353
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE round(abs(c1), 0) = 1; -- FuncExpr
354354
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = -c1; -- OpExpr(l)
355-
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE (c1 IS NOT NULL) IS DISTINCT FROM (c1 IS NOT NULL); -- DistinctExpr
355+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE (c3 IS NOT NULL) IS DISTINCT FROM (c3 IS NOT NULL); -- DistinctExpr
356356
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = ANY(ARRAY[c2, 1, c1 + 0]); -- ScalarArrayOpExpr
357357
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c1 = (ARRAY[c1,c2,3])[1]; -- SubscriptingRef
358358
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft1 t1 WHERE c6 = E'foo''s\\bar'; -- check special chars

src/backend/optimizer/plan/initsplan.c

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3048,36 +3048,16 @@ add_base_clause_to_rel(PlannerInfo *root, Index relid,
30483048
* expr_is_nonnullable
30493049
* Check to see if the Expr cannot be NULL
30503050
*
3051-
* If the Expr is a simple Var that is defined NOT NULL and meanwhile is not
3052-
* nulled by any outer joins, then we can know that it cannot be NULL.
3051+
* Currently we only support simple Vars.
30533052
*/
30543053
static bool
30553054
expr_is_nonnullable(PlannerInfo *root, Expr *expr)
30563055
{
3057-
RelOptInfo *rel;
3058-
Var *var;
3059-
30603056
/* For now only check simple Vars */
30613057
if (!IsA(expr, Var))
30623058
return false;
30633059

3064-
var = (Var *) expr;
3065-
3066-
/* could the Var be nulled by any outer joins? */
3067-
if (!bms_is_empty(var->varnullingrels))
3068-
return false;
3069-
3070-
/* system columns cannot be NULL */
3071-
if (var->varattno < 0)
3072-
return true;
3073-
3074-
/* is the column defined NOT NULL? */
3075-
rel = find_base_rel(root, var->varno);
3076-
if (var->varattno > 0 &&
3077-
bms_is_member(var->varattno, rel->notnullattnums))
3078-
return true;
3079-
3080-
return false;
3060+
return var_is_nonnullable(root, (Var *) expr, true);
30813061
}
30823062

30833063
/*

src/backend/optimizer/plan/planner.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
342342
glob->transientPlan = false;
343343
glob->dependsOnRole = false;
344344
glob->partition_directory = NULL;
345+
glob->rel_notnullatts_hash = NULL;
345346

346347
/*
347348
* Assess whether it's feasible to use parallel mode for this query. We
@@ -723,11 +724,12 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
723724
/*
724725
* Scan the rangetable for relation RTEs and retrieve the necessary
725726
* catalog information for each relation. Using this information, clear
726-
* the inh flag for any relation that has no children, and expand virtual
727-
* generated columns for any relation that contains them. Note that this
728-
* step does not descend into sublinks and subqueries; if we pull up any
729-
* sublinks or subqueries below, their relation RTEs are processed just
730-
* before pulling them up.
727+
* the inh flag for any relation that has no children, collect not-null
728+
* attribute numbers for any relation that has column not-null
729+
* constraints, and expand virtual generated columns for any relation that
730+
* contains them. Note that this step does not descend into sublinks and
731+
* subqueries; if we pull up any sublinks or subqueries below, their
732+
* relation RTEs are processed just before pulling them up.
731733
*/
732734
parse = root->parse = preprocess_relation_rtes(root);
733735

src/backend/optimizer/plan/subselect.c

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1519,8 +1519,10 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
15191519
/*
15201520
* Scan the rangetable for relation RTEs and retrieve the necessary
15211521
* catalog information for each relation. Using this information, clear
1522-
* the inh flag for any relation that has no children, and expand virtual
1523-
* generated columns for any relation that contains them.
1522+
* the inh flag for any relation that has no children, collect not-null
1523+
* attribute numbers for any relation that has column not-null
1524+
* constraints, and expand virtual generated columns for any relation that
1525+
* contains them.
15241526
*
15251527
* Note: we construct up an entirely dummy PlannerInfo for use here. This
15261528
* is fine because only the "glob" and "parse" links will be used in this
@@ -1760,6 +1762,7 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
17601762
Node **testexpr, List **paramIds)
17611763
{
17621764
Node *whereClause;
1765+
PlannerInfo subroot;
17631766
List *leftargs,
17641767
*rightargs,
17651768
*opids,
@@ -1819,12 +1822,15 @@ convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect,
18191822
* parent aliases were flattened already, and we're not going to pull any
18201823
* child Vars (of any description) into the parent.
18211824
*
1822-
* Note: passing the parent's root to eval_const_expressions is
1823-
* technically wrong, but we can get away with it since only the
1824-
* boundParams (if any) are used, and those would be the same in a
1825-
* subroot.
1825+
* Note: we construct up an entirely dummy PlannerInfo to pass to
1826+
* eval_const_expressions. This is fine because only the "glob" and
1827+
* "parse" links are used by eval_const_expressions.
18261828
*/
1827-
whereClause = eval_const_expressions(root, whereClause);
1829+
MemSet(&subroot, 0, sizeof(subroot));
1830+
subroot.type = T_PlannerInfo;
1831+
subroot.glob = root->glob;
1832+
subroot.parse = subselect;
1833+
whereClause = eval_const_expressions(&subroot, whereClause);
18281834
whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false);
18291835
whereClause = (Node *) make_ands_implicit((Expr *) whereClause);
18301836

src/backend/optimizer/prep/prepjointree.c

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "optimizer/clauses.h"
3737
#include "optimizer/optimizer.h"
3838
#include "optimizer/placeholder.h"
39+
#include "optimizer/plancat.h"
3940
#include "optimizer/prep.h"
4041
#include "optimizer/subselect.h"
4142
#include "optimizer/tlist.h"
@@ -401,8 +402,9 @@ transform_MERGE_to_join(Query *parse)
401402
*
402403
* This scans the rangetable for relation RTEs and retrieves the necessary
403404
* catalog information for each relation. Using this information, it clears
404-
* the inh flag for any relation that has no children, and expands virtual
405-
* generated columns for any relation that contains them.
405+
* the inh flag for any relation that has no children, collects not-null
406+
* attribute numbers for any relation that has column not-null constraints, and
407+
* expands virtual generated columns for any relation that contains them.
406408
*
407409
* Note that expanding virtual generated columns may cause the query tree to
408410
* have new copies of rangetable entries. Therefore, we have to use list_nth
@@ -447,6 +449,13 @@ preprocess_relation_rtes(PlannerInfo *root)
447449
if (rte->inh)
448450
rte->inh = relation->rd_rel->relhassubclass;
449451

452+
/*
453+
* Check to see if the relation has any column not-null constraints;
454+
* if so, retrieve the constraint information and store it in a
455+
* relation OID based hash table.
456+
*/
457+
get_relation_notnullatts(root, relation);
458+
450459
/*
451460
* Check to see if the relation has any virtual generated columns; if
452461
* so, replace all Var nodes in the query that reference these columns
@@ -1384,8 +1393,10 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
13841393
/*
13851394
* Scan the rangetable for relation RTEs and retrieve the necessary
13861395
* catalog information for each relation. Using this information, clear
1387-
* the inh flag for any relation that has no children, and expand virtual
1388-
* generated columns for any relation that contains them.
1396+
* the inh flag for any relation that has no children, collect not-null
1397+
* attribute numbers for any relation that has column not-null
1398+
* constraints, and expand virtual generated columns for any relation that
1399+
* contains them.
13891400
*/
13901401
subquery = subroot->parse = preprocess_relation_rtes(subroot);
13911402

src/backend/optimizer/util/clauses.c

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "postgres.h"
2121

2222
#include "access/htup_details.h"
23+
#include "catalog/pg_class.h"
2324
#include "catalog/pg_language.h"
2425
#include "catalog/pg_operator.h"
2526
#include "catalog/pg_proc.h"
@@ -36,13 +37,15 @@
3637
#include "optimizer/clauses.h"
3738
#include "optimizer/cost.h"
3839
#include "optimizer/optimizer.h"
40+
#include "optimizer/pathnode.h"
3941
#include "optimizer/plancat.h"
4042
#include "optimizer/planmain.h"
4143
#include "parser/analyze.h"
4244
#include "parser/parse_coerce.h"
4345
#include "parser/parse_collate.h"
4446
#include "parser/parse_func.h"
4547
#include "parser/parse_oper.h"
48+
#include "parser/parsetree.h"
4649
#include "rewrite/rewriteHandler.h"
4750
#include "rewrite/rewriteManip.h"
4851
#include "tcop/tcopprot.h"
@@ -2242,7 +2245,8 @@ rowtype_field_matches(Oid rowtypeid, int fieldnum,
22422245
* only operators and functions that are reasonable to try to execute.
22432246
*
22442247
* NOTE: "root" can be passed as NULL if the caller never wants to do any
2245-
* Param substitutions nor receive info about inlined functions.
2248+
* Param substitutions nor receive info about inlined functions nor reduce
2249+
* NullTest for Vars to constant true or constant false.
22462250
*
22472251
* NOTE: the planner assumes that this will always flatten nested AND and
22482252
* OR clauses into N-argument form. See comments in prepqual.c.
@@ -3544,6 +3548,31 @@ eval_const_expressions_mutator(Node *node,
35443548

35453549
return makeBoolConst(result, false);
35463550
}
3551+
if (!ntest->argisrow && arg && IsA(arg, Var) && context->root)
3552+
{
3553+
Var *varg = (Var *) arg;
3554+
bool result;
3555+
3556+
if (var_is_nonnullable(context->root, varg, false))
3557+
{
3558+
switch (ntest->nulltesttype)
3559+
{
3560+
case IS_NULL:
3561+
result = false;
3562+
break;
3563+
case IS_NOT_NULL:
3564+
result = true;
3565+
break;
3566+
default:
3567+
elog(ERROR, "unrecognized nulltesttype: %d",
3568+
(int) ntest->nulltesttype);
3569+
result = false; /* keep compiler quiet */
3570+
break;
3571+
}
3572+
3573+
return makeBoolConst(result, false);
3574+
}
3575+
}
35473576

35483577
newntest = makeNode(NullTest);
35493578
newntest->arg = (Expr *) arg;
@@ -4162,6 +4191,67 @@ simplify_function(Oid funcid, Oid result_type, int32 result_typmod,
41624191
return newexpr;
41634192
}
41644193

4194+
/*
4195+
* var_is_nonnullable: check to see if the Var cannot be NULL
4196+
*
4197+
* If the Var is defined NOT NULL and meanwhile is not nulled by any outer
4198+
* joins or grouping sets, then we can know that it cannot be NULL.
4199+
*
4200+
* use_rel_info indicates whether the corresponding RelOptInfo is available for
4201+
* use.
4202+
*/
4203+
bool
4204+
var_is_nonnullable(PlannerInfo *root, Var *var, bool use_rel_info)
4205+
{
4206+
Relids notnullattnums = NULL;
4207+
4208+
Assert(IsA(var, Var));
4209+
4210+
/* skip upper-level Vars */
4211+
if (var->varlevelsup != 0)
4212+
return false;
4213+
4214+
/* could the Var be nulled by any outer joins or grouping sets? */
4215+
if (!bms_is_empty(var->varnullingrels))
4216+
return false;
4217+
4218+
/* system columns cannot be NULL */
4219+
if (var->varattno < 0)
4220+
return true;
4221+
4222+
/*
4223+
* Check if the Var is defined as NOT NULL. We retrieve the column NOT
4224+
* NULL constraint information from the corresponding RelOptInfo if it is
4225+
* available; otherwise, we search the hash table for this information.
4226+
*/
4227+
if (use_rel_info)
4228+
{
4229+
RelOptInfo *rel = find_base_rel(root, var->varno);
4230+
4231+
notnullattnums = rel->notnullattnums;
4232+
}
4233+
else
4234+
{
4235+
RangeTblEntry *rte = planner_rt_fetch(var->varno, root);
4236+
4237+
/*
4238+
* We must skip inheritance parent tables, as some child tables may
4239+
* have a NOT NULL constraint for a column while others may not. This
4240+
* cannot happen with partitioned tables, though.
4241+
*/
4242+
if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
4243+
return false;
4244+
4245+
notnullattnums = find_relation_notnullatts(root, rte->relid);
4246+
}
4247+
4248+
if (var->varattno > 0 &&
4249+
bms_is_member(var->varattno, notnullattnums))
4250+
return true;
4251+
4252+
return false;
4253+
}
4254+
41654255
/*
41664256
* expand_function_arguments: convert named-notation args to positional args
41674257
* and/or insert default args, as needed

src/backend/optimizer/util/inherit.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -466,8 +466,7 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
466466
Index *childRTindex_p)
467467
{
468468
Query *parse = root->parse;
469-
Oid parentOID PG_USED_FOR_ASSERTS_ONLY =
470-
RelationGetRelid(parentrel);
469+
Oid parentOID = RelationGetRelid(parentrel);
471470
Oid childOID = RelationGetRelid(childrel);
472471
RangeTblEntry *childrte;
473472
Index childRTindex;
@@ -513,6 +512,13 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte,
513512
*childrte_p = childrte;
514513
*childRTindex_p = childRTindex;
515514

515+
/*
516+
* Retrieve column not-null constraint information for the child relation
517+
* if its relation OID is different from the parent's.
518+
*/
519+
if (childOID != parentOID)
520+
get_relation_notnullatts(root, childrel);
521+
516522
/*
517523
* Build an AppendRelInfo struct for each parent/child pair.
518524
*/

0 commit comments

Comments
 (0)