Skip to content

Commit 44ea445

Browse files
Nikita Glukhovl-wang
authored andcommitted
Implement read-only dot notation for jsonb
This patch introduces JSONB member access using dot notation, wildcard access, and array subscripting with slicing, aligning with the JSON simplified accessor specified in SQL:2023. Specifically, the following syntax enhancements are added: 1. Simple dot-notation access to JSONB object fields 2. Wildcard dot-notation access to JSONB object fields 2. Subscripting for index range access to JSONB array elements Examples: -- Setup create table t(x int, y jsonb); insert into t select 1, '{"a": 1, "b": 42}'::jsonb; insert into t select 1, '{"a": 2, "b": {"c": 42}}'::jsonb; insert into t select 1, '{"a": 3, "b": {"c": "42"}, "d":[11, 12]}'::jsonb; -- Existing syntax predates the SQL standard: select (t.y)->'b' from t; select (t.y)->'b'->'c' from t; select (t.y)->'d'->0 from t; -- JSON simplified accessor specified by the SQL standard: select (t.y).b from t; select (t.y).b.c from t; select (t.y).d[0] from t; The SQL standard states that simplified access is equivalent to: JSON_QUERY (VEP, 'lax $.JC' WITH CONDITIONAL ARRAY WRAPPER NULL ON EMPTY NULL ON ERROR) where: VEP = <value expression primary> JC = <JSON simplified accessor op chain> For example, the JSON_QUERY equivalents of the above queries are: select json_query(y, 'lax $.b' WITH CONDITIONAL ARRAY WRAPPER NULL ON EMPTY NULL ON ERROR) from t; select json_query(y, 'lax $.b.c' WITH CONDITIONAL ARRAY WRAPPER NULL ON EMPTY NULL ON ERROR) from t; select json_query(y, 'lax $.d[0]' WITH CONDITIONAL ARRAY WRAPPER NULL ON EMPTY NULL ON ERROR) from t; Implementation details: Extends the existing container subscripting interface to support container-specific information, specifically a JSONPath expression for jsonb. During query transformation, detects dot-notation, wildcard access, and sliced subscripting. If any of these accessors are present, constructs a JSONPath expression representing the access chain. During execution, if a JSONPath expression is present in JsonbSubWorkspace, executes it via JsonPathQuery(). Does not transform accessors directly into JSON_QUERY during transformation to preserve the original query structure for EXPLAIN and CREATE VIEW.
1 parent c0bcf7e commit 44ea445

File tree

4 files changed

+556
-26
lines changed

4 files changed

+556
-26
lines changed

src/backend/utils/adt/jsonbsubs.c

Lines changed: 276 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,30 @@
1515
#include "postgres.h"
1616

1717
#include "executor/execExpr.h"
18+
#include "nodes/makefuncs.h"
1819
#include "nodes/nodeFuncs.h"
1920
#include "nodes/subscripting.h"
2021
#include "parser/parse_coerce.h"
2122
#include "parser/parse_expr.h"
2223
#include "utils/builtins.h"
2324
#include "utils/jsonb.h"
25+
#include "utils/jsonpath.h"
2426

2527

26-
/* SubscriptingRefState.workspace for jsonb subscripting execution */
28+
/*
29+
* SubscriptingRefState.workspace for generic jsonb subscripting execution.
30+
*
31+
* Stores state for both jsonb simple subscripting and dot notation access.
32+
* Dot notation additionally uses `jsonpath` for JsonPath evaluation.
33+
*/
2734
typedef struct JsonbSubWorkspace
2835
{
2936
bool expectArray; /* jsonb root is expected to be an array */
3037
Oid *indexOid; /* OID of coerced subscript expression, could
3138
* be only integer or text */
3239
Datum *index; /* Subscript values in Datum format */
40+
JsonPath *jsonpath; /* JsonPath for dot notation execution via
41+
* JsonPathQuery() */
3342
} JsonbSubWorkspace;
3443

3544
static Oid
@@ -110,6 +119,228 @@ coerce_jsonpath_subscript(ParseState *pstate, Node *subExpr, Oid numtype)
110119
return subExpr;
111120
}
112121

122+
/*
123+
* During transformation, determine whether to build a JsonPath
124+
* for JsonPathQuery() execution.
125+
*
126+
* JsonPath is needed if the indirection list includes:
127+
* - String-based access (dot notation)
128+
* - Wildcard (`*`)
129+
* - Slice-based subscripting
130+
*
131+
* Otherwise, simple jsonb subscripting is sufficient.
132+
*/
133+
static bool
134+
jsonb_check_jsonpath_needed(List *indirection)
135+
{
136+
ListCell *lc;
137+
138+
foreach(lc, indirection)
139+
{
140+
Node *accessor = lfirst(lc);
141+
142+
if (IsA(accessor, String) ||
143+
IsA(accessor, A_Star))
144+
return true;
145+
else
146+
{
147+
A_Indices *ai;
148+
149+
Assert(IsA(accessor, A_Indices));
150+
ai = castNode(A_Indices, accessor);
151+
152+
if (!ai->uidx || ai->lidx)
153+
{
154+
Assert(ai->is_slice);
155+
return true;
156+
}
157+
}
158+
}
159+
160+
return false;
161+
}
162+
163+
/*
164+
* Helper functions for constructing JsonPath expressions.
165+
*
166+
* The following functions create various types of JsonPathParseItem nodes,
167+
* which are used to build JsonPath expressions for jsonb simplified accessor.
168+
*/
169+
170+
static JsonPathParseItem *
171+
make_jsonpath_item(JsonPathItemType type)
172+
{
173+
JsonPathParseItem *v = palloc(sizeof(*v));
174+
175+
v->type = type;
176+
v->next = NULL;
177+
178+
return v;
179+
}
180+
181+
static JsonPathParseItem *
182+
make_jsonpath_item_int(int32 val, List **exprs)
183+
{
184+
JsonPathParseItem *jpi = make_jsonpath_item(jpiNumeric);
185+
186+
jpi->value.numeric =
187+
DatumGetNumeric(DirectFunctionCall1(int4_numeric, Int32GetDatum(val)));
188+
189+
*exprs = lappend(*exprs, makeConst(INT4OID, -1, InvalidOid, 4,
190+
Int32GetDatum(val), false, true));
191+
192+
return jpi;
193+
}
194+
195+
/*
196+
* Convert an expression into a JsonPathParseItem.
197+
* If the expression is a constant integer, create a direct numeric item.
198+
* Otherwise, create a variable reference and add it to the expression list.
199+
*/
200+
static JsonPathParseItem *
201+
make_jsonpath_item_expr(ParseState *pstate, Node *expr, List **exprs)
202+
{
203+
Const *cnst;
204+
205+
expr = transformExpr(pstate, expr, pstate->p_expr_kind);
206+
207+
if (!IsA(expr, Const))
208+
ereport(ERROR,
209+
(errcode(ERRCODE_DATATYPE_MISMATCH),
210+
errmsg("jsonb simplified accessor supports subscripting in const int4, got type: %s",
211+
format_type_be(exprType(expr))),
212+
parser_errposition(pstate, exprLocation(expr))));
213+
214+
cnst = (Const *) expr;
215+
216+
if (cnst->consttype == INT4OID && !cnst->constisnull)
217+
{
218+
int32 val = DatumGetInt32(cnst->constvalue);
219+
220+
return make_jsonpath_item_int(val, exprs);
221+
}
222+
223+
ereport(ERROR,
224+
(errcode(ERRCODE_DATATYPE_MISMATCH),
225+
errmsg("jsonb simplified accessor supports subscripting in type: INT4, got type: %s",
226+
format_type_be(cnst->consttype)),
227+
parser_errposition(pstate, exprLocation(expr))));
228+
}
229+
230+
/*
231+
* jsonb_subscript_make_jsonpath
232+
*
233+
* Constructs a JsonPath expression from a list of indirections.
234+
* This function is used when jsonb subscripting involves dot notation,
235+
* wildcards (*), or slice-based subscripting, requiring JsonPath-based
236+
* evaluation.
237+
*
238+
* The function modifies the indirection list in place, removing processed
239+
* elements as it converts them into JsonPath components, as follows:
240+
* - String keys (dot notation) -> jpiKey items.
241+
* - Wildcard (*) -> jpiAnyKey item.
242+
* - Array indices and slices -> jpiIndexArray items.
243+
*
244+
* Parameters:
245+
* - pstate: Parse state context.
246+
* - indirection: List of subscripting expressions (modified in-place).
247+
* - uexprs: Upper-bound expressions extracted from subscripts.
248+
* - lexprs: Lower-bound expressions extracted from subscripts.
249+
* Returns:
250+
* - a Const node containing the transformed JsonPath expression.
251+
*/
252+
static Node *
253+
jsonb_subscript_make_jsonpath(ParseState *pstate, List **indirection,
254+
List **uexprs, List **lexprs)
255+
{
256+
JsonPathParseResult jpres;
257+
JsonPathParseItem *path = make_jsonpath_item(jpiRoot);
258+
ListCell *lc;
259+
Datum jsp;
260+
int pathlen = 0;
261+
262+
*uexprs = NIL;
263+
*lexprs = NIL;
264+
265+
jpres.expr = path;
266+
jpres.lax = true;
267+
268+
foreach(lc, *indirection)
269+
{
270+
Node *accessor = lfirst(lc);
271+
JsonPathParseItem *jpi;
272+
273+
if (IsA(accessor, String))
274+
{
275+
char *field = strVal(accessor);
276+
277+
jpi = make_jsonpath_item(jpiKey);
278+
jpi->value.string.val = field;
279+
jpi->value.string.len = strlen(field);
280+
281+
*uexprs = lappend(*uexprs, accessor);
282+
}
283+
else if (IsA(accessor, A_Star))
284+
{
285+
jpi = make_jsonpath_item(jpiAnyKey);
286+
287+
*uexprs = lappend(*uexprs, NULL);
288+
}
289+
else if (IsA(accessor, A_Indices))
290+
{
291+
A_Indices *ai = castNode(A_Indices, accessor);
292+
293+
jpi = make_jsonpath_item(jpiIndexArray);
294+
jpi->value.array.nelems = 1;
295+
jpi->value.array.elems = palloc(sizeof(jpi->value.array.elems[0]));
296+
297+
if (ai->is_slice)
298+
{
299+
while (list_length(*lexprs) < list_length(*uexprs))
300+
*lexprs = lappend(*lexprs, NULL);
301+
302+
if (ai->lidx)
303+
jpi->value.array.elems[0].from = make_jsonpath_item_expr(pstate, ai->lidx, lexprs);
304+
else
305+
jpi->value.array.elems[0].from = make_jsonpath_item_int(0, lexprs);
306+
307+
if (ai->uidx)
308+
jpi->value.array.elems[0].to = make_jsonpath_item_expr(pstate, ai->uidx, uexprs);
309+
else
310+
{
311+
jpi->value.array.elems[0].to = make_jsonpath_item(jpiLast);
312+
*uexprs = lappend(*uexprs, NULL);
313+
}
314+
}
315+
else
316+
{
317+
Assert(ai->uidx && !ai->lidx);
318+
jpi->value.array.elems[0].from = make_jsonpath_item_expr(pstate, ai->uidx, uexprs);
319+
jpi->value.array.elems[0].to = NULL;
320+
}
321+
}
322+
else
323+
break;
324+
325+
/* append path item */
326+
path->next = jpi;
327+
path = jpi;
328+
pathlen++;
329+
}
330+
331+
if (*lexprs)
332+
{
333+
while (list_length(*lexprs) < list_length(*uexprs))
334+
*lexprs = lappend(*lexprs, NULL);
335+
}
336+
337+
*indirection = list_delete_first_n(*indirection, pathlen);
338+
339+
jsp = jsonPathFromParseResult(&jpres, 0, NULL);
340+
341+
return (Node *) makeConst(JSONPATHOID, -1, InvalidOid, -1, jsp, false, false);
342+
}
343+
113344
/*
114345
* Finish parse analysis of a SubscriptingRef expression for a jsonb.
115346
*
@@ -126,19 +357,32 @@ jsonb_subscript_transform(SubscriptingRef *sbsref,
126357
List *upperIndexpr = NIL;
127358
ListCell *idx;
128359

360+
/* Determine the result type of the subscripting operation; always jsonb */
361+
sbsref->refrestype = JSONBOID;
362+
sbsref->reftypmod = -1;
363+
364+
if (jsonb_check_jsonpath_needed(*indirection))
365+
{
366+
sbsref->refjsonbpath =
367+
jsonb_subscript_make_jsonpath(pstate, indirection,
368+
&sbsref->refupperindexpr,
369+
&sbsref->reflowerindexpr);
370+
return;
371+
}
372+
129373
/*
130374
* Transform and convert the subscript expressions. Jsonb subscripting
131375
* does not support slices, look only and the upper index.
132376
*/
133377
foreach(idx, *indirection)
134378
{
379+
Node *i = lfirst(idx);
135380
A_Indices *ai;
136381
Node *subExpr;
137382

138-
if (!IsA(lfirst(idx), A_Indices))
139-
break;
383+
Assert(IsA(i, A_Indices));
140384

141-
ai = lfirst_node(A_Indices, idx);
385+
ai = castNode(A_Indices, i);
142386

143387
if (isSlice)
144388
{
@@ -175,10 +419,6 @@ jsonb_subscript_transform(SubscriptingRef *sbsref,
175419
sbsref->refupperindexpr = upperIndexpr;
176420
sbsref->reflowerindexpr = NIL;
177421

178-
/* Determine the result type of the subscripting operation; always jsonb */
179-
sbsref->refrestype = JSONBOID;
180-
sbsref->reftypmod = -1;
181-
182422
/* Remove processed elements */
183423
if (upperIndexpr)
184424
*indirection = list_delete_first_n(*indirection, list_length(upperIndexpr));
@@ -233,7 +473,7 @@ jsonb_subscript_check_subscripts(ExprState *state,
233473
* For jsonb fetch and assign functions we need to provide path in
234474
* text format. Convert if it's not already text.
235475
*/
236-
if (workspace->indexOid[i] == INT4OID)
476+
if (!workspace->jsonpath && workspace->indexOid[i] == INT4OID)
237477
{
238478
Datum datum = sbsrefstate->upperindex[i];
239479
char *cs = DatumGetCString(DirectFunctionCall1(int4out, datum));
@@ -261,17 +501,32 @@ jsonb_subscript_fetch(ExprState *state,
261501
{
262502
SubscriptingRefState *sbsrefstate = op->d.sbsref.state;
263503
JsonbSubWorkspace *workspace = (JsonbSubWorkspace *) sbsrefstate->workspace;
264-
Jsonb *jsonbSource;
265504

266505
/* Should not get here if source jsonb (or any subscript) is null */
267506
Assert(!(*op->resnull));
268507

269-
jsonbSource = DatumGetJsonbP(*op->resvalue);
270-
*op->resvalue = jsonb_get_element(jsonbSource,
271-
workspace->index,
272-
sbsrefstate->numupper,
273-
op->resnull,
274-
false);
508+
if (workspace->jsonpath)
509+
{
510+
bool empty = false;
511+
bool error = false;
512+
513+
*op->resvalue = JsonPathQuery(*op->resvalue, workspace->jsonpath,
514+
JSW_CONDITIONAL,
515+
&empty, &error, NULL,
516+
NULL);
517+
518+
*op->resnull = empty || error;
519+
}
520+
else
521+
{
522+
Jsonb *jsonbSource = DatumGetJsonbP(*op->resvalue);
523+
524+
*op->resvalue = jsonb_get_element(jsonbSource,
525+
workspace->index,
526+
sbsrefstate->numupper,
527+
op->resnull,
528+
false);
529+
}
275530
}
276531

277532
/*
@@ -381,13 +636,17 @@ jsonb_exec_setup(const SubscriptingRef *sbsref,
381636
ListCell *lc;
382637
int nupper = sbsref->refupperindexpr->length;
383638
char *ptr;
639+
bool useJsonpath = sbsref->refjsonbpath != NULL;
384640

385641
/* Allocate type-specific workspace with space for per-subscript data */
386642
workspace = palloc0(MAXALIGN(sizeof(JsonbSubWorkspace)) +
387643
nupper * (sizeof(Datum) + sizeof(Oid)));
388644
workspace->expectArray = false;
389645
ptr = ((char *) workspace) + MAXALIGN(sizeof(JsonbSubWorkspace));
390646

647+
if (useJsonpath)
648+
workspace->jsonpath = DatumGetJsonPathP(castNode(Const, sbsref->refjsonbpath)->constvalue);
649+
391650
/*
392651
* This coding assumes sizeof(Datum) >= sizeof(Oid), else we might
393652
* misalign the indexOid pointer
@@ -404,7 +663,7 @@ jsonb_exec_setup(const SubscriptingRef *sbsref,
404663
Node *expr = lfirst(lc);
405664
int i = foreach_current_index(lc);
406665

407-
workspace->indexOid[i] = exprType(expr);
666+
workspace->indexOid[i] = jsonb_subscript_type(expr);
408667
}
409668

410669
/*

src/include/nodes/primnodes.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,13 @@ typedef struct SubscriptingRef
718718
Expr *refexpr;
719719
/* expression for the source value, or NULL if fetch */
720720
Expr *refassgnexpr;
721+
722+
/*
723+
* container-specific extra information, currently used only by jsonb.
724+
* stores a JsonPath expression when jsonb dot notation is used. NULL for
725+
* simple subscripting.
726+
*/
727+
Node *refjsonbpath;
721728
} SubscriptingRef;
722729

723730
/*

0 commit comments

Comments
 (0)