Skip to content

Commit c91ed7b

Browse files
committed
Added iterative search for IVFFlat [skip ci]
1 parent 48fe70c commit c91ed7b

File tree

6 files changed

+258
-27
lines changed

6 files changed

+258
-27
lines changed

src/ivfflat.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,16 @@
1717
#endif
1818

1919
int ivfflat_probes;
20+
int ivfflat_iterative_search;
21+
int ivfflat_iterative_search_max_probes;
2022
static relopt_kind ivfflat_relopt_kind;
2123

24+
static const struct config_enum_entry ivfflat_iterative_search_options[] = {
25+
{"off", IVFFLAT_ITERATIVE_SEARCH_OFF, false},
26+
{"on", IVFFLAT_ITERATIVE_SEARCH_RELAXED, false},
27+
{NULL, 0, false}
28+
};
29+
2230
/*
2331
* Initialize index options and variables
2432
*/
@@ -33,6 +41,14 @@ IvfflatInit(void)
3341
"Valid range is 1..lists.", &ivfflat_probes,
3442
IVFFLAT_DEFAULT_PROBES, IVFFLAT_MIN_LISTS, IVFFLAT_MAX_LISTS, PGC_USERSET, 0, NULL, NULL, NULL);
3543

44+
DefineCustomEnumVariable("ivfflat.iterative_search", "Sets whether to use iterative search",
45+
NULL, &ivfflat_iterative_search,
46+
IVFFLAT_ITERATIVE_SEARCH_OFF, ivfflat_iterative_search_options, PGC_USERSET, 0, NULL, NULL, NULL);
47+
48+
DefineCustomIntVariable("ivfflat.iterative_search_max_probes", "Sets the max number of probes for iterative search",
49+
"Zero sets to the number of lists", &ivfflat_iterative_search_max_probes,
50+
0, 0, IVFFLAT_MAX_LISTS, PGC_USERSET, 0, NULL, NULL, NULL);
51+
3652
MarkGUCPrefixReserved("ivfflat");
3753
}
3854

src/ivfflat.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,14 @@
8080

8181
/* Variables */
8282
extern int ivfflat_probes;
83+
extern int ivfflat_iterative_search;
84+
extern int ivfflat_iterative_search_max_probes;
85+
86+
typedef enum IvfflatIterativeSearchType
87+
{
88+
IVFFLAT_ITERATIVE_SEARCH_OFF,
89+
IVFFLAT_ITERATIVE_SEARCH_RELAXED
90+
} IvfflatIterativeSearchType;
8391

8492
typedef struct VectorArrayData
8593
{
@@ -248,8 +256,10 @@ typedef struct IvfflatScanOpaqueData
248256
{
249257
const IvfflatTypeInfo *typeInfo;
250258
int probes;
259+
int maxProbes;
251260
int dimensions;
252261
bool first;
262+
Datum value;
253263

254264
/* Sorting */
255265
Tuplesortstate *sortstate;
@@ -266,6 +276,8 @@ typedef struct IvfflatScanOpaqueData
266276

267277
/* Lists */
268278
pairingheap *listQueue;
279+
BlockNumber *listPages;
280+
int listIndex;
269281
IvfflatScanList lists[FLEXIBLE_ARRAY_MEMBER]; /* must come last */
270282
} IvfflatScanOpaqueData;
271283

src/ivfscan.c

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ GetScanLists(IndexScanDesc scan, Datum value)
6565
/* Use procinfo from the index instead of scan key for performance */
6666
distance = DatumGetFloat8(so->distfunc(so->procinfo, so->collation, PointerGetDatum(&list->center), value));
6767

68-
if (listCount < so->probes)
68+
if (listCount < so->maxProbes)
6969
{
7070
IvfflatScanList *scanlist;
7171

@@ -78,7 +78,7 @@ GetScanLists(IndexScanDesc scan, Datum value)
7878
pairingheap_add(so->listQueue, &scanlist->ph_node);
7979

8080
/* Calculate max distance */
81-
if (listCount == so->probes)
81+
if (listCount == so->maxProbes)
8282
maxDistance = GetScanList(pairingheap_first(so->listQueue))->distance;
8383
}
8484
else if (distance < maxDistance)
@@ -102,6 +102,11 @@ GetScanLists(IndexScanDesc scan, Datum value)
102102

103103
UnlockReleaseBuffer(cbuf);
104104
}
105+
106+
for (int i = listCount - 1; i >= 0; i--)
107+
so->listPages[i] = GetScanList(pairingheap_remove_first(so->listQueue))->startPage;
108+
109+
Assert(pairingheap_is_empty(so->listQueue));
105110
}
106111

107112
/*
@@ -114,11 +119,14 @@ GetScanItems(IndexScanDesc scan, Datum value)
114119
TupleDesc tupdesc = RelationGetDescr(scan->indexRelation);
115120
double tuples = 0;
116121
TupleTableSlot *slot = so->vslot;
122+
int batchProbes = 0;
123+
124+
tuplesort_reset(so->sortstate);
117125

118126
/* Search closest probes lists */
119-
while (!pairingheap_is_empty(so->listQueue))
127+
while (so->listIndex < so->maxProbes && (++batchProbes) <= so->probes)
120128
{
121-
BlockNumber searchPage = GetScanList(pairingheap_remove_first(so->listQueue))->startPage;
129+
BlockNumber searchPage = so->listPages[so->listIndex++];
122130

123131
/* Search all entry pages for list */
124132
while (BlockNumberIsValid(searchPage))
@@ -166,13 +174,17 @@ GetScanItems(IndexScanDesc scan, Datum value)
166174
}
167175
}
168176

169-
if (tuples < 100)
177+
if (tuples < 100 && ivfflat_iterative_search == IVFFLAT_ITERATIVE_SEARCH_OFF)
170178
ereport(DEBUG1,
171179
(errmsg("index scan found few tuples"),
172180
errdetail("Index may have been created with little data."),
173181
errhint("Recreate the index and possibly decrease lists.")));
174182

175183
tuplesort_performsort(so->sortstate);
184+
185+
#if defined(IVFFLAT_MEMORY)
186+
elog(INFO, "memory: %zu MB", MemoryContextMemAllocated(CurrentMemoryContext, true) / (1024 * 1024));
187+
#endif
176188
}
177189

178190
/*
@@ -240,6 +252,7 @@ ivfflatbeginscan(Relation index, int nkeys, int norderbys)
240252
int lists;
241253
int dimensions;
242254
int probes = ivfflat_probes;
255+
int maxProbes;
243256

244257
scan = RelationGetIndexScan(index, nkeys, norderbys);
245258

@@ -249,10 +262,21 @@ ivfflatbeginscan(Relation index, int nkeys, int norderbys)
249262
if (probes > lists)
250263
probes = lists;
251264

252-
so = (IvfflatScanOpaque) palloc(offsetof(IvfflatScanOpaqueData, lists) + probes * sizeof(IvfflatScanList));
265+
if (ivfflat_iterative_search != IVFFLAT_ITERATIVE_SEARCH_OFF)
266+
{
267+
if (ivfflat_iterative_search_max_probes == 0)
268+
maxProbes = lists;
269+
else
270+
maxProbes = Min(ivfflat_iterative_search_max_probes, lists);
271+
}
272+
else
273+
maxProbes = probes;
274+
275+
so = (IvfflatScanOpaque) palloc(offsetof(IvfflatScanOpaqueData, lists) + maxProbes * sizeof(IvfflatScanList));
253276
so->typeInfo = IvfflatGetTypeInfo(index);
254277
so->first = true;
255278
so->probes = probes;
279+
so->maxProbes = maxProbes;
256280
so->dimensions = dimensions;
257281

258282
/* Set support functions */
@@ -280,6 +304,8 @@ ivfflatbeginscan(Relation index, int nkeys, int norderbys)
280304
so->bas = GetAccessStrategy(BAS_BULKREAD);
281305

282306
so->listQueue = pairingheap_allocate(CompareLists, scan);
307+
so->listPages = palloc(maxProbes * sizeof(BlockNumber));
308+
so->listIndex = 0;
283309

284310
scan->opaque = so;
285311

@@ -294,11 +320,9 @@ ivfflatrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int
294320
{
295321
IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque;
296322

297-
if (!so->first)
298-
tuplesort_reset(so->sortstate);
299-
300323
so->first = true;
301324
pairingheap_reset(so->listQueue);
325+
so->listIndex = 0;
302326

303327
if (keys && scan->numberOfKeys > 0)
304328
memmove(scan->keyData, keys, scan->numberOfKeys * sizeof(ScanKeyData));
@@ -314,6 +338,8 @@ bool
314338
ivfflatgettuple(IndexScanDesc scan, ScanDirection dir)
315339
{
316340
IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque;
341+
ItemPointer heaptid;
342+
bool isnull;
317343

318344
/*
319345
* Index can be used to scan backward, but Postgres doesn't support
@@ -341,28 +367,25 @@ ivfflatgettuple(IndexScanDesc scan, ScanDirection dir)
341367
IvfflatBench("GetScanLists", GetScanLists(scan, value));
342368
IvfflatBench("GetScanItems", GetScanItems(scan, value));
343369
so->first = false;
370+
so->value = value;
344371

345-
#if defined(IVFFLAT_MEMORY)
346-
elog(INFO, "memory: %zu MB", MemoryContextMemAllocated(CurrentMemoryContext, true) / (1024 * 1024));
347-
#endif
348-
349-
/* Clean up if we allocated a new value */
350-
if (value != scan->orderByData->sk_argument)
351-
pfree(DatumGetPointer(value));
372+
/* TODO clean up if we allocated a new value */
352373
}
353374

354-
if (tuplesort_gettupleslot(so->sortstate, true, false, so->mslot, NULL))
375+
while (!tuplesort_gettupleslot(so->sortstate, true, false, so->mslot, NULL))
355376
{
356-
bool isnull;
357-
ItemPointer heaptid = (ItemPointer) DatumGetPointer(slot_getattr(so->mslot, 2, &isnull));
377+
if (so->listIndex == so->maxProbes)
378+
return false;
358379

359-
scan->xs_heaptid = *heaptid;
360-
scan->xs_recheck = false;
361-
scan->xs_recheckorderby = false;
362-
return true;
380+
IvfflatBench("GetScanItems", GetScanItems(scan, so->value));
363381
}
364382

365-
return false;
383+
heaptid = (ItemPointer) DatumGetPointer(slot_getattr(so->mslot, 2, &isnull));
384+
385+
scan->xs_heaptid = *heaptid;
386+
scan->xs_recheck = false;
387+
scan->xs_recheckorderby = false;
388+
return true;
366389
}
367390

368391
/*
@@ -374,6 +397,7 @@ ivfflatendscan(IndexScanDesc scan)
374397
IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque;
375398

376399
pairingheap_free(so->listQueue);
400+
pfree(so->listPages);
377401
tuplesort_end(so->sortstate);
378402
FreeAccessStrategy(so->bas);
379403
FreeTupleDesc(so->tupdesc);

src/ivfvacuum.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ ivfflatbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
2626
Page cpage;
2727
OffsetNumber coffno;
2828
OffsetNumber cmaxoffno;
29-
BlockNumber startPages[MaxOffsetNumber];
29+
BlockNumber listPages[MaxOffsetNumber];
3030
ListInfo listInfo;
3131

3232
cbuf = ReadBuffer(index, blkno);
@@ -40,7 +40,7 @@ ivfflatbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
4040
{
4141
IvfflatList list = (IvfflatList) PageGetItem(cpage, PageGetItemId(cpage, coffno));
4242

43-
startPages[coffno - FirstOffsetNumber] = list->startPage;
43+
listPages[coffno - FirstOffsetNumber] = list->startPage;
4444
}
4545

4646
listInfo.blkno = blkno;
@@ -50,7 +50,7 @@ ivfflatbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
5050

5151
for (coffno = FirstOffsetNumber; coffno <= cmaxoffno; coffno = OffsetNumberNext(coffno))
5252
{
53-
BlockNumber searchPage = startPages[coffno - FirstOffsetNumber];
53+
BlockNumber searchPage = listPages[coffno - FirstOffsetNumber];
5454
BlockNumber insertPage = InvalidBlockNumber;
5555

5656
/* Iterate over entry pages */
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
use strict;
2+
use warnings FATAL => 'all';
3+
use PostgreSQL::Test::Cluster;
4+
use PostgreSQL::Test::Utils;
5+
use Test::More;
6+
7+
my $dim = 3;
8+
my $array_sql = join(",", ('random()') x $dim);
9+
10+
# Initialize node
11+
my $node = PostgreSQL::Test::Cluster->new('node');
12+
$node->init;
13+
$node->start;
14+
15+
# Create table
16+
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
17+
$node->safe_psql("postgres", "CREATE TABLE tst (i int4 PRIMARY KEY, v vector($dim));");
18+
$node->safe_psql("postgres",
19+
"INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 100000) i;"
20+
);
21+
$node->safe_psql("postgres", "CREATE INDEX ON tst USING ivfflat (v vector_l2_ops);");
22+
23+
my $count = $node->safe_psql("postgres", qq(
24+
SET enable_seqscan = off;
25+
SET ivfflat.probes = 10;
26+
SET ivfflat.iterative_search = on;
27+
SELECT COUNT(*) FROM (SELECT v FROM tst WHERE i % 10000 = 0 ORDER BY v <-> (SELECT v FROM tst LIMIT 1) LIMIT 11) t;
28+
));
29+
is($count, 10);
30+
31+
foreach ((30, 50, 70))
32+
{
33+
my $max_probes = $_;
34+
my $expected = $max_probes / 10;
35+
my $sum = 0;
36+
37+
for my $i (1 .. 20)
38+
{
39+
$count = $node->safe_psql("postgres", qq(
40+
SET enable_seqscan = off;
41+
SET ivfflat.probes = 10;
42+
SET ivfflat.iterative_search = on;
43+
SET ivfflat.iterative_search_max_probes = $max_probes;
44+
SELECT COUNT(*) FROM (SELECT v FROM tst WHERE i % 10000 = 0 ORDER BY v <-> (SELECT v FROM tst WHERE i = $i) LIMIT 11) t;
45+
));
46+
$sum += $count;
47+
}
48+
49+
my $avg = $sum / 20;
50+
cmp_ok($avg, '>', $expected - 2);
51+
cmp_ok($avg, '<', $expected + 2);
52+
}
53+
54+
done_testing();

0 commit comments

Comments
 (0)