Skip to content

Commit 8dde14a

Browse files
ankanehlinnaka
andcommitted
Reduced memory usage for HNSW index scans
Co-authored-by: Heikki Linnakangas <heikki.linnakangas@iki.fi>
1 parent d74d306 commit 8dde14a

File tree

1 file changed

+121
-71
lines changed

1 file changed

+121
-71
lines changed

src/hnswutils.c

Lines changed: 121 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -580,13 +580,12 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index,
580580
}
581581

582582
/*
583-
* Get the distance for a candidate
583+
* Get the distance for an element
584584
*/
585585
static float
586-
GetCandidateDistance(char *base, HnswCandidate * hc, Datum q, FmgrInfo *procinfo, Oid collation)
586+
GetElementDistance(char *base, HnswElement element, Datum q, FmgrInfo *procinfo, Oid collation)
587587
{
588-
HnswElement hce = HnswPtrAccess(base, hc->element);
589-
Datum value = HnswGetValue(base, hce);
588+
Datum value = HnswGetValue(base, element);
590589

591590
return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, q, value));
592591
}
@@ -601,7 +600,7 @@ HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index,
601600

602601
HnswPtrStore(base, hc->element, entryPoint);
603602
if (index == NULL)
604-
hc->distance = GetCandidateDistance(base, hc, q, procinfo, collation);
603+
hc->distance = GetElementDistance(base, entryPoint, q, procinfo, collation);
605604
else
606605
HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, collation, loadVec, NULL);
607606
return hc;
@@ -706,20 +705,87 @@ AddToVisited(char *base, visited_hash * v, HnswCandidate * hc, Relation index, b
706705
* Count element towards ef
707706
*/
708707
static inline bool
709-
CountElement(char *base, HnswElement skipElement, HnswCandidate * hc)
708+
CountElement(char *base, HnswElement skipElement, HnswElement e)
710709
{
711-
HnswElement e;
712-
713710
if (skipElement == NULL)
714711
return true;
715712

716713
/* Ensure does not access heaptidsLength during in-memory build */
717714
pg_memory_barrier();
718715

719-
e = HnswPtrAccess(base, hc->element);
720716
return e->heaptidsLength != 0;
721717
}
722718

719+
/*
720+
* Load unvisited neighbors from memory
721+
*/
722+
static void
723+
HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswElement * unvisited, int *unvisitedLength, visited_hash * v, int lc, HnswNeighborArray * neighborhoodData, Size neighborhoodSize)
724+
{
725+
/* Get the neighborhood at layer lc */
726+
HnswNeighborArray *neighborhood = HnswGetNeighbors(base, element, lc);
727+
728+
/* Copy neighborhood to local memory */
729+
LWLockAcquire(&element->lock, LW_SHARED);
730+
memcpy(neighborhoodData, neighborhood, neighborhoodSize);
731+
LWLockRelease(&element->lock);
732+
neighborhood = neighborhoodData;
733+
734+
*unvisitedLength = 0;
735+
736+
for (int i = 0; i < neighborhood->length; i++)
737+
{
738+
HnswCandidate *hc = &neighborhood->items[i];
739+
bool found;
740+
741+
AddToVisited(base, v, hc, NULL, &found);
742+
743+
if (!found)
744+
unvisited[(*unvisitedLength)++] = HnswPtrAccess(base, hc->element);
745+
}
746+
}
747+
748+
/*
749+
* Load unvisited neighbors from disk
750+
*/
751+
static void
752+
HnswLoadUnvisitedFromDisk(HnswElement element, HnswElement * unvisited, int *unvisitedLength, visited_hash * v, Relation index, int m, int lm, int lc)
753+
{
754+
Buffer buf;
755+
Page page;
756+
HnswNeighborTuple ntup;
757+
int start;
758+
ItemPointerData indextids[HNSW_MAX_M * 2];
759+
760+
buf = ReadBuffer(index, element->neighborPage);
761+
LockBuffer(buf, BUFFER_LOCK_SHARE);
762+
page = BufferGetPage(buf);
763+
764+
ntup = (HnswNeighborTuple) PageGetItem(page, PageGetItemId(page, element->neighborOffno));
765+
start = (element->level - lc) * m;
766+
767+
/* Copy to minimize lock time */
768+
memcpy(&indextids, ntup->indextids + start, lm * sizeof(ItemPointerData));
769+
770+
UnlockReleaseBuffer(buf);
771+
772+
*unvisitedLength = 0;
773+
774+
for (int i = 0; i < lm; i++)
775+
{
776+
ItemPointer indextid = &indextids[i];
777+
bool found;
778+
779+
if (!ItemPointerIsValid(indextid))
780+
break;
781+
782+
tidhash_insert(v->tids, *indextid, &found);
783+
784+
if (!found)
785+
unvisited[(*unvisitedLength)++] = HnswInitElementFromBlock(ItemPointerGetBlockNumber(indextid), ItemPointerGetOffsetNumber(indextid));
786+
}
787+
}
788+
723789
/*
724790
* Algorithm 2 from paper
725791
*/
@@ -734,13 +800,16 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
734800
ListCell *lc2;
735801
HnswNeighborArray *neighborhoodData = NULL;
736802
Size neighborhoodSize = 0;
803+
int lm = HnswGetLayerM(m, lc);
804+
HnswElement *unvisited = palloc(lm * sizeof(HnswElement));
805+
int unvisitedLength;
737806

738807
InitVisited(base, &v, index, ef, m);
739808

740809
/* Create local memory for neighborhood if needed */
741810
if (index == NULL)
742811
{
743-
neighborhoodSize = HNSW_NEIGHBOR_ARRAY_SIZE(HnswGetLayerM(m, lc));
812+
neighborhoodSize = HNSW_NEIGHBOR_ARRAY_SIZE(lm);
744813
neighborhoodData = palloc(neighborhoodSize);
745814
}
746815

@@ -762,13 +831,12 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
762831
* would be ideal to do this for inserts as well, but this could
763832
* affect insert performance.
764833
*/
765-
if (CountElement(base, skipElement, hc))
834+
if (CountElement(base, skipElement, HnswPtrAccess(base, hc->element)))
766835
wlen++;
767836
}
768837

769838
while (!pairingheap_is_empty(C))
770839
{
771-
HnswNeighborArray *neighborhood;
772840
HnswCandidate *c = HnswGetPairingHeapCandidate(c_node, pairingheap_remove_first(C));
773841
HnswCandidate *f = HnswGetPairingHeapCandidate(w_node, pairingheap_first(W));
774842
HnswElement cElement;
@@ -778,74 +846,56 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
778846

779847
cElement = HnswPtrAccess(base, c->element);
780848

781-
if (HnswPtrIsNull(base, cElement->neighbors))
782-
HnswLoadNeighbors(cElement, index, m);
783-
784-
/* Get the neighborhood at layer lc */
785-
neighborhood = HnswGetNeighbors(base, cElement, lc);
786-
787-
/* Copy neighborhood to local memory if needed */
788849
if (index == NULL)
789-
{
790-
LWLockAcquire(&cElement->lock, LW_SHARED);
791-
memcpy(neighborhoodData, neighborhood, neighborhoodSize);
792-
LWLockRelease(&cElement->lock);
793-
neighborhood = neighborhoodData;
794-
}
850+
HnswLoadUnvisitedFromMemory(base, cElement, unvisited, &unvisitedLength, &v, lc, neighborhoodData, neighborhoodSize);
851+
else
852+
HnswLoadUnvisitedFromDisk(cElement, unvisited, &unvisitedLength, &v, index, m, lm, lc);
795853

796-
for (int i = 0; i < neighborhood->length; i++)
854+
for (int i = 0; i < unvisitedLength; i++)
797855
{
798-
HnswCandidate *e = &neighborhood->items[i];
799-
bool visited;
856+
HnswElement eElement = unvisited[i];
857+
float eDistance;
858+
bool alwaysAdd = wlen < ef;
859+
860+
f = HnswGetPairingHeapCandidate(w_node, pairingheap_first(W));
800861

801-
AddToVisited(base, &v, e, index, &visited);
862+
if (index == NULL)
863+
eDistance = GetElementDistance(base, eElement, q, procinfo, collation);
864+
else
865+
HnswLoadElement(eElement, &eDistance, &q, index, procinfo, collation, inserting, alwaysAdd ? NULL : &f->distance);
802866

803-
if (!visited)
867+
if (eDistance < f->distance || alwaysAdd)
804868
{
805-
float eDistance;
806-
HnswElement eElement = HnswPtrAccess(base, e->element);
807-
bool alwaysAdd = wlen < ef;
869+
HnswCandidate *e;
870+
HnswPairingHeapNode *node;
808871

809-
f = HnswGetPairingHeapCandidate(w_node, pairingheap_first(W));
872+
Assert(!eElement->deleted);
810873

811-
if (index == NULL)
812-
eDistance = GetCandidateDistance(base, e, q, procinfo, collation);
813-
else
814-
HnswLoadElement(eElement, &eDistance, &q, index, procinfo, collation, inserting, alwaysAdd ? NULL : &f->distance);
874+
/* Make robust to issues */
875+
if (eElement->level < lc)
876+
continue;
877+
878+
/* Create a new candidate */
879+
e = palloc(sizeof(HnswCandidate));
880+
HnswPtrStore(base, e->element, eElement);
881+
e->distance = eDistance;
882+
883+
node = CreatePairingHeapNode(e);
884+
pairingheap_add(C, &node->c_node);
885+
pairingheap_add(W, &node->w_node);
815886

816-
if (eDistance < f->distance || alwaysAdd)
887+
/*
888+
* Do not count elements being deleted towards ef when
889+
* vacuuming. It would be ideal to do this for inserts as
890+
* well, but this could affect insert performance.
891+
*/
892+
if (CountElement(base, skipElement, eElement))
817893
{
818-
HnswCandidate *ec;
819-
HnswPairingHeapNode *node;
820-
821-
Assert(!eElement->deleted);
822-
823-
/* Make robust to issues */
824-
if (eElement->level < lc)
825-
continue;
826-
827-
/* Copy e */
828-
ec = palloc(sizeof(HnswCandidate));
829-
HnswPtrStore(base, ec->element, eElement);
830-
ec->distance = eDistance;
831-
832-
node = CreatePairingHeapNode(ec);
833-
pairingheap_add(C, &node->c_node);
834-
pairingheap_add(W, &node->w_node);
835-
836-
/*
837-
* Do not count elements being deleted towards ef when
838-
* vacuuming. It would be ideal to do this for inserts as
839-
* well, but this could affect insert performance.
840-
*/
841-
if (CountElement(base, skipElement, e))
842-
{
843-
wlen++;
844-
845-
/* No need to decrement wlen */
846-
if (wlen > ef)
847-
pairingheap_remove_first(W);
848-
}
894+
wlen++;
895+
896+
/* No need to decrement wlen */
897+
if (wlen > ef)
898+
pairingheap_remove_first(W);
849899
}
850900
}
851901
}
@@ -1117,7 +1167,7 @@ HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm
11171167
if (HnswPtrIsNull(base, hc3Element->value))
11181168
HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true, NULL);
11191169
else
1120-
hc3->distance = GetCandidateDistance(base, hc3, q, procinfo, collation);
1170+
hc3->distance = GetElementDistance(base, hc3Element, q, procinfo, collation);
11211171

11221172
/* Prune element if being deleted */
11231173
if (hc3Element->heaptidsLength == 0)

0 commit comments

Comments
 (0)