Skip to content

Commit 33508c8

Browse files
committed
Experimental branch - not storing the affixes in the shared segment
Conditions requiring non-trivial regular expressions, translated into regex_t structures, don't work with shared memory architecture well, and it's rather difficult to perform deep copy with them, and thus were unsupported by shared_ispell so far. This branch takes different approach - does not store the affixes in shared memory at all, but each banckend needs to load them independently. That should not be a big deal though, as this should be very fast (the number of affixes is rather small). The changes were rather trivial and it seems to work, but there should be at least some basic safeguards (e.g. changing the affixes after loading the dictionary seems like a bad idea).
1 parent 8e3f3e1 commit 33508c8

File tree

1 file changed

+35
-148
lines changed

1 file changed

+35
-148
lines changed

src/shared_ispell.c

Lines changed: 35 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
*
2727
* dispell_init
2828
* -> init_shared_dict
29-
* -> get_shared_dict
29+
* -> get_shared_dict (not found)
3030
* -> NIStartBuild
3131
* -> NIImportDictionary
3232
* -> NIImportAffixes
@@ -35,11 +35,16 @@
3535
* -> NIFinishBuild
3636
* -> sizeIspellDict
3737
* -> copyIspellDict
38-
* -> copyAffixNode (prefixes)
39-
* -> copyAffixNode (suffixes)
4038
* -> copySPNode
4139
* -> copy affix data
4240
* -> copy compound affixes
41+
*
42+
* -> get_shared_dict (found -> reload affixes)
43+
* -> NIStartBuild
44+
* -> NIImportAffixes
45+
* -> NISortAffixes
46+
* -> NIFinishBuild
47+
*
4348
* -> get_shared_stop_list
4449
* -> readstoplist
4550
* -> copyStopList
@@ -362,6 +367,28 @@ void init_shared_dict(DictInfo * info, char * dictFile, char * affFile, char * s
362367
shdict->next = segment_info->dict;
363368
segment_info->dict = shdict;
364369

370+
} else {
371+
372+
/* we got the dictionary, but we need to reload the affixes (to handle regex_t rules) */
373+
374+
dict = (IspellDict *)palloc0(sizeof(IspellDict));
375+
376+
NIStartBuild(dict);
377+
378+
NIImportAffixes(dict,
379+
get_tsearch_config_filename(affFile, "affix"));
380+
381+
dict->AffixData = shdict->AffixData;
382+
dict->lenAffixData = shdict->lenAffixData;
383+
dict->nAffixData = shdict->nAffixData;
384+
385+
NISortAffixes(dict);
386+
387+
NIFinishBuild(dict);
388+
389+
shdict->Suffix = dict->Suffix;
390+
shdict->Prefix = dict->Prefix;
391+
365392
}
366393

367394
/* STOP WORDS */
@@ -723,139 +750,6 @@ int sizeSPNode(SPNode * node) {
723750
return size;
724751
}
725752

726-
/* RegisNode - simple regular expressions */
727-
728-
static
729-
RegisNode * copyRegisNode(RegisNode * node) {
730-
731-
RegisNode * copy = (RegisNode *)shalloc(offsetof(RegisNode, data) + node->len);
732-
733-
memcpy(copy, node, offsetof(RegisNode, data) + node->len);
734-
735-
if (node->next != NULL) {
736-
copy->next = copyRegisNode(node->next);
737-
}
738-
739-
return copy;
740-
}
741-
742-
static
743-
int sizeRegisNode(RegisNode * node) {
744-
745-
int size = MAXALIGN(offsetof(RegisNode, data) + node->len);
746-
747-
if (node->next != NULL) {
748-
size += sizeRegisNode(node->next);
749-
}
750-
751-
return size;
752-
}
753-
754-
/* AFFIX - affix rules (simple, regis or full regular expressions). */
755-
756-
static
757-
AFFIX * copyAffix(AFFIX * affix) {
758-
759-
AFFIX * copy = (AFFIX*)shalloc(sizeof(AFFIX));
760-
761-
memcpy(copy, affix, sizeof(AFFIX));
762-
763-
copy->find = shstrcpy(affix->find);
764-
copy->repl = shstrcpy(affix->repl);
765-
766-
if (affix->isregis) {
767-
copy->reg.regis.node = copyRegisNode(affix->reg.regis.node);
768-
} else if (! affix->issimple) {
769-
770-
/*FIXME Need to copy the regex_t properly. But a plain copy would not be
771-
* safe tu use by multiple processes at the same time, so each backend
772-
* needs to create it's own copy. */
773-
elog(ERROR, "This extension can't handle regex_t affixes yet.");
774-
775-
}
776-
777-
return copy;
778-
779-
}
780-
781-
static
782-
int sizeAffix(AFFIX * affix) {
783-
784-
int size = MAXALIGN(sizeof(AFFIX));
785-
786-
size += MAXALIGN(strlen(affix->find)+1);
787-
size += MAXALIGN(strlen(affix->repl)+1);
788-
789-
if (affix->isregis) {
790-
size += sizeRegisNode(affix->reg.regis.node);
791-
} else if (! affix->issimple) {
792-
793-
/*FIXME Need to copy the regex_t properly. But would a plain copy be
794-
* safe tu use by multiple processes at the same time? */
795-
elog(ERROR, "This extension can't handle regex_t affixes yet.");
796-
797-
}
798-
799-
return size;
800-
801-
}
802-
803-
/* AffixNode */
804-
805-
static
806-
AffixNode * copyAffixNode(AffixNode * node) {
807-
808-
int i, j;
809-
AffixNode * copy = NULL;
810-
811-
if (node == NULL) {
812-
return NULL;
813-
}
814-
815-
copy = (AffixNode *)shalloc(offsetof(AffixNode,data) + sizeof(AffixNodeData) * node->length);
816-
memcpy(copy, node, offsetof(AffixNode,data) + sizeof(AffixNodeData) * node->length);
817-
818-
for (i = 0; i < node->length; i++) {
819-
820-
copy->data[i].node = copyAffixNode(node->data[i].node);
821-
822-
copy->data[i].val = node->data[i].val;
823-
copy->data[i].naff = node->data[i].naff;
824-
copy->data[i].aff = (AFFIX**)shalloc(sizeof(AFFIX*) * node->data[i].naff);
825-
826-
for (j = 0; j < node->data[i].naff; j++) {
827-
copy->data[i].aff[j] = copyAffix(node->data[i].aff[j]);
828-
}
829-
}
830-
831-
return copy;
832-
}
833-
834-
static
835-
int sizeAffixNode(AffixNode * node) {
836-
837-
int i, j;
838-
int size = 0;
839-
840-
if (node == NULL) {
841-
return 0;
842-
}
843-
844-
size = MAXALIGN(offsetof(AffixNode,data) + sizeof(AffixNodeData) * node->length);
845-
846-
for (i = 0; i < node->length; i++) {
847-
848-
size += sizeAffixNode(node->data[i].node);
849-
size += MAXALIGN(sizeof(AFFIX*) * node->data[i].naff);
850-
851-
for (j = 0; j < node->data[i].naff; j++) {
852-
size += sizeAffix(node->data[i].aff[j]);
853-
}
854-
}
855-
856-
return size;
857-
}
858-
859753
/* StopList */
860754

861755
static
@@ -932,13 +826,6 @@ SharedIspellDict * copyIspellDict(IspellDict * dict, char * dictFile, char * aff
932826
strcpy(copy->dictFile, dictFile);
933827
strcpy(copy->affixFile, affixFile);
934828

935-
copy->naffixes = dict->naffixes;
936-
937-
copy->Affix = (AFFIX*)shalloc(sizeof(AFFIX) * dict->naffixes);
938-
939-
copy->Suffix = copyAffixNode(dict->Suffix);
940-
copy->Prefix = copyAffixNode(dict->Prefix);
941-
942829
copy->Dictionary = copySPNode(dict->Dictionary);
943830

944831
/* copy affix data */
@@ -958,6 +845,11 @@ SharedIspellDict * copyIspellDict(IspellDict * dict, char * dictFile, char * aff
958845

959846
copy->nbytes = size;
960847
copy->nwords = words;
848+
849+
/* use the affixes directly (no copy, we'll reload it anyway to handle regular expressions) */
850+
copy->naffixes = dict->naffixes;
851+
copy->Suffix = dict->Suffix;
852+
copy->Prefix = dict->Prefix;
961853

962854
return copy;
963855

@@ -975,11 +867,6 @@ int sizeIspellDict(IspellDict * dict, char * dictFile, char * affixFile) {
975867
size += MAXALIGN(strlen(dictFile)+1);
976868
size += MAXALIGN(strlen(affixFile)+1);
977869

978-
size += MAXALIGN(sizeof(AFFIX) * dict->naffixes);
979-
980-
size += MAXALIGN(sizeAffixNode(dict->Suffix));
981-
size += MAXALIGN(sizeAffixNode(dict->Prefix));
982-
983870
size += sizeSPNode(dict->Dictionary);
984871

985872
/* copy affix data */

0 commit comments

Comments
 (0)