Skip to content

Commit 161a3e8

Browse files
pg_upgrade: Use COPY for large object metadata.
Presently, pg_dump generates commands like SELECT pg_catalog.lo_create('5432'); ALTER LARGE OBJECT 5432 OWNER TO alice; GRANT SELECT ON LARGE OBJECT 5432 TO bob; for each large object. This is particularly slow at restore time, especially when there are tens or hundreds of millions of large objects. From reports and personal experience, such slow restores seem to be most painful when encountered during pg_upgrade. This commit teaches pg_dump to instead dump pg_largeobject_metadata and the corresponding pg_shdepend rows when in binary upgrade mode, i.e., pg_dump now generates commands like COPY pg_catalog.pg_largeobject_metadata (oid, lomowner, lomacl) FROM stdin; 5432 16384 {alice=rw/alice,bob=r/alice} \. COPY pg_catalog.pg_shdepend (dbid, classid, objid, objsubid, refclassid, refobjid, deptype) FROM stdin; 5 2613 5432 0 1260 16384 o 5 2613 5432 0 1260 16385 a \. Testing indicates the COPY approach can be significantly faster. To do any better, we'd probably need to find a way to copy/link pg_largeobject_metadata's files during pg_upgrade, which would be limited to upgrades from >= v16 (since commit 7b37823 changed the storage format for aclitem, which is used for pg_largeobject_metadata.lomacl). Note that this change only applies to binary upgrade mode (i.e., dumps initiated by pg_upgrade) since it inserts rows directly into catalogs. Also, this optimization can only be used for upgrades from >= v12 because pg_largeobject_metadata was created WITH OIDS in older versions, which prevents pg_dump from handling pg_largeobject_metadata.oid properly. With some extra effort, it might be possible to support upgrades from older versions, but the added complexity didn't seem worth it to support versions that will have been out-of-support for nearly 3 years by the time this change is released. Experienced hackers may remember that prior to v12, pg_upgrade copied/linked pg_largeobject_metadata's files (see commit 12a53c7). Besides the aforementioned storage format issues, this approach failed to transfer the relevant pg_shdepend rows, and pg_dump still had to generate an lo_create() command per large object so that creating the dependent comments and security labels worked. We could perhaps adopt a hybrid approach for upgrades from v16 and newer (i.e., generate lo_create() commands for each large object, copy/link pg_largeobject_metadata's files, and COPY the relevant pg_shdepend rows), but further testing is needed. Reported-by: Hannu Krosing <hannuk@google.com> Suggested-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Hannu Krosing <hannuk@google.com> Reviewed-by: Nitin Motiani <nitinmotiani@google.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/CAMT0RQSS-6qLH%2BzYsOeUbAYhop3wmQTkNmQpo5--QRDUR%2BqYmQ%40mail.gmail.com
1 parent 4c5159a commit 161a3e8

File tree

3 files changed

+103
-6
lines changed

3 files changed

+103
-6
lines changed

src/bin/pg_dump/pg_backup_archiver.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
#endif
3232

3333
#include "catalog/pg_class_d.h"
34+
#include "catalog/pg_largeobject_metadata_d.h"
35+
#include "catalog/pg_shdepend_d.h"
3436
#include "common/string.h"
3537
#include "compress_io.h"
3638
#include "dumputils.h"
@@ -2974,6 +2976,19 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
29742976
int res = REQ_SCHEMA | REQ_DATA;
29752977
RestoreOptions *ropt = AH->public.ropt;
29762978

2979+
/*
2980+
* For binary upgrade mode, dump pg_largeobject_metadata and the
2981+
* associated pg_shdepend rows. This is faster to restore than the
2982+
* equivalent set of large object commands. We can only do this for
2983+
* upgrades from v12 and newer; in older versions, pg_largeobject_metadata
2984+
* was created WITH OIDS, so the OID column is hidden and won't be dumped.
2985+
*/
2986+
if (ropt->binary_upgrade && AH->public.remoteVersion >= 120000 &&
2987+
strcmp(te->desc, "TABLE DATA") == 0 &&
2988+
(te->catalogId.oid == LargeObjectMetadataRelationId ||
2989+
te->catalogId.oid == SharedDependRelationId))
2990+
return REQ_DATA;
2991+
29772992
/* These items are treated specially */
29782993
if (strcmp(te->desc, "ENCODING") == 0 ||
29792994
strcmp(te->desc, "STDSTRINGS") == 0 ||

src/bin/pg_dump/pg_dump.c

Lines changed: 85 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,10 @@
4949
#include "catalog/pg_class_d.h"
5050
#include "catalog/pg_default_acl_d.h"
5151
#include "catalog/pg_largeobject_d.h"
52+
#include "catalog/pg_largeobject_metadata_d.h"
5253
#include "catalog/pg_proc_d.h"
5354
#include "catalog/pg_publication_d.h"
55+
#include "catalog/pg_shdepend_d.h"
5456
#include "catalog/pg_subscription_d.h"
5557
#include "catalog/pg_type_d.h"
5658
#include "common/connect.h"
@@ -209,6 +211,12 @@ static int nbinaryUpgradeClassOids = 0;
209211
static SequenceItem *sequences = NULL;
210212
static int nsequences = 0;
211213

214+
/*
215+
* For binary upgrade, the dump ID of pg_largeobject_metadata is saved for use
216+
* as a dependency for pg_shdepend and any large object comments/seclabels.
217+
*/
218+
static DumpId lo_metadata_dumpId;
219+
212220
/* Maximum number of relations to fetch in a fetchAttributeStats() call. */
213221
#define MAX_ATTR_STATS_RELS 64
214222

@@ -1085,6 +1093,36 @@ main(int argc, char **argv)
10851093
if (!dopt.dumpData && dopt.sequence_data)
10861094
getTableData(&dopt, tblinfo, numTables, RELKIND_SEQUENCE);
10871095

1096+
/*
1097+
* For binary upgrade mode, dump pg_largeobject_metadata and the
1098+
* associated pg_shdepend rows. This is faster to restore than the
1099+
* equivalent set of large object commands. We can only do this for
1100+
* upgrades from v12 and newer; in older versions, pg_largeobject_metadata
1101+
* was created WITH OIDS, so the OID column is hidden and won't be dumped.
1102+
*/
1103+
if (dopt.binary_upgrade && fout->remoteVersion >= 120000)
1104+
{
1105+
TableInfo *lo_metadata = findTableByOid(LargeObjectMetadataRelationId);
1106+
TableInfo *shdepend = findTableByOid(SharedDependRelationId);
1107+
1108+
makeTableDataInfo(&dopt, lo_metadata);
1109+
makeTableDataInfo(&dopt, shdepend);
1110+
1111+
/*
1112+
* Save pg_largeobject_metadata's dump ID for use as a dependency for
1113+
* pg_shdepend and any large object comments/seclabels.
1114+
*/
1115+
lo_metadata_dumpId = lo_metadata->dataObj->dobj.dumpId;
1116+
addObjectDependency(&shdepend->dataObj->dobj, lo_metadata_dumpId);
1117+
1118+
/*
1119+
* Only dump large object shdepend rows for this database.
1120+
*/
1121+
shdepend->dataObj->filtercond = "WHERE classid = 'pg_largeobject'::regclass "
1122+
"AND dbid = (SELECT oid FROM pg_database "
1123+
" WHERE datname = current_database())";
1124+
}
1125+
10881126
/*
10891127
* In binary-upgrade mode, we do not have to worry about the actual LO
10901128
* data or the associated metadata that resides in the pg_largeobject and
@@ -3924,10 +3962,37 @@ getLOs(Archive *fout)
39243962
* as it will be copied by pg_upgrade, which simply copies the
39253963
* pg_largeobject table. We *do* however dump out anything but the
39263964
* data, as pg_upgrade copies just pg_largeobject, but not
3927-
* pg_largeobject_metadata, after the dump is restored.
3965+
* pg_largeobject_metadata, after the dump is restored. In versions
3966+
* before v12, this is done via proper large object commands. In
3967+
* newer versions, we dump the content of pg_largeobject_metadata and
3968+
* any associated pg_shdepend rows, which is faster to restore. (On
3969+
* <v12, pg_largeobject_metadata was created WITH OIDS, so the OID
3970+
* column is hidden and won't be dumped.)
39283971
*/
39293972
if (dopt->binary_upgrade)
3930-
loinfo->dobj.dump &= ~DUMP_COMPONENT_DATA;
3973+
{
3974+
if (fout->remoteVersion >= 120000)
3975+
{
3976+
/*
3977+
* We should've saved pg_largeobject_metadata's dump ID before
3978+
* this point.
3979+
*/
3980+
Assert(lo_metadata_dumpId);
3981+
3982+
loinfo->dobj.dump &= ~(DUMP_COMPONENT_DATA | DUMP_COMPONENT_ACL | DUMP_COMPONENT_DEFINITION);
3983+
3984+
/*
3985+
* Mark the large object as dependent on
3986+
* pg_largeobject_metadata so that any large object
3987+
* comments/seclables are dumped after it.
3988+
*/
3989+
loinfo->dobj.dependencies = (DumpId *) pg_malloc(sizeof(DumpId));
3990+
loinfo->dobj.dependencies[0] = lo_metadata_dumpId;
3991+
loinfo->dobj.nDeps = loinfo->dobj.allocDeps = 1;
3992+
}
3993+
else
3994+
loinfo->dobj.dump &= ~DUMP_COMPONENT_DATA;
3995+
}
39313996

39323997
/*
39333998
* Create a "BLOBS" data item for the group, too. This is just a
@@ -9039,8 +9104,20 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
90399104
if (tbinfo->relkind == RELKIND_SEQUENCE)
90409105
continue;
90419106

9042-
/* Don't bother with uninteresting tables, either */
9043-
if (!tbinfo->interesting)
9107+
/*
9108+
* Don't bother with uninteresting tables, either. For binary
9109+
* upgrades, this is bypassed for pg_largeobject_metadata and
9110+
* pg_shdepend so that the columns names are collected for the
9111+
* corresponding COPY commands. Restoring the data for those catalogs
9112+
* is faster than restoring the equivalent set of large object
9113+
* commands. We can only do this for upgrades from v12 and newer; in
9114+
* older versions, pg_largeobject_metadata was created WITH OIDS, so
9115+
* the OID column is hidden and won't be dumped.
9116+
*/
9117+
if (!tbinfo->interesting &&
9118+
!(fout->dopt->binary_upgrade && fout->remoteVersion >= 120000 &&
9119+
(tbinfo->dobj.catId.oid == LargeObjectMetadataRelationId ||
9120+
tbinfo->dobj.catId.oid == SharedDependRelationId)))
90449121
continue;
90459122

90469123
/* OK, we need info for this table */
@@ -9244,7 +9321,10 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables)
92449321
pg_fatal("unrecognized table OID %u", attrelid);
92459322
/* cross-check that we only got requested tables */
92469323
if (tbinfo->relkind == RELKIND_SEQUENCE ||
9247-
!tbinfo->interesting)
9324+
(!tbinfo->interesting &&
9325+
!(fout->dopt->binary_upgrade && fout->remoteVersion >= 120000 &&
9326+
(tbinfo->dobj.catId.oid == LargeObjectMetadataRelationId ||
9327+
tbinfo->dobj.catId.oid == SharedDependRelationId))))
92489328
pg_fatal("unexpected column data for table \"%s\"",
92499329
tbinfo->dobj.name);
92509330

src/bin/pg_dump/t/002_pg_dump.pl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1087,6 +1087,7 @@
10871087
test_schema_plus_large_objects => 1,
10881088
},
10891089
unlike => {
1090+
binary_upgrade => 1,
10901091
no_large_objects => 1,
10911092
no_owner => 1,
10921093
schema_only => 1,
@@ -1605,6 +1606,7 @@
16051606
test_schema_plus_large_objects => 1,
16061607
},
16071608
unlike => {
1609+
binary_upgrade => 1,
16081610
schema_only => 1,
16091611
schema_only_with_statistics => 1,
16101612
no_large_objects => 1,
@@ -4612,9 +4614,9 @@
46124614
no_schema => 1,
46134615
section_data => 1,
46144616
test_schema_plus_large_objects => 1,
4615-
binary_upgrade => 1,
46164617
},
46174618
unlike => {
4619+
binary_upgrade => 1,
46184620
no_large_objects => 1,
46194621
no_privs => 1,
46204622
schema_only => 1,

0 commit comments

Comments
 (0)