Skip to content

Commit 5e503e1

Browse files
committed
Revert "WAL-log inplace update before revealing it to other sessions."
This reverts commit bfd5c6e (v17) and counterparts in each other non-master branch. This unblocks reverting a commit on which it depends. Discussion: https://postgr.es/m/10ec0bc3-5933-1189-6bb8-5dec4114558e@gmail.com
1 parent cd64941 commit 5e503e1

File tree

3 files changed

+18
-46
lines changed

3 files changed

+18
-46
lines changed

src/backend/access/heap/README.tuplock

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,4 +203,6 @@ Inplace updates create an exception to the rule that tuple data won't change
203203
under a reader holding a pin. A reader of a heap_fetch() result tuple may
204204
witness a torn read. Current inplace-updated fields are aligned and are no
205205
wider than four bytes, and current readers don't need consistency across
206-
fields. Hence, they get by with just fetching each field once.
206+
fields. Hence, they get by with just fetching each field once. XXX such a
207+
caller may also read a value that has not reached WAL; see
208+
systable_inplace_update_finish().

src/backend/access/heap/heapam.c

Lines changed: 13 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -6139,18 +6139,13 @@ heap_inplace_update_and_unlock(Relation relation,
61396139
HeapTupleHeader htup = oldtup->t_data;
61406140
uint32 oldlen;
61416141
uint32 newlen;
6142-
char *dst;
6143-
char *src;
61446142

61456143
Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
61466144
oldlen = oldtup->t_len - htup->t_hoff;
61476145
newlen = tuple->t_len - tuple->t_data->t_hoff;
61486146
if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
61496147
elog(ERROR, "wrong tuple length");
61506148

6151-
dst = (char *) htup + htup->t_hoff;
6152-
src = (char *) tuple->t_data + tuple->t_data->t_hoff;
6153-
61546149
/*
61556150
* Construct shared cache inval if necessary. Note that because we only
61566151
* pass the new version of the tuple, this mustn't be used for any
@@ -6169,15 +6164,15 @@ heap_inplace_update_and_unlock(Relation relation,
61696164
*/
61706165
PreInplace_Inval();
61716166

6167+
/* NO EREPORT(ERROR) from here till changes are logged */
6168+
START_CRIT_SECTION();
6169+
6170+
memcpy((char *) htup + htup->t_hoff,
6171+
(char *) tuple->t_data + tuple->t_data->t_hoff,
6172+
newlen);
6173+
61726174
/*----------
6173-
* NO EREPORT(ERROR) from here till changes are complete
6174-
*
6175-
* Our buffer lock won't stop a reader having already pinned and checked
6176-
* visibility for this tuple. Hence, we write WAL first, then mutate the
6177-
* buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6178-
* checkpoint delay makes that acceptable. With the usual order of
6179-
* changes, a crash after memcpy() and before XLogInsert() could allow
6180-
* datfrozenxid to overtake relfrozenxid:
6175+
* XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
61816176
*
61826177
* ["D" is a VACUUM (ONLY_DATABASE_STATS)]
61836178
* ["R" is a VACUUM tbl]
@@ -6187,57 +6182,31 @@ heap_inplace_update_and_unlock(Relation relation,
61876182
* D: raise pg_database.datfrozenxid, XLogInsert(), finish
61886183
* [crash]
61896184
* [recovery restores datfrozenxid w/o relfrozenxid]
6190-
*
6191-
* Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6192-
* the buffer to the stack before logging. Here, that facilitates a FPI
6193-
* of the post-mutation block before we accept other sessions seeing it.
61946185
*/
6195-
Assert(!MyPgXact->delayChkpt);
6196-
START_CRIT_SECTION();
6197-
MyPgXact->delayChkpt = true;
6186+
6187+
MarkBufferDirty(buffer);
61986188

61996189
/* XLOG stuff */
62006190
if (RelationNeedsWAL(relation))
62016191
{
62026192
xl_heap_inplace xlrec;
6203-
PGAlignedBlock copied_buffer;
6204-
char *origdata = (char *) BufferGetBlock(buffer);
6205-
Page page = BufferGetPage(buffer);
6206-
uint16 lower = ((PageHeader) page)->pd_lower;
6207-
uint16 upper = ((PageHeader) page)->pd_upper;
6208-
uintptr_t dst_offset_in_block;
6209-
RelFileNode rnode;
6210-
ForkNumber forkno;
6211-
BlockNumber blkno;
62126193
XLogRecPtr recptr;
62136194

62146195
xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
62156196

62166197
XLogBeginInsert();
62176198
XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
62186199

6219-
/* register block matching what buffer will look like after changes */
6220-
memcpy(copied_buffer.data, origdata, lower);
6221-
memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper);
6222-
dst_offset_in_block = dst - origdata;
6223-
memcpy(copied_buffer.data + dst_offset_in_block, src, newlen);
6224-
BufferGetTag(buffer, &rnode, &forkno, &blkno);
6225-
Assert(forkno == MAIN_FORKNUM);
6226-
XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer.data,
6227-
REGBUF_STANDARD);
6228-
XLogRegisterBufData(0, src, newlen);
6200+
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6201+
XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
62296202

62306203
/* inplace updates aren't decoded atm, don't log the origin */
62316204

62326205
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
62336206

6234-
PageSetLSN(page, recptr);
6207+
PageSetLSN(BufferGetPage(buffer), recptr);
62356208
}
62366209

6237-
memcpy(dst, src, newlen);
6238-
6239-
MarkBufferDirty(buffer);
6240-
62416210
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
62426211

62436212
/*
@@ -6250,7 +6219,6 @@ heap_inplace_update_and_unlock(Relation relation,
62506219
*/
62516220
AtInplace_Inval();
62526221

6253-
MyPgXact->delayChkpt = false;
62546222
END_CRIT_SECTION();
62556223
UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
62566224

src/backend/access/transam/xloginsert.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,8 @@ XLogRegisterBlock(uint8 block_id, RelFileNode *rnode, ForkNumber forknum,
268268
{
269269
registered_buffer *regbuf;
270270

271+
/* This is currently only used to WAL-log a full-page image of a page */
272+
Assert(flags & REGBUF_FORCE_IMAGE);
271273
Assert(begininsert_called);
272274

273275
if (block_id >= max_registered_block_id)

0 commit comments

Comments
 (0)