@@ -6135,13 +6135,18 @@ heap_inplace_update_and_unlock(Relation relation,
6135
6135
HeapTupleHeader htup = oldtup -> t_data ;
6136
6136
uint32 oldlen ;
6137
6137
uint32 newlen ;
6138
+ char * dst ;
6139
+ char * src ;
6138
6140
6139
6141
Assert (ItemPointerEquals (& oldtup -> t_self , & tuple -> t_self ));
6140
6142
oldlen = oldtup -> t_len - htup -> t_hoff ;
6141
6143
newlen = tuple -> t_len - tuple -> t_data -> t_hoff ;
6142
6144
if (oldlen != newlen || htup -> t_hoff != tuple -> t_data -> t_hoff )
6143
6145
elog (ERROR , "wrong tuple length" );
6144
6146
6147
+ dst = (char * ) htup + htup -> t_hoff ;
6148
+ src = (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ;
6149
+
6145
6150
/*
6146
6151
* Construct shared cache inval if necessary. Note that because we only
6147
6152
* pass the new version of the tuple, this mustn't be used for any
@@ -6160,15 +6165,15 @@ heap_inplace_update_and_unlock(Relation relation,
6160
6165
*/
6161
6166
PreInplace_Inval ();
6162
6167
6163
- /* NO EREPORT(ERROR) from here till changes are logged */
6164
- START_CRIT_SECTION ();
6165
-
6166
- memcpy ((char * ) htup + htup -> t_hoff ,
6167
- (char * ) tuple -> t_data + tuple -> t_data -> t_hoff ,
6168
- newlen );
6169
-
6170
6168
/*----------
6171
- * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid:
6169
+ * NO EREPORT(ERROR) from here till changes are complete
6170
+ *
6171
+ * Our buffer lock won't stop a reader having already pinned and checked
6172
+ * visibility for this tuple. Hence, we write WAL first, then mutate the
6173
+ * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6174
+ * checkpoint delay makes that acceptable. With the usual order of
6175
+ * changes, a crash after memcpy() and before XLogInsert() could allow
6176
+ * datfrozenxid to overtake relfrozenxid:
6172
6177
*
6173
6178
* ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6174
6179
* ["R" is a VACUUM tbl]
@@ -6178,31 +6183,57 @@ heap_inplace_update_and_unlock(Relation relation,
6178
6183
* D: raise pg_database.datfrozenxid, XLogInsert(), finish
6179
6184
* [crash]
6180
6185
* [recovery restores datfrozenxid w/o relfrozenxid]
6186
+ *
6187
+ * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6188
+ * the buffer to the stack before logging. Here, that facilitates a FPI
6189
+ * of the post-mutation block before we accept other sessions seeing it.
6181
6190
*/
6182
-
6183
- MarkBufferDirty (buffer );
6191
+ Assert (!MyPgXact -> delayChkpt );
6192
+ START_CRIT_SECTION ();
6193
+ MyPgXact -> delayChkpt = true;
6184
6194
6185
6195
/* XLOG stuff */
6186
6196
if (RelationNeedsWAL (relation ))
6187
6197
{
6188
6198
xl_heap_inplace xlrec ;
6199
+ PGAlignedBlock copied_buffer ;
6200
+ char * origdata = (char * ) BufferGetBlock (buffer );
6201
+ Page page = BufferGetPage (buffer );
6202
+ uint16 lower = ((PageHeader ) page )-> pd_lower ;
6203
+ uint16 upper = ((PageHeader ) page )-> pd_upper ;
6204
+ uintptr_t dst_offset_in_block ;
6205
+ RelFileNode rnode ;
6206
+ ForkNumber forkno ;
6207
+ BlockNumber blkno ;
6189
6208
XLogRecPtr recptr ;
6190
6209
6191
6210
xlrec .offnum = ItemPointerGetOffsetNumber (& tuple -> t_self );
6192
6211
6193
6212
XLogBeginInsert ();
6194
6213
XLogRegisterData ((char * ) & xlrec , SizeOfHeapInplace );
6195
6214
6196
- XLogRegisterBuffer (0 , buffer , REGBUF_STANDARD );
6197
- XLogRegisterBufData (0 , (char * ) htup + htup -> t_hoff , newlen );
6215
+ /* register block matching what buffer will look like after changes */
6216
+ memcpy (copied_buffer .data , origdata , lower );
6217
+ memcpy (copied_buffer .data + upper , origdata + upper , BLCKSZ - upper );
6218
+ dst_offset_in_block = dst - origdata ;
6219
+ memcpy (copied_buffer .data + dst_offset_in_block , src , newlen );
6220
+ BufferGetTag (buffer , & rnode , & forkno , & blkno );
6221
+ Assert (forkno == MAIN_FORKNUM );
6222
+ XLogRegisterBlock (0 , & rnode , forkno , blkno , copied_buffer .data ,
6223
+ REGBUF_STANDARD );
6224
+ XLogRegisterBufData (0 , src , newlen );
6198
6225
6199
6226
/* inplace updates aren't decoded atm, don't log the origin */
6200
6227
6201
6228
recptr = XLogInsert (RM_HEAP_ID , XLOG_HEAP_INPLACE );
6202
6229
6203
- PageSetLSN (BufferGetPage ( buffer ) , recptr );
6230
+ PageSetLSN (page , recptr );
6204
6231
}
6205
6232
6233
+ memcpy (dst , src , newlen );
6234
+
6235
+ MarkBufferDirty (buffer );
6236
+
6206
6237
LockBuffer (buffer , BUFFER_LOCK_UNLOCK );
6207
6238
6208
6239
/*
@@ -6215,6 +6246,7 @@ heap_inplace_update_and_unlock(Relation relation,
6215
6246
*/
6216
6247
AtInplace_Inval ();
6217
6248
6249
+ MyPgXact -> delayChkpt = false;
6218
6250
END_CRIT_SECTION ();
6219
6251
UnlockTuple (relation , & tuple -> t_self , InplaceUpdateTupleLock );
6220
6252
0 commit comments