@@ -401,10 +401,10 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
401
401
402
402
if (current .backup_mode != BACKUP_MODE_FULL )
403
403
{
404
- elog (LOG , "current_tli: %X" , current .tli );
405
- elog (LOG , "prev_backup-> start_lsn: %X/%X" ,
404
+ elog (LOG , "Current tli: %X" , current .tli );
405
+ elog (LOG , "Parent start_lsn: %X/%X" ,
406
406
(uint32 ) (prev_backup -> start_lsn >> 32 ), (uint32 ) (prev_backup -> start_lsn ));
407
- elog (LOG , "current. start_lsn: %X/%X" ,
407
+ elog (LOG , "start_lsn: %X/%X" ,
408
408
(uint32 ) (current .start_lsn >> 32 ), (uint32 ) (current .start_lsn ));
409
409
}
410
410
@@ -583,9 +583,6 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
583
583
/* Notify end of backup */
584
584
pg_stop_backup (& current , pg_startbackup_conn , nodeInfo );
585
585
586
- elog (LOG , "current.stop_lsn: %X/%X" ,
587
- (uint32 ) (stop_backup_lsn >> 32 ), (uint32 ) (stop_backup_lsn ));
588
-
589
586
/* In case of backup from replica >= 9.6 we must fix minRecPoint,
590
587
* First we must find pg_control in backup_files_list.
591
588
*/
@@ -1742,65 +1739,66 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
1742
1739
/* Calculate LSN */
1743
1740
stop_backup_lsn_tmp = ((uint64 ) lsn_hi ) << 32 | lsn_lo ;
1744
1741
1742
+ /* It is ok for replica to return invalid STOP LSN
1743
+ * UPD: Apparently it is ok even for a master.
1744
+ */
1745
1745
if (!XRecOffIsValid (stop_backup_lsn_tmp ))
1746
1746
{
1747
- /* It is ok for replica to return STOP LSN with NullXRecOff
1748
- * UPD: Apparently it is ok even for master.
1749
- */
1750
- if (XRecOffIsNull (stop_backup_lsn_tmp ))
1751
- {
1752
- char * xlog_path ,
1753
- stream_xlog_path [MAXPGPATH ];
1754
- XLogSegNo segno = 0 ;
1755
- XLogRecPtr lsn_tmp = InvalidXLogRecPtr ;
1747
+ char * xlog_path ,
1748
+ stream_xlog_path [MAXPGPATH ];
1749
+ XLogSegNo segno = 0 ;
1750
+ XLogRecPtr lsn_tmp = InvalidXLogRecPtr ;
1756
1751
1757
- /*
1758
- * Even though the value is invalid, it's expected postgres behaviour
1759
- * and we're trying to fix it below.
1760
- */
1761
- elog (LOG , "Null offset in stop_backup_lsn value %X/%X, trying to fix" ,
1762
- (uint32 ) (stop_backup_lsn_tmp >> 32 ), (uint32 ) (stop_backup_lsn_tmp ));
1752
+ /*
1753
+ * Even though the value is invalid, it's expected postgres behaviour
1754
+ * and we're trying to fix it below.
1755
+ */
1756
+ elog (LOG , "Invalid offset in stop_lsn value %X/%X, trying to fix" ,
1757
+ (uint32 ) (stop_backup_lsn_tmp >> 32 ), (uint32 ) (stop_backup_lsn_tmp ));
1763
1758
1764
- /*
1765
- * Note: even with gdb it is very hard to produce automated tests for
1766
- * contrecord + NullXRecOff , so emulate it for manual testing.
1767
- */
1768
- //stop_backup_lsn_tmp = stop_backup_lsn_tmp - XLOG_SEG_SIZE;
1769
- //elog(WARNING, "New Invalid stop_backup_lsn value %X/%X",
1770
- // (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
1759
+ /*
1760
+ * Note: even with gdb it is very hard to produce automated tests for
1761
+ * contrecord + invalid LSN , so emulate it for manual testing.
1762
+ */
1763
+ //stop_backup_lsn_tmp = stop_backup_lsn_tmp - XLOG_SEG_SIZE;
1764
+ //elog(WARNING, "New Invalid stop_backup_lsn value %X/%X",
1765
+ // (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
1771
1766
1772
- if (stream_wal )
1773
- {
1774
- pgBackupGetPath2 (backup , stream_xlog_path ,
1775
- lengthof (stream_xlog_path ),
1776
- DATABASE_DIR , PG_XLOG_DIR );
1777
- xlog_path = stream_xlog_path ;
1778
- }
1779
- else
1780
- xlog_path = arclog_path ;
1767
+ if (stream_wal )
1768
+ {
1769
+ pgBackupGetPath2 (backup , stream_xlog_path ,
1770
+ lengthof (stream_xlog_path ),
1771
+ DATABASE_DIR , PG_XLOG_DIR );
1772
+ xlog_path = stream_xlog_path ;
1773
+ }
1774
+ else
1775
+ xlog_path = arclog_path ;
1781
1776
1782
- GetXLogSegNo (stop_backup_lsn_tmp , segno , instance_config .xlog_seg_size );
1777
+ GetXLogSegNo (stop_backup_lsn_tmp , segno , instance_config .xlog_seg_size );
1783
1778
1784
- /*
1785
- * Note, that there is no guarantee that corresponding WAL file even exists.
1786
- * Replica may return LSN from future and keep staying in present.
1787
- * Or it can return LSN with NullXRecOff .
1788
- *
1789
- * That's bad, since we want to get real LSN to save it in backup label file
1790
- * and to use it in WAL validation.
1791
- *
1792
- * So we try to do the following:
1793
- * 1. Wait 'archive_timeout' seconds for segment containing stop_lsn and
1794
- * look for the first valid record in it.
1795
- * It solves the problem of occasional invalid XRecOff on write-busy system.
1796
- * 2. Failing that, look for record in previous segment with endpoint
1797
- * equal or greater than stop_lsn. It may(!) solve the problem of NullXRecOff
1798
- * on write-idle system. If that fails too, error out.
1799
- */
1779
+ /*
1780
+ * Note, that there is no guarantee that corresponding WAL file even exists.
1781
+ * Replica may return LSN from future and keep staying in present.
1782
+ * Or it can return invalid LSN .
1783
+ *
1784
+ * That's bad, since we want to get real LSN to save it in backup label file
1785
+ * and to use it in WAL validation.
1786
+ *
1787
+ * So we try to do the following:
1788
+ * 1. Wait 'archive_timeout' seconds for segment containing stop_lsn and
1789
+ * look for the first valid record in it.
1790
+ * It solves the problem of occasional invalid LSN on write-busy system.
1791
+ * 2. Failing that, look for record in previous segment with endpoint
1792
+ * equal or greater than stop_lsn. It may(!) solve the problem of invalid LSN
1793
+ * on write-idle system. If that fails too, error out.
1794
+ */
1800
1795
1796
+ /* stop_lsn is pointing to a 0 byte of xlog segment */
1797
+ if (stop_backup_lsn_tmp % instance_config .xlog_seg_size == 0 )
1798
+ {
1801
1799
/* Wait for segment with current stop_lsn, it is ok for it to never arrive */
1802
1800
wait_wal_lsn (stop_backup_lsn_tmp , false, backup -> tli ,
1803
- false, true, WARNING , stream_wal );
1801
+ false, true, WARNING , stream_wal );
1804
1802
1805
1803
/* Get the first record in segment with current stop_lsn */
1806
1804
lsn_tmp = get_first_record_lsn (xlog_path , segno , backup -> tli ,
@@ -1836,17 +1834,39 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
1836
1834
(uint32 ) (stop_backup_lsn_tmp >> 32 ),
1837
1835
(uint32 ) (stop_backup_lsn_tmp ));
1838
1836
}
1837
+ }
1838
+ /* stop lsn is aligned to xlog block size, just find next lsn */
1839
+ else if (stop_backup_lsn_tmp % XLOG_BLCKSZ == 0 )
1840
+ {
1841
+ /* Wait for segment with current stop_lsn */
1842
+ wait_wal_lsn (stop_backup_lsn_tmp , false, backup -> tli ,
1843
+ false, true, ERROR , stream_wal );
1844
+
1845
+ /* Get the next closest record in segment with current stop_lsn */
1846
+ lsn_tmp = get_next_record_lsn (xlog_path , segno , backup -> tli ,
1847
+ instance_config .xlog_seg_size ,
1848
+ instance_config .archive_timeout ,
1849
+ stop_backup_lsn_tmp );
1839
1850
1840
- /* Setting stop_backup_lsn will set stop point for streaming */
1841
- stop_backup_lsn = lsn_tmp ;
1842
- stop_lsn_exists = true;
1851
+ /* sanity */
1852
+ if (!XRecOffIsValid (lsn_tmp ) || XLogRecPtrIsInvalid (lsn_tmp ))
1853
+ elog (ERROR , "Failed to get WAL record next to %X/%X" ,
1854
+ (uint32 ) (stop_backup_lsn_tmp >> 32 ),
1855
+ (uint32 ) (stop_backup_lsn_tmp ));
1843
1856
}
1844
1857
/* PostgreSQL returned something very illegal as STOP_LSN, error out */
1845
1858
else
1846
1859
elog (ERROR , "Invalid stop_backup_lsn value %X/%X" ,
1847
1860
(uint32 ) (stop_backup_lsn_tmp >> 32 ), (uint32 ) (stop_backup_lsn_tmp ));
1861
+
1862
+ /* Setting stop_backup_lsn will set stop point for streaming */
1863
+ stop_backup_lsn = lsn_tmp ;
1864
+ stop_lsn_exists = true;
1848
1865
}
1849
1866
1867
+ elog (LOG , "stop_lsn: %X/%X" ,
1868
+ (uint32 ) (stop_backup_lsn >> 32 ), (uint32 ) (stop_backup_lsn ));
1869
+
1850
1870
/* Write backup_label and tablespace_map */
1851
1871
if (!exclusive_backup )
1852
1872
{
0 commit comments