@@ -192,7 +192,7 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
192
192
{
193
193
/* try to setup multi-timeline backup chain */
194
194
elog (WARNING , "Valid backup on current timeline %u is not found, "
195
- "try to look up on previous timelines" ,
195
+ "trying to look up on previous timelines" ,
196
196
current .tli );
197
197
198
198
tli_list = catalog_get_timelines (& instance_config );
@@ -333,7 +333,7 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
333
333
334
334
/* list files with the logical path. omit $PGDATA */
335
335
dir_list_file (backup_files_list , instance_config .pgdata ,
336
- true, true, false, 0 , FIO_DB_HOST );
336
+ true, true, false, true, 0 , FIO_DB_HOST );
337
337
338
338
/*
339
339
* Get database_map (name to oid) for use in partial restore feature.
@@ -350,7 +350,7 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
350
350
/* External dirs numeration starts with 1.
351
351
* 0 value is not external dir */
352
352
dir_list_file (backup_files_list , parray_get (external_dirs , i ),
353
- false, true, false, i + 1 , FIO_DB_HOST );
353
+ false, true, false, true, i + 1 , FIO_DB_HOST );
354
354
355
355
/* close ssh session in main thread */
356
356
fio_disconnect ();
@@ -401,10 +401,10 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
401
401
402
402
if (current .backup_mode != BACKUP_MODE_FULL )
403
403
{
404
- elog (LOG , "current_tli: %X" , current .tli );
405
- elog (LOG , "prev_backup-> start_lsn: %X/%X" ,
404
+ elog (LOG , "Current tli: %X" , current .tli );
405
+ elog (LOG , "Parent start_lsn: %X/%X" ,
406
406
(uint32 ) (prev_backup -> start_lsn >> 32 ), (uint32 ) (prev_backup -> start_lsn ));
407
- elog (LOG , "current. start_lsn: %X/%X" ,
407
+ elog (LOG , "start_lsn: %X/%X" ,
408
408
(uint32 ) (current .start_lsn >> 32 ), (uint32 ) (current .start_lsn ));
409
409
}
410
410
@@ -436,10 +436,11 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
436
436
/*
437
437
* Build the page map from ptrack information.
438
438
*/
439
- if (nodeInfo -> ptrack_version_num = = 20 )
439
+ if (nodeInfo -> ptrack_version_num > = 20 )
440
440
make_pagemap_from_ptrack_2 (backup_files_list , backup_conn ,
441
- nodeInfo -> ptrack_schema ,
442
- prev_backup_start_lsn );
441
+ nodeInfo -> ptrack_schema ,
442
+ nodeInfo -> ptrack_version_num ,
443
+ prev_backup_start_lsn );
443
444
else if (nodeInfo -> ptrack_version_num == 15 ||
444
445
nodeInfo -> ptrack_version_num == 16 ||
445
446
nodeInfo -> ptrack_version_num == 17 )
@@ -582,9 +583,6 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
582
583
/* Notify end of backup */
583
584
pg_stop_backup (& current , pg_startbackup_conn , nodeInfo );
584
585
585
- elog (LOG , "current.stop_lsn: %X/%X" ,
586
- (uint32 ) (stop_backup_lsn >> 32 ), (uint32 ) (stop_backup_lsn ));
587
-
588
586
/* In case of backup from replica >= 9.6 we must fix minRecPoint,
589
587
* First we must find pg_control in backup_files_list.
590
588
*/
@@ -626,7 +624,7 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync)
626
624
/* Scan backup PG_XLOG_DIR */
627
625
xlog_files_list = parray_new ();
628
626
join_path_components (pg_xlog_path , database_path , PG_XLOG_DIR );
629
- dir_list_file (xlog_files_list , pg_xlog_path , false, true, false, 0 ,
627
+ dir_list_file (xlog_files_list , pg_xlog_path , false, true, false, true, 0 ,
630
628
FIO_BACKUP_HOST );
631
629
632
630
/* TODO: Drop streamed WAL segments greater than stop_lsn */
@@ -884,15 +882,10 @@ do_backup(time_t start_time, bool no_validate,
884
882
#endif
885
883
886
884
get_ptrack_version (backup_conn , & nodeInfo );
887
- // elog(WARNING, "ptrack_version_num %d", ptrack_version_num);
885
+ // elog(WARNING, "ptrack_version_num %d", ptrack_version_num);
888
886
889
887
if (nodeInfo .ptrack_version_num > 0 )
890
- {
891
- if (nodeInfo .ptrack_version_num >= 20 )
892
- nodeInfo .is_ptrack_enable = pg_ptrack_enable2 (backup_conn );
893
- else
894
- nodeInfo .is_ptrack_enable = pg_ptrack_enable (backup_conn );
895
- }
888
+ nodeInfo .is_ptrack_enable = pg_ptrack_enable (backup_conn , nodeInfo .ptrack_version_num );
896
889
897
890
if (current .backup_mode == BACKUP_MODE_DIFF_PTRACK )
898
891
{
@@ -1746,65 +1739,66 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
1746
1739
/* Calculate LSN */
1747
1740
stop_backup_lsn_tmp = ((uint64 ) lsn_hi ) << 32 | lsn_lo ;
1748
1741
1742
+ /* It is ok for replica to return invalid STOP LSN
1743
+ * UPD: Apparently it is ok even for a master.
1744
+ */
1749
1745
if (!XRecOffIsValid (stop_backup_lsn_tmp ))
1750
1746
{
1751
- /* It is ok for replica to return STOP LSN with NullXRecOff
1752
- * UPD: Apparently it is ok even for master.
1753
- */
1754
- if (XRecOffIsNull (stop_backup_lsn_tmp ))
1755
- {
1756
- char * xlog_path ,
1757
- stream_xlog_path [MAXPGPATH ];
1758
- XLogSegNo segno = 0 ;
1759
- XLogRecPtr lsn_tmp = InvalidXLogRecPtr ;
1747
+ char * xlog_path ,
1748
+ stream_xlog_path [MAXPGPATH ];
1749
+ XLogSegNo segno = 0 ;
1750
+ XLogRecPtr lsn_tmp = InvalidXLogRecPtr ;
1760
1751
1761
- /*
1762
- * Even though the value is invalid, it's expected postgres behaviour
1763
- * and we're trying to fix it below.
1764
- */
1765
- elog (LOG , "Null offset in stop_backup_lsn value %X/%X, trying to fix" ,
1766
- (uint32 ) (stop_backup_lsn_tmp >> 32 ), (uint32 ) (stop_backup_lsn_tmp ));
1752
+ /*
1753
+ * Even though the value is invalid, it's expected postgres behaviour
1754
+ * and we're trying to fix it below.
1755
+ */
1756
+ elog (LOG , "Invalid offset in stop_lsn value %X/%X, trying to fix" ,
1757
+ (uint32 ) (stop_backup_lsn_tmp >> 32 ), (uint32 ) (stop_backup_lsn_tmp ));
1767
1758
1768
- /*
1769
- * Note: even with gdb it is very hard to produce automated tests for
1770
- * contrecord + NullXRecOff , so emulate it for manual testing.
1771
- */
1772
- //stop_backup_lsn_tmp = stop_backup_lsn_tmp - XLOG_SEG_SIZE;
1773
- //elog(WARNING, "New Invalid stop_backup_lsn value %X/%X",
1774
- // (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
1759
+ /*
1760
+ * Note: even with gdb it is very hard to produce automated tests for
1761
+ * contrecord + invalid LSN , so emulate it for manual testing.
1762
+ */
1763
+ //stop_backup_lsn_tmp = stop_backup_lsn_tmp - XLOG_SEG_SIZE;
1764
+ //elog(WARNING, "New Invalid stop_backup_lsn value %X/%X",
1765
+ // (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
1775
1766
1776
- if (stream_wal )
1777
- {
1778
- pgBackupGetPath2 (backup , stream_xlog_path ,
1779
- lengthof (stream_xlog_path ),
1780
- DATABASE_DIR , PG_XLOG_DIR );
1781
- xlog_path = stream_xlog_path ;
1782
- }
1783
- else
1784
- xlog_path = arclog_path ;
1767
+ if (stream_wal )
1768
+ {
1769
+ pgBackupGetPath2 (backup , stream_xlog_path ,
1770
+ lengthof (stream_xlog_path ),
1771
+ DATABASE_DIR , PG_XLOG_DIR );
1772
+ xlog_path = stream_xlog_path ;
1773
+ }
1774
+ else
1775
+ xlog_path = arclog_path ;
1785
1776
1786
- GetXLogSegNo (stop_backup_lsn_tmp , segno , instance_config .xlog_seg_size );
1777
+ GetXLogSegNo (stop_backup_lsn_tmp , segno , instance_config .xlog_seg_size );
1787
1778
1788
- /*
1789
- * Note, that there is no guarantee that corresponding WAL file even exists.
1790
- * Replica may return LSN from future and keep staying in present.
1791
- * Or it can return LSN with NullXRecOff .
1792
- *
1793
- * That's bad, since we want to get real LSN to save it in backup label file
1794
- * and to use it in WAL validation.
1795
- *
1796
- * So we try to do the following:
1797
- * 1. Wait 'archive_timeout' seconds for segment containing stop_lsn and
1798
- * look for the first valid record in it.
1799
- * It solves the problem of occasional invalid XRecOff on write-busy system.
1800
- * 2. Failing that, look for record in previous segment with endpoint
1801
- * equal or greater than stop_lsn. It may(!) solve the problem of NullXRecOff
1802
- * on write-idle system. If that fails too, error out.
1803
- */
1779
+ /*
1780
+ * Note, that there is no guarantee that corresponding WAL file even exists.
1781
+ * Replica may return LSN from future and keep staying in present.
1782
+ * Or it can return invalid LSN .
1783
+ *
1784
+ * That's bad, since we want to get real LSN to save it in backup label file
1785
+ * and to use it in WAL validation.
1786
+ *
1787
+ * So we try to do the following:
1788
+ * 1. Wait 'archive_timeout' seconds for segment containing stop_lsn and
1789
+ * look for the first valid record in it.
1790
+ * It solves the problem of occasional invalid LSN on write-busy system.
1791
+ * 2. Failing that, look for record in previous segment with endpoint
1792
+ * equal or greater than stop_lsn. It may(!) solve the problem of invalid LSN
1793
+ * on write-idle system. If that fails too, error out.
1794
+ */
1804
1795
1796
+ /* stop_lsn is pointing to a 0 byte of xlog segment */
1797
+ if (stop_backup_lsn_tmp % instance_config .xlog_seg_size == 0 )
1798
+ {
1805
1799
/* Wait for segment with current stop_lsn, it is ok for it to never arrive */
1806
1800
wait_wal_lsn (stop_backup_lsn_tmp , false, backup -> tli ,
1807
- false, true, WARNING , stream_wal );
1801
+ false, true, WARNING , stream_wal );
1808
1802
1809
1803
/* Get the first record in segment with current stop_lsn */
1810
1804
lsn_tmp = get_first_record_lsn (xlog_path , segno , backup -> tli ,
@@ -1840,17 +1834,39 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
1840
1834
(uint32 ) (stop_backup_lsn_tmp >> 32 ),
1841
1835
(uint32 ) (stop_backup_lsn_tmp ));
1842
1836
}
1837
+ }
1838
+ /* stop lsn is aligned to xlog block size, just find next lsn */
1839
+ else if (stop_backup_lsn_tmp % XLOG_BLCKSZ == 0 )
1840
+ {
1841
+ /* Wait for segment with current stop_lsn */
1842
+ wait_wal_lsn (stop_backup_lsn_tmp , false, backup -> tli ,
1843
+ false, true, ERROR , stream_wal );
1844
+
1845
+ /* Get the next closest record in segment with current stop_lsn */
1846
+ lsn_tmp = get_next_record_lsn (xlog_path , segno , backup -> tli ,
1847
+ instance_config .xlog_seg_size ,
1848
+ instance_config .archive_timeout ,
1849
+ stop_backup_lsn_tmp );
1843
1850
1844
- /* Setting stop_backup_lsn will set stop point for streaming */
1845
- stop_backup_lsn = lsn_tmp ;
1846
- stop_lsn_exists = true;
1851
+ /* sanity */
1852
+ if (!XRecOffIsValid (lsn_tmp ) || XLogRecPtrIsInvalid (lsn_tmp ))
1853
+ elog (ERROR , "Failed to get WAL record next to %X/%X" ,
1854
+ (uint32 ) (stop_backup_lsn_tmp >> 32 ),
1855
+ (uint32 ) (stop_backup_lsn_tmp ));
1847
1856
}
1848
1857
/* PostgreSQL returned something very illegal as STOP_LSN, error out */
1849
1858
else
1850
1859
elog (ERROR , "Invalid stop_backup_lsn value %X/%X" ,
1851
1860
(uint32 ) (stop_backup_lsn_tmp >> 32 ), (uint32 ) (stop_backup_lsn_tmp ));
1861
+
1862
+ /* Setting stop_backup_lsn will set stop point for streaming */
1863
+ stop_backup_lsn = lsn_tmp ;
1864
+ stop_lsn_exists = true;
1852
1865
}
1853
1866
1867
+ elog (LOG , "stop_lsn: %X/%X" ,
1868
+ (uint32 ) (stop_backup_lsn >> 32 ), (uint32 ) (stop_backup_lsn ));
1869
+
1854
1870
/* Write backup_label and tablespace_map */
1855
1871
if (!exclusive_backup )
1856
1872
{
0 commit comments