Skip to content

Commit a5eb8db

Browse files
committed
[Issue #92] Refactoring of catalog_get_last_data_backup(). First we look for latest valid FULL backup, he is the chain ancestor. Then latest valid child of chain ancestor becomes the parent of current backup.
1 parent 4217bc2 commit a5eb8db

File tree

4 files changed

+246
-9
lines changed

4 files changed

+246
-9
lines changed

src/backup.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ do_backup_instance(PGconn *backup_conn)
198198
/* get list of backups already taken */
199199
backup_list = catalog_get_backup_list(INVALID_BACKUP_ID);
200200

201-
prev_backup = catalog_get_last_data_backup(backup_list, current.tli);
201+
prev_backup = catalog_get_last_data_backup(backup_list, current.tli, current.start_time);
202202
if (prev_backup == NULL)
203203
elog(ERROR, "Valid backup on current timeline is not found. "
204204
"Create new FULL backup before an incremental one.");

src/catalog.c

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -444,10 +444,12 @@ catalog_lock_backup_list(parray *backup_list, int from_idx, int to_idx)
444444
* Find the latest valid child of latest valid FULL backup on given timeline
445445
*/
446446
pgBackup *
447-
catalog_get_last_data_backup(parray *backup_list, TimeLineID tli)
447+
catalog_get_last_data_backup(parray *backup_list, TimeLineID tli, time_t current_start_time)
448448
{
449449
int i;
450450
pgBackup *full_backup = NULL;
451+
pgBackup *tmp_backup = NULL;
452+
char *invalid_backup_id;
451453

452454
/* backup_list is sorted in order of descending ID */
453455
for (i = 0; i < parray_num(backup_list); i++)
@@ -465,23 +467,65 @@ catalog_get_last_data_backup(parray *backup_list, TimeLineID tli)
465467

466468
/* Failed to find valid FULL backup to fulfill ancestor role */
467469
if (!full_backup)
470+
{
471+
elog(WARNING, "Failed to find a valid backup chain");
468472
return NULL;
473+
}
474+
475+
elog(INFO, "Latest valid FULL backup: %s",
476+
base36enc(full_backup->start_time));
469477

470478
/* FULL backup is found, lets find his latest child */
471479
for (i = 0; i < parray_num(backup_list); i++)
472480
{
473481
pgBackup *backup = (pgBackup *) parray_get(backup_list, i);
474482

475-
if (is_parent(full_backup->start_time, backup, true))
483+
/* only valid descendants are acceptable */
484+
if ((backup->status == BACKUP_STATUS_OK ||
485+
backup->status == BACKUP_STATUS_DONE))
476486
{
477-
478-
/* only valid descendants are acceptable */
479-
if (backup->status == BACKUP_STATUS_OK ||
480-
backup->status == BACKUP_STATUS_DONE)
487+
switch (scan_parent_chain(backup, &tmp_backup))
481488
{
482-
return backup;
489+
/* broken chain */
490+
case 0:
491+
invalid_backup_id = base36enc_dup(tmp_backup->parent_backup);
492+
493+
elog(WARNING, "Backup %s has missing parent: %s. Cannot be a parent",
494+
base36enc(backup->start_time), invalid_backup_id);
495+
pg_free(invalid_backup_id);
496+
continue;
497+
498+
/* chain is intact, but at least one parent is invalid */
499+
case 1:
500+
invalid_backup_id = base36enc_dup(tmp_backup->start_time);
501+
502+
elog(WARNING, "Backup %s has invalid parent: %s. Cannot be a parent",
503+
base36enc(backup->start_time), invalid_backup_id);
504+
pg_free(invalid_backup_id);
505+
continue;
506+
507+
/* chain is ok */
508+
case 2 :
509+
/* Yes, we could call is_parent() earlier, after choosing the ancestor,
510+
* but this way we have an opportunity to report about all possible
511+
* anomalies.
512+
*/
513+
if (is_parent(full_backup->start_time, backup, true))
514+
{
515+
elog(INFO, "Parent backup: %s",
516+
base36enc(backup->start_time));
517+
return backup;
518+
}
483519
}
484520
}
521+
/* skip yourself */
522+
else if (backup->start_time == current_start_time)
523+
continue;
524+
else
525+
{
526+
elog(WARNING, "Backup %s has status: %s. Cannot be a parent.",
527+
base36enc(backup->start_time), status2str(backup->status));
528+
}
485529
}
486530

487531
return NULL;

src/pg_probackup.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,8 @@ extern parray *catalog_get_backup_list(time_t requested_backup_id);
553553
extern void catalog_lock_backup_list(parray *backup_list, int from_idx,
554554
int to_idx);
555555
extern pgBackup *catalog_get_last_data_backup(parray *backup_list,
556-
TimeLineID tli);
556+
TimeLineID tli,
557+
time_t current_start_time);
557558
extern void pgBackupWriteControl(FILE *out, pgBackup *backup);
558559
extern void write_backup_filelist(pgBackup *backup, parray *files,
559560
const char *root, parray *external_list);

tests/backup.py

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import os
33
from time import sleep
44
from .helpers.ptrack_helpers import ProbackupTest, ProbackupException
5+
import shutil
56

67

78
module_name = 'backup'
@@ -1475,3 +1476,194 @@ def test_backup_with_least_privileges_role(self):
14751476

14761477
# Clean after yourself
14771478
self.del_test_dir(module_name, fname)
1479+
1480+
# @unittest.skip("skip")
1481+
def test_parent_choosing(self):
1482+
"""
1483+
PAGE3 <- RUNNING(parent should be FULL)
1484+
PAGE2 <- OK
1485+
PAGE1 <- CORRUPT
1486+
FULL
1487+
"""
1488+
fname = self.id().split('.')[3]
1489+
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
1490+
node = self.make_simple_node(
1491+
base_dir=os.path.join(module_name, fname, 'node'),
1492+
set_replication=True,
1493+
initdb_params=['--data-checksums'])
1494+
1495+
self.init_pb(backup_dir)
1496+
self.add_instance(backup_dir, 'node', node)
1497+
self.set_archiving(backup_dir, 'node', node)
1498+
node.slow_start()
1499+
1500+
full_id = self.backup_node(backup_dir, 'node', node)
1501+
1502+
# PAGE1
1503+
page1_id = self.backup_node(
1504+
backup_dir, 'node', node, backup_type='page')
1505+
1506+
# PAGE2
1507+
page2_id = self.backup_node(
1508+
backup_dir, 'node', node, backup_type='page')
1509+
1510+
# Change PAGE1 to ERROR
1511+
self.change_backup_status(backup_dir, 'node', page1_id, 'ERROR')
1512+
1513+
# PAGE3
1514+
page3_id = self.backup_node(
1515+
backup_dir, 'node', node,
1516+
backup_type='page', options=['--log-level-file=LOG'])
1517+
1518+
log_file_path = os.path.join(backup_dir, 'log', 'pg_probackup.log')
1519+
with open(log_file_path) as f:
1520+
log_file_content = f.read()
1521+
1522+
self.assertIn(
1523+
"WARNING: Backup {0} has invalid parent: {1}. "
1524+
"Cannot be a parent".format(page2_id, page1_id),
1525+
log_file_content)
1526+
1527+
self.assertIn(
1528+
"WARNING: Backup {0} has status: ERROR. "
1529+
"Cannot be a parent".format(page1_id),
1530+
log_file_content)
1531+
1532+
self.assertIn(
1533+
"Parent backup: {0}".format(full_id),
1534+
log_file_content)
1535+
1536+
self.assertEqual(
1537+
self.show_pb(
1538+
backup_dir, 'node', backup_id=page3_id)['parent-backup-id'],
1539+
full_id)
1540+
1541+
# Clean after yourself
1542+
self.del_test_dir(module_name, fname)
1543+
1544+
# @unittest.skip("skip")
1545+
def test_parent_choosing_1(self):
1546+
"""
1547+
PAGE3 <- RUNNING(parent should be FULL)
1548+
PAGE2 <- OK
1549+
PAGE1 <- (missing)
1550+
FULL
1551+
"""
1552+
fname = self.id().split('.')[3]
1553+
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
1554+
node = self.make_simple_node(
1555+
base_dir=os.path.join(module_name, fname, 'node'),
1556+
set_replication=True,
1557+
initdb_params=['--data-checksums'])
1558+
1559+
self.init_pb(backup_dir)
1560+
self.add_instance(backup_dir, 'node', node)
1561+
self.set_archiving(backup_dir, 'node', node)
1562+
node.slow_start()
1563+
1564+
full_id = self.backup_node(backup_dir, 'node', node)
1565+
1566+
# PAGE1
1567+
page1_id = self.backup_node(
1568+
backup_dir, 'node', node, backup_type='page')
1569+
1570+
# PAGE2
1571+
page2_id = self.backup_node(
1572+
backup_dir, 'node', node, backup_type='page')
1573+
1574+
# Delete PAGE1
1575+
shutil.rmtree(
1576+
os.path.join(backup_dir, 'backups', 'node', page1_id))
1577+
1578+
# PAGE3
1579+
page3_id = self.backup_node(
1580+
backup_dir, 'node', node,
1581+
backup_type='page', options=['--log-level-file=LOG'])
1582+
1583+
log_file_path = os.path.join(backup_dir, 'log', 'pg_probackup.log')
1584+
with open(log_file_path) as f:
1585+
log_file_content = f.read()
1586+
1587+
self.assertIn(
1588+
"WARNING: Backup {0} has missing parent: {1}. "
1589+
"Cannot be a parent".format(page2_id, page1_id),
1590+
log_file_content)
1591+
1592+
self.assertIn(
1593+
"Parent backup: {0}".format(full_id),
1594+
log_file_content)
1595+
1596+
self.assertEqual(
1597+
self.show_pb(
1598+
backup_dir, 'node', backup_id=page3_id)['parent-backup-id'],
1599+
full_id)
1600+
1601+
# Clean after yourself
1602+
self.del_test_dir(module_name, fname)
1603+
1604+
# @unittest.skip("skip")
1605+
def test_parent_choosing_2(self):
1606+
"""
1607+
PAGE3 <- RUNNING(backup should fail)
1608+
PAGE2 <- OK
1609+
PAGE1 <- OK
1610+
FULL <- (missing)
1611+
"""
1612+
fname = self.id().split('.')[3]
1613+
backup_dir = os.path.join(self.tmp_path, module_name, fname, 'backup')
1614+
node = self.make_simple_node(
1615+
base_dir=os.path.join(module_name, fname, 'node'),
1616+
set_replication=True,
1617+
initdb_params=['--data-checksums'])
1618+
1619+
self.init_pb(backup_dir)
1620+
self.add_instance(backup_dir, 'node', node)
1621+
self.set_archiving(backup_dir, 'node', node)
1622+
node.slow_start()
1623+
1624+
full_id = self.backup_node(backup_dir, 'node', node)
1625+
1626+
# PAGE1
1627+
page1_id = self.backup_node(
1628+
backup_dir, 'node', node, backup_type='page')
1629+
1630+
# PAGE2
1631+
page2_id = self.backup_node(
1632+
backup_dir, 'node', node, backup_type='page')
1633+
1634+
# Delete FULL
1635+
shutil.rmtree(
1636+
os.path.join(backup_dir, 'backups', 'node', full_id))
1637+
1638+
# PAGE3
1639+
try:
1640+
self.backup_node(
1641+
backup_dir, 'node', node,
1642+
backup_type='page', options=['--log-level-file=LOG'])
1643+
# we should die here because exception is what we expect to happen
1644+
self.assertEqual(
1645+
1, 0,
1646+
"Expecting Error because FULL backup is missing"
1647+
"\n Output: {0} \n CMD: {1}".format(
1648+
repr(self.output), self.cmd))
1649+
except ProbackupException as e:
1650+
self.assertIn(
1651+
'WARNING: Failed to find a valid backup chain',
1652+
e.message,
1653+
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
1654+
repr(e.message), self.cmd))
1655+
1656+
self.assertIn(
1657+
'ERROR: Valid backup on current timeline is not found. '
1658+
'Create new FULL backup before an incremental one.',
1659+
e.message,
1660+
'\n Unexpected Error Message: {0}\n CMD: {1}'.format(
1661+
repr(e.message), self.cmd))
1662+
1663+
self.assertEqual(
1664+
self.show_pb(
1665+
backup_dir, 'node')[2]['status'],
1666+
'ERROR')
1667+
1668+
# Clean after yourself
1669+
self.del_test_dir(module_name, fname)

0 commit comments

Comments
 (0)