Skip to content

Make pg_basebackup work with encrypted WAL #473

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: TDE_REL_17_STABLE
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions contrib/pg_tde/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ tap_tests = [
't/pg_waldump_fullpage.pl',
't/replication.pl',
't/rotate_key.pl',
't/standby_source.pl',
't/tde_heap.pl',
't/unlogged_tables.pl',
't/wal_encrypt.pl',
Expand Down
20 changes: 17 additions & 3 deletions contrib/pg_tde/src/access/pg_tde_tdemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#define TDE_FILE_HEADER_SIZE sizeof(TDEFileHeader)

#define MaxXLogRecPtr (~(XLogRecPtr)0)
#define MaxTimeLineID (~(TimeLineID)0)

typedef struct TDEFileHeader
{
Expand Down Expand Up @@ -369,13 +370,19 @@ pg_tde_delete_principal_key(Oid dbOid)
* needs keyfile_path
*/
void
pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path)
pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, TimeLineID tli, const char *keyfile_path)
{
LWLock *lock_pk = tde_lwlock_enc_keys();
int fd;
off_t read_pos,
write_pos,
last_key_idx;
struct
{
XLogRecPtr start_lsn;
TimeLineID tli;
} lsn_tli;


LWLockAcquire(lock_pk, LW_EXCLUSIVE);

Expand All @@ -384,7 +391,10 @@ pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path)
last_key_idx = ((lseek(fd, 0, SEEK_END) - TDE_FILE_HEADER_SIZE) / MAP_ENTRY_SIZE) - 1;
write_pos = TDE_FILE_HEADER_SIZE + (last_key_idx * MAP_ENTRY_SIZE) + offsetof(TDEMapEntry, enc_key) + offsetof(InternalKey, start_lsn);

if (pg_pwrite(fd, &lsn, sizeof(XLogRecPtr), write_pos) != sizeof(XLogRecPtr))
lsn_tli.start_lsn = lsn;
lsn_tli.tli = tli;

if (pg_pwrite(fd, &lsn_tli, sizeof(lsn_tli), write_pos) != sizeof(lsn_tli))
{
ereport(ERROR,
errcode_for_file_access(),
Expand All @@ -408,7 +418,7 @@ pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path)
errmsg("could not read previous WAL key: %m"));
}

if (prev_map_entry.enc_key.start_lsn >= lsn)
if (prev_map_entry.enc_key.start_lsn >= lsn && prev_map_entry.enc_key.tli >= tli)
{
prev_map_entry.enc_key.type = TDE_KEY_TYPE_WAL_INVALID;

Expand Down Expand Up @@ -1071,6 +1081,7 @@ pg_tde_fetch_wal_keys(XLogRecPtr start_lsn)
WALKeyCacheRec *wal_rec;
InternalKey stub_key = {
.start_lsn = InvalidXLogRecPtr,
.tli = 0,
};

wal_rec = pg_tde_add_wal_key_to_cache(&stub_key, InvalidXLogRecPtr);
Expand Down Expand Up @@ -1132,8 +1143,10 @@ pg_tde_add_wal_key_to_cache(InternalKey *key, XLogRecPtr start_lsn)
MemoryContextSwitchTo(oldCtx);
#endif

wal_rec->start_tli = key->tli;
wal_rec->start_lsn = start_lsn;
wal_rec->end_lsn = MaxXLogRecPtr;
wal_rec->end_tli = MaxTimeLineID;
wal_rec->key = *key;
wal_rec->crypt_ctx = NULL;
if (!tde_wal_key_last_rec)
Expand All @@ -1145,6 +1158,7 @@ pg_tde_add_wal_key_to_cache(InternalKey *key, XLogRecPtr start_lsn)
{
tde_wal_key_last_rec->next = wal_rec;
tde_wal_key_last_rec->end_lsn = wal_rec->start_lsn;
tde_wal_key_last_rec->end_tli = wal_rec->start_tli;
tde_wal_key_last_rec = wal_rec;
}

Expand Down
94 changes: 73 additions & 21 deletions contrib/pg_tde/src/access/pg_tde_xlog_smgr.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ typedef struct EncryptionStateData
{
char db_map_path[MAXPGPATH];
pg_atomic_uint64 enc_key_lsn; /* to sync with readers */
pg_atomic_uint64 enc_key_tli; /* to sync with readers */
} EncryptionStateData;

static EncryptionStateData *EncryptionState = NULL;
Expand All @@ -85,6 +86,18 @@ TDEXLogSetEncKeyLsn(XLogRecPtr start_lsn)
pg_atomic_write_u64(&EncryptionState->enc_key_lsn, start_lsn);
}

static TimeLineID
TDEXLogGetEncKeyTli()
{
return (TimeLineID) pg_atomic_read_u64(&EncryptionState->enc_key_tli);
}

static void
TDEXLogSetEncKeyTli(TimeLineID tli)
{
pg_atomic_write_u64(&EncryptionState->enc_key_tli, tli);
}

static Size TDEXLogEncryptBuffSize(void);

static int XLOGChooseNumBuffers(void);
Expand Down Expand Up @@ -159,6 +172,7 @@ TDEXLogShmemInit(void)
}

pg_atomic_init_u64(&EncryptionState->enc_key_lsn, 0);
pg_atomic_init_u64(&EncryptionState->enc_key_tli, 0);

elog(DEBUG1, "pg_tde: initialized encryption buffer %lu bytes", TDEXLogEncryptStateSize());
}
Expand All @@ -169,6 +183,7 @@ typedef struct EncryptionStateData
{
char db_map_path[MAXPGPATH];
XLogRecPtr enc_key_lsn; /* to sync with reader */
XLogRecPtr enc_key_tli; /* to sync with reader */
} EncryptionStateData;

static EncryptionStateData EncryptionStateD = {0};
Expand All @@ -186,7 +201,19 @@ TDEXLogGetEncKeyLsn()
static void
TDEXLogSetEncKeyLsn(XLogRecPtr start_lsn)
{
EncryptionState->enc_key_lsn = EncryptionKey.start_lsn;
EncryptionState->enc_key_lsn = start_lsn;
}

static TimeLineID
TDEXLogGetEncKeyTli()
{
return (TimeLineID) EncryptionState->enc_key_tli;
}

static void
TDEXLogSetEncKeyTli(TimeLineID tli)
{
EncryptionState->enc_key_lsn = tli;
}

#endif /* FRONTEND */
Expand Down Expand Up @@ -221,6 +248,7 @@ TDEXLogSmgrInitWrite(bool encrypt_xlog)
{
EncryptionKey = *key;
TDEXLogSetEncKeyLsn(EncryptionKey.start_lsn);
TDEXLogSetEncKeyTli(EncryptionKey.tli);
}

if (key)
Expand All @@ -245,8 +273,8 @@ TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset,
#endif

#ifdef TDE_XLOG_DEBUG
elog(DEBUG1, "write encrypted WAL, size: %lu, offset: %ld [%lX], seg: %X/%X, key_start_lsn: %X/%X",
count, offset, offset, LSN_FORMAT_ARGS(segno), LSN_FORMAT_ARGS(key->start_lsn));
elog(DEBUG1, "write encrypted WAL, size: %lu, offset: %ld [%lX] tli %u, seg: %X/%X, key_start: %u_%X/%X",
count, offset, offset, tli, LSN_FORMAT_ARGS(segno), key->tli, LSN_FORMAT_ARGS(key->start_lsn));
#endif

CalcXLogPageIVPrefix(tli, segno, key->base_iv, iv_prefix);
Expand All @@ -272,9 +300,11 @@ tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset,

XLogSegNoOffsetToRecPtr(segno, offset, segSize, lsn);

pg_tde_wal_last_key_set_lsn(lsn, EncryptionState->db_map_path);
pg_tde_wal_last_key_set_lsn(lsn, tli, EncryptionState->db_map_path);
EncryptionKey.start_lsn = lsn;
EncryptionKey.tli = tli;
TDEXLogSetEncKeyLsn(lsn);
TDEXLogSetEncKeyTli(tli);
}

if (EncryptionKey.type == TDE_KEY_TYPE_WAL_ENCRYPTED)
Expand All @@ -291,21 +321,36 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
TimeLineID tli, XLogSegNo segno, int segSize)
{
ssize_t readsz;
WALKeyCacheRec *keys = pg_tde_get_wal_cache_keys();
XLogRecPtr write_key_lsn;
XLogRecPtr data_start;
XLogRecPtr data_end;

#ifdef TDE_XLOG_DEBUG
elog(DEBUG1, "read from a WAL segment, size: %lu offset: %ld [%lX], seg: %X/%X",
count, offset, offset, LSN_FORMAT_ARGS(segno));
elog(DEBUG1, "read from a WAL segment, size: %lu offset: %ld [%lX], tli: %u, seg: %X/%X",
count, offset, offset, tli, LSN_FORMAT_ARGS(segno));
#endif

readsz = pg_pread(fd, buf, count, offset);

if (readsz <= 0)
return readsz;

TDEXLogCryptBuffer(buf, count, offset, tli, segno, segSize);

return readsz;
}

/*
* [De]Crypt buffer if needed based on provided segment offset, number and TLI
*/
void
TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
TimeLineID tli, XLogSegNo segno, int segSize)
{
WALKeyCacheRec *keys = pg_tde_get_wal_cache_keys();
XLogRecPtr write_key_lsn;
XLogRecPtr data_start;
XLogRecPtr data_end;
KeyTliLsn data_start_t = {.tli = tli};
KeyTliLsn data_end_t = {.tli = tli};

if (!keys)
{
/* cache is empty, try to read keys from disk */
Expand All @@ -317,11 +362,14 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
if (!XLogRecPtrIsInvalid(write_key_lsn))
{
WALKeyCacheRec *last_key = pg_tde_get_last_wal_key();
KeyTliLsn last_key_time = {.tli = last_key->start_tli, .lsn = last_key->start_lsn};
KeyTliLsn write_key_time = {.tli = TDEXLogGetEncKeyTli(), .lsn = write_key_lsn};

Assert(last_key);

/* write has generated a new key, need to fetch it */
if (last_key->start_lsn < write_key_lsn)
if (key_tli_lsn_cmp(last_key_time, write_key_time) == -1)
// if (last_key->start_lsn < write_key_lsn)
{
pg_tde_fetch_wal_keys(write_key_lsn);

Expand All @@ -331,18 +379,24 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
}

XLogSegNoOffsetToRecPtr(segno, offset, segSize, data_start);
XLogSegNoOffsetToRecPtr(segno, offset + readsz, segSize, data_end);
XLogSegNoOffsetToRecPtr(segno, offset + count, segSize, data_end);

data_start_t.lsn = data_start;
data_end_t.lsn = data_end;

/*
* TODO: this is higly ineffective. We should get rid of linked list and
* search from the last key as this is what the walsender is useing.
*/
for (WALKeyCacheRec *curr_key = keys; curr_key != NULL; curr_key = curr_key->next)
{
KeyTliLsn key_start_t = {.lsn = curr_key->start_lsn, .tli = curr_key->start_tli};
KeyTliLsn key_end_t = {.lsn = curr_key->end_lsn, .tli = curr_key->end_tli};

#ifdef TDE_XLOG_DEBUG
elog(DEBUG1, "WAL key %X/%X-%X/%X, encrypted: %s",
LSN_FORMAT_ARGS(curr_key->start_lsn),
LSN_FORMAT_ARGS(curr_key->end_lsn),
elog(DEBUG1, "WAL key %u_%X/%X - %u_%X/%X, encrypted: %s",
curr_key->start_tli, LSN_FORMAT_ARGS(curr_key->start_lsn),
curr_key->end_tli, LSN_FORMAT_ARGS(curr_key->end_lsn),
curr_key->key.type == TDE_KEY_TYPE_WAL_ENCRYPTED ? "yes" : "no");
#endif

Expand All @@ -353,7 +407,7 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
* Check if the key's range overlaps with the buffer's and decypt
* the part that does.
*/
if (data_start < curr_key->end_lsn && data_end > curr_key->start_lsn)
if (key_tli_lsn_cmp(data_start_t, key_end_t) == -1 && key_tli_lsn_cmp(data_end_t, key_start_t) == 1)
{
char iv_prefix[16];
off_t dec_off = XLogSegmentOffset(Max(data_start, curr_key->start_lsn), segSize);
Expand All @@ -368,22 +422,20 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
/* We have reached the end of the segment */
if (dec_end == 0)
{
dec_end = offset + readsz;
dec_end = offset + count;
}

dec_sz = dec_end - dec_off;

#ifdef TDE_XLOG_DEBUG
elog(DEBUG1, "decrypt WAL, dec_off: %lu [buff_off %lu], sz: %lu | key %X/%X",
dec_off, dec_off - offset, dec_sz, LSN_FORMAT_ARGS(curr_key->key->start_lsn));
elog(DEBUG1, "decrypt WAL, dec_off: %lu [buff_off %lu] tli %u, sz: %lu | key %u_%X/%X",
dec_off, dec_off - offset, tli, dec_sz, curr_key->key.tli, LSN_FORMAT_ARGS(curr_key->start_lsn));
#endif
pg_tde_stream_crypt(iv_prefix, dec_off, dec_buf, dec_sz, dec_buf,
&curr_key->key, &curr_key->crypt_ctx);
}
}
}

return readsz;
}

union u128cast
Expand Down
29 changes: 28 additions & 1 deletion contrib/pg_tde/src/include/access/pg_tde_tdemap.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,33 @@ typedef struct InternalKey
uint32 type;

XLogRecPtr start_lsn;
TimeLineID tli;
} InternalKey;

typedef struct KeyTliLsn
{
TimeLineID tli;
XLogRecPtr lsn;
} KeyTliLsn;

static inline int
key_tli_lsn_cmp(KeyTliLsn t1, KeyTliLsn t2)
{
if (t1.tli < t2.tli)
return -1;

if (t1.tli > t2.tli)
return 1;

if (t1.lsn < t2.lsn)
return -1;

if (t1.lsn > t2.lsn)
return 1;

return 0;
}

#define MAP_ENTRY_IV_SIZE 16
#define MAP_ENTRY_AEAD_TAG_SIZE 16

Expand Down Expand Up @@ -62,6 +87,8 @@ typedef struct WALKeyCacheRec
{
XLogRecPtr start_lsn;
XLogRecPtr end_lsn;
TimeLineID start_tli;
TimeLineID end_tli;

InternalKey key;
void *crypt_ctx;
Expand All @@ -73,7 +100,7 @@ extern InternalKey *pg_tde_read_last_wal_key(void);
extern WALKeyCacheRec *pg_tde_get_last_wal_key(void);
extern WALKeyCacheRec *pg_tde_fetch_wal_keys(XLogRecPtr start_lsn);
extern WALKeyCacheRec *pg_tde_get_wal_cache_keys(void);
extern void pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path);
extern void pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, TimeLineID tli, const char *keyfile_path);
extern void pg_tde_create_wal_key(InternalKey *rel_key_data, const RelFileLocator *newrlocator, TDEMapEntryType entry_type);

#define PG_TDE_MAP_FILENAME "%d_keys"
Expand Down
3 changes: 3 additions & 0 deletions contrib/pg_tde/src/include/access/pg_tde_xlog_smgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,7 @@ extern void TDEXLogShmemInit(void);
extern void TDEXLogSmgrInit(void);
extern void TDEXLogSmgrInitWrite(bool encrypt_xlog);

extern void TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
TimeLineID tli, XLogSegNo segno, int segSize);

#endif /* PG_TDE_XLOGSMGR_H */
3 changes: 2 additions & 1 deletion contrib/pg_tde/t/RewindTest.pm
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::RecursiveCopy;
use PostgreSQL::Test::Utils;
use Test::More;
use pgtde;

our @EXPORT = qw(
$node_primary
Expand Down Expand Up @@ -199,7 +200,7 @@ sub create_standby
$node_standby =
PostgreSQL::Test::Cluster->new(
'standby' . ($extra_name ? "_${extra_name}" : ''));
$node_primary->backup('my_backup');
PGTDE::backup($node_primary, 'my_backup');
$node_standby->init_from_backup($node_primary, 'my_backup');
my $connstr_primary = $node_primary->connstr();

Expand Down
13 changes: 13 additions & 0 deletions contrib/pg_tde/t/pgtde.pm
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,17 @@ sub compare_results
return compare($expected_filename_with_path, $out_filename_with_path);
}

sub backup
{
my ($node, $backup_name, %params) = @_;
my $backup_dir = $node->backup_dir . '/'. $backup_name;

mkdir $backup_dir;

PostgreSQL::Test::RecursiveCopy::copypath($node->data_dir . '/pg_tde',
$backup_dir . '/pg_tde');

$node->backup($backup_name, %params);
}

1;
Loading
Loading