Skip to content

Commit dedbed2

Browse files
committed
Merge branch '170-instance-id-label' into 'master'
Description (#170): * add a label with InstanceID for retrieval service containers * clean up service containers on retrieval failure Closes #170 See merge request postgres-ai/database-lab!181
2 parents 5eb30e3 + c34d46d commit dedbed2

File tree

7 files changed

+135
-57
lines changed

7 files changed

+135
-57
lines changed

cmd/database-lab/main.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"gitlab.com/postgres-ai/database-lab/pkg/config"
2323
"gitlab.com/postgres-ai/database-lab/pkg/log"
2424
"gitlab.com/postgres-ai/database-lab/pkg/retrieval"
25+
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools/cont"
2526
"gitlab.com/postgres-ai/database-lab/pkg/services/cloning"
2627
"gitlab.com/postgres-ai/database-lab/pkg/services/platform"
2728
"gitlab.com/postgres-ai/database-lab/pkg/services/provision"
@@ -99,6 +100,10 @@ func main() {
99100
}
100101

101102
if err := retrievalSvc.Run(ctx); err != nil {
103+
if cleanUpErr := cont.CleanUpServiceContainers(ctx, dockerCLI, cfg.Global.InstanceID); cleanUpErr != nil {
104+
log.Err("Failed to clean up service containers:", cleanUpErr)
105+
}
106+
102107
log.Fatal("Failed to run the data retrieval service:", err)
103108
}
104109

pkg/retrieval/engine/postgres/logical/dump.go

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/config"
2626
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/dbmarker"
2727
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools"
28+
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools/cont"
2829
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools/defaults"
2930
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools/health"
3031
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/options"
@@ -225,7 +226,7 @@ func (d *DumpJob) Run(ctx context.Context) (err error) {
225226
return errors.Wrap(err, "failed to generate PostgreSQL password")
226227
}
227228

228-
cont, err := d.dockerClient.ContainerCreate(ctx, d.buildContainerConfig(pwd), hostConfig, &network.NetworkingConfig{},
229+
dumpCont, err := d.dockerClient.ContainerCreate(ctx, d.buildContainerConfig(pwd), hostConfig, &network.NetworkingConfig{},
229230
d.dumpContainerName(),
230231
)
231232
if err != nil {
@@ -234,21 +235,21 @@ func (d *DumpJob) Run(ctx context.Context) (err error) {
234235
return errors.Wrapf(err, "failed to create container %q", d.dumpContainerName())
235236
}
236237

237-
defer tools.RemoveContainer(ctx, d.dockerClient, cont.ID, tools.StopTimeout)
238+
defer tools.RemoveContainer(ctx, d.dockerClient, dumpCont.ID, cont.StopTimeout)
238239

239240
defer func() {
240241
if err != nil {
241242
tools.PrintContainerLogs(ctx, d.dockerClient, d.dumpContainerName())
242243
}
243244
}()
244245

245-
if err := d.dockerClient.ContainerStart(ctx, cont.ID, types.ContainerStartOptions{}); err != nil {
246+
if err := d.dockerClient.ContainerStart(ctx, dumpCont.ID, types.ContainerStartOptions{}); err != nil {
246247
return errors.Wrapf(err, "failed to start container %q", d.dumpContainerName())
247248
}
248249

249-
log.Msg(fmt.Sprintf("Running container: %s. ID: %v", d.dumpContainerName(), cont.ID))
250+
log.Msg(fmt.Sprintf("Running container: %s. ID: %v", d.dumpContainerName(), dumpCont.ID))
250251

251-
if err := tools.CheckContainerReadiness(ctx, d.dockerClient, cont.ID); err != nil {
252+
if err := tools.CheckContainerReadiness(ctx, d.dockerClient, dumpCont.ID); err != nil {
252253
return errors.Wrap(err, "failed to readiness check")
253254
}
254255

@@ -259,7 +260,7 @@ func (d *DumpJob) Run(ctx context.Context) (err error) {
259260
dumpCommand := d.buildLogicalDumpCommand()
260261
log.Msg("Running dump command", dumpCommand)
261262

262-
execCommand, err := d.dockerClient.ContainerExecCreate(ctx, cont.ID, types.ExecConfig{
263+
execCommand, err := d.dockerClient.ContainerExecCreate(ctx, dumpCont.ID, types.ExecConfig{
263264
AttachStdout: true,
264265
AttachStderr: true,
265266
Cmd: dumpCommand,
@@ -274,7 +275,7 @@ func (d *DumpJob) Run(ctx context.Context) (err error) {
274275
log.Msg("Partial dump will be run. Tables for dumping: ", strings.Join(d.Partial.Tables, ", "))
275276
}
276277

277-
if err := d.performDumpCommand(ctx, os.Stdout, cont.ID, execCommand.ID); err != nil {
278+
if err := d.performDumpCommand(ctx, os.Stdout, dumpCont.ID, execCommand.ID); err != nil {
278279
return errors.Wrap(err, "failed to dump a database")
279280
}
280281

@@ -283,7 +284,7 @@ func (d *DumpJob) Run(ctx context.Context) (err error) {
283284
return errors.Wrap(err, "failed to mark the created dump")
284285
}
285286

286-
if err := recalculateStats(ctx, d.dockerClient, cont.ID, buildAnalyzeCommand(Connection{
287+
if err := recalculateStats(ctx, d.dockerClient, dumpCont.ID, buildAnalyzeCommand(Connection{
287288
DBName: d.config.db.DBName,
288289
Username: defaults.Username,
289290
}, d.DumpOptions.ParallelJobs)); err != nil {
@@ -349,7 +350,10 @@ func (d *DumpJob) getEnvironmentVariables(password string) []string {
349350

350351
func (d *DumpJob) buildContainerConfig(password string) *container.Config {
351352
return &container.Config{
352-
Labels: map[string]string{tools.DBLabControlLabel: tools.DBLabDumpLabel},
353+
Labels: map[string]string{
354+
cont.DBLabControlLabel: cont.DBLabDumpLabel,
355+
cont.DBLabInstanceIDLabel: d.globalCfg.InstanceID,
356+
},
353357
Env: d.getEnvironmentVariables(password),
354358
Image: d.DockerImage,
355359
Healthcheck: health.GetConfig(),

pkg/retrieval/engine/postgres/logical/restore.go

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/config"
2525
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/dbmarker"
2626
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools"
27+
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools/cont"
2728
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools/defaults"
2829
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools/health"
2930
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/options"
@@ -130,7 +131,7 @@ func (r *RestoreJob) Run(ctx context.Context) (err error) {
130131
return errors.Wrap(err, "failed to generate PostgreSQL password")
131132
}
132133

133-
cont, err := r.dockerClient.ContainerCreate(ctx,
134+
restoreCont, err := r.dockerClient.ContainerCreate(ctx,
134135
r.buildContainerConfig(pwd),
135136
hostConfig,
136137
&network.NetworkingConfig{},
@@ -140,28 +141,28 @@ func (r *RestoreJob) Run(ctx context.Context) (err error) {
140141
return errors.Wrapf(err, "failed to create container %q", r.restoreContainerName())
141142
}
142143

143-
defer tools.RemoveContainer(ctx, r.dockerClient, cont.ID, tools.StopTimeout)
144+
defer tools.RemoveContainer(ctx, r.dockerClient, restoreCont.ID, cont.StopTimeout)
144145

145146
defer func() {
146147
if err != nil {
147148
tools.PrintContainerLogs(ctx, r.dockerClient, r.restoreContainerName())
148149
}
149150
}()
150151

151-
if err := r.dockerClient.ContainerStart(ctx, cont.ID, types.ContainerStartOptions{}); err != nil {
152+
if err := r.dockerClient.ContainerStart(ctx, restoreCont.ID, types.ContainerStartOptions{}); err != nil {
152153
return errors.Wrapf(err, "failed to start container %q", r.restoreContainerName())
153154
}
154155

155-
log.Msg(fmt.Sprintf("Running container: %s. ID: %v", r.restoreContainerName(), cont.ID))
156+
log.Msg(fmt.Sprintf("Running container: %s. ID: %v", r.restoreContainerName(), restoreCont.ID))
156157

157-
if err := tools.CheckContainerReadiness(ctx, r.dockerClient, cont.ID); err != nil {
158+
if err := tools.CheckContainerReadiness(ctx, r.dockerClient, restoreCont.ID); err != nil {
158159
return errors.Wrap(err, "failed to readiness check")
159160
}
160161

161162
restoreCommand := r.buildLogicalRestoreCommand()
162163
log.Msg("Running restore command: ", restoreCommand)
163164

164-
execCommand, err := r.dockerClient.ContainerExecCreate(ctx, cont.ID, types.ExecConfig{
165+
execCommand, err := r.dockerClient.ContainerExecCreate(ctx, restoreCont.ID, types.ExecConfig{
165166
AttachStdout: true,
166167
AttachStderr: true,
167168
Tty: true,
@@ -180,15 +181,15 @@ func (r *RestoreJob) Run(ctx context.Context) (err error) {
180181
return errors.Wrap(err, "failed to run restore command")
181182
}
182183

183-
if err := tools.InspectCommandResponse(ctx, r.dockerClient, cont.ID, execCommand.ID); err != nil {
184+
if err := tools.InspectCommandResponse(ctx, r.dockerClient, restoreCont.ID, execCommand.ID); err != nil {
184185
return errors.Wrap(err, "failed to exec restore command")
185186
}
186187

187-
if err := r.markDatabase(ctx, cont.ID); err != nil {
188+
if err := r.markDatabase(ctx, restoreCont.ID); err != nil {
188189
return errors.Wrap(err, "failed to mark the database")
189190
}
190191

191-
if err := recalculateStats(ctx, r.dockerClient, cont.ID, buildAnalyzeCommand(Connection{
192+
if err := recalculateStats(ctx, r.dockerClient, restoreCont.ID, buildAnalyzeCommand(Connection{
192193
Username: defaults.Username,
193194
DBName: r.RestoreOptions.DBName,
194195
}, r.RestoreOptions.ParallelJobs)); err != nil {
@@ -202,7 +203,10 @@ func (r *RestoreJob) Run(ctx context.Context) (err error) {
202203

203204
func (r *RestoreJob) buildContainerConfig(password string) *container.Config {
204205
return &container.Config{
205-
Labels: map[string]string{tools.DBLabControlLabel: tools.DBLabRestoreLabel},
206+
Labels: map[string]string{
207+
cont.DBLabControlLabel: cont.DBLabRestoreLabel,
208+
cont.DBLabInstanceIDLabel: r.globalCfg.InstanceID,
209+
},
206210
Env: append(os.Environ(), []string{
207211
"PGDATA=" + r.globalCfg.DataDir(),
208212
"POSTGRES_PASSWORD=" + password,

pkg/retrieval/engine/postgres/physical/physical.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/config"
2929
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/dbmarker"
3030
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools"
31+
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools/cont"
3132
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools/defaults"
3233
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools/fs"
3334
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/options"
@@ -141,12 +142,12 @@ func (r *RestoreJob) Run(ctx context.Context) (err error) {
141142
return nil
142143
}
143144

144-
contID, err := r.startContainer(ctx, r.restoreContainerName(), tools.DBLabRestoreLabel)
145+
contID, err := r.startContainer(ctx, r.restoreContainerName(), cont.DBLabRestoreLabel)
145146
if err != nil {
146147
return errors.Wrapf(err, "failed to create container: %s", r.restoreContainerName())
147148
}
148149

149-
defer tools.RemoveContainer(ctx, r.dockerClient, contID, tools.StopTimeout)
150+
defer tools.RemoveContainer(ctx, r.dockerClient, contID, cont.StopTimeout)
150151

151152
defer func() {
152153
if err != nil {
@@ -310,7 +311,7 @@ LOOP:
310311
}
311312

312313
func (r *RestoreJob) syncInstanceName() string {
313-
return tools.SyncInstanceContainerPrefix + r.globalCfg.InstanceID
314+
return cont.SyncInstanceContainerPrefix + r.globalCfg.InstanceID
314315
}
315316

316317
func (r *RestoreJob) runSyncInstance(ctx context.Context) error {
@@ -327,12 +328,12 @@ func (r *RestoreJob) runSyncInstance(ctx context.Context) error {
327328

328329
log.Msg("Removing non-running sync instance")
329330

330-
tools.RemoveContainer(ctx, r.dockerClient, syncContainer.ID, tools.StopTimeout)
331+
tools.RemoveContainer(ctx, r.dockerClient, syncContainer.ID, cont.StopTimeout)
331332
}
332333

333334
log.Msg("Starting sync instance: ", r.syncInstanceName())
334335

335-
syncInstanceID, err := r.startContainer(ctx, r.syncInstanceName(), tools.DBLabSyncLabel)
336+
syncInstanceID, err := r.startContainer(ctx, r.syncInstanceName(), cont.DBLabSyncLabel)
336337
if err != nil {
337338
return err
338339
}
@@ -383,9 +384,12 @@ func (r *RestoreJob) getEnvironmentVariables(password string) []string {
383384

384385
func (r *RestoreJob) buildContainerConfig(password, label string) *container.Config {
385386
return &container.Config{
386-
Labels: map[string]string{tools.DBLabControlLabel: label},
387-
Env: r.getEnvironmentVariables(password),
388-
Image: r.CopyOptions.DockerImage,
387+
Labels: map[string]string{
388+
cont.DBLabControlLabel: label,
389+
cont.DBLabInstanceIDLabel: r.globalCfg.InstanceID,
390+
},
391+
Env: r.getEnvironmentVariables(password),
392+
Image: r.CopyOptions.DockerImage,
389393
}
390394
}
391395

pkg/retrieval/engine/postgres/snapshot/physical.go

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/config"
3434
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/dbmarker"
3535
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools"
36+
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools/cont"
3637
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools/defaults"
3738
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/engine/postgres/tools/health"
3839
"gitlab.com/postgres-ai/database-lab/pkg/retrieval/options"
@@ -169,7 +170,7 @@ func (p *PhysicalInitial) validateConfig() error {
169170
}
170171

171172
func (p *PhysicalInitial) syncInstanceName() string {
172-
return tools.SyncInstanceContainerPrefix + p.globalCfg.InstanceID
173+
return cont.SyncInstanceContainerPrefix + p.globalCfg.InstanceID
173174
}
174175

175176
// Name returns a name of the job.
@@ -381,7 +382,7 @@ func (p *PhysicalInitial) promoteInstance(ctx context.Context, clonePath string)
381382
}
382383

383384
// Run promotion container.
384-
cont, err := p.dockerClient.ContainerCreate(ctx,
385+
promoteCont, err := p.dockerClient.ContainerCreate(ctx,
385386
p.buildContainerConfig(clonePath, promoteImage, pwd),
386387
hostConfig,
387388
&network.NetworkingConfig{},
@@ -392,30 +393,30 @@ func (p *PhysicalInitial) promoteInstance(ctx context.Context, clonePath string)
392393
return errors.Wrap(err, "failed to create container")
393394
}
394395

395-
defer tools.RemoveContainer(ctx, p.dockerClient, cont.ID, tools.StopTimeout)
396+
defer tools.RemoveContainer(ctx, p.dockerClient, promoteCont.ID, cont.StopTimeout)
396397

397398
defer func() {
398399
if err != nil {
399400
tools.PrintContainerLogs(ctx, p.dockerClient, p.promoteContainerName())
400401
}
401402
}()
402403

403-
if err := p.dockerClient.ContainerStart(ctx, cont.ID, types.ContainerStartOptions{}); err != nil {
404+
if err := p.dockerClient.ContainerStart(ctx, promoteCont.ID, types.ContainerStartOptions{}); err != nil {
404405
return errors.Wrap(err, "failed to start container")
405406
}
406407

407-
log.Msg(fmt.Sprintf("Running container: %s. ID: %v", p.promoteContainerName(), cont.ID))
408+
log.Msg(fmt.Sprintf("Running container: %s. ID: %v", p.promoteContainerName(), promoteCont.ID))
408409

409410
// Start PostgreSQL instance.
410-
if err := tools.RunPostgres(ctx, p.dockerClient, cont.ID, clonePath); err != nil {
411+
if err := tools.RunPostgres(ctx, p.dockerClient, promoteCont.ID, clonePath); err != nil {
411412
return errors.Wrap(err, "failed to start PostgreSQL instance")
412413
}
413414

414-
if err := tools.CheckContainerReadiness(ctx, p.dockerClient, cont.ID); err != nil {
415+
if err := tools.CheckContainerReadiness(ctx, p.dockerClient, promoteCont.ID); err != nil {
415416
return errors.Wrap(err, "failed to readiness check")
416417
}
417418

418-
shouldBePromoted, err := p.checkRecovery(ctx, cont.ID)
419+
shouldBePromoted, err := p.checkRecovery(ctx, promoteCont.ID)
419420
if err != nil {
420421
return errors.Wrap(err, "failed to read response of the exec command")
421422
}
@@ -424,7 +425,7 @@ func (p *PhysicalInitial) promoteInstance(ctx context.Context, clonePath string)
424425

425426
// Detect dataStateAt.
426427
if shouldBePromoted == "t" {
427-
extractedDataStateAt, err := p.extractDataStateAt(ctx, cont.ID)
428+
extractedDataStateAt, err := p.extractDataStateAt(ctx, promoteCont.ID)
428429
if err != nil {
429430
return errors.Wrap(err,
430431
`Failed to get data_state_at: PGDATA should be promoted, but pg_last_xact_replay_timestamp() returns empty result.
@@ -444,13 +445,13 @@ func (p *PhysicalInitial) promoteInstance(ctx context.Context, clonePath string)
444445
log.Msg("Data state at: ", p.dbMark.DataStateAt)
445446

446447
// Promote PGDATA.
447-
if err := p.runPromoteCommand(ctx, cont.ID, clonePath); err != nil {
448+
if err := p.runPromoteCommand(ctx, promoteCont.ID, clonePath); err != nil {
448449
return errors.Wrapf(err, "failed to promote PGDATA: %s", clonePath)
449450
}
450451
}
451452

452453
// Checkpoint.
453-
if err := p.checkpoint(ctx, cont.ID); err != nil {
454+
if err := p.checkpoint(ctx, promoteCont.ID); err != nil {
454455
return err
455456
}
456457

@@ -500,7 +501,10 @@ func (p *PhysicalInitial) adjustRecoveryConfiguration(pgVersion, clonePGDataDir
500501

501502
func (p *PhysicalInitial) buildContainerConfig(clonePath, promoteImage, password string) *container.Config {
502503
return &container.Config{
503-
Labels: map[string]string{tools.DBLabControlLabel: tools.DBLabPromoteLabel},
504+
Labels: map[string]string{
505+
cont.DBLabControlLabel: cont.DBLabPromoteLabel,
506+
cont.DBLabInstanceIDLabel: p.globalCfg.InstanceID,
507+
},
504508
Env: []string{
505509
"PGDATA=" + clonePath,
506510
"POSTGRES_PASSWORD=" + password,

0 commit comments

Comments
 (0)