Skip to content

Commit a243eaa

Browse files
committed
feat: improve config
- Reorder the blocks. - Comment each param (except the retrieval section, it's on TODO), improve existing comments. - Split the example config to separate configs, corresponding to concrete use cases: logical dump/restore, RDS, physical, WAL-G.
1 parent f8b4c37 commit a243eaa

File tree

5 files changed

+1048
-215
lines changed

5 files changed

+1048
-215
lines changed
Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
# Copy the following to: ./configs/config.yaml
2+
3+
# Database Lab API server. This API is used to work with clones
4+
# (list them, create, delete, see how to connect to a clone).
5+
# Normally, it is supposed to listen 127.0.0.1:2345 (default),
6+
# and to be running inside a Docker container,
7+
# with port mapping, to allow users to connect from outside
8+
# to 2345 port using private or public IP address of the machine
9+
# where the container is running. See https://postgres.ai/docs/database-lab/how-to-manage-database-lab
10+
server:
11+
# The main token that is used to work with Database Lab API.
12+
# Note, that only one token is supported.
13+
# However, if the integration with Postgres.ai Platform is configured
14+
# (see below, "platform: ..." configuration), then users may use
15+
# their personal tokens generated on the Platform. In this case,
16+
# it is recommended to keep "verificationToken" secret, known
17+
# only to the administrator of the Database Lab instance.
18+
verificationToken: "secret_token"
19+
20+
# The host which the Database Lab server accepts HTTP connections to.
21+
# By default: "127.0.0.1".
22+
# Keep it default when running inside a Docker container.
23+
# Use an empty string to accept connections to all network interfaces.
24+
host: "127.0.0.1"
25+
26+
# HTTP server port. Default: 2345.
27+
port: 2345
28+
29+
global:
30+
# Database engine. Currently, the only supported option: "postgres".
31+
engine: postgres
32+
33+
# Docker image to perform various tasks, such as:
34+
# - initial data retrieval,
35+
# - continuous data synchronization,
36+
# - pre-processing.
37+
# Default: "official" PostgreSQL image from Docker.
38+
# Alternative: an extended image from the Postgres.ai team,
39+
# containing a number of popular extensions, including those
40+
# RDS provides. See https://postgres.ai/docs/database-lab/supported_databases
41+
dockerImage: "postgres:12-alpine"
42+
43+
# Full path to data directory. This directory must already exist
44+
# before launching Database Lab instance. It may be empty if
45+
# data initialization is configured (see below).
46+
dataDir: "/var/lib/dblab/data"
47+
48+
# Debugging, when enabled, allows to see more in the Database Lab logs
49+
# (not PostgreSQL logs). Enable in the case of troubleshooting.
50+
debug: false
51+
52+
# Details of provisioning – where data is located,
53+
# thin cloning method, etc.
54+
provision:
55+
# Provision mode. Default (and the only supported): "local".
56+
# Do not edit this.
57+
mode: "local"
58+
59+
# Subdir where PGDATA located relative to the pool root dir.
60+
# It is a part of global.dataDir, *with* leading slashes.
61+
# If PGDATA is supposed to be in global.dataDir and this is the mount point,
62+
# then use "/" in pgDataSubdir.
63+
# Otherwise, use the final part (everything except the mount point), with leading slash.
64+
# Example:
65+
# dataDir (PostgreSQL data directory): /var/lib/dblab/data
66+
# pgDataSubdir: /pgdata
67+
# -- in this case, we assume that the mount point is: /var/lib/dblab
68+
# Note, that, despite the fact that it alwats begins with "/",
69+
# it is a relative path. Default: "/".
70+
pgDataSubdir: "/data"
71+
72+
# Database username that will be used for Postgres management connections.
73+
# This user must exist.
74+
# The password is not needed – it will be set automatically.
75+
# Connections will be made via a Unix domain socket (local).
76+
pgMgmtUsername: "postgres"
77+
78+
# "Local" mode related parameters.
79+
local:
80+
# Thin-clone managing module used for thin cloning.
81+
# Available options: "zfs" (recommended), "lvm".
82+
thinCloneManager: "zfs"
83+
84+
# Name of your pool (in the case of ZFS) or volume group
85+
# with logic volume name (e.g. "dblab_vg/pg_lv", in the case of LVM).
86+
pool: "dblab_pool"
87+
88+
# Pool of ports for Postgres clones. Ports will be allocated sequentially,
89+
# starting from the lowest value. The "from" value must be less than "to".
90+
portPool:
91+
from: 6000
92+
to: 6100
93+
94+
# Directory that will be used to mount clones. Subdirectories in this directory
95+
# will be used as mount points for clones. Subdirectory names will
96+
# correspond to ports. E.g., subdirectory "6000" for the clone running on port 6000.
97+
mountDir: /var/lib/dblab/clones
98+
99+
# Unix domain socket directory used to establish local connections to cloned databases.
100+
unixSocketDir: /var/lib/dblab/sockets
101+
102+
# Snapshots with the suffix will not be accessible to use for cloning.
103+
snapshotFilterSuffix: "_pre"
104+
105+
# Database Lab provisions thin clones using Docker containers, we need
106+
# to specify which Postgres Docker image is to be used when cloning.
107+
# The default is the extended Postgres image built on top of the official Postgres image
108+
# (See https://postgres.ai/docs/database-lab/supported_databases).
109+
# Any custom or official Docker image that runs Postgres with PGDATA located
110+
# in "/var/lib/postgresql/pgdata" directory. Our Dockerfile
111+
# (See https://gitlab.com/postgres-ai/custom-images/-/tree/master/extended)
112+
# is recommended in case if customization is needed.
113+
dockerImage: "postgresai/extended-postgres:12"
114+
115+
# Use sudo for ZFS/LVM and Docker commands if Database Lab server running
116+
# outside a container. Keep it "false" (default) when running in a container.
117+
useSudo: false
118+
119+
# Data retrieval flow. This section defines both initial retrieval, and rules
120+
# to keep data directory in synchronized state with the source. Both are optional:
121+
# you may already have the data directory, so neither initial retrieval nor
122+
# synchronization are needed.
123+
#
124+
# Data retrieval can be also considered as "thick" cloning. Once it's done, users
125+
# can use "thin" cloning to get independent full-size clones of the database in
126+
# seconds, for testing and development. Normally, retrieval (thick cloning) is
127+
# a slow operation (1 TiB/h is a good speed). Optionally, the process of keeping
128+
# the Database Lab data directory in sync with the source (being continuously
129+
# updated) can be configured.
130+
#
131+
# There are two basic ways to organize data retrieval:
132+
# - "logical": use dump/restore processes, obtaining a logical copy of the initial
133+
# database (such as set of SQL commands), and then loading it to
134+
# the target Database Lab data directory. This is the only option
135+
# for managed cloud PostgreSQL services such as Amazon RDS. Physically,
136+
# the copy of the database created using this method differs from
137+
# the original one (data blocks are stored differently). However,
138+
# row counts are the same, as well as internal database statistics,
139+
# allowing to do various kinds of development and testing, including
140+
# running EXPLAIN command to optimize SQL queries.
141+
# - "physical": physically copy the data directory from the source (or from the
142+
# archive if a physical backup tool such as WAL-G, pgBackRest or Barman
143+
# is used). This approach allows to have a copy of the original database
144+
# which is physically identical, including the existing bloat, data
145+
# blocks location. Not supported for managed cloud Postgres services
146+
# such as Amazon RDS.
147+
retrieval:
148+
stages:
149+
- initialize
150+
151+
spec:
152+
# The initialize stage provides declarative initialization of the PostgreSQL data directory used by Database Lab Engine.
153+
# The stage must not contain physical and logical restore jobs simultaneously.
154+
initialize:
155+
jobs:
156+
# Dumps PostgreSQL database from provided source.
157+
- name: logical-dump
158+
options:
159+
# The dump file will be automatically created on this location and then used to restore.
160+
# Ensure that there is enough disk space.
161+
dumpLocation: "/var/lib/dblab/db.dump"
162+
163+
# The Docker image containing the tools required to get a dump.
164+
dockerImage: "postgres:12-alpine"
165+
166+
# Source of data.
167+
source:
168+
# Source types: "local", "remote", "rds"
169+
type: remote
170+
171+
# Connection parameters of the database to be dumped.
172+
connection:
173+
# Database connection parameters.
174+
# Currently, only password can be specified via environment variable (PGPASSWORD),
175+
# everything else needs to be specified here.
176+
dbname: postgres
177+
host: 34.56.78.90
178+
port: 5432
179+
username: postgres
180+
181+
# Connection password. The environment variable PGPASSWORD can be used instead of this option.
182+
# The environment variable has a higher priority.
183+
password: postgres
184+
185+
# Options for a partial dump.
186+
# partial:
187+
# tables:
188+
# - test
189+
190+
# The number of parallel jobs to get a dump.
191+
# It's ignored if "restore" is present because "pg_dump | pg_restore" is always single-threaded.
192+
parallelJobs: 2
193+
194+
# Options for direct restore to Database Lab Engine instance.
195+
# Uncomment this if you prefer restoring from the dump on the fly. In this case,
196+
# you do not need to use "logical-restore" job. Keep in mind that unlike "logical-restore",
197+
# this option does not support parallelization, it is always a single-threaded (both for
198+
# dumping on the source, and restoring on the destination end).
199+
# restore:
200+
# # Restore data even if the Postgres directory (`global.dataDir`) is not empty.
201+
# # Note the existing data might be overwritten.
202+
# forceInit: false
203+
204+
# Restores PostgreSQL database from the provided dump. If you use this block, do not use
205+
# "restore" option in the "logical-dump" job.
206+
- name: logical-restore
207+
options:
208+
dbname: "test"
209+
210+
# The location of the archive file (or directory, for a directory-format archive) to be restored.
211+
dumpLocation: "/var/lib/dblab/db.dump"
212+
213+
# The Docker image containing the tools required to restore
214+
dockerImage: "postgres:12-alpine"
215+
216+
# Restore data even if the Postgres directory (`global.dataDir`) is not empty.
217+
# Note the existing data might be overwritten.
218+
forceInit: false
219+
220+
# Options for a partial dump.
221+
# partial:
222+
# tables:
223+
# - test
224+
225+
- name: logical-snapshot
226+
options:
227+
# It is possible to define a pre-precessing script. For example, "/tmp/scripts/custom.sh".
228+
# Default: empty string (no pre-processing defined).
229+
# This can be used for scrubbing eliminating PII data, to define data masking, etc.
230+
preprocessingScript: ""
231+
232+
# Adjust PostgreSQL configuration
233+
configs:
234+
# In order to match production plans with Database Lab plans set parameters related to Query Planning as on production.
235+
shared_buffers: 1GB
236+
# shared_preload_libraries – copy the value from the source
237+
shared_preload_libraries: "pg_stat_statements"
238+
# work_mem and all the Query Planning parameters – copy the values from the source.
239+
# To do it, use this query:
240+
# select format($$%s = '%s'$$, name, setting)
241+
# from pg_settings
242+
# where
243+
# name ~ '(work_mem$|^enable_|_cost$|scan_size$|effective_cache_size|^jit)'
244+
# or name ~ '(^geqo|default_statistics_target|constraint_exclusion|cursor_tuple_fraction)'
245+
# or name ~ '(collapse_limit$|parallel|plan_cache_mode)';
246+
work_mem: "100MB"
247+
# ... put Query Planning parameters here
248+
249+
cloning:
250+
# Deprecated field. Default: "base".
251+
mode: "base"
252+
253+
# Host that will be specified in database connection info for all clones
254+
# Use public IP address if database connections are allowed from outside
255+
# This value is only used to inform users about how to connect to database clones
256+
accessHost: "localhost"
257+
258+
# Automatically delete clones after the specified minutes of inactivity.
259+
# 0 - disable automatic deletion.
260+
# Inactivity means:
261+
# - no active sessions (queries being processed right now)
262+
# - no recently logged queries in the query log
263+
maxIdleMinutes: 120
264+
265+
266+
# ### INTEGRATION ###
267+
268+
# Postgres.ai Platform integration (provides GUI) – extends the open source offering.
269+
# Uncomment the following lines if you need GUI, personal tokens, audit logs, more.
270+
#
271+
#platform:
272+
# # Platform API URL. To work with Postgres.ai SaaS, keep it default
273+
# # ("https://postgres.ai/api/general").
274+
# url: "https://postgres.ai/api/general"
275+
#
276+
# # Token for authorization in Platform API. This token can be obtained on
277+
# # the Postgres.ai Console: https://postgres.ai/console/YOUR_ORG_NAME/tokens
278+
# # This token needs to be kept in secret, known only to the administrtor.
279+
# accessToken: "platform_access_token"
280+
#
281+
# # Enable authorization with personal tokens of the organization's members.
282+
# # If false: all users must use "verificationToken" value for any API request
283+
# # If true: "verificationToken" is known only to admin, users use their own tokens,
284+
# # and any token can be revoked not affecting others
285+
# enablePersonalTokens: true

0 commit comments

Comments
 (0)