Skip to content

Commit f42de9f

Browse files
chore!: delete old connection events from audit log (#18735)
### Breaking change (changelog note): >With new connection events appearing in the Connection Log, connection events older than 90 days will now be deleted from the Audit Log. If you require this legacy data, we recommend querying it from the REST API or making a backup of the database/these events before upgrading your Coder deployment. Please see the PR for details on what exactly will be deleted. Of note is that there are currently no plans to delete connection events from the Connection Log. ### Context This is the fifth PR for moving connection events out of the audit log. In previous PRs: - **New** connection logs have been routed to the `connection_logs` table. They will *not* appear in the audit log. - These new connection logs are served from the new `/api/v2/connectionlog` endpoint. In this PR: - We'll now clean existing connection events out of the audit log, if they are older than 90 days, We do this in batches of 1000, every 10 minutes. The criteria for deletion is simple: ``` WHERE ( action = 'connect' OR action = 'disconnect' OR action = 'open' OR action = 'close' ) AND "time" < @before_time::timestamp with time zone ``` where `@before_time` is currently configured to 90 days in the past. Future PRs: - Write documentation for the endpoint / feature
1 parent b5260d5 commit f42de9f

File tree

9 files changed

+237
-0
lines changed

9 files changed

+237
-0
lines changed

coderd/database/dbauthz/dbauthz.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1552,6 +1552,16 @@ func (q *querier) DeleteOAuth2ProviderAppTokensByAppAndUserID(ctx context.Contex
15521552
return q.db.DeleteOAuth2ProviderAppTokensByAppAndUserID(ctx, arg)
15531553
}
15541554

1555+
func (q *querier) DeleteOldAuditLogConnectionEvents(ctx context.Context, threshold database.DeleteOldAuditLogConnectionEventsParams) error {
1556+
// `ResourceSystem` is deprecated, but it doesn't make sense to add
1557+
// `policy.ActionDelete` to `ResourceAuditLog`, since this is the one and
1558+
// only time we'll be deleting from the audit log.
1559+
if err := q.authorizeContext(ctx, policy.ActionDelete, rbac.ResourceSystem); err != nil {
1560+
return err
1561+
}
1562+
return q.db.DeleteOldAuditLogConnectionEvents(ctx, threshold)
1563+
}
1564+
15551565
func (q *querier) DeleteOldNotificationMessages(ctx context.Context) error {
15561566
if err := q.authorizeContext(ctx, policy.ActionDelete, rbac.ResourceNotificationMessage); err != nil {
15571567
return err

coderd/database/dbauthz/dbauthz_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,10 @@ func (s *MethodTestSuite) TestAuditLogs() {
337337
_ = dbgen.AuditLog(s.T(), db, database.AuditLog{})
338338
check.Args(database.CountAuditLogsParams{}, emptyPreparedAuthorized{}).Asserts(rbac.ResourceAuditLog, policy.ActionRead)
339339
}))
340+
s.Run("DeleteOldAuditLogConnectionEvents", s.Subtest(func(db database.Store, check *expects) {
341+
_ = dbgen.AuditLog(s.T(), db, database.AuditLog{})
342+
check.Args(database.DeleteOldAuditLogConnectionEventsParams{}).Asserts(rbac.ResourceSystem, policy.ActionDelete)
343+
}))
340344
}
341345

342346
func (s *MethodTestSuite) TestConnectionLogs() {

coderd/database/dbmetrics/querymetrics.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/dbmock/dbmock.go

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/dbpurge/dbpurge.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ import (
1818
const (
1919
delay = 10 * time.Minute
2020
maxAgentLogAge = 7 * 24 * time.Hour
21+
// Connection events are now inserted into the `connection_logs` table.
22+
// We'll slowly remove old connection events from the `audit_logs` table,
23+
// but we won't touch the `connection_logs` table.
24+
maxAuditLogConnectionEventAge = 90 * 24 * time.Hour // 90 days
25+
auditLogConnectionEventBatchSize = 1000
2126
)
2227

2328
// New creates a new periodically purging database instance.
@@ -63,6 +68,14 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, clk quartz.
6368
return xerrors.Errorf("failed to delete old notification messages: %w", err)
6469
}
6570

71+
deleteOldAuditLogConnectionEventsBefore := start.Add(-maxAuditLogConnectionEventAge)
72+
if err := tx.DeleteOldAuditLogConnectionEvents(ctx, database.DeleteOldAuditLogConnectionEventsParams{
73+
BeforeTime: deleteOldAuditLogConnectionEventsBefore,
74+
LimitCount: auditLogConnectionEventBatchSize,
75+
}); err != nil {
76+
return xerrors.Errorf("failed to delete old audit log connection events: %w", err)
77+
}
78+
6679
logger.Debug(ctx, "purged old database entries", slog.F("duration", clk.Since(start)))
6780

6881
return nil

coderd/database/dbpurge/dbpurge_test.go

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,3 +490,148 @@ func containsProvisionerDaemon(daemons []database.ProvisionerDaemon, name string
490490
return d.Name == name
491491
})
492492
}
493+
494+
//nolint:paralleltest // It uses LockIDDBPurge.
495+
func TestDeleteOldAuditLogConnectionEvents(t *testing.T) {
496+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
497+
defer cancel()
498+
499+
clk := quartz.NewMock(t)
500+
now := dbtime.Now()
501+
afterThreshold := now.Add(-91 * 24 * time.Hour) // 91 days ago (older than 90 day threshold)
502+
beforeThreshold := now.Add(-30 * 24 * time.Hour) // 30 days ago (newer than 90 day threshold)
503+
closeBeforeThreshold := now.Add(-89 * 24 * time.Hour) // 89 days ago
504+
clk.Set(now).MustWait(ctx)
505+
506+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
507+
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
508+
user := dbgen.User(t, db, database.User{})
509+
org := dbgen.Organization(t, db, database.Organization{})
510+
511+
oldConnectLog := dbgen.AuditLog(t, db, database.AuditLog{
512+
UserID: user.ID,
513+
OrganizationID: org.ID,
514+
Time: afterThreshold,
515+
Action: database.AuditActionConnect,
516+
ResourceType: database.ResourceTypeWorkspace,
517+
})
518+
519+
oldDisconnectLog := dbgen.AuditLog(t, db, database.AuditLog{
520+
UserID: user.ID,
521+
OrganizationID: org.ID,
522+
Time: afterThreshold,
523+
Action: database.AuditActionDisconnect,
524+
ResourceType: database.ResourceTypeWorkspace,
525+
})
526+
527+
oldOpenLog := dbgen.AuditLog(t, db, database.AuditLog{
528+
UserID: user.ID,
529+
OrganizationID: org.ID,
530+
Time: afterThreshold,
531+
Action: database.AuditActionOpen,
532+
ResourceType: database.ResourceTypeWorkspace,
533+
})
534+
535+
oldCloseLog := dbgen.AuditLog(t, db, database.AuditLog{
536+
UserID: user.ID,
537+
OrganizationID: org.ID,
538+
Time: afterThreshold,
539+
Action: database.AuditActionClose,
540+
ResourceType: database.ResourceTypeWorkspace,
541+
})
542+
543+
recentConnectLog := dbgen.AuditLog(t, db, database.AuditLog{
544+
UserID: user.ID,
545+
OrganizationID: org.ID,
546+
Time: beforeThreshold,
547+
Action: database.AuditActionConnect,
548+
ResourceType: database.ResourceTypeWorkspace,
549+
})
550+
551+
oldNonConnectionLog := dbgen.AuditLog(t, db, database.AuditLog{
552+
UserID: user.ID,
553+
OrganizationID: org.ID,
554+
Time: afterThreshold,
555+
Action: database.AuditActionCreate,
556+
ResourceType: database.ResourceTypeWorkspace,
557+
})
558+
559+
nearThresholdConnectLog := dbgen.AuditLog(t, db, database.AuditLog{
560+
UserID: user.ID,
561+
OrganizationID: org.ID,
562+
Time: closeBeforeThreshold,
563+
Action: database.AuditActionConnect,
564+
ResourceType: database.ResourceTypeWorkspace,
565+
})
566+
567+
// Run the purge
568+
done := awaitDoTick(ctx, t, clk)
569+
closer := dbpurge.New(ctx, logger, db, clk)
570+
defer closer.Close()
571+
// Wait for tick
572+
testutil.TryReceive(ctx, t, done)
573+
574+
// Verify results by querying all audit logs
575+
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{})
576+
require.NoError(t, err)
577+
578+
// Extract log IDs for comparison
579+
logIDs := make([]uuid.UUID, len(logs))
580+
for i, log := range logs {
581+
logIDs[i] = log.AuditLog.ID
582+
}
583+
584+
require.NotContains(t, logIDs, oldConnectLog.ID, "old connect log should be deleted")
585+
require.NotContains(t, logIDs, oldDisconnectLog.ID, "old disconnect log should be deleted")
586+
require.NotContains(t, logIDs, oldOpenLog.ID, "old open log should be deleted")
587+
require.NotContains(t, logIDs, oldCloseLog.ID, "old close log should be deleted")
588+
require.Contains(t, logIDs, recentConnectLog.ID, "recent connect log should be kept")
589+
require.Contains(t, logIDs, nearThresholdConnectLog.ID, "near threshold connect log should be kept")
590+
require.Contains(t, logIDs, oldNonConnectionLog.ID, "old non-connection log should be kept")
591+
}
592+
593+
func TestDeleteOldAuditLogConnectionEventsLimit(t *testing.T) {
594+
t.Parallel()
595+
596+
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
597+
defer cancel()
598+
599+
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
600+
user := dbgen.User(t, db, database.User{})
601+
org := dbgen.Organization(t, db, database.Organization{})
602+
603+
now := dbtime.Now()
604+
threshold := now.Add(-90 * 24 * time.Hour)
605+
606+
for i := 0; i < 5; i++ {
607+
dbgen.AuditLog(t, db, database.AuditLog{
608+
UserID: user.ID,
609+
OrganizationID: org.ID,
610+
Time: threshold.Add(-time.Duration(i+1) * time.Hour),
611+
Action: database.AuditActionConnect,
612+
ResourceType: database.ResourceTypeWorkspace,
613+
})
614+
}
615+
616+
err := db.DeleteOldAuditLogConnectionEvents(ctx, database.DeleteOldAuditLogConnectionEventsParams{
617+
BeforeTime: threshold,
618+
LimitCount: 1,
619+
})
620+
require.NoError(t, err)
621+
622+
logs, err := db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{})
623+
require.NoError(t, err)
624+
625+
require.Len(t, logs, 4)
626+
627+
err = db.DeleteOldAuditLogConnectionEvents(ctx, database.DeleteOldAuditLogConnectionEventsParams{
628+
BeforeTime: threshold,
629+
LimitCount: 100,
630+
})
631+
require.NoError(t, err)
632+
633+
logs, err = db.GetAuditLogsOffset(ctx, database.GetAuditLogsOffsetParams{})
634+
require.NoError(t, err)
635+
636+
require.Len(t, logs, 0)
637+
}

coderd/database/querier.go

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/queries.sql.go

Lines changed: 27 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/queries/auditlogs.sql

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,3 +237,19 @@ WHERE
237237
-- Authorize Filter clause will be injected below in CountAuthorizedAuditLogs
238238
-- @authorize_filter
239239
;
240+
241+
-- name: DeleteOldAuditLogConnectionEvents :exec
242+
DELETE FROM audit_logs
243+
WHERE id IN (
244+
SELECT id FROM audit_logs
245+
WHERE
246+
(
247+
action = 'connect'
248+
OR action = 'disconnect'
249+
OR action = 'open'
250+
OR action = 'close'
251+
)
252+
AND "time" < @before_time::timestamp with time zone
253+
ORDER BY "time" ASC
254+
LIMIT @limit_count
255+
);

0 commit comments

Comments
 (0)