revert to previous nightly gauntlet

hugodutka · hugodutka · commit fa170a4cf09e · 2025-07-08T12:42:32.000Z
diff --git a/.github/workflows/nightly-gauntlet.yaml b/.github/workflows/nightly-gauntlet.yaml
@@ -0,0 +1,183 @@
+# The nightly-gauntlet runs tests that are either too flaky or too slow to block
+# every PR.
+name: nightly-gauntlet
+on:
+  schedule:
+    # Every day at 4AM
+    - cron: "0 4 * * 1-5"
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  test-go-pg:
+    # make sure to adjust NUM_PARALLEL_PACKAGES and NUM_PARALLEL_TESTS below
+    # when changing runner sizes
+    runs-on: ${{ matrix.os == 'macos-latest' && github.repository_owner == 'coder' && 'depot-macos-latest' || matrix.os == 'windows-2022' && github.repository_owner == 'coder' && 'depot-windows-2022-16' || matrix.os }}
+    # This timeout must be greater than the timeout set by `go test` in
+    # `make test-postgres` to ensure we receive a trace of running
+    # goroutines. Setting this to the timeout +5m should work quite well
+    # even if some of the preceding steps are slow.
+    timeout-minutes: 25
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - macos-latest
+          - windows-2022
+    steps:
+      - name: Harden Runner
+        uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
+        with:
+          egress-policy: audit
+
+      # macOS indexes all new files in the background. Our Postgres tests
+      # create and destroy thousands of databases on disk, and Spotlight
+      # tries to index all of them, seriously slowing down the tests.
+      - name: Disable Spotlight Indexing
+        if: runner.os == 'macOS'
+        run: |
+          sudo mdutil -a -i off
+          sudo mdutil -X /
+          sudo launchctl bootout system /System/Library/LaunchDaemons/com.apple.metadata.mds.plist
+
+      # Set up RAM disks to speed up the rest of the job. This action is in
+      # a separate repository to allow its use before actions/checkout.
+      - name: Setup RAM Disks
+        if: runner.os == 'Windows'
+        uses: coder/setup-ramdisk-action@79dacfe70c47ad6d6c0dd7f45412368802641439
+
+      - name: Checkout
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 1
+
+      - name: Setup Go
+        uses: ./.github/actions/setup-go
+        with:
+          # Runners have Go baked-in and Go will automatically
+          # download the toolchain configured in go.mod, so we don't
+          # need to reinstall it. It's faster on Windows runners.
+          use-preinstalled-go: ${{ runner.os == 'Windows' }}
+          use-temp-cache-dirs: ${{ runner.os == 'Windows' }}
+
+      - name: Setup Terraform
+        uses: ./.github/actions/setup-tf
+
+      - name: Test with PostgreSQL Database
+        env:
+          POSTGRES_VERSION: "13"
+          TS_DEBUG_DISCO: "true"
+          LC_CTYPE: "en_US.UTF-8"
+          LC_ALL: "en_US.UTF-8"
+        shell: bash
+        run: |
+          if [ "${{ runner.os }}" == "Windows" ]; then
+            # Create a temp dir on the R: ramdisk drive for Windows. The default
+            # C: drive is extremely slow: https://github.com/actions/runner-images/issues/8755
+            mkdir -p "R:/temp/embedded-pg"
+            go run scripts/embedded-pg/main.go -path "R:/temp/embedded-pg"
+          fi
+          if [ "${{ runner.os }}" == "macOS" ]; then
+            # Postgres runs faster on a ramdisk on macOS too
+            mkdir -p /tmp/tmpfs
+            sudo mount_tmpfs -o noowners -s 8g /tmp/tmpfs
+            go run scripts/embedded-pg/main.go -path /tmp/tmpfs/embedded-pg
+          fi
+
+          # if macOS, install google-chrome for scaletests
+          # As another concern, should we really have this kind of external dependency
+          # requirement on standard CI?
+          if [ "${{ matrix.os }}" == "macos-latest" ]; then
+            brew install google-chrome
+          fi
+
+          # By default Go will use the number of logical CPUs, which
+          # is a fine default.
+          PARALLEL_FLAG=""
+
+          # macOS will output "The default interactive shell is now zsh"
+          # intermittently in CI...
+          if [ "${{ matrix.os }}" == "macos-latest" ]; then
+            touch ~/.bash_profile && echo "export BASH_SILENCE_DEPRECATION_WARNING=1" >> ~/.bash_profile
+          fi
+
+          # Golang's default for these 2 variables is the number of logical CPUs.
+          # Our Windows and Linux runners have 16 cores, so they match up there.
+          NUM_PARALLEL_PACKAGES=16
+          NUM_PARALLEL_TESTS=16
+          if [ "${{ runner.os }}" == "Windows" ]; then
+            # On Windows Postgres chokes up when we have 16x16=256 tests
+            # running in parallel, and dbtestutil.NewDB starts to take more than
+            # 10s to complete sometimes causing test timeouts. With 16x8=128 tests
+            # Postgres tends not to choke.
+            NUM_PARALLEL_PACKAGES=8
+          fi
+          if [ "${{ runner.os }}" == "macOS" ]; then
+            # Our macOS runners have 8 cores. We leave NUM_PARALLEL_TESTS at 16
+            # because the tests complete faster and Postgres doesn't choke. It seems
+            # that macOS's tmpfs is faster than the one on Windows.
+            NUM_PARALLEL_PACKAGES=8
+          fi
+
+          # We rerun failing tests to counteract flakiness coming from Postgres
+          # choking on macOS and Windows sometimes.
+          DB=ci gotestsum --rerun-fails=2 --rerun-fails-max-failures=1000 \
+            --format standard-quiet --packages "./..." \
+            -- -v -p $NUM_PARALLEL_PACKAGES -parallel=$NUM_PARALLEL_TESTS -count=1
+
+      - name: Upload test stats to Datadog
+        timeout-minutes: 1
+        continue-on-error: true
+        uses: ./.github/actions/upload-datadog
+        if: success() || failure()
+        with:
+          api-key: ${{ secrets.DATADOG_API_KEY }}
+
+  notify-slack-on-failure:
+    needs:
+      - test-go-pg
+    runs-on: ubuntu-latest
+    if: failure() && github.ref == 'refs/heads/main'
+
+    steps:
+      - name: Send Slack notification
+        run: |
+          curl -X POST -H 'Content-type: application/json' \
+          --data '{
+            "blocks": [
+              {
+                "type": "header",
+                "text": {
+                  "type": "plain_text",
+                  "text": "❌ Nightly gauntlet failed",
+                  "emoji": true
+                }
+              },
+              {
+                "type": "section",
+                "fields": [
+                  {
+                    "type": "mrkdwn",
+                    "text": "*Workflow:*\n${{ github.workflow }}"
+                  },
+                  {
+                    "type": "mrkdwn",
+                    "text": "*Committer:*\n${{ github.actor }}"
+                  },
+                  {
+                    "type": "mrkdwn",
+                    "text": "*Commit:*\n${{ github.sha }}"
+                  }
+                ]
+              },
+              {
+                "type": "section",
+                "text": {
+                  "type": "mrkdwn",
+                  "text": "*View failure:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|Click here>"
+                }
+              }
+            ]
+          }' ${{ secrets.CI_FAILURE_SLACK_WEBHOOK }}