feat: grind test #20093

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

charlielye merged 1 commit into next from cl/grind_test

Feb 4, 2026

.github/ci3.sh

-Original file line number
+Diff line change
@@ Expand Up / @@ -56,7 +56,7 @@ function check_cache { @@
         "ci-release-pr"
       )
       # Check if CI_MODE is in cached_ci_modes
-      if [[ " ${cached_ci_modes[@]} " =~ " ${CI_MODE} " ]]; then
+      if [[ " ${cached_ci_modes[@]} " =~ " ${CI_MODE} " && "$GITHUB_RUN_ATTEMPT" -eq 1 ]]; then
         if cache_download "$cache_name" . 2>/dev/null && [ -f ".ci-success.txt" ]; then
           echo "Cache hit in .github/ci3.sh! Previous run: $(cat ".ci-success.txt")"
           exit 0
@@ Expand Down @@

Makefile

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -37,9 +37,10 @@ endef
  
    # Collects the test commands from the given project

    # Writes the full output to /tmp/test_cmds atomically.

    # The test engine is expected to be running and it will read commands from this file.

    # MAKEFILE_TARGET is exported so filter_test_cmds can inject it into the hash prefix for targeted rebuilds.

    define test

    	$(call run_command,$(1),$(ROOT)/$(2),\

    	  ./bootstrap.sh test_cmds $(3) | $(ROOT)/ci3/filter_test_cmds | $(ROOT)/ci3/atomic_append /tmp/test_cmds)

    	  export MAKEFILE_TARGET=$(1) && ./bootstrap.sh test_cmds $(3) | $(ROOT)/ci3/filter_test_cmds | $(ROOT)/ci3/atomic_append /tmp/test_cmds)

    endef

    #==============================================================================

    @@ -193,16 +194,16 @@ bb-sol: bb-cpp-native
  
    # Barretenberg Tests

    #==============================================================================

    bb-cpp-tests-native: bb-cpp-native

    bb-cpp-native-tests: bb-cpp-native

    	$(call test,$@,barretenberg/cpp,native)

    bb-cpp-tests-wasm-threads: bb-cpp-wasm-threads

    bb-cpp-wasm-threads-tests: bb-cpp-wasm-threads

    	$(call test,$@,barretenberg/cpp,wasm_threads)

    bb-cpp-tests-asan: bb-cpp-asan

    bb-cpp-asan-tests: bb-cpp-asan

    	$(call test,$@,barretenberg/cpp,asan)

    bb-cpp-tests-smt: bb-cpp-smt

    bb-cpp-smt-tests: bb-cpp-smt

    	$(call test,$@,barretenberg/cpp,smt)

    bb-acir-tests: bb-acir

    @@ -220,9 +221,9 @@ bb-docs-tests: bb-docs
  
    bb-bbup-tests: bb-bbup

    	$(call test,$@,barretenberg/bbup)

    bb-tests: bb-cpp-tests-native bb-acir-tests bb-ts-tests bb-sol-tests bb-bbup-tests bb-docs-tests

    bb-tests: bb-cpp-native-tests bb-acir-tests bb-ts-tests bb-sol-tests bb-bbup-tests bb-docs-tests

    bb-full-tests: bb-cpp-tests-native bb-cpp-tests-wasm-threads bb-cpp-tests-asan bb-cpp-tests-smt  bb-acir-tests bb-ts-tests bb-sol-tests bb-bbup-tests bb-docs-tests

    bb-full-tests: bb-cpp-native-tests bb-cpp-wasm-threads-tests bb-cpp-asan-tests bb-cpp-smt-tests  bb-acir-tests bb-ts-tests bb-sol-tests bb-bbup-tests bb-docs-tests

    #==============================================================================

    # Noir Projects

barretenberg/cpp/format.sh

-Original file line number
+Diff line change
@@ Expand Up / @@ -6,16 +6,18 @@ function format_files { @@
     }
     if [ "$1" == "staged" ]; then
-      echo Formatting barretenberg staged files...
       files=$(git diff-index --diff-filter=d --relative --cached --name-only HEAD | grep -e '\.\(cpp\|hpp\|tcc\)$')
-      format_files "$files"
       if [ -n "$files" ]; then
+        echo Formatting barretenberg staged files...
+        format_files "$files"
         echo "$files" | xargs -r git add
       fi
     elif [ "$1" == "changed" ]; then
-      echo Formatting barretenberg changed files...
       files=$(git diff-index --diff-filter=d --relative --name-only HEAD | grep -e '\.\(cpp\|hpp\|tcc\)$')
-      format_files "$files"
+      if [ -n "$files" ]; then
+        echo Formatting barretenberg changed files...
+        format_files "$files"
+      fi
     elif [ "$1" == "check" ]; then
       files=$(find ./src -iname *.hpp -o -iname *.cpp -o -iname *.tcc | grep -v bb/deps)
       echo "$files" | parallel -N10 clang-format-20 --dry-run --Werror
@@ Expand Down @@

bootstrap.sh

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -281,10 +281,11 @@ function build_and_test {
  
      # Start the test engine.

      rm -f $test_cmds_file

      touch $test_cmds_file

      # put it in it's own process group via background subshell, we can terminate on cleanup.

      (color_prefix "test-engine" "denoise test_engine_start") &

      # put it in it's own process group, we can terminate on cleanup.

      setsid color_prefix "test-engine" "denoise test_engine_start" &

      test_engine_pid=$!

      test_engine_pgid=$(ps -o pgid= -p $test_engine_pid)

      echo "Started test engine with $test_engine_pid in PGID $test_engine_pgid."

      # Start the build.

      if [ -z "$target" ]; then

    @@ -590,6 +591,16 @@ case "$cmd" in
  
        build_and_test

        bench

        ;;

      "ci-grind-test")

        export CI=1

        export USE_TEST_CACHE=0

        full_cmd="${1:?full_cmd required}"

        timeout="${2:-}"

        commit="${3:-}"

        grind_test "$full_cmd" "$timeout" "$commit"

        ;;

      ##########################################

      # NETWORK DEPLOYMENTS WITH BENCHES/TESTS #

ci.sh

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -24,10 +24,11 @@ function print_usage {
  
      echo_cmd "full-no-test-cache"    "Spin up an EC2 instance and run bootstrap ci-full-no-test-cache."

      echo_cmd "docs"                  "Spin up an EC2 instance and run docs-only CI."

      echo_cmd "barretenberg"          "Spin up an EC2 instance and run barretenberg-only CI."

      echo_cmd "grind"                 "Spin up multiple EC2 instances to run parallel full CI runs."

      echo_cmd "merge-queue"           "Spin up several EC2 instances to run the merge-queue jobs."

      echo_cmd "grind"                 "Spin up EC2 instances to run parallel full CI runs."

      echo_cmd "merge-queue"           "Spin up EC2 instances to run the merge-queue jobs."

      echo_cmd "grind-test"            "Spin up an EC2 and grind a given test command."

      echo_cmd "network-deploy"        "Spin up an EC2 instance to deploy a network."

      echo_cmd "network-scenarios"      "Spin up EC2 instance(s) to run network scenario tests in parallel."

      echo_cmd "network-scenarios"     "Spin up EC2 instances to run network scenario tests in parallel."

      echo_cmd "network-tests"         "Spin up an EC2 instance to run tests on a network."

      echo_cmd "network-bench"         "Spin up an EC2 instance to run benchmarks on a network."

      echo_cmd "network-teardown"      "Spin up an EC2 instance to teardown a network deployment."

    @@ -110,10 +111,23 @@ case "$cmd" in
  
        parallel --jobs 10 --termseq 'TERM,10000' --tagstring '{= $_=~s/run (\w+).*/$1/; =}' --line-buffered --halt now,fail=1 ::: \

          'run x1-full amd64 ci-full-no-test-cache' \

          'run x2-full amd64 ci-full-no-test-cache' \

          'run x3-full amd64 ci-full-no-test-cache' \

          'run x4-full amd64 ci-full-no-test-cache' \

          'run x3-full amd64 ci-full-no-test-cache-makefile' \

          'run x4-full amd64 ci-full-no-test-cache-makefile' \

          'run a1-fast arm64 ci-fast' | DUP=1 cache_log "Merge queue CI run" $RUN_ID

        ;;

      grind-test)

        full_cmd="$1"

        timeout="${2:-}"

        commit="${3:-}"

        # Extract test command (strip rebuild hash prefix) and hash it

        # Uses same hash as run_test_cmd's test_hash for consistency

        test_cmd="${full_cmd#* }"

        test_hash=$(hash_str_orig "$test_cmd")

        export CI_DASHBOARD="deflake"

        export JOB_ID="grind-test-$test_hash"

        export INSTANCE_POSTFIX=$JOB_ID

        bootstrap_ec2 "./bootstrap.sh ci-grind-test '$full_cmd' $timeout $commit" | DUP=1 cache_log "Grind test CI run" $RUN_ID

        ;;

      ##########################################

      # NETWORK DEPLOYMENTS WITH BENCHES/TESTS #

ci3/aws_request_instance

-Original file line number
+Diff line change
@@ Expand Up / @@ -62,6 +62,7 @@ for cpu in "${cpu_list[@]}"; do @@
         aws_request_instance_type $name $instance_type $price $ami $state_dir
         code=$?
         [[ "$code" -eq 0 || "$code" -eq 143 || "$code" -eq 130 ]] && exit $code
+        echo "Instance request exited with code: $code"
       done
     done
     exit 1

ci3/aws_request_instance_type

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -46,7 +46,7 @@ echo "$launch_spec" > "$spec_path"
  
    info="(name: $name) (type: $instance_type) (ami: $ami) (bid: $price)"

    if [ "${NO_SPOT:-0}" -ne 1 ]; then

      >&2 echo "Requesting $instance_type spot instance $info..."

      echo "Requesting $instance_type spot instance $info..."

      sir=$(aws ec2 request-spot-instances \

        --spot-price "$price" \

        --instance-count 1 \

    @@ -56,7 +56,7 @@ if [ "${NO_SPOT:-0}" -ne 1 ]; then
  
        --output text)

      echo $sir > $sir_path

      >&2 echo "Waiting for instance id for spot request: $sir..."

      echo "Waiting for instance id for spot request: $sir..."

      sleep 5

      for i in {1..6}; do

        iid=$(aws ec2 describe-spot-instance-requests \

    @@ -66,7 +66,7 @@ if [ "${NO_SPOT:-0}" -ne 1 ]; then
  
        [ -z "$iid" -o "$iid" == "None" ] || break

        if [ $i -eq 6 ]; then

          >&2 echo "Timeout waiting for spot request."

          echo "Timeout waiting for spot request."

          # Cancel spot request. We may still get allocated an instance if it's *just* happened.

          aws ec2 cancel-spot-instance-requests --spot-instance-request-ids $sir > /dev/null

        fi

    @@ -78,7 +78,7 @@ fi
  
    if [ -z "${iid:-}" -o "${iid:-}" == "None" ]; then

      # Request on-demand instance.

      >&2 echo "Requesting $instance_type on-demand instance $info..."

      echo "Requesting $instance_type on-demand instance $info..."

      iid=$(aws ec2 run-instances \

        --cli-input-json file://$spec_path \

        --query "Instances[*].[InstanceId]" \

    @@ -104,13 +104,13 @@ while [ -z "${ip:-}" ]; do
  
    done

    # Wait till ssh port is open.

    >&2 echo "Waiting for SSH at $ip..."

    echo "Waiting for SSH at $ip..."

    SECONDS=0

    SSH_CONFIG_PATH=${SSH_CONFIG_PATH:-aws/build_instance_ssh_config}

    [ "${NO_TERMINATE:-0}" -eq 1 ] && LIVE_CMD=true || LIVE_CMD="sudo shutdown -h +${AWS_SHUTDOWN_TIME:-60}"

    while ! ssh -F $SSH_CONFIG_PATH -o ConnectTimeout=1 $ip $LIVE_CMD > /dev/null 2>&1; do

      if (( SECONDS >= 60 )); then

        >&2 echo "Timeout: SSH could not login to $ip within 60 seconds."

        echo "Timeout: SSH could not login to $ip within 60 seconds."

        exit 1

      fi

      sleep 1

ci3/bootstrap_ec2

-Original file line number
+Diff line change
@@ Expand Up / @@ -188,7 +188,21 @@ container_script=$( @@
       case \$code in
 ) ;;
 ) log_ci_run PASSED \$ci_log_id ;;
-        *) log_ci_run FAILED \$ci_log_id && merge_train_failure_slack_notify \$ci_log_id && release_canary_slack_notify \$ci_log_id ;;
+        *)
+          log_ci_run FAILED \$ci_log_id
+          merge_train_failure_slack_notify \$ci_log_id
+          release_canary_slack_notify \$ci_log_id
+          ci_failed_data=\$(jq -n \\
+            --arg status "failed" \\
+            --arg log_id "\$ci_log_id" \\
+            --arg ref_name "\${TARGET_BRANCH:-\$REF_NAME}" \\
+            --arg commit_hash "\$COMMIT_HASH" \\
+            --arg commit_author "\$COMMIT_AUTHOR" \\
+            --arg commit_msg "\$COMMIT_MSG" \\
+            --argjson exit_code "\$code" \\
+            '{status: \$status, log_id: \$log_id, ref_name: \$ref_name, commit_hash: \$commit_hash, commit_author: \$commit_author, commit_msg: \$commit_msg, exit_code: \$exit_code, timestamp: now | todate}')
+          redis_publish "ci:run:failed" "\$ci_failed_data"
+          ;;
       esac
       exit \$code
     EOF
@@ Expand Down Expand Up / @@ -317,7 +331,7 @@ function run { @@
             -e AWS_TOKEN=\$aws_token \
             -e NAMESPACE=${NAMESPACE:-} \
             -e NETWORK=${NETWORK:-} \
-            --pids-limit=32768 \
+            --pids-limit=65536 \
             --shm-size=2g \
             aztecprotocol/devbox:3.0 bash -c $(printf '%q' "$container_script")
         }
@@ Expand Down @@

ci3/dashboard/rk.py

-Original file line number
+Diff line change
@@ -1,12 +1,14 @@
-    from flask import Flask, render_template_string, request, Response
+    from flask import Flask, render_template_string, request, Response, redirect
     from flask_compress import Compress
     from flask_httpauth import HTTPBasicAuth
     import gzip
     import json
     import os
     import re
     import requests
+    import subprocess
     import threading
+    import uuid
     from ansi2html import Ansi2HTMLConverter
     from pathlib import Path
@@ Expand Down Expand Up / @@ -127,13 +129,12 @@ def root() -> str: @@
             f"\n"
             f"Select a filter:\n"
             f"\n{YELLOW}"
-            f"{hyperlink('/section/master?fail_list=failed_tests_master', 'master queue')}\n"
-            f"{hyperlink('/section/staging?fail_list=failed_tests_staging', 'staging queue')}\n"
-            f"{hyperlink('/section/next?fail_list=failed_tests_next', 'next queue')}\n"
+            f"{hyperlink('/section/next', 'next queue')}\n"
             f"{hyperlink('/section/prs', 'prs')}\n"
             f"{hyperlink('/section/releases', 'releases')}\n"
             f"{hyperlink('/section/nightly', 'nightly')}\n"
             f"{hyperlink('/section/network', 'network')}\n"
+            f"{hyperlink('/section/deflake', 'deflake')}\n"
             f"{RESET}"
             f"\n"
             f"Benchmarks:\n"
@@ Expand All / @@ -150,12 +151,11 @@ def section_view(section: str) -> str: @@
         limit = int(request.args.get('limit', 50))
         filter_str = request.args.get('filter', default='', type=str)
         filter_prop = request.args.get('filter_prop', default='', type=str)
-        fail_list = request.args.get('fail_list', default='', type=str)
         lines = update_status(offset, filter_str, filter_prop)
         lines += "\n"
         lines += f"Last {limit} ci runs on {section}:\n\n"
-        lines += get_section_data(section, offset, limit, filter_str, filter_prop, fail_list)
+        lines += get_section_data(section, offset, limit, filter_str, filter_prop)
         return lines
     TEMPLATE = """
@@ Expand Down Expand Up / @@ -392,6 +392,64 @@ def get_breakdown(runtime, flow_name, sha): @@
         return Response('{"error": "Breakdown not found"}', mimetype='application/json', status=404)
+    @app.route('/grind')
+    @auth.login_required
+    def trigger_grind():
+        """Trigger a grind job for a flaky test."""
+        from urllib.parse import urlencode as url_encode
+        full_cmd = request.args.get('cmd')
+        commit = request.args.get('commit', 'HEAD')
+        grind_time = request.args.get('time')  # None = show selection page
+        run_id = request.args.get('run')  # Pre-generated run_id from selection page
+        if not full_cmd:
+            return "Missing cmd parameter", 400
+        # If run_id is provided and already has a log, redirect to it (back-button protection)
+        if run_id and r.exists(run_id):
+            return redirect(f'/{run_id}')
+        # If no time selected, show selection page
+        if not grind_time:
+            # Generate one run_id for all time links on this page load
+            page_run_id = uuid.uuid4().hex[:16]
+            time_options = ['5m', '10m', '20m', '30m', '1h']
+            time_links = []
+            for t in time_options:
+                url = f"/grind?{url_encode({'cmd': full_cmd, 'commit': commit, 'time': t, 'run': page_run_id})}"
+                time_links.append(f"{YELLOW}{hyperlink(url, t)}{RESET}")
+            page = (
+                f"{BOLD}Grind Test{RESET}\n\n"
+                f"Command: {full_cmd}\n\n"
+                f"Select grind duration: "
+                f"{' | '.join(time_links)}\n"
+            )
+            return render_template_string(TEMPLATE, value=ansi_to_html(page), filter_str='grind', follow='top')
+        # Time selected - start the grind
+        # Use run_id from URL, or generate new one if not provided
+        if not run_id:
+            run_id = uuid.uuid4().hex[:16]
+        # Initialize the log key so redirect doesn't show "Key not found"
+        r.setex(run_id, 86400, b'Starting grind...\n')
+        # Start grind job in background
+        # Dashboard server needs local repo checkout at REPO_PATH
+        repo_path = os.environ.get('REPO_PATH')
+        if repo_path:
+            subprocess.Popen(
+                ['bash', '-c', f'cd {repo_path} && RUN_ID={run_id} ./ci.sh grind-test "{full_cmd}" {grind_time} {commit}'],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                start_new_session=True
+            )
+        # Redirect to log view.
+        return redirect(f'/{run_id}')
     @app.route('/<key>')
     @auth.login_required
     def get_value(key):
@@ Expand Down @@

ci3/dashboard/rk_cli.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -13,13 +13,11 @@ def main(): @@
                             help='Filter pattern (comma-separated)')
         parser.add_argument('--filter-prop', '-p', type=str, default='',
                             help='Property to filter on (status,name,author,msg)')
-        parser.add_argument('--fail-list', type=str, default='', help='Redis key for failed tests list')
         args = parser.parse_args()
         set_base_url("http://ci.aztec-labs.com")
-        output = get_section_data(args.section, args.offset, args.limit,
-                                   args.filter_str, args.filter_prop, args.fail_list)
+        output = get_section_data(args.section, args.offset, args.limit, args.filter_str, args.filter_prop)
         print(output, end='')
     if __name__ == '__main__':
@@ Expand Down @@

ci3/dashboard/rk_core.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -38,7 +38,7 @@ def get_list_as_string(key, limit=None):
  
            else:

                values = r.lrange(key, 0, limit - 1)

            if not values:

                value = "List is empty or key not found"

                value = ""

            else:

                concatenated = []

                for item in values:

    @@ -93,8 +93,7 @@ def render(group: list) -> str:
  
        return f"{date_time}: {links_str} {BOLD}{name}{RESET} {PURPLE}{author}{RESET}: {msg} {duration_str}{CLEAR_EOL}\n"

    def get_section_data(section: str, offset: int = 0, limit: int = 100,

                         filter_str: str = '', filter_prop: str = '',

                         fail_list: str = '') -> str:

                         filter_str: str = '', filter_prop: str = '') -> str:

        """Core logic for fetching and rendering section data."""

        lua_script_path = Path(__file__).parent / 'set-filter.lua'

        with lua_script_path.open('r') as f:

    @@ -113,8 +112,9 @@ def get_section_data(section: str, offset: int = 0, limit: int = 100,
  
                group_sorted = sorted(group, key=lambda x: x.get('ts', x.get('timestamp', 0)))

                lines += render(group_sorted)

        if fail_list:

        fail_lines = get_list_as_string("failed_tests_" + section, 100)

        if fail_lines:

            lines += "\n"

            lines += f"Last 100 failed or flaked tests:\n\n"

            lines += get_list_as_string(fail_list, 100)

            lines += fail_lines

        return lines

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat: grind test #20093

Uh oh!

Diff view

Diff view

There are no files selected for viewing

ludamad Feb 4, 2026

Uh oh!

Uh oh!

Uh oh!

feat: grind test #20093

Uh oh!

feat: grind test #20093

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

ludamad Feb 4, 2026

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!