diff --git a/.github/ci3.sh b/.github/ci3.sh index 4a76ae0bacab..b06e1930222c 100755 --- a/.github/ci3.sh +++ b/.github/ci3.sh @@ -56,7 +56,7 @@ function check_cache { "ci-release-pr" ) # Check if CI_MODE is in cached_ci_modes - if [[ " ${cached_ci_modes[@]} " =~ " ${CI_MODE} " ]]; then + if [[ " ${cached_ci_modes[@]} " =~ " ${CI_MODE} " && "$GITHUB_RUN_ATTEMPT" -eq 1 ]]; then if cache_download "$cache_name" . 2>/dev/null && [ -f ".ci-success.txt" ]; then echo "Cache hit in .github/ci3.sh! Previous run: $(cat ".ci-success.txt")" exit 0 diff --git a/Makefile b/Makefile index ac873fd5e9b2..0114c579e4d2 100644 --- a/Makefile +++ b/Makefile @@ -37,9 +37,10 @@ endef # Collects the test commands from the given project # Writes the full output to /tmp/test_cmds atomically. # The test engine is expected to be running and it will read commands from this file. +# MAKEFILE_TARGET is exported so filter_test_cmds can inject it into the hash prefix for targeted rebuilds. define test $(call run_command,$(1),$(ROOT)/$(2),\ - ./bootstrap.sh test_cmds $(3) | $(ROOT)/ci3/filter_test_cmds | $(ROOT)/ci3/atomic_append /tmp/test_cmds) + export MAKEFILE_TARGET=$(1) && ./bootstrap.sh test_cmds $(3) | $(ROOT)/ci3/filter_test_cmds | $(ROOT)/ci3/atomic_append /tmp/test_cmds) endef #============================================================================== @@ -193,16 +194,16 @@ bb-sol: bb-cpp-native # Barretenberg Tests #============================================================================== -bb-cpp-tests-native: bb-cpp-native +bb-cpp-native-tests: bb-cpp-native $(call test,$@,barretenberg/cpp,native) -bb-cpp-tests-wasm-threads: bb-cpp-wasm-threads +bb-cpp-wasm-threads-tests: bb-cpp-wasm-threads $(call test,$@,barretenberg/cpp,wasm_threads) -bb-cpp-tests-asan: bb-cpp-asan +bb-cpp-asan-tests: bb-cpp-asan $(call test,$@,barretenberg/cpp,asan) -bb-cpp-tests-smt: bb-cpp-smt +bb-cpp-smt-tests: bb-cpp-smt $(call test,$@,barretenberg/cpp,smt) bb-acir-tests: bb-acir @@ -220,9 +221,9 @@ bb-docs-tests: bb-docs bb-bbup-tests: bb-bbup $(call test,$@,barretenberg/bbup) -bb-tests: bb-cpp-tests-native bb-acir-tests bb-ts-tests bb-sol-tests bb-bbup-tests bb-docs-tests +bb-tests: bb-cpp-native-tests bb-acir-tests bb-ts-tests bb-sol-tests bb-bbup-tests bb-docs-tests -bb-full-tests: bb-cpp-tests-native bb-cpp-tests-wasm-threads bb-cpp-tests-asan bb-cpp-tests-smt bb-acir-tests bb-ts-tests bb-sol-tests bb-bbup-tests bb-docs-tests +bb-full-tests: bb-cpp-native-tests bb-cpp-wasm-threads-tests bb-cpp-asan-tests bb-cpp-smt-tests bb-acir-tests bb-ts-tests bb-sol-tests bb-bbup-tests bb-docs-tests #============================================================================== # Noir Projects diff --git a/barretenberg/cpp/format.sh b/barretenberg/cpp/format.sh index 9083aeef7432..8ac9d9749ecd 100755 --- a/barretenberg/cpp/format.sh +++ b/barretenberg/cpp/format.sh @@ -6,16 +6,18 @@ function format_files { } if [ "$1" == "staged" ]; then - echo Formatting barretenberg staged files... files=$(git diff-index --diff-filter=d --relative --cached --name-only HEAD | grep -e '\.\(cpp\|hpp\|tcc\)$') - format_files "$files" if [ -n "$files" ]; then + echo Formatting barretenberg staged files... + format_files "$files" echo "$files" | xargs -r git add fi elif [ "$1" == "changed" ]; then - echo Formatting barretenberg changed files... files=$(git diff-index --diff-filter=d --relative --name-only HEAD | grep -e '\.\(cpp\|hpp\|tcc\)$') - format_files "$files" + if [ -n "$files" ]; then + echo Formatting barretenberg changed files... + format_files "$files" + fi elif [ "$1" == "check" ]; then files=$(find ./src -iname *.hpp -o -iname *.cpp -o -iname *.tcc | grep -v bb/deps) echo "$files" | parallel -N10 clang-format-20 --dry-run --Werror diff --git a/bootstrap.sh b/bootstrap.sh index 2dea77d81c27..f7df0443b01b 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -281,10 +281,11 @@ function build_and_test { # Start the test engine. rm -f $test_cmds_file touch $test_cmds_file - # put it in it's own process group via background subshell, we can terminate on cleanup. - (color_prefix "test-engine" "denoise test_engine_start") & + # put it in it's own process group, we can terminate on cleanup. + setsid color_prefix "test-engine" "denoise test_engine_start" & test_engine_pid=$! test_engine_pgid=$(ps -o pgid= -p $test_engine_pid) + echo "Started test engine with $test_engine_pid in PGID $test_engine_pgid." # Start the build. if [ -z "$target" ]; then @@ -590,6 +591,16 @@ case "$cmd" in build_and_test bench ;; + "ci-grind-test") + export CI=1 + export USE_TEST_CACHE=0 + + full_cmd="${1:?full_cmd required}" + timeout="${2:-}" + commit="${3:-}" + + grind_test "$full_cmd" "$timeout" "$commit" + ;; ########################################## # NETWORK DEPLOYMENTS WITH BENCHES/TESTS # diff --git a/ci.sh b/ci.sh index d285481fbbea..504d37e20093 100755 --- a/ci.sh +++ b/ci.sh @@ -24,10 +24,11 @@ function print_usage { echo_cmd "full-no-test-cache" "Spin up an EC2 instance and run bootstrap ci-full-no-test-cache." echo_cmd "docs" "Spin up an EC2 instance and run docs-only CI." echo_cmd "barretenberg" "Spin up an EC2 instance and run barretenberg-only CI." - echo_cmd "grind" "Spin up multiple EC2 instances to run parallel full CI runs." - echo_cmd "merge-queue" "Spin up several EC2 instances to run the merge-queue jobs." + echo_cmd "grind" "Spin up EC2 instances to run parallel full CI runs." + echo_cmd "merge-queue" "Spin up EC2 instances to run the merge-queue jobs." + echo_cmd "grind-test" "Spin up an EC2 and grind a given test command." echo_cmd "network-deploy" "Spin up an EC2 instance to deploy a network." - echo_cmd "network-scenarios" "Spin up EC2 instance(s) to run network scenario tests in parallel." + echo_cmd "network-scenarios" "Spin up EC2 instances to run network scenario tests in parallel." echo_cmd "network-tests" "Spin up an EC2 instance to run tests on a network." echo_cmd "network-bench" "Spin up an EC2 instance to run benchmarks on a network." echo_cmd "network-teardown" "Spin up an EC2 instance to teardown a network deployment." @@ -110,10 +111,23 @@ case "$cmd" in parallel --jobs 10 --termseq 'TERM,10000' --tagstring '{= $_=~s/run (\w+).*/$1/; =}' --line-buffered --halt now,fail=1 ::: \ 'run x1-full amd64 ci-full-no-test-cache' \ 'run x2-full amd64 ci-full-no-test-cache' \ - 'run x3-full amd64 ci-full-no-test-cache' \ - 'run x4-full amd64 ci-full-no-test-cache' \ + 'run x3-full amd64 ci-full-no-test-cache-makefile' \ + 'run x4-full amd64 ci-full-no-test-cache-makefile' \ 'run a1-fast arm64 ci-fast' | DUP=1 cache_log "Merge queue CI run" $RUN_ID ;; + grind-test) + full_cmd="$1" + timeout="${2:-}" + commit="${3:-}" + # Extract test command (strip rebuild hash prefix) and hash it + # Uses same hash as run_test_cmd's test_hash for consistency + test_cmd="${full_cmd#* }" + test_hash=$(hash_str_orig "$test_cmd") + export CI_DASHBOARD="deflake" + export JOB_ID="grind-test-$test_hash" + export INSTANCE_POSTFIX=$JOB_ID + bootstrap_ec2 "./bootstrap.sh ci-grind-test '$full_cmd' $timeout $commit" | DUP=1 cache_log "Grind test CI run" $RUN_ID + ;; ########################################## # NETWORK DEPLOYMENTS WITH BENCHES/TESTS # diff --git a/ci3/aws_request_instance b/ci3/aws_request_instance index 90ba319718a2..cd9cba8d0629 100755 --- a/ci3/aws_request_instance +++ b/ci3/aws_request_instance @@ -62,6 +62,7 @@ for cpu in "${cpu_list[@]}"; do aws_request_instance_type $name $instance_type $price $ami $state_dir code=$? [[ "$code" -eq 0 || "$code" -eq 143 || "$code" -eq 130 ]] && exit $code + echo "Instance request exited with code: $code" done done exit 1 diff --git a/ci3/aws_request_instance_type b/ci3/aws_request_instance_type index 7a05960e7fc3..6aa578a8954a 100755 --- a/ci3/aws_request_instance_type +++ b/ci3/aws_request_instance_type @@ -46,7 +46,7 @@ echo "$launch_spec" > "$spec_path" info="(name: $name) (type: $instance_type) (ami: $ami) (bid: $price)" if [ "${NO_SPOT:-0}" -ne 1 ]; then - >&2 echo "Requesting $instance_type spot instance $info..." + echo "Requesting $instance_type spot instance $info..." sir=$(aws ec2 request-spot-instances \ --spot-price "$price" \ --instance-count 1 \ @@ -56,7 +56,7 @@ if [ "${NO_SPOT:-0}" -ne 1 ]; then --output text) echo $sir > $sir_path - >&2 echo "Waiting for instance id for spot request: $sir..." + echo "Waiting for instance id for spot request: $sir..." sleep 5 for i in {1..6}; do iid=$(aws ec2 describe-spot-instance-requests \ @@ -66,7 +66,7 @@ if [ "${NO_SPOT:-0}" -ne 1 ]; then [ -z "$iid" -o "$iid" == "None" ] || break if [ $i -eq 6 ]; then - >&2 echo "Timeout waiting for spot request." + echo "Timeout waiting for spot request." # Cancel spot request. We may still get allocated an instance if it's *just* happened. aws ec2 cancel-spot-instance-requests --spot-instance-request-ids $sir > /dev/null fi @@ -78,7 +78,7 @@ fi if [ -z "${iid:-}" -o "${iid:-}" == "None" ]; then # Request on-demand instance. - >&2 echo "Requesting $instance_type on-demand instance $info..." + echo "Requesting $instance_type on-demand instance $info..." iid=$(aws ec2 run-instances \ --cli-input-json file://$spec_path \ --query "Instances[*].[InstanceId]" \ @@ -104,13 +104,13 @@ while [ -z "${ip:-}" ]; do done # Wait till ssh port is open. ->&2 echo "Waiting for SSH at $ip..." +echo "Waiting for SSH at $ip..." SECONDS=0 SSH_CONFIG_PATH=${SSH_CONFIG_PATH:-aws/build_instance_ssh_config} [ "${NO_TERMINATE:-0}" -eq 1 ] && LIVE_CMD=true || LIVE_CMD="sudo shutdown -h +${AWS_SHUTDOWN_TIME:-60}" while ! ssh -F $SSH_CONFIG_PATH -o ConnectTimeout=1 $ip $LIVE_CMD > /dev/null 2>&1; do if (( SECONDS >= 60 )); then - >&2 echo "Timeout: SSH could not login to $ip within 60 seconds." + echo "Timeout: SSH could not login to $ip within 60 seconds." exit 1 fi sleep 1 diff --git a/ci3/bootstrap_ec2 b/ci3/bootstrap_ec2 index a11ad744da5c..a24f0cfc177b 100755 --- a/ci3/bootstrap_ec2 +++ b/ci3/bootstrap_ec2 @@ -188,7 +188,21 @@ container_script=$( case \$code in 155) ;; 0) log_ci_run PASSED \$ci_log_id ;; - *) log_ci_run FAILED \$ci_log_id && merge_train_failure_slack_notify \$ci_log_id && release_canary_slack_notify \$ci_log_id ;; + *) + log_ci_run FAILED \$ci_log_id + merge_train_failure_slack_notify \$ci_log_id + release_canary_slack_notify \$ci_log_id + ci_failed_data=\$(jq -n \\ + --arg status "failed" \\ + --arg log_id "\$ci_log_id" \\ + --arg ref_name "\${TARGET_BRANCH:-\$REF_NAME}" \\ + --arg commit_hash "\$COMMIT_HASH" \\ + --arg commit_author "\$COMMIT_AUTHOR" \\ + --arg commit_msg "\$COMMIT_MSG" \\ + --argjson exit_code "\$code" \\ + '{status: \$status, log_id: \$log_id, ref_name: \$ref_name, commit_hash: \$commit_hash, commit_author: \$commit_author, commit_msg: \$commit_msg, exit_code: \$exit_code, timestamp: now | todate}') + redis_publish "ci:run:failed" "\$ci_failed_data" + ;; esac exit \$code EOF @@ -317,7 +331,7 @@ function run { -e AWS_TOKEN=\$aws_token \ -e NAMESPACE=${NAMESPACE:-} \ -e NETWORK=${NETWORK:-} \ - --pids-limit=32768 \ + --pids-limit=65536 \ --shm-size=2g \ aztecprotocol/devbox:3.0 bash -c $(printf '%q' "$container_script") } diff --git a/ci3/dashboard/rk.py b/ci3/dashboard/rk.py index 72c4e3bed69f..d1afd17628fe 100644 --- a/ci3/dashboard/rk.py +++ b/ci3/dashboard/rk.py @@ -1,4 +1,4 @@ -from flask import Flask, render_template_string, request, Response +from flask import Flask, render_template_string, request, Response, redirect from flask_compress import Compress from flask_httpauth import HTTPBasicAuth import gzip @@ -6,7 +6,9 @@ import os import re import requests +import subprocess import threading +import uuid from ansi2html import Ansi2HTMLConverter from pathlib import Path @@ -127,13 +129,12 @@ def root() -> str: f"\n" f"Select a filter:\n" f"\n{YELLOW}" - f"{hyperlink('/section/master?fail_list=failed_tests_master', 'master queue')}\n" - f"{hyperlink('/section/staging?fail_list=failed_tests_staging', 'staging queue')}\n" - f"{hyperlink('/section/next?fail_list=failed_tests_next', 'next queue')}\n" + f"{hyperlink('/section/next', 'next queue')}\n" f"{hyperlink('/section/prs', 'prs')}\n" f"{hyperlink('/section/releases', 'releases')}\n" f"{hyperlink('/section/nightly', 'nightly')}\n" f"{hyperlink('/section/network', 'network')}\n" + f"{hyperlink('/section/deflake', 'deflake')}\n" f"{RESET}" f"\n" f"Benchmarks:\n" @@ -150,12 +151,11 @@ def section_view(section: str) -> str: limit = int(request.args.get('limit', 50)) filter_str = request.args.get('filter', default='', type=str) filter_prop = request.args.get('filter_prop', default='', type=str) - fail_list = request.args.get('fail_list', default='', type=str) lines = update_status(offset, filter_str, filter_prop) lines += "\n" lines += f"Last {limit} ci runs on {section}:\n\n" - lines += get_section_data(section, offset, limit, filter_str, filter_prop, fail_list) + lines += get_section_data(section, offset, limit, filter_str, filter_prop) return lines TEMPLATE = """ @@ -392,6 +392,64 @@ def get_breakdown(runtime, flow_name, sha): return Response('{"error": "Breakdown not found"}', mimetype='application/json', status=404) +@app.route('/grind') +@auth.login_required +def trigger_grind(): + """Trigger a grind job for a flaky test.""" + from urllib.parse import urlencode as url_encode + + full_cmd = request.args.get('cmd') + commit = request.args.get('commit', 'HEAD') + grind_time = request.args.get('time') # None = show selection page + run_id = request.args.get('run') # Pre-generated run_id from selection page + + if not full_cmd: + return "Missing cmd parameter", 400 + + # If run_id is provided and already has a log, redirect to it (back-button protection) + if run_id and r.exists(run_id): + return redirect(f'/{run_id}') + + # If no time selected, show selection page + if not grind_time: + # Generate one run_id for all time links on this page load + page_run_id = uuid.uuid4().hex[:16] + time_options = ['5m', '10m', '20m', '30m', '1h'] + time_links = [] + for t in time_options: + url = f"/grind?{url_encode({'cmd': full_cmd, 'commit': commit, 'time': t, 'run': page_run_id})}" + time_links.append(f"{YELLOW}{hyperlink(url, t)}{RESET}") + + page = ( + f"{BOLD}Grind Test{RESET}\n\n" + f"Command: {full_cmd}\n\n" + f"Select grind duration: " + f"{' | '.join(time_links)}\n" + ) + return render_template_string(TEMPLATE, value=ansi_to_html(page), filter_str='grind', follow='top') + + # Time selected - start the grind + # Use run_id from URL, or generate new one if not provided + if not run_id: + run_id = uuid.uuid4().hex[:16] + + # Initialize the log key so redirect doesn't show "Key not found" + r.setex(run_id, 86400, b'Starting grind...\n') + + # Start grind job in background + # Dashboard server needs local repo checkout at REPO_PATH + repo_path = os.environ.get('REPO_PATH') + if repo_path: + subprocess.Popen( + ['bash', '-c', f'cd {repo_path} && RUN_ID={run_id} ./ci.sh grind-test "{full_cmd}" {grind_time} {commit}'], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + start_new_session=True + ) + + # Redirect to log view. + return redirect(f'/{run_id}') + @app.route('/') @auth.login_required def get_value(key): diff --git a/ci3/dashboard/rk_cli.py b/ci3/dashboard/rk_cli.py index 87e73a6bbefd..7c006b18f68a 100644 --- a/ci3/dashboard/rk_cli.py +++ b/ci3/dashboard/rk_cli.py @@ -13,13 +13,11 @@ def main(): help='Filter pattern (comma-separated)') parser.add_argument('--filter-prop', '-p', type=str, default='', help='Property to filter on (status,name,author,msg)') - parser.add_argument('--fail-list', type=str, default='', help='Redis key for failed tests list') args = parser.parse_args() set_base_url("http://ci.aztec-labs.com") - output = get_section_data(args.section, args.offset, args.limit, - args.filter_str, args.filter_prop, args.fail_list) + output = get_section_data(args.section, args.offset, args.limit, args.filter_str, args.filter_prop) print(output, end='') if __name__ == '__main__': diff --git a/ci3/dashboard/rk_core.py b/ci3/dashboard/rk_core.py index 1f4043187c7e..9d02503547a8 100644 --- a/ci3/dashboard/rk_core.py +++ b/ci3/dashboard/rk_core.py @@ -38,7 +38,7 @@ def get_list_as_string(key, limit=None): else: values = r.lrange(key, 0, limit - 1) if not values: - value = "List is empty or key not found" + value = "" else: concatenated = [] for item in values: @@ -93,8 +93,7 @@ def render(group: list) -> str: return f"{date_time}: {links_str} {BOLD}{name}{RESET} {PURPLE}{author}{RESET}: {msg} {duration_str}{CLEAR_EOL}\n" def get_section_data(section: str, offset: int = 0, limit: int = 100, - filter_str: str = '', filter_prop: str = '', - fail_list: str = '') -> str: + filter_str: str = '', filter_prop: str = '') -> str: """Core logic for fetching and rendering section data.""" lua_script_path = Path(__file__).parent / 'set-filter.lua' with lua_script_path.open('r') as f: @@ -113,8 +112,9 @@ def get_section_data(section: str, offset: int = 0, limit: int = 100, group_sorted = sorted(group, key=lambda x: x.get('ts', x.get('timestamp', 0))) lines += render(group_sorted) - if fail_list: + fail_lines = get_list_as_string("failed_tests_" + section, 100) + if fail_lines: lines += "\n" lines += f"Last 100 failed or flaked tests:\n\n" - lines += get_list_as_string(fail_list, 100) + lines += fail_lines return lines diff --git a/ci3/exec_test b/ci3/exec_test new file mode 100755 index 000000000000..78622dcc8c40 --- /dev/null +++ b/ci3/exec_test @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +# Execute a test command with proper resource limits and logging. +# Usage: exec_test +# +# full_cmd format: [:VAR=val]... +# Output goes to stdout - caller handles redirection. +# +# Handles: +# - Variable extraction (CPUS, TIMEOUT, ISOLATE, etc.) +# - Log header (Command, Commit, Resources, etc.) +# - Test execution with taskset/docker_isolate +# - Timestamps on output +# +# Returns: exit code from test + +NO_CD=1 source $(git rev-parse --show-toplevel)/ci3/source + +full_cmd="$1" + +[ -z "$full_cmd" ] && exit 0 + +# Extract test parameters (hash_prefix, test_cmd, CPUS, TIMEOUT, MEM, etc.) +source $ci3/source_test_params + +# Compute test hash for history tracking +test_hash=$(hash_str_orig "$test_cmd") + +# Print log header +if [ -n "${PARENT_LOG_ID:-}" ]; then + echo -e "Parent Log: $(ci_term_link $PARENT_LOG_ID)" +fi +cat < >(add_timestamps) +else + [ "${ONLY_TERM_PARENT:-0}" -eq 1 ] && fg_arg="--foreground" + taskset -c $CPU_LIST timeout ${fg_arg:-} -v $TIMEOUT bash -c "$test_cmd" &> >(add_timestamps) +fi +code=$? +set -e + +exit $code diff --git a/ci3/filter_test_cmds b/ci3/filter_test_cmds index 9a0b985e6611..778ce22490ec 100755 --- a/ci3/filter_test_cmds +++ b/ci3/filter_test_cmds @@ -1,6 +1,17 @@ #!/usr/bin/env bash NO_CD=1 source $(git rev-parse --show-toplevel)/ci3/source +# Inject MAKEFILE_TARGET into hash prefix if set (strip -tests suffix). +# This allows bisect_flake and grind_test to do targeted builds. +function inject_makefile_target { + if [ -n "${MAKEFILE_TARGET:-}" ]; then + local mt="${MAKEFILE_TARGET%-tests}" + sed "s/^\([^ ]*\) /\1:MAKEFILE_TARGET=$mt /" + else + cat + fi +} + # Filters any test cmd that marked to skip in the .test_patterns.yml file. # Then filter through cache to remove previously successfully run tests. -grep -Ev -f <(yq e '.tests[] | select(.skip == true) | .regex' $root/.test_patterns.yml) | filter_cached_test_cmd +grep -Ev -f <(yq e '.tests[] | select(.skip == true) | .regex' $root/.test_patterns.yml) | inject_makefile_target | filter_cached_test_cmd diff --git a/ci3/grind_test b/ci3/grind_test new file mode 100755 index 000000000000..8169b7fc9053 --- /dev/null +++ b/ci3/grind_test @@ -0,0 +1,70 @@ +#!/usr/bin/env bash + +# Wrapping in {} ensures bash parses the entire script before executing it. +# Avoids nerfing self when checking out different commits. +{ + +NO_CD=1 source $(git rev-parse --show-toplevel)/ci3/source +source $ci3/source_redis + +full_cmd="$1" +timeout="${2:-10m}" +commit_tip=$(git rev-parse HEAD) +commit="${3:-$commit_tip}" +jobs=$(($(nproc) * 2)) + +# Ensure we return to the original commit on exit. +trap 'git checkout $commit_tip' EXIT + +# Extract test parameters (hash_prefix, test_cmd, CPUS, TIMEOUT, MAKEFILE_TARGET, etc.) +source $ci3/source_test_params + +function checkout { + git reset --hard $1 --quiet + denoise "git submodule update --init --recursive --depth 1 --jobs 8" + git checkout $commit_tip -- ci3 +} + +function build { + if [ -n "${MAKEFILE_TARGET:-}" ]; then + echo "Building target: $MAKEFILE_TARGET..." + make $MAKEFILE_TARGET + else + echo "Building (full bootstrap)..." + ./bootstrap.sh + fi +} + +function grind { + echo "Grinding for $timeout..." + local par_cmd="CI=0 TRACK_TEST_FAIL=1 run_test_cmd '${hash_prefix}:NAME=test-{} ${test_cmd}'" + local joblog=$(mktemp) + + # Run for full timeout, logging all results (no --halt, collect all failure data) + local mem=$(total_mem_gb) + awk 'BEGIN {i=1; while (1) print i++}' | timeout $timeout parallel --memsuspend $((mem / 4))G --halt now,fail=10 --joblog "$joblog" -j $jobs --line-buffer "$par_cmd" || true + + # Check if any jobs failed (column 7 is exitval in joblog) + if awk 'NR>1 && $7!=0 {found=1} END {exit !found}' "$joblog"; then + rm -f "$joblog" + return 1 # Had failures + fi + rm -f "$joblog" + return 0 # No failures +} + +echo_header "grind test" +echo "Full cmd: $full_cmd" +echo "Make target: ${MAKEFILE_TARGET:-}" +echo "Grind for: $timeout" +echo "Commit: $commit" +echo "Ref name: $REF_NAME" +echo + +ulimit -n $(ulimit -Hn) + +checkout $commit +build +grind + +} diff --git a/ci3/memsuspend_limit b/ci3/memsuspend_limit index a453e190a365..d889f6f20c13 100755 --- a/ci3/memsuspend_limit +++ b/ci3/memsuspend_limit @@ -11,16 +11,8 @@ if [ -n "${MEMSUSPEND:-}" ]; then echo $MEMSUSPEND exit fi -os=$(uname -s) -# Default to 64GB memory -total_mem_gb=64 -if [[ "$os" == "Darwin" ]]; then - total_mem_bytes=$(sysctl -n hw.memsize) - total_mem_gb=$((total_mem_bytes / 1024 / 1024 / 1024)) -elif [[ "$os" == "Linux" ]]; then - total_mem_gb=$(free -g | awk '/^Mem:/ {print $2}') -fi +mem=$(total_mem_gb) # Max out at one fourth of 256GB (64GB memsuspend). -echo $(( total_mem_gb < 256 ? $((total_mem_gb / 4)) : 64 ))G +echo $(( mem < 256 ? $((mem / 4)) : 64 ))G diff --git a/ci3/run_test_cmd b/ci3/run_test_cmd index 77371ac2de3e..cacdc5ee708d 100755 --- a/ci3/run_test_cmd +++ b/ci3/run_test_cmd @@ -1,5 +1,9 @@ #!/usr/bin/env bash # Called by 'parallelize' to execute a given test cmd. +# This is a thin wrapper around exec_test that handles CI integration: +# - Cache checking +# - Redis publishing (started/failed/flaked events) +# - Flake detection and owner notification NO_CD=1 source $(git rev-parse --show-toplevel)/ci3/source source $ci3/source_redis source $ci3/source_cache @@ -16,34 +20,63 @@ cmd=$1 is_merge_queue=0 [[ "$REF_NAME" =~ ^gh-readonly-queue/ ]] && is_merge_queue=1 -# Ensure SCENARIO_TESTS is always defined -SCENARIO_TESTS=${SCENARIO_TESTS:-0} +# If enabled, update the test log every 5s. +live_logging=0 +# If enabled, publish test started/failed/flaked events to Redis channel. +publish=0 +# If enabled, save logs for passed tests. +pass_log=0 +# Allow a flaking tests to pass rather than fail. +allow_flakes=0 +# If enabled, track test result in history tracker. +track_test_history=0 +# If enabled, track failed test results to fail tracker. +track_test_fail=0 +# If enabled, notify slack on flakes. +slack_notify_flake=0 +# If enabled, notify slack on fails. +slack_notify_fail=0 + +if [ "$CI" -eq 1 ]; then + # CI overrides. + if [ "$CI_REDIS_AVAILABLE" -eq 1 ]; then + live_logging=1 + publish=1 + fi + pass_log=1 + allow_flakes=1 + track_test_fail=1 -# Extract the first token and export any variable assignments. -hash_part="${cmd%% *}" -if [[ "$hash_part" == *:* ]]; then - IFS=':' read -ra parts <<< "$hash_part" - # The first element is the actual hash; remaining elements are variable assignments. - for var_assignment in "${parts[@]:1}"; do - export "$var_assignment" - done + # Track the test in the history tracker if this is in merge queue or targeting a version branch. + if [[ "$is_merge_queue" -eq 1 || ("${TARGET_BRANCH:-}" =~ ^v[0-9]) ]]; then + track_test_history=1 + fi + + # Notify on flakes if in merge queue or backport-to-v2-staging. + [[ "$is_merge_queue" -eq 1 || "$REF_NAME" == "backport-to-v2-staging" ]] && slack_notify_flake=1 + + # Notify on fails for scenario tests. + [[ ${SCENARIO_TESTS:-0} -eq 1 ]] && slack_notify_fail=1 fi -# Defaults, unless overridden above. -TIMEOUT=${TIMEOUT:-600s} -# The following are exported as they maybe used in the test command. -# We can schedule on all CPUs by default. -export CPU_LIST=${CPU_LIST:-"0-$(($(nproc)-1))"} -export CPUS=${CPUS:-2} -# TODO: Only currently enforced by docker. Investigate ulimit. -export MEM=${MEM:-$((CPUS * 4))g} - -# Remove the rebuild hash (first field) that is in front of the test command. -# Exported for use in yq. -export test_cmd="${cmd#* }" +# Env overrides. +live_logging=${LIVE_LOGGING:-$live_logging} +publish=${PUBLISH:-$publish} +pass_log=${PASS_LOG:-$pass_log} +allow_flakes=${ALLOW_FLAKES:-$allow_flakes} +track_test_history=${TRACK_TEST_HISTORY:-$track_test_history} +track_test_fail=${TRACK_TEST_FAIL:-$track_test_fail} +slack_notify_flake=${SLACK_NOTIFY_FLAKE:-$slack_notify_flake} +slack_notify_fail=${SLACK_NOTIFY_FAIL:-$slack_notify_fail} + +# The "key" is used for test caching (should re-run or not). It includes all test framework arguments. key=$(hash_str_orig "$cmd") -# For tracking a list of results for individual tests (excludes the rebuild hash). +# Extract the test command (without hash prefix) for display and tracking. +export test_cmd="${cmd#* }" + +# The "test hash" does not include framework arguments, only the test command itself. +# It is used for tracking test history. test_hash=$(hash_str_orig "$test_cmd") # We can skip the test if it's already been successfully run. @@ -57,9 +90,7 @@ if [ "${USE_TEST_CACHE:-0}" -eq 1 ]; then fi fi -# If the test has a verbose mode, we want it enabled. -export VERBOSE=1 - +# Exit handler to cleanup. function cleanup { if [ -n "${publish_pid:-}" ]; then kill $publish_pid &>/dev/null @@ -70,37 +101,13 @@ function cleanup { } trap cleanup EXIT +# Signal handler to forward SIGTERM/SIGINT to test process. function sig_handler { - # echo RTC kill $test_pid $cmd >/dev/tty; kill -TERM ${test_pid:-} &>/dev/null - # echo RTC waiting on $test_pid >/dev/tty; - # wait $test_pid - # echo RTC wait complete for $test_pid >/dev/tty; exit } trap sig_handler SIGTERM SIGINT -# Run the test, capturing output, with a timeout of 10m. -# We cannot use "output=$(timeout ...)" here as it stymies proper signal propagation. -# To ensure we can propagate SIGTERM to timeouts process group we use a temp file and forward the signal. -tmp_file=/tmp/$key -touch $tmp_file - -# Print test metadata header. -if [ -n "${PARENT_LOG_ID:-}" ]; then - echo -e "Parent Log: $(ci_term_link $PARENT_LOG_ID)" >>$tmp_file -fi -cat <>$tmp_file -Command: $cmd -Commit: https://github.com/AztecProtocol/aztec-packages/commit/$COMMIT_HASH -Env: REF_NAME=$REF_NAME CURRENT_VERSION=$CURRENT_VERSION CI_FULL=$CI_FULL -Date: $(date) -System: ARCH=$(arch) CPUS=$(nproc) MEM=$(free -h | awk '/^Mem:/{print $2}') HOSTNAME=$(hostname) -Resources: CPU_LIST=$CPU_LIST CPUS=$CPUS MEM=$MEM TIMEOUT=$TIMEOUT -History: http://ci.aztec-labs.com/list/history_$test_hash${TARGET_BRANCH:+_$TARGET_BRANCH} - -EOF - function publish_log { local expire=${1:-$CI_REDIS_EXPIRE} cat $tmp_file 2>/dev/null | redis_setexz $log_key $expire @@ -131,85 +138,103 @@ function live_publish_log { done } -if [ "$CI_REDIS_AVAILABLE" -eq 1 ]; then - log_key=$(uuid) - log_info=" ($(ci_term_link $log_key))" +# Create a new log key and ci link. +log_key=$(uuid) +log_info=" ($(ci_term_link $log_key))" +grind_link=$(term_link "/grind?cmd=$(urlencode "$cmd")" "grind") +# We append this after the PASSED/FAILED/FLAKED for test tracking. +track_line_postfix="${log_info:-} (${grind_link}): $test_cmd (${SECONDS}s) (${purple}$COMMIT_AUTHOR${reset}: $COMMIT_MSG)" + +# Create a temporary file for the test log. +tmp_file=/tmp/$key +touch $tmp_file - # Publish test started event to Redis channel. +# Publish test started event to Redis channel. +if [ "$publish" -eq 1 ]; then start_redis_data=$(jq -n \ --arg status "started" \ - --arg test_cmd "$test_cmd" \ + --arg test_cmd "$cmd" \ --arg log_id "$log_key" \ --arg log_url "http://ci.aztec-labs.com/$log_key" \ --arg ref_name "${TARGET_BRANCH:-$REF_NAME}" \ --arg commit_hash "$COMMIT_HASH" \ --arg commit_author "$COMMIT_AUTHOR" \ --arg commit_msg "$COMMIT_MSG" \ - --argjson is_scenario "$SCENARIO_TESTS" \ - '{status: $status, test_cmd: $test_cmd, log_id: $log_id, log_url: $log_url, ref_name: $ref_name, commit_hash: $commit_hash, commit_author: $commit_author, commit_msg: $commit_msg, is_scenario_test: ($is_scenario == 1), timestamp: now | todate}') + '{status: $status, test_cmd: $test_cmd, log_id: $log_id, log_url: $log_url, ref_name: $ref_name, commit_hash: $commit_hash, commit_author: $commit_author, commit_msg: $commit_msg, timestamp: now | todate}') redis_publish "ci:test:started" "$start_redis_data" +fi - if [ "$CI" -eq 1 ]; then - # If we're in CI, we want to publish the log live. - live_publish_log & - publish_pid=$! - fi +# Start live log publishing if enabled. +if [ "$live_logging" -eq 1 ]; then + live_publish_log & + publish_pid=$! fi -# Reset timer. -# Disable exit on error so we can capture code. -# Run the test. Bind it to the given or default range of CPUs. -# Timeout uses foreground so we only signal the test process, not the whole group (better cleanup control). -# Append timestamps. Use process substitution to avoid a subshell which interferes with signal processing. +# Reset timer and run the test in background (for prompt signal handling) using exec_test. SECONDS=0 set +e -if [ "${ISOLATE:-0}" -eq 1 ]; then - docker_isolate "timeout -v $TIMEOUT bash -c '$test_cmd'" &> >(add_timestamps >> $tmp_file) & -else - [ "${ONLY_TERM_PARENT:-0}" -eq 1 ] && fg_arg="--foreground" - taskset -c $CPU_LIST timeout ${fg_arg:-} -v $TIMEOUT bash -c "$test_cmd" &> >(add_timestamps >> $tmp_file) & -fi +$ci3/exec_test "$cmd" >> "$tmp_file" 2>&1 & test_pid=$! -# echo "RTC waiting on $test_pid" >/dev/tty wait $test_pid code=$? -# If the test received a SIGTERM or SIGINT, we don't want to track or print anything. +# If the test received a SIGTERM or SIGINT, we don't want to track or print anything, just exit. if [ "$code" -eq 143 ] || [ "$code" -eq 130 ]; then exit $code fi if [ "$CI_REDIS_AVAILABLE" -eq 1 ]; then # If the test succeeded and we're in CI, set success flag for test. This key is unique to the test. - # If the test succeeded and we're in CI, save the test log. - # If the test failed, regardless of CI state, save the test log. + # If the test succeeded and we're logging passes, save the test log. + # If the test failed, save the test log. if [ $code -eq 0 ]; then if [ "$CI" -eq 1 ]; then redis_cli SETEX $key 604800 $log_key &>/dev/null + fi + if [ "$pass_log" -eq 1 ]; then + # Publish final log. publish_log_final else + # Scrub the link we optimistically (for live logging) set earlier. log_info="" fi else - # Extend lifetime of failed test logs to 12 weeks. + # Publish final log, extending lifetime of failure to 12 weeks. publish_log_final $((60 * 60 * 24 * 7 * 12)) fi fi function track_test { - if [ "$CI" -eq 0 ]; then - return - fi - # For the next branch, we track tests in merge queues only. - # For version branches, we don't assume there is a merge queue. - if [ "$is_merge_queue" -eq 1 ] || [[ -n "${TARGET_BRANCH:-}" && "$TARGET_BRANCH" =~ ^v[0-9] ]]; then - local key=$1 - local line=$(pr_link "$2") - - redis_cli LPUSH $key "$(date "+%m-%d %H:%M:%S"): $(echo -e "$line")" &>/dev/null - # Keeps only the last 1000 lines. - redis_cli RTRIM $key -1000 -1 &>/dev/null - fi + local list_key=$1 + local line=$(pr_link "$2") + local max_lines=${3:-1000} + + redis_cli LPUSH $list_key "$(date "+%m-%d %H:%M:%S"): $(echo -e "$line")" &>/dev/null + # Keeps only the last max_lines lines. + redis_cli LTRIM $list_key 0 $max_lines &>/dev/null +} + +function track_test_history { + track_test "history_${test_hash}${TARGET_BRANCH:+_$TARGET_BRANCH}" "$1" ${2:-} +} + +function track_test_failed { + track_test "failed_tests${CI_DASHBOARD:+_$CI_DASHBOARD}" "$1" 10000 +} + +function publish_redis { + local redis_data=$(jq -n \ + --arg status "$1" \ + --arg cmd "$cmd" \ + --arg log_key "$log_key" \ + --arg ref_name "$REF_NAME" \ + --arg commit_hash "$COMMIT_HASH" \ + --arg commit_author "$COMMIT_AUTHOR" \ + --arg commit_msg "$COMMIT_MSG" \ + --argjson code "$code" \ + --argjson duration "$SECONDS" \ + '{status: $status, cmd: $cmd, log_key: $log_key, ref_name: $ref_name, commit_hash: $commit_hash, commit_author: $commit_author, commit_msg: $commit_msg, exit_code: $code, duration_seconds: $duration, timestamp: now | todate}') + redis_publish "ci:test:$1" "$redis_data" } # Show PASSED and early out on success. @@ -217,8 +242,11 @@ function pass { local line="${green}PASSED${reset}${log_info:-}: $test_cmd (${SECONDS}s)" echo -e "$line" - line+=" (${purple}$COMMIT_AUTHOR${reset}: $COMMIT_MSG)" - track_test "history_${test_hash}${TARGET_BRANCH:+_$TARGET_BRANCH}" "$line" + if [ "$track_test_history" -eq 1 ]; then + local track_line="${green}PASSED${reset}${log_info:-} ${grind_link}: $test_cmd (${SECONDS}s) (${purple}$COMMIT_AUTHOR${reset}: $COMMIT_MSG)" + track_test_history "$track_line" + fi + exit 0 } @@ -232,37 +260,13 @@ function fail { echo -e "$line" fi - line+=" (${purple}$COMMIT_AUTHOR${reset}: $COMMIT_MSG)" - track_test "history_${test_hash}${TARGET_BRANCH:+_$TARGET_BRANCH}" "$line" - track_test "failed_tests${TARGET_BRANCH:+_$TARGET_BRANCH}" "$line" + local track_line="${red}FAILED${reset}${log_info:-} ${grind_link}: $test_cmd (${SECONDS}s) (code: $code) (${purple}$COMMIT_AUTHOR${reset}: $COMMIT_MSG)" + [ "$track_test_history" -eq 1 ] && track_test_history "$track_line" + [ "$track_test_fail" -eq 1 ] && track_test_failed "$track_line" + [ "$publish" -eq 1 ] && publish_redis "failed" - # Publish failed status to Redis channel - local redis_data=$(jq -n \ - --arg status "failed" \ - --arg test_cmd "$test_cmd" \ - --arg log_url "${CI_REDIS_AVAILABLE:+http://ci.aztec-labs.com/$log_key}" \ - --arg ref_name "${TARGET_BRANCH:-$REF_NAME}" \ - --arg commit_hash "$COMMIT_HASH" \ - --arg commit_author "$COMMIT_AUTHOR" \ - --arg commit_msg "$COMMIT_MSG" \ - --argjson code "$code" \ - --argjson duration "$SECONDS" \ - --argjson is_scenario "$SCENARIO_TESTS" \ - '{status: $status, test_cmd: $test_cmd, log_url: $log_url, ref_name: $ref_name, commit_hash: $commit_hash, commit_author: $commit_author, commit_msg: $commit_msg, exit_code: $code, duration_seconds: $duration, is_scenario_test: ($is_scenario == 1), timestamp: now | todate}') - redis_publish "ci:test:failed" "$redis_data" - - # notify slack if scenario test failed - if [ "$SCENARIO_TESTS" -eq 1 ] && [ -n "${SLACK_BOT_TOKEN:-}" ]; then - read -r -d '' data </dev/null + if [ "$slack_notify_fail" -eq 1 ]; then + slack_notify "Test FAILED on *${TARGET_BRANCH:-$REF_NAME}*: \`$test_cmd\` http://ci.aztec-labs.com/$log_key" "#alerts-next-scenario" fi exit $code @@ -277,59 +281,33 @@ function flake { local line="${purple}FLAKED${reset}${log_info:-}: $test_cmd (${SECONDS}s) (code: $code)${group_suffix}" echo -e "$line" - line+=" (${purple}$COMMIT_AUTHOR${reset}: $COMMIT_MSG)" - track_test "history_${test_hash}${TARGET_BRANCH:+_$TARGET_BRANCH}" "$line" - track_test "failed_tests_${TARGET_BRANCH:-}" "$line" + local track_line="${purple}FLAKED${reset}${log_info:-} ${grind_link}: $test_cmd (${SECONDS}s) (code: $code)${group_suffix} (${purple}$COMMIT_AUTHOR${reset}: $COMMIT_MSG)" + [ "$track_test_history" -eq 1 ] && track_test_history "$track_line" + [ "$track_test_fail" -eq 1 ] && track_test_failed "$track_line" + [ "$publish" -eq 1 ] && publish_redis "flaked" # Save flake to buffer file (for PR comment generation) if [ -n "${FLAKES_FILE:-}" ]; then echo "$line" >> "$FLAKES_FILE" fi - # Publish flake status to Redis channel - local owners_json=$(echo "$owners" | jq -R -s 'split("\n") | map(select(length > 0))') - local redis_data=$(jq -n \ - --arg status "flaked" \ - --arg test_cmd "$test_cmd" \ - --arg log_url "http://ci.aztec-labs.com/$log_key" \ - --arg ref_name "${TARGET_BRANCH:-$REF_NAME}" \ - --arg commit_hash "$COMMIT_HASH" \ - --arg commit_author "$COMMIT_AUTHOR" \ - --arg commit_msg "$COMMIT_MSG" \ - --argjson code "$code" \ - --argjson duration "$SECONDS" \ - --argjson owners "$owners_json" \ - --arg flake_group_id "${flake_group_id:-}" \ - '{status: $status, test_cmd: $test_cmd, log_url: $log_url, ref_name: $ref_name, commit_hash: $commit_hash, commit_author: $commit_author, commit_msg: $commit_msg, exit_code: $code, duration_seconds: $duration, owners: $owners, flake_group_id: $flake_group_id, timestamp: now | todate}') - redis_publish "ci:test:flaked" "$redis_data" - - # Early out if no token or not in merge queue (unless on backport-to-v2-staging). - if [ -z "${SLACK_BOT_TOKEN:-}" ] || { [ "$is_merge_queue" -eq 0 ] && [ "$REF_NAME" != "backport-to-v2-staging" ]; }; then - return + if [ "$slack_notify_flake" -eq 1 ]; then + # Send slack message to owners. + local slack_uids="" + for uid in $owners; do + slack_uids+="<@$uid> " + done + slack_notify "${slack_uids% }: Test flaked on *${TARGET_BRANCH:-$REF_NAME}*: \`$test_cmd\` http://ci.aztec-labs.com/$log_key" fi - # Send slack message to owners. - local slack_uids="" - for uid in $owners; do - slack_uids+="<@$uid> " - done - data=$(jq -n \ - --arg channel "#aztec3-ci" \ - --arg text "${slack_uids% }: Test flaked on *${TARGET_BRANCH:-$REF_NAME}*: \`$test_cmd\` http://ci.aztec-labs.com/$log_key" \ - '{channel: $channel, text: $text}' - ) - curl -X POST https://slack.com/api/chat.postMessage \ - -H "Authorization: Bearer $SLACK_BOT_TOKEN" \ - -H "Content-type: application/json" \ - --data "$data" &>/dev/null exit 0 } # Test passed. [ $code -eq 0 ] && pass -# We're not in CI, fail. -[ "$CI" -eq 0 ] && fail +# If flakes are not allowed, fail the test. +[ "$allow_flakes" -eq 0 ] && fail # Get matching test entries test_entries=$(get_test_entry "$test_cmd" "$tmp_file") diff --git a/ci3/slack_notify b/ci3/slack_notify new file mode 100755 index 000000000000..8d82cf8e3feb --- /dev/null +++ b/ci3/slack_notify @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +NO_CD=1 source $(git rev-parse --show-toplevel)/ci3/source +source $ci3/source_redis + +if [ -z "${SLACK_BOT_TOKEN:-}" ]; then + exit 0 +fi + +channel=${2:-"#aztec3-ci"} +data=$(jq -n --arg channel "$channel" --arg text "$1" '{channel: $channel, text: $text}') + +curl -X POST https://slack.com/api/chat.postMessage \ + -H "Authorization: Bearer $SLACK_BOT_TOKEN" \ + -H "Content-type: application/json" \ + --data "$data" &>/dev/null diff --git a/ci3/source_redis b/ci3/source_redis index fc36a8b5886f..61cfacf14233 100644 --- a/ci3/source_redis +++ b/ci3/source_redis @@ -40,11 +40,11 @@ if [ "$CI_REDIS_AVAILABLE" -eq 0 ]; then export USE_TEST_CACHE=0 fi -# Will set log expiry to 2 weeks in CI, and 8 hours for local runs. +# Will set log expiry to 2 weeks in CI, and 2 days for local runs. if [ "${CI:-0}" -eq 1 ]; then CI_REDIS_EXPIRE=$((60 * 60 * 24 * 14)) else - CI_REDIS_EXPIRE=$((60 * 60 * 8)) + CI_REDIS_EXPIRE=$((60 * 60 * 24 * 2)) fi # Help function to avoid having to constantly specify args. diff --git a/ci3/source_stdlib b/ci3/source_stdlib index 18ac41dc369d..49c7edcbb3be 100644 --- a/ci3/source_stdlib +++ b/ci3/source_stdlib @@ -48,4 +48,8 @@ function aws_get_meta_data { curl -fs -H "X-aws-ec2-metadata-token: $AWS_TOKEN" http://169.254.169.254/latest/meta-data/$1 || true } -export -f hash_str hash_str_orig echo_stderr uuid get_num_cpus get_num_cpus_max +function urlencode { + printf '%s' "$1" | python3 -c "import urllib.parse, sys; print(urllib.parse.quote(sys.stdin.read(), safe=''))" +} + +export -f hash_str hash_str_orig echo_stderr uuid get_num_cpus get_num_cpus_max urlencode diff --git a/ci3/source_test_params b/ci3/source_test_params new file mode 100644 index 000000000000..f24808110a7a --- /dev/null +++ b/ci3/source_test_params @@ -0,0 +1,30 @@ +# Source this to parse a full_cmd and extract test parameters. +# Usage: full_cmd="[:VAR=val]... " source $ci3/source_test_params +# +# Requires: +# full_cmd - The full test command string +# +# Exports: +# hash_prefix - First token (hash and embedded vars) +# test_cmd - Everything after first space +# CPUS, TIMEOUT, MEM, CPU_LIST, ISOLATE, MAKEFILE_TARGET, etc. + +# Parse full_cmd: extract hash prefix and test command +hash_prefix="${full_cmd%% *}" +test_cmd="${full_cmd#* }" + +# Extract variables from hash prefix (format: hash:CPUS=4:TIMEOUT=900s:MAKEFILE_TARGET=yarn-project) +# This must happen before defaults are set so that e.g. MEM can use extracted CPUS. +if [[ "$hash_prefix" == *:* ]]; then + IFS=':' read -ra parts <<< "$hash_prefix" + for var_assignment in "${parts[@]:1}"; do + export "$var_assignment" + done +fi + +# Apply defaults for unset variables +export TIMEOUT=${TIMEOUT:-600s} +export CPU_LIST=${CPU_LIST:-"0-$(($(nproc)-1))"} +export CPUS=${CPUS:-2} +export MEM=${MEM:-$((CPUS * 4))g} +# MAKEFILE_TARGET has no default (empty means full bootstrap) diff --git a/ci3/squash_args b/ci3/squash_args new file mode 100755 index 000000000000..84356b2f5cab --- /dev/null +++ b/ci3/squash_args @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +cmd=$1 +shift +squashed_args=$(printf '%q ' "$@") +exec $cmd "$squashed_args" diff --git a/ci3/total_mem_gb b/ci3/total_mem_gb new file mode 100755 index 000000000000..332c6099f830 --- /dev/null +++ b/ci3/total_mem_gb @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +set -eu + +os=$(uname -s) + +if [[ "$os" == "Darwin" ]]; then + total_mem_bytes=$(sysctl -n hw.memsize) + total_mem_gb=$((total_mem_bytes / 1024 / 1024 / 1024)) +elif [[ "$os" == "Linux" ]]; then + total_mem_gb=$(free -g | awk '/^Mem:/ {print $2}') +fi + +echo $total_mem_gb