Project

General

Profile

Bug #22098 » run_loop.sh

luke-gru (Luke Gruber), 06/09/2026 09:27 PM

 
#!/usr/bin/env bash
# Rerun the single-shot repro many times.
# Stop on the first crash (exit via signal) or hang (deadlock).
set -u
cd "$(dirname "$0")"

RUNS=${RUNS:-100}
PER_RUN_TIMEOUT=${PER_RUN_TIMEOUT:-20}
RUBY=${RUBY:-ruby}
# YJIT disabled. Tiny malloc limit -> frequent malloc-triggered GC
# (handle_malloc_failure -> garbage_collect), to coincide with the resume storm.
export RUBYOPT="${RUBYOPT:-}"
export RUBY_GC_MALLOC_LIMIT=${RUBY_GC_MALLOC_LIMIT:-16384}
export RUBY_GC_MALLOC_LIMIT_MAX=${RUBY_GC_MALLOC_LIMIT_MAX:-16384}
export REACTORS=${REACTORS:-8} FIBERS=${FIBERS:-32} PRODUCERS=${PRODUCERS:-16} ALLOC=${ALLOC:-400} MAXTIME=${MAXTIME:-5}

echo "ruby: $($RUBY -v | head -1)"
echo "RUNS=$RUNS REACTORS=$REACTORS FIBERS=$FIBERS PRODUCERS=$PRODUCERS ALLOC=$ALLOC MAXTIME=$MAXTIME malloc_limit=$RUBY_GC_MALLOC_LIMIT timeout=${PER_RUN_TIMEOUT}s"

log_line() { printf '%s\n' "$*"; }

for i in $(seq 1 "$RUNS"); do
log="repro.run.log"
log_line "run $i/$RUNS: starting (timeout ${PER_RUN_TIMEOUT}s)..."
start=$SECONDS

"$RUBY" -Ilib -Iext/gvltools repro.rb >"$log" 2>&1 &
pid=$!

# Poll until the run exits on its own or the timeout elapses. Unlike a
# kill-on-timeout watchdog, this leaves a hung process ALIVE so a debugger
# can attach to it.
deadlocked=1
while [ $((SECONDS - start)) -lt "$PER_RUN_TIMEOUT" ]; do
if ! kill -0 "$pid" 2>/dev/null; then
deadlocked=0
break
fi
sleep 0.5
done

if [ "$deadlocked" -eq 1 ]; then
elapsed=$((SECONDS - start))
log_line ""
log_line "############################################################"
log_line "### run $i/$RUNS: DEADLOCK after ${elapsed}s (>${PER_RUN_TIMEOUT}s)"
log_line "### PID = $pid (left ALIVE -- attach your debugger now)"
log_line "### lldb -p $pid -o 'thread backtrace all'"
log_line "### sample $pid 5"
log_line "############################################################"
# Block here so the hung process stays attachable. When you're done
# debugging, press Enter to SIGKILL it and stop the loop.
if { exec 3</dev/tty; } 2>/dev/null; then
read -r -p "Press Enter to SIGKILL pid $pid and exit... " _ <&3
exec 3<&-
kill -KILL "$pid" 2>/dev/null
else
log_line "(no controlling tty: leaving pid $pid running -- kill it yourself when done)"
fi
exit 1
fi

wait "$pid"; code=$?
elapsed=$((SECONDS - start))

if [ "$code" -eq 0 ]; then
log_line "run $i/$RUNS: survived in ${elapsed}s"
continue
fi

# Exited on its own with a non-zero status -> crashed (e.g. SIGSEGV=139).
sig=$((code - 128))
log_line "=== run $i/$RUNS: CRASH after ${elapsed}s, exit=$code (signal $sig) ==="
log_line "----- repro.run.log -----"
cat "$log"
cp "$log" "repro.crash.log"
log_line "----- saved to repro.crash.log -----"
exit 1
done
log_line "No crash across $RUNS runs."
(2-2/2)