|
#!/usr/bin/env bash
|
|
# Rerun the single-shot repro many times.
|
|
# Stop on the first crash (exit via signal) or hang (deadlock).
|
|
set -u
|
|
cd "$(dirname "$0")"
|
|
|
|
RUNS=${RUNS:-100}
|
|
PER_RUN_TIMEOUT=${PER_RUN_TIMEOUT:-20}
|
|
RUBY=${RUBY:-ruby}
|
|
# YJIT disabled. Tiny malloc limit -> frequent malloc-triggered GC
|
|
# (handle_malloc_failure -> garbage_collect), to coincide with the resume storm.
|
|
export RUBYOPT="${RUBYOPT:-}"
|
|
export RUBY_GC_MALLOC_LIMIT=${RUBY_GC_MALLOC_LIMIT:-16384}
|
|
export RUBY_GC_MALLOC_LIMIT_MAX=${RUBY_GC_MALLOC_LIMIT_MAX:-16384}
|
|
export REACTORS=${REACTORS:-8} FIBERS=${FIBERS:-32} PRODUCERS=${PRODUCERS:-16} ALLOC=${ALLOC:-400} MAXTIME=${MAXTIME:-5}
|
|
|
|
echo "ruby: $($RUBY -v | head -1)"
|
|
echo "RUNS=$RUNS REACTORS=$REACTORS FIBERS=$FIBERS PRODUCERS=$PRODUCERS ALLOC=$ALLOC MAXTIME=$MAXTIME malloc_limit=$RUBY_GC_MALLOC_LIMIT timeout=${PER_RUN_TIMEOUT}s"
|
|
|
|
log_line() { printf '%s\n' "$*"; }
|
|
|
|
for i in $(seq 1 "$RUNS"); do
|
|
log="repro.run.log"
|
|
log_line "run $i/$RUNS: starting (timeout ${PER_RUN_TIMEOUT}s)..."
|
|
start=$SECONDS
|
|
|
|
"$RUBY" -Ilib -Iext/gvltools repro.rb >"$log" 2>&1 &
|
|
pid=$!
|
|
|
|
# Poll until the run exits on its own or the timeout elapses. Unlike a
|
|
# kill-on-timeout watchdog, this leaves a hung process ALIVE so a debugger
|
|
# can attach to it.
|
|
deadlocked=1
|
|
while [ $((SECONDS - start)) -lt "$PER_RUN_TIMEOUT" ]; do
|
|
if ! kill -0 "$pid" 2>/dev/null; then
|
|
deadlocked=0
|
|
break
|
|
fi
|
|
sleep 0.5
|
|
done
|
|
|
|
if [ "$deadlocked" -eq 1 ]; then
|
|
elapsed=$((SECONDS - start))
|
|
log_line ""
|
|
log_line "############################################################"
|
|
log_line "### run $i/$RUNS: DEADLOCK after ${elapsed}s (>${PER_RUN_TIMEOUT}s)"
|
|
log_line "### PID = $pid (left ALIVE -- attach your debugger now)"
|
|
log_line "### lldb -p $pid -o 'thread backtrace all'"
|
|
log_line "### sample $pid 5"
|
|
log_line "############################################################"
|
|
# Block here so the hung process stays attachable. When you're done
|
|
# debugging, press Enter to SIGKILL it and stop the loop.
|
|
if { exec 3</dev/tty; } 2>/dev/null; then
|
|
read -r -p "Press Enter to SIGKILL pid $pid and exit... " _ <&3
|
|
exec 3<&-
|
|
kill -KILL "$pid" 2>/dev/null
|
|
else
|
|
log_line "(no controlling tty: leaving pid $pid running -- kill it yourself when done)"
|
|
fi
|
|
exit 1
|
|
fi
|
|
|
|
wait "$pid"; code=$?
|
|
elapsed=$((SECONDS - start))
|
|
|
|
if [ "$code" -eq 0 ]; then
|
|
log_line "run $i/$RUNS: survived in ${elapsed}s"
|
|
continue
|
|
fi
|
|
|
|
# Exited on its own with a non-zero status -> crashed (e.g. SIGSEGV=139).
|
|
sig=$((code - 128))
|
|
log_line "=== run $i/$RUNS: CRASH after ${elapsed}s, exit=$code (signal $sig) ==="
|
|
log_line "----- repro.run.log -----"
|
|
cat "$log"
|
|
cp "$log" "repro.crash.log"
|
|
log_line "----- saved to repro.crash.log -----"
|
|
exit 1
|
|
done
|
|
log_line "No crash across $RUNS runs."
|