Skip to content

Commit ac2a53c

Browse files
Merge pull request #483 from ruby/mvh-interleave
Optionally interleave benchmarks base/experiment
2 parents 1939e61 + 159a495 commit ac2a53c

6 files changed

Lines changed: 222 additions & 34 deletions

File tree

lib/argument_parser.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class ArgumentParser
2424
:skip_zjit,
2525
:with_pre_init,
2626
:pvalue,
27+
:interleave,
2728
keyword_init: true
2829
)
2930

@@ -149,6 +150,10 @@ def parse(argv)
149150
args.pvalue = true
150151
end
151152

153+
opts.on("--interleave", "run benchmarks interleaved across executables to reduce thermal drift") do
154+
args.interleave = true
155+
end
156+
152157
opts.on("--graph", "generate a graph image of benchmark results") do
153158
args.graph = true
154159
end
@@ -230,6 +235,7 @@ def default_args
230235
excludes: [],
231236
rss: false,
232237
pvalue: false,
238+
interleave: false,
233239
graph: false,
234240
no_pinning: false,
235241
force_pinning: false,

lib/benchmark_runner/cli.rb

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,19 +40,43 @@ def run
4040
force_pinning: args.force_pinning
4141
)
4242

43-
# Benchmark with and without YJIT
43+
# Collect ruby version descriptions for all executables upfront
44+
args.executables.each do |name, executable|
45+
ruby_descriptions[name] = `#{executable.shelljoin} -v`.chomp
46+
end
47+
4448
bench_start_time = Time.now.to_f
4549
bench_data = {}
4650
bench_failures = {}
47-
args.executables.each do |name, executable|
48-
ruby_descriptions[name] = `#{executable.shelljoin} -v`.chomp
4951

50-
bench_data[name], failures = suite.run(
51-
ruby: executable,
52-
ruby_description: ruby_descriptions[name]
53-
)
54-
# Make it easier to query later.
55-
bench_failures[name] = failures unless failures.empty?
52+
if args.interleave
53+
args.executables.each_key { |name| bench_data[name] = {} }
54+
entries = suite.benchmarks
55+
56+
entries.each_with_index do |entry, idx|
57+
# Alternate executable order to cancel cache-warming bias
58+
exes = ruby_descriptions.keys
59+
exes = exes.reverse if idx.odd?
60+
61+
exes.each do |name|
62+
puts("Running benchmark \"#{entry.name}\" [#{name}] (#{idx+1}/#{entries.length})")
63+
result = suite.run_benchmark(entry, ruby: args.executables[name], ruby_description: ruby_descriptions[name])
64+
if result[:data]
65+
bench_data[name][entry.name] = result[:data]
66+
else
67+
bench_failures[name] ||= {}
68+
bench_failures[name][entry.name] = result[:failure]
69+
end
70+
end
71+
end
72+
else
73+
args.executables.each do |name, executable|
74+
bench_data[name], failures = suite.run(
75+
ruby: executable,
76+
ruby_description: ruby_descriptions[name]
77+
)
78+
bench_failures[name] = failures unless failures.empty?
79+
end
5680
end
5781

5882
bench_end_time = Time.now.to_f

lib/benchmark_suite.rb

Lines changed: 42 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -33,41 +33,53 @@ def initialize(categories:, name_filters:, excludes: [], out_path:, harness:, ha
3333
@bench_dir = BENCHMARKS_DIR
3434
end
3535

36-
# Run all the benchmarks and record execution times
37-
# Returns [bench_data, bench_failures]
38-
def run(ruby:, ruby_description:)
39-
bench_data = {}
40-
bench_failures = {}
36+
# Discovered and filtered benchmark entries, memoized.
37+
def benchmarks
38+
@benchmarks ||= discover_benchmarks
39+
end
4140

42-
benchmark_entries = discover_benchmarks
41+
# Run a single benchmark entry on a single executable.
42+
# Returns { name:, data: } on success, { name:, failure: } on error.
43+
def run_benchmark(entry, ruby:, ruby_description:)
4344
env = benchmark_env(ruby)
4445
caller_json_path = ENV["RESULT_JSON_PATH"]
45-
46-
# Capture quiet setting before entering unbundled env (which clears ENV)
4746
quiet = ENV['BENCHMARK_QUIET'] == '1'
4847

49-
benchmark_entries.each_with_index do |entry, idx|
50-
puts("Running benchmark \"#{entry.name}\" (#{idx+1}/#{benchmark_entries.length})")
48+
result_json_path = caller_json_path || File.join(out_path, "temp#{Process.pid}.json")
49+
cmd_prefix = base_cmd(ruby_description, entry.name)
5150

52-
result_json_path = caller_json_path || File.join(out_path, "temp#{Process.pid}.json")
53-
cmd_prefix = base_cmd(ruby_description, entry.name)
54-
55-
# Clear project-level Bundler environment so benchmarks run in a clean context.
56-
# Benchmarks that need Bundler (e.g., railsbench) set up their own via use_gemfile.
57-
# This is important when running tests under `bundle exec rake test`.
58-
result = if defined?(Bundler)
59-
Bundler.with_unbundled_env do
60-
run_single_benchmark(entry.script_path, result_json_path, ruby, cmd_prefix, env, entry.name, quiet: quiet)
61-
end
62-
else
51+
# Clear project-level Bundler environment so benchmarks run in a clean context.
52+
# Benchmarks that need Bundler (e.g., railsbench) set up their own via use_gemfile.
53+
result = if defined?(Bundler)
54+
Bundler.with_unbundled_env do
6355
run_single_benchmark(entry.script_path, result_json_path, ruby, cmd_prefix, env, entry.name, quiet: quiet)
6456
end
57+
else
58+
run_single_benchmark(entry.script_path, result_json_path, ruby, cmd_prefix, env, entry.name, quiet: quiet)
59+
end
60+
61+
if result[:success]
62+
{ name: entry.name, data: process_benchmark_result(result_json_path, result[:command], delete_file: !caller_json_path) }
63+
else
64+
FileUtils.rm_f(result_json_path) unless caller_json_path
65+
{ name: entry.name, failure: result[:status].exitstatus }
66+
end
67+
end
6568

66-
if result[:success]
67-
bench_data[entry.name] = process_benchmark_result(result_json_path, result[:command], delete_file: !caller_json_path)
69+
# Run all the benchmarks and record execution times.
70+
# Returns [bench_data, bench_failures]
71+
def run(ruby:, ruby_description:)
72+
bench_data = {}
73+
bench_failures = {}
74+
75+
benchmarks.each_with_index do |entry, idx|
76+
puts("Running benchmark \"#{entry.name}\" (#{idx+1}/#{benchmarks.length})")
77+
78+
result = run_benchmark(entry, ruby: ruby, ruby_description: ruby_description)
79+
if result[:data]
80+
bench_data[entry.name] = result[:data]
6881
else
69-
bench_failures[entry.name] = result[:status].exitstatus
70-
FileUtils.rm_f(result_json_path) unless caller_json_path
82+
bench_failures[entry.name] = result[:failure]
7183
end
7284
end
7385

@@ -174,6 +186,11 @@ def benchmark_harness_for(benchmark_name)
174186
end
175187

176188
def benchmark_env(ruby)
189+
@benchmark_env_cache ||= {}
190+
@benchmark_env_cache[ruby] ||= compute_benchmark_env(ruby)
191+
end
192+
193+
def compute_benchmark_env(ruby)
177194
# When the Ruby running this script is not the first Ruby in PATH, shell commands
178195
# like `bundle install` in a child process will not use the Ruby being benchmarked.
179196
# It overrides PATH to guarantee the commands of the benchmarked Ruby will be used.

test/argument_parser_test.rb

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def setup_mock_ruby(path)
5050
assert_equal [], args.name_filters
5151
assert_equal false, args.rss
5252
assert_equal false, args.pvalue
53+
assert_equal false, args.interleave
5354
assert_equal false, args.graph
5455
assert_equal false, args.no_pinning
5556
assert_equal false, args.turbo
@@ -438,6 +439,15 @@ def setup_mock_ruby(path)
438439
end
439440
end
440441

442+
describe '--interleave option' do
443+
it 'sets interleave flag' do
444+
parser = ArgumentParser.new
445+
args = parser.parse(['--interleave'])
446+
447+
assert_equal true, args.interleave
448+
end
449+
end
450+
441451
describe '--graph option' do
442452
it 'sets graph flag' do
443453
parser = ArgumentParser.new

test/benchmark_runner_cli_test.rb

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def create_args(overrides = {})
4848
name_filters: [],
4949
excludes: [],
5050
rss: false,
51+
interleave: false,
5152
graph: false,
5253
no_pinning: true,
5354
turbo: true,
@@ -318,6 +319,37 @@ def create_args(overrides = {})
318319
end
319320
end
320321

322+
it 'runs benchmarks interleaved when --interleave is set' do
323+
Dir.mktmpdir do |tmpdir|
324+
args = create_args(
325+
name_filters: ['fib', 'respond_to'],
326+
out_path: tmpdir,
327+
interleave: true
328+
)
329+
330+
cli = BenchmarkRunner::CLI.new(args)
331+
output = capture_io { cli.run }.join
332+
333+
# Progress output should include executable names in brackets
334+
assert_match(/\[.+\]/, output, "Interleaved output should include executable name in brackets")
335+
assert_match(/Total time spent benchmarking:/, output)
336+
337+
# Verify output files were created with data from all executables
338+
json_files = Dir.glob(File.join(tmpdir, "*.json"))
339+
assert_equal 1, json_files.size
340+
341+
json_data = JSON.parse(File.read(json_files.first))
342+
raw_data = json_data['raw_data']
343+
344+
# All executables should have results
345+
args.executables.each_key do |name|
346+
assert raw_data.key?(name), "Expected raw_data to contain '#{name}'"
347+
assert raw_data[name].key?('fib'), "Expected '#{name}' to have 'fib' results"
348+
assert raw_data[name].key?('respond_to'), "Expected '#{name}' to have 'respond_to' results"
349+
end
350+
end
351+
end
352+
321353
it 'creates output directory if it does not exist' do
322354
Dir.mktmpdir do |parent_tmpdir|
323355
nested_dir = File.join(parent_tmpdir, 'nested', 'output', 'dir')

test/benchmark_suite_test.rb

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,105 @@
519519
end
520520
end
521521

522+
describe '#benchmarks' do
523+
it 'returns discovered benchmark entries' do
524+
suite = BenchmarkSuite.new(
525+
categories: [],
526+
name_filters: ['simple'],
527+
out_path: @out_path,
528+
harness: 'harness',
529+
no_pinning: true
530+
)
531+
532+
entries = suite.benchmarks
533+
assert_instance_of Array, entries
534+
assert_equal 1, entries.length
535+
assert_equal 'simple', entries.first.name
536+
end
537+
538+
it 'memoizes the result' do
539+
suite = BenchmarkSuite.new(
540+
categories: [],
541+
name_filters: ['simple'],
542+
out_path: @out_path,
543+
harness: 'harness',
544+
no_pinning: true
545+
)
546+
547+
assert_same suite.benchmarks, suite.benchmarks
548+
end
549+
end
550+
551+
describe '#run_benchmark' do
552+
it 'returns data hash on success' do
553+
suite = BenchmarkSuite.new(
554+
categories: [],
555+
name_filters: ['simple'],
556+
out_path: @out_path,
557+
harness: 'harness',
558+
no_pinning: true
559+
)
560+
561+
entry = suite.benchmarks.first
562+
result = nil
563+
capture_io do
564+
result = suite.run_benchmark(entry, ruby: [RbConfig.ruby], ruby_description: 'ruby 3.2.0')
565+
end
566+
567+
assert_equal 'simple', result[:name]
568+
assert_instance_of Hash, result[:data]
569+
assert_includes result[:data], 'warmup'
570+
assert_includes result[:data], 'bench'
571+
assert_includes result[:data], 'rss'
572+
assert_nil result[:failure]
573+
end
574+
575+
it 'returns failure hash on error' do
576+
File.write('benchmarks/failing.rb', "exit(1)\n")
577+
578+
suite = BenchmarkSuite.new(
579+
categories: [],
580+
name_filters: ['failing'],
581+
out_path: @out_path,
582+
harness: 'harness',
583+
no_pinning: true
584+
)
585+
586+
entry = suite.benchmarks.first
587+
result = nil
588+
capture_io do
589+
result = suite.run_benchmark(entry, ruby: [RbConfig.ruby], ruby_description: 'ruby 3.2.0')
590+
end
591+
592+
assert_equal 'failing', result[:name]
593+
assert_nil result[:data]
594+
assert_equal 1, result[:failure]
595+
end
596+
597+
it 'produces same data as run for the same benchmark' do
598+
suite = BenchmarkSuite.new(
599+
categories: [],
600+
name_filters: ['simple'],
601+
out_path: @out_path,
602+
harness: 'harness',
603+
no_pinning: true
604+
)
605+
606+
entry = suite.benchmarks.first
607+
single_result = nil
608+
capture_io do
609+
single_result = suite.run_benchmark(entry, ruby: [RbConfig.ruby], ruby_description: 'ruby 3.2.0')
610+
end
611+
612+
run_data = nil
613+
capture_io do
614+
run_data, _ = suite.run(ruby: [RbConfig.ruby], ruby_description: 'ruby 3.2.0')
615+
end
616+
617+
assert_equal run_data['simple'].keys.sort, single_result[:data].keys.sort
618+
end
619+
end
620+
522621
describe 'integration with BenchmarkFilter' do
523622
it 'uses BenchmarkFilter to match benchmarks' do
524623
# Create benchmarks with different categories

0 commit comments

Comments
 (0)