Skip to content

Commit dd12edb

Browse files
Merge pull request #492 from ruby/mvh-fix-pvalue-display
Fix pvalue display when --pvalue is not being passed.
2 parents 54c9ebf + 5d3d769 commit dd12edb

File tree

5 files changed

+62
-7
lines changed

5 files changed

+62
-7
lines changed

lib/benchmark_runner.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def write_csv(output_path, ruby_descriptions, table)
4848
end
4949

5050
# Build output text string with metadata, table, and legend
51-
def build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: false, include_gc: false)
51+
def build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: false, include_gc: false, include_pvalue: false)
5252
base_name, *other_names = ruby_descriptions.keys
5353

5454
output_str = +""
@@ -73,7 +73,9 @@ def build_output_text(ruby_descriptions, table, format, bench_failures, include_
7373
output_str << "- sweep #{base_name}/#{name}: ratio of GC sweeping time. Higher is better for #{name}. Above 1 represents faster sweeping.\n"
7474
end
7575
end
76-
output_str << "- ***: p < 0.001, **: p < 0.01, *: p < 0.05 (Welch's t-test)\n"
76+
if include_pvalue
77+
output_str << "- ***: p < 0.001, **: p < 0.01, *: p < 0.05 (Welch's t-test)\n"
78+
end
7779
end
7880

7981
output_str

lib/benchmark_runner/cli.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def run
109109
BenchmarkRunner.write_csv(output_path, ruby_descriptions, table)
110110

111111
# Save the output in a text file that we can easily refer to
112-
output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: args.rss, include_gc: builder.include_gc?)
112+
output_str = BenchmarkRunner.build_output_text(ruby_descriptions, table, format, bench_failures, include_rss: args.rss, include_gc: builder.include_gc?, include_pvalue: args.pvalue)
113113
out_txt_path = output_path + ".txt"
114114
File.open(out_txt_path, "w") { |f| f.write output_str }
115115

lib/results_table_builder.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts)
174174
row.concat(ratio_1sts)
175175

176176
other_ts.each do |other_t|
177-
pval = Stats.welch_p_value(base_t, other_t)
177+
pval = @include_pvalue ? Stats.welch_p_value(base_t, other_t) : nil
178178
row << format_ratio(mean(base_t) / mean(other_t), pval)
179179
if @include_pvalue
180180
row << format_p_value(pval)
@@ -207,7 +207,7 @@ def gc_ratio(base, other)
207207
mean(other) == 0.0
208208
return "N/A"
209209
end
210-
pval = Stats.welch_p_value(base, other)
210+
pval = @include_pvalue ? Stats.welch_p_value(base, other) : nil
211211
format_ratio(mean(base) / mean(other), pval)
212212
end
213213

test/benchmark_runner_test.rb

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,25 @@
387387
assert_includes result, 'Legend:'
388388
assert_includes result, '- ruby-yjit 1st itr: ratio of ruby-base/ruby-yjit time for the first benchmarking iteration.'
389389
assert_includes result, '- ruby-base/ruby-yjit: ratio of ruby-base/ruby-yjit time. Higher is better for ruby-yjit. Above 1 represents a speedup.'
390+
refute_includes result, "p < 0.001"
391+
end
392+
393+
it 'includes p-value legend when include_pvalue is true' do
394+
ruby_descriptions = {
395+
'ruby-base' => 'ruby 3.3.0',
396+
'ruby-yjit' => 'ruby 3.3.0 +YJIT'
397+
}
398+
table = [
399+
['bench', 'ruby-base (ms)', 'stddev (%)', 'ruby-yjit (ms)', 'stddev (%)'],
400+
['fib', '100.0', '5.0', '50.0', '3.0']
401+
]
402+
format = ['%s', '%.1f', '%.1f', '%.1f', '%.1f']
403+
bench_failures = {}
404+
405+
result = BenchmarkRunner.build_output_text(
406+
ruby_descriptions, table, format, bench_failures, include_pvalue: true
407+
)
408+
390409
assert_includes result, "- ***: p < 0.001, **: p < 0.01, *: p < 0.05 (Welch's t-test)"
391410
end
392411

test/results_table_builder_test.rb

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@
425425
assert_equal '', table[1].last
426426
end
427427

428-
it 'always shows significance symbol but omits verbose columns without --pvalue' do
428+
it 'omits significance symbols and p-value columns without --pvalue' do
429429
executable_names = ['ruby', 'ruby-yjit']
430430
bench_data = {
431431
'ruby' => {
@@ -452,7 +452,41 @@
452452
table, _format = builder.build
453453
refute_includes table[0], 'p-value'
454454
refute_includes table[0], 'sig'
455-
assert_match(/\(\*{1,3}\)$/, table[1].last)
455+
ratio_cell = table[1].last
456+
refute_match(/\*/, ratio_cell)
457+
assert_match(/\A\d+\.\d+\s*\z/, ratio_cell)
458+
end
459+
460+
it 'shows significance symbols and p-value columns with --pvalue' do
461+
executable_names = ['ruby', 'ruby-yjit']
462+
bench_data = {
463+
'ruby' => {
464+
'fib' => {
465+
'warmup' => [0.1],
466+
'bench' => [0.100, 0.101, 0.099],
467+
'rss' => 1024 * 1024 * 10
468+
}
469+
},
470+
'ruby-yjit' => {
471+
'fib' => {
472+
'warmup' => [0.05],
473+
'bench' => [0.050, 0.051, 0.049],
474+
'rss' => 1024 * 1024 * 12
475+
}
476+
}
477+
}
478+
479+
builder = ResultsTableBuilder.new(
480+
executable_names: executable_names,
481+
bench_data: bench_data,
482+
include_pvalue: true
483+
)
484+
485+
table, _format = builder.build
486+
assert_includes table[0], 'p-value'
487+
assert_includes table[0], 'sig'
488+
ratio_col_idx = table[0].index('ruby/ruby-yjit')
489+
assert_match(/\(\*{1,3}\)/, table[1][ratio_col_idx])
456490
end
457491

458492
it 'handles only headline benchmarks' do

0 commit comments

Comments
 (0)