Commit bf5d9da

committed

feat(a/b): save A/B fail results

Save A/B test fails to be able to do offline analysis. Signed-off-by: Egor Lazarchuk <yegorlz@amazon.co.uk>

1 parent f2ef491 commit bf5d9daCopy full SHA for bf5d9da

File tree

1 file changed

+16

-3

lines changed

tools
- ab_test.py

1 file changed

+16

-3

lines changed

`‎tools/ab_test.py`

Lines changed: 16 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -277,6 +277,7 @@ def analyze_data(`
`277`	`277`	`)`
`278`	`278`
`279`	`279`	`messages = []`
	`280`	`+ fails = []`
`280`	`281`	`for dimension_set, metric, result, unit in failures:`
`281`	`282`	`# Sanity check as described above`
`282`	`283`	`if abs(statistics.mean(relative_changes_by_metric[metric])) <= noise_threshold:`
`@@ -291,18 +292,30 @@ def analyze_data(`
`291`	`292`	`old_mean = statistics.mean(processed_emf_a[dimension_set][metric][0])`
`292`	`293`	`new_mean = statistics.mean(processed_emf_b[dimension_set][metric][0])`
`293`	`294`
	`295`	`+ change_unit = format_with_reduced_unit(result.statistic, unit)`
	`296`	`+ change_p = result.statistic / old_mean`
	`297`	`+ old_unit = format_with_reduced_unit(old_mean, unit)`
	`298`	`+ new_unit = format_with_reduced_unit(new_mean, unit)`
	`299`	`+`
	`300`	`+ fail = dict(dimension_set)`
	`301`	`+ fail["diff"] = change_p`
	`302`	`+ fails.append(fail)`
	`303`	`+`
`294`	`304`	`msg = (`
`295`	`305`	`f"033円[0;32m[Firecracker A/B-Test Runner]033円[0m A/B-testing shows a change of "`
`296`		`- f"{format_with_reduced_unit(result.statistic, unit)}, or {result.statistic/old_mean:.2%}, "`
`297`		`- f"(from {format_with_reduced_unit(old_mean, unit)} to {format_with_reduced_unit(new_mean, unit)}) "`
	`306`	`+ f"{change_unit}, or {change_p:.2%}, "`
	`307`	`+ f"(from {old_unit} to {new_unit}) "`
`298`	`308`	`f"for metric 033円[1m{metric}033円[0m with 033円[0;31m033円[1mp={result.pvalue}033円[0m. "`
`299`	`309`	`f"This means that observing a change of this magnitude or worse, assuming that performance "`
`300`	`310`	`f"characteristics did not change across the tested commits, has a probability of {result.pvalue:.2%}. "`
`301`	`311`	`f"Tested Dimensions:\n{json.dumps(dict(dimension_set), indent=2, sort_keys=True)}"`
`302`	`312`	`)`
`303`	`313`	`messages.append(msg)`
`304`	`314`
`305`		`- assert not messages, "\n" + "\n".join(messages)`
	`315`	`+ if messages:`
	`316`	`+ with open("test_results/ab.json", "w") as f:`
	`317`	`+ json.dump({"fails": fails}, f, indent=2, sort_keys=True)`
	`318`	`+ assert False, "\n" + "\n".join(messages)`
`306`	`319`	`print("No regressions detected!")`
`307`	`320`
`308`	`321`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit bf5d9da

File tree

1 file changed

1 file changed

`‎tools/ab_test.py`

0 commit comments