Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit afd72c3

Browse files
feat(a/b): add support scripts for A/B visualization
Add scripts for combining and plotting results of A/B runs. Move all A/B related scripts to the tools/ab Signed-off-by: Egor Lazarchuk <yegorlz@amazon.co.uk>
1 parent bf5d9da commit afd72c3

File tree

4 files changed

+186
-6
lines changed

4 files changed

+186
-6
lines changed

‎tools/ab_test.py renamed to ‎tools/ab/ab_test.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,8 @@
3434
# pylint:disable=wrong-import-position
3535
from framework.ab_test import binary_ab_test, check_regression
3636
from framework.properties import global_props
37-
from host_tools.metrics import (
38-
emit_raw_emf,
39-
format_with_reduced_unit,
40-
get_metrics_logger,
41-
)
37+
from host_tools.metrics import (emit_raw_emf, format_with_reduced_unit,
38+
get_metrics_logger)
4239

4340
# Performance tests that are known to be unstable and exhibit variances of up to 60% of the mean
4441
IGNORED = [

‎tools/ab/combine.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import argparse
2+
import json
3+
import os
4+
from pathlib import Path
5+
6+
parser = argparse.ArgumentParser(
7+
description="Combine A/B test fails into groups per test type"
8+
)
9+
parser.add_argument(
10+
"path",
11+
help="Path to the directory with failed A/B runs",
12+
type=Path,
13+
)
14+
args = parser.parse_args()
15+
16+
BLOCK = "test_block_performance"
17+
NET_THROUGHPUT = "test_network_throughput"
18+
NET_LATENCY = "test_network_latency"
19+
20+
block_data = []
21+
net_data = []
22+
net_lat_data = []
23+
for d in os.walk(args.path):
24+
if "ab.json" in d[-1]:
25+
path = d[0] + "/ab.json"
26+
print(path)
27+
with open(path, "r+") as f:
28+
lines = f.read()
29+
j = '{"data":' + lines + "}"
30+
data = json.loads(j)
31+
for e in data["data"]:
32+
match e["performance_test"]:
33+
case BLOCk:
34+
block_data.append(e)
35+
case NET_THROUGHPUT:
36+
net_data.append(e)
37+
case NET_LATENCY:
38+
net_lat_data.append(e)
39+
40+
with open(f"{NET_LATENCY}.json", "w") as f:
41+
json.dump({"results": net_lat_data}, f, indent=2, sort_keys=True)
42+
with open(f"{NET_THROUGHPUT}.json", "w") as f:
43+
json.dump({"results": net_data}, f, indent=2, sort_keys=True)
44+
with open(f"{BLOCK}.json", "w") as f:
45+
json.dump({"fails": block_data}, f, indent=2, sort_keys=True)

‎tools/ab/plot.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import argparse
2+
import json
3+
import os
4+
from enum import Enum
5+
6+
import matplotlib.pyplot as plt
7+
import numpy as np
8+
9+
plt.style.use("dark_background")
10+
11+
12+
def clamp(min_v, max_v, v):
13+
return max(min_v, min(max_v, v))
14+
15+
16+
def lerp(color_a, color_b, t):
17+
return (
18+
clamp(0.0, 1.0, abs(color_a[0] * (1.0 - t) + color_b[0] * t)),
19+
clamp(0.0, 1.0, abs(color_a[1] * (1.0 - t) + color_b[1] * t)),
20+
clamp(0.0, 1.0, abs(color_a[2] * (1.0 - t) + color_b[2] * t)),
21+
)
22+
23+
24+
GREY = (0.5, 0.5, 0.5)
25+
GREEN = (0.1, 0.8, 0.1)
26+
RED = (0.8, 0.0, 0.1)
27+
28+
POSITIVE_COLOR = GREEN
29+
NEGATIVE_COLOR = RED
30+
31+
32+
class DataType(Enum):
33+
Block = "block"
34+
Net = "net"
35+
NetLatency = "net_latency"
36+
37+
38+
parser = argparse.ArgumentParser(description="Plot results of A/B test")
39+
parser.add_argument("path", type=str)
40+
args = parser.parse_args()
41+
42+
paths = [f"{args.path}/{f}" for f in os.listdir(args.path)]
43+
for path in paths:
44+
print(f"processing: {path}")
45+
with open(path) as f:
46+
fails = json.load(f)["fails"]
47+
48+
if not fails:
49+
print(f"skipping {path}. No data present")
50+
continue
51+
52+
instances = set()
53+
host_kernels = set()
54+
aggregated = {}
55+
56+
match fails[0]["performance_test"]:
57+
case "test_block_performance":
58+
data_type = DataType.Block
59+
case "test_network_tcp_throughput":
60+
data_type = DataType.Net
61+
case "test_network_latency":
62+
data_type = DataType.NetLatency
63+
case _:
64+
print("unknown data type. skipping")
65+
continue
66+
67+
for fail in fails:
68+
instances.add(fail["instance"])
69+
host_kernels.add(fail["host_kernel"])
70+
71+
if data_type == DataType.Block:
72+
tag = (
73+
fail["instance"],
74+
fail["host_kernel"],
75+
fail["guest_kernel"],
76+
fail["fio_mode"],
77+
fail["vcpus"],
78+
fail["io_engine"],
79+
)
80+
elif data_type == DataType.Net:
81+
tag = (
82+
fail["instance"],
83+
fail["host_kernel"],
84+
fail["guest_kernel"],
85+
fail["mode"],
86+
fail["vcpus"],
87+
)
88+
elif data_type == DataType.NetLatency:
89+
tag = (
90+
fail["instance"],
91+
fail["host_kernel"],
92+
fail["guest_kernel"],
93+
)
94+
POSITIVE_COLOR = RED
95+
NEGATIVE_COLOR = GREEN
96+
97+
if tag not in aggregated:
98+
aggregated[tag] = []
99+
aggregated[tag].append(fail["diff"])
100+
101+
for instance in sorted(instances):
102+
fig, ax = plt.subplots(len(host_kernels), figsize=(16, 11))
103+
if len(host_kernels) == 1:
104+
ax = [ax]
105+
fig.tight_layout(pad=8.0)
106+
107+
for i, host_kernel in enumerate(sorted(host_kernels)):
108+
data = []
109+
for key, value in aggregated.items():
110+
if key[0] == instance and key[1] == host_kernel:
111+
label = "\n".join(key[2:])
112+
values = np.array(value)
113+
mean = np.mean(values)
114+
std = np.std(values)
115+
data.append((label, mean, std))
116+
data.sort()
117+
labels = np.array([t[0] for t in data])
118+
means = np.array([t[1] for t in data])
119+
errors = np.array([t[2] for t in data])
120+
colors = [
121+
(
122+
lerp(GREY, POSITIVE_COLOR, t)
123+
if 0.0 < t
124+
else lerp(GREY, NEGATIVE_COLOR, -t)
125+
)
126+
for t in [m / 100.0 for m in means]
127+
]
128+
129+
bar = ax[i].bar(labels, means, yerr=errors, color=colors, ecolor="white")
130+
bar_labels = [f"{m:.2f} / {s:.2f}" for (m, s) in zip(means, errors)]
131+
ax[i].bar_label(bar, labels=bar_labels)
132+
ax[i].set_ylabel("Percentage of change: mean / std")
133+
ax[i].grid(color="grey", linestyle="-.", linewidth=0.5, alpha=0.5)
134+
ax[i].set_title(
135+
f"{data_type}\nInstance: {instance}\nHost kernel: {host_kernel}",
136+
)
137+
138+
plt.savefig(f"{args.path}/{data_type}_{instance}.png")

‎tools/devtool

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -760,7 +760,7 @@ cmd_test() {
760760
test_script="./tools/test.sh"
761761

762762
if [ $do_ab_test -eq 1 ]; then
763-
test_script="./tools/ab_test.py"
763+
test_script="./tools/ab/ab_test.py"
764764
fi
765765

766766
# Testing (running Firecracker via the jailer) needs root access,

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /