#!/usr/bin/env python3
import argparse
import csv
import json
import math
from pathlib import Path
def parse_args():
parser = argparse.ArgumentParser(description="Plot benchmark metrics from benchmark_history.csv")
base_dir = Path(__file__).resolve().parent
parser.add_argument(
"--figure",
choices=["panel", "summary"],
default="panel",
help="Figure type: panel (paper-style multi-panel) or summary (seed robustness only).",
)
parser.add_argument(
"--history",
default=str(base_dir / "results" / "benchmark_history.csv"),
help="Path to benchmark_history.csv",
)
parser.add_argument(
"--ks-per-feature",
default=str(base_dir / "results" / "ks_per_feature.csv"),
help="Path to ks_per_feature.csv",
)
parser.add_argument(
"--data-shift",
default=str(base_dir / "results" / "data_shift_stats.csv"),
help="Path to data_shift_stats.csv",
)
parser.add_argument(
"--metrics-history",
default=str(base_dir / "results" / "metrics_history.csv"),
help="Path to metrics_history.csv",
)
parser.add_argument(
"--filtered-metrics",
default=str(base_dir / "results" / "filtered_metrics.json"),
help="Path to filtered_metrics.json (optional).",
)
parser.add_argument(
"--out",
default="",
help="Output SVG path (default depends on --figure).",
)
parser.add_argument(
"--engine",
choices=["auto", "matplotlib", "svg"],
default="auto",
help="Plotting engine: auto prefers matplotlib if available; otherwise uses pure-SVG.",
)
return parser.parse_args()
def mean_std(vals):
m = sum(vals) / len(vals)
if len(vals) == 1:
return m, 0.0
v = sum((x - m) * (x - m) for x in vals) / (len(vals) - 1)
return m, math.sqrt(v)
def svg_escape(s):
return (
str(s)
.replace("&", "&")
.replace("<", "<")
.replace(">", ">")
.replace('"', """)
.replace("'", "'")
)
def clamp(v, lo, hi):
if v < lo:
return lo
if v > hi:
return hi
return v
def lerp(a, b, t):
return a + (b - a) * t
def hex_to_rgb(h):
h = h.lstrip("#")
return int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
def rgb_to_hex(r, g, b):
return "#{:02x}{:02x}{:02x}".format(int(clamp(r, 0, 255)), int(clamp(g, 0, 255)), int(clamp(b, 0, 255)))
def diverging_color(v, vmin=-2.0, vmax=2.0, cold="#2563eb", hot="#ef4444", mid="#ffffff"):
v = clamp(v, vmin, vmax)
if v >= 0:
t = 0.0 if vmax == 0 else v / vmax
r0, g0, b0 = hex_to_rgb(mid)
r1, g1, b1 = hex_to_rgb(hot)
return rgb_to_hex(lerp(r0, r1, t), lerp(g0, g1, t), lerp(b0, b1, t))
t = 0.0 if vmin == 0 else (-v) / (-vmin)
r0, g0, b0 = hex_to_rgb(mid)
r1, g1, b1 = hex_to_rgb(cold)
return rgb_to_hex(lerp(r0, r1, t), lerp(g0, g1, t), lerp(b0, b1, t))
def plot_matplotlib(rows, seeds, metrics, out_path):
import matplotlib.pyplot as plt
try:
plt.style.use("seaborn-v0_8-whitegrid")
except Exception:
pass
fig, axes = plt.subplots(nrows=len(metrics), ncols=1, figsize=(8.6, 4.6), sharex=False)
if len(metrics) == 1:
axes = [axes]
point_color = "#3b82f6"
band_color = "#ef4444"
grid_color = "#e5e7eb"
axis_color = "#111827"
for ax, (key, title) in zip(axes, metrics, strict=True):
vals = [r[key] for r in rows]
m, s = mean_std(vals)
vmin = min(vals + [m - s])
vmax = max(vals + [m + s])
if vmax == vmin:
vmax = vmin + 1.0
vr = vmax - vmin
vmin -= 0.20 * vr
vmax += 0.20 * vr
y0 = 0.0
jitter = [-0.08, 0.0, 0.08]
ys = [(y0 + jitter[i % len(jitter)]) for i in range(len(vals))]
ax.axvspan(m - s, m + s, color=band_color, alpha=0.10, linewidth=0)
ax.axvline(m, color=band_color, linewidth=2.2)
ax.scatter(vals, ys, s=46, color=point_color, edgecolors="white", linewidths=1.0, zorder=3)
for x, y, seed in zip(vals, ys, seeds, strict=True):
ax.annotate(
str(seed),
(x, y),
textcoords="offset points",
xytext=(0, 10),
ha="center",
va="bottom",
fontsize=8,
color=axis_color,
)
ax.set_title(title, loc="left", fontsize=11, color=axis_color, pad=8)
ax.set_yticks([])
ax.set_ylim(-0.35, 0.35)
ax.set_xlim(vmin, vmax)
ax.grid(True, axis="x", color=grid_color)
ax.grid(False, axis="y")
ax.text(
0.99,
0.80,
"mean={m:.4f} ± {s:.4f}".format(m=m, s=s),
transform=ax.transAxes,
ha="right",
va="center",
fontsize=9,
color="#374151",
)
fig.suptitle("Benchmark Metrics (3 seeds) · lower is better", fontsize=12, color=axis_color, y=0.98)
fig.tight_layout(rect=(0, 0, 1, 0.95))
out_path.parent.mkdir(parents=True, exist_ok=True)
fig.savefig(out_path, format="svg")
plt.close(fig)
def plot_svg(rows, seeds, metrics, out_path):
W, H = 980, 440
pad_l, pad_r, pad_t, pad_b = 200, 30, 74, 36
row_gap = 52
row_h = (H - pad_t - pad_b - row_gap * (len(metrics) - 1)) / len(metrics)
bg = "#ffffff"
axis = "#2b2b2b"
grid = "#e9e9e9"
band = "#d62728"
band_fill = "#d62728"
point = "#1f77b4"
text = "#111111"
subtle = "#666666"
parts = []
parts.append(
"")
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text("\n".join(parts), encoding="utf-8")
def read_csv_rows(path):
p = Path(path)
if not p.exists():
return []
with p.open("r", encoding="utf-8", newline="") as f:
reader = csv.DictReader(f)
return list(reader)
def read_json(path):
p = Path(path)
if not p.exists():
return None
return json.loads(p.read_text(encoding="utf-8"))
def parse_float(s):
if s is None:
return None
ss = str(s).strip()
if ss == "" or ss.lower() == "none" or ss.lower() == "nan":
return None
return float(ss)
def zscores(vals):
if not vals:
return []
m = sum(vals) / len(vals)
v = sum((x - m) * (x - m) for x in vals) / len(vals)
s = math.sqrt(v)
if s == 0:
return [0.0 for _ in vals]
return [(x - m) / s for x in vals]
def panel_svg(bh_rows, ks_rows, shift_rows, hist_rows, filtered_metrics, out_path):
W, H = 1400, 900
margin = 42
gap = 26
panel_w = (W - margin * 2 - gap) / 2
panel_h = (H - margin * 2 - gap) / 2
bg = "#ffffff"
ink = "#111827"
subtle = "#6b7280"
border = "#e5e7eb"
grid = "#eef2f7"
blue = "#3b82f6"
red = "#ef4444"
green = "#10b981"
def panel_rect(x, y):
return "".format(
x=x, y=y, w=panel_w, h=panel_h, b=border
)
def text(x, y, s, size=12, anchor="start", color=ink, weight="normal"):
return "{t}".format(
x=x, y=y, a=anchor, fs=size, c=color, w=weight, t=svg_escape(s)
)
def line(x1, y1, x2, y2, color=border, width=1.0, dash=None, opacity=1.0, cap="round"):
extra = ""
if dash:
extra += " stroke-dasharray='{d}'".format(d=dash)
if opacity != 1.0:
extra += " stroke-opacity='{o}'".format(o=opacity)
return "".format(
x1=x1, y1=y1, x2=x2, y2=y2, c=color, w=width, cap=cap, extra=extra
)
def round_box(x, y, w, h, fill="#ffffff", stroke=border, sw=1.2, rx=12):
return "".format(
x=x, y=y, w=w, h=h, rx=rx, f=fill, s=stroke, sw=sw
)
def arrow(x1, y1, x2, y2, color=ink, width=1.8):
ang = math.atan2(y2 - y1, x2 - x1)
ah = 10.0
aw = 5.0
hx = x2 - ah * math.cos(ang)
hy = y2 - ah * math.sin(ang)
px = aw * math.sin(ang)
py = -aw * math.cos(ang)
p1x, p1y = hx + px, hy + py
p2x, p2y = hx - px, hy - py
return (
""
""
).format(x1=x1, y1=y1, x2=x2, y2=y2, c=color, w=width, p1x=p1x, p1y=p1y, p2x=p2x, p2y=p2y)
parts = []
parts.append(
"")
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text("\n".join(parts), encoding="utf-8")
def panel_matplotlib(bh_rows, ks_rows, shift_rows, hist_rows, filtered_metrics, out_path):
import matplotlib.pyplot as plt
import matplotlib.patches as patches
try:
plt.style.use("seaborn-v0_8-whitegrid")
except Exception:
pass
fig = plt.figure(figsize=(13.6, 8.6))
gs = fig.add_gridspec(2, 2, wspace=0.18, hspace=0.22)
axA = fig.add_subplot(gs[0, 0])
axB = fig.add_subplot(gs[0, 1])
axC = fig.add_subplot(gs[1, 0])
axD = fig.add_subplot(gs[1, 1])
fig.suptitle("Benchmark Overview (HAI Security Dataset)", fontsize=16, y=0.98)
axA.set_title("A Typed Hybrid Generation", loc="left", fontsize=12, fontweight="bold")
axA.axis("off")
axA.set_xlim(0, 1)
axA.set_ylim(0, 1)
box_y = 0.55
box_w = 0.18
box_h = 0.16
x_positions = [0.06, 0.30, 0.54, 0.78]
labels = ["HAI windows\n(L=96)", "Typed\ndecomposition", "Hybrid\ngenerator", "Synthetic\nwindows"]
for x, lbl in zip(x_positions, labels, strict=True):
axA.add_patch(patches.FancyBboxPatch((x, box_y), box_w, box_h, boxstyle="round,pad=0.02,rounding_size=0.02", facecolor="#f8fafc", edgecolor="#e5e7eb"))
axA.text(x + box_w / 2, box_y + box_h / 2, lbl, ha="center", va="center", fontsize=10, fontweight="bold")
for i in range(3):
x1 = x_positions[i] + box_w
x2 = x_positions[i + 1]
axA.annotate("", xy=(x2, box_y + box_h / 2), xytext=(x1, box_y + box_h / 2), arrowprops=dict(arrowstyle="-|>", lw=1.4, color="#6b7280"))
hx = x_positions[2]
hy = 0.20
axA.text(hx, hy + 0.27, "Type-aware routes", fontsize=9, color="#6b7280", fontweight="bold")
inner = [("Trend (det.)", "#e0f2fe", "#3b82f6"), ("Residual (DDPM)", "#fee2e2", "#ef4444"), ("Discrete head", "#dcfce7", "#10b981")]
for k, (name, fc, ec) in enumerate(inner):
y = hy + 0.18 - k * 0.11
axA.add_patch(patches.FancyBboxPatch((hx, y), box_w, 0.08, boxstyle="round,pad=0.02,rounding_size=0.02", facecolor=fc, edgecolor=ec, lw=1.2))
axA.text(hx + 0.01, y + 0.04, name, ha="left", va="center", fontsize=9, fontweight="bold")
axA.annotate("", xy=(hx + box_w / 2, hy + 0.20), xytext=(hx + box_w / 2, box_y), arrowprops=dict(arrowstyle="-|>", lw=1.2, color="#6b7280"))
axA.text(0.06, 0.06, "Metrics align with types: KS (continuous), JSD (discrete), lag-1 (temporal).", fontsize=9, color="#6b7280")
axB.set_title("B Feature-Level Distribution Fidelity", loc="left", fontsize=12, fontweight="bold")
ks_sorted = sorted(
[
{
"feature": r.get("feature", ""),
"ks": parse_float(r.get("ks")),
"gen_frac_at_min": parse_float(r.get("gen_frac_at_min")) or 0.0,
"gen_frac_at_max": parse_float(r.get("gen_frac_at_max")) or 0.0,
}
for r in ks_rows
if parse_float(r.get("ks")) is not None
],
key=lambda x: x["ks"],
reverse=True,
)
top = ks_sorted[:14]
feats = [r["feature"] for r in top][::-1]
vals = [r["ks"] for r in top][::-1]
collapsed = [((r["gen_frac_at_min"] >= 0.98) or (r["gen_frac_at_max"] >= 0.98)) for r in top][::-1]
colors = ["#fb7185" if c else "#0ea5e9" for c in collapsed]
axB.barh(feats, vals, color=colors)
axB.set_xlabel("KS (lower is better)")
axB.set_xlim(0, 1.0)
if isinstance(filtered_metrics, dict) and filtered_metrics.get("dropped_features"):
dropped = ", ".join(d.get("feature", "") for d in filtered_metrics["dropped_features"] if d.get("feature"))
if dropped:
axB.text(0.99, 0.02, "dropped: {d}".format(d=dropped), transform=axB.transAxes, ha="right", va="bottom", fontsize=9, color="#6b7280")
axC.set_title("C Dataset Shift Across Training Files", loc="left", fontsize=12, fontweight="bold")
if shift_rows:
cols = list(shift_rows[0].keys())
else:
cols = []
mean_cols = [c for c in cols if c.startswith("mean_")]
wanted = ["mean_P1_FT01", "mean_P1_LIT01", "mean_P1_PIT01", "mean_P2_CO_rpm", "mean_P3_LIT01", "mean_P4_ST_PT01"]
feats = [c for c in wanted if c in mean_cols]
files = [r.get("file", "") for r in shift_rows]
M = []
for c in feats:
M.append([parse_float(r.get(c)) or 0.0 for r in shift_rows])
if M and files and feats:
Z = list(zip(*[zscores(col) for col in M], strict=True))
im = axC.imshow(Z, aspect="auto", cmap="coolwarm", vmin=-2, vmax=2)
axC.set_yticks(range(len(files)))
axC.set_yticklabels(files)
axC.set_xticks(range(len(feats)))
axC.set_xticklabels([f.replace("mean_", "") for f in feats], rotation=25, ha="right")
axC.set_ylabel("Train file")
axC.set_xlabel("Feature mean z-score")
fig.colorbar(im, ax=axC, fraction=0.046, pad=0.04)
else:
axC.axis("off")
axC.text(0.5, 0.5, "missing data_shift_stats.csv", ha="center", va="center", fontsize=11, color="#6b7280")
axD.set_title("D Robustness Across Seeds", loc="left", fontsize=12, fontweight="bold")
axD.axis("off")
axD.set_xlim(0, 1)
axD.set_ylim(0, 1)
metrics = [("avg_ks", "KS (cont.)"), ("avg_jsd", "JSD (disc.)"), ("avg_lag1_diff", "Abs Δ lag-1")]
bh_rows = sorted(bh_rows, key=lambda r: r.get("seed", 0))
for mi, (k, title) in enumerate(metrics):
vals = [r.get(k) for r in bh_rows if r.get(k) is not None]
if not vals:
continue
m, s = mean_std(vals)
y = 0.78 - mi * 0.22
axD.text(0.04, y, title, fontsize=10, fontweight="bold", va="center")
x0 = 0.42
x1 = 0.96
vmin = min(vals + [m - s])
vmax = max(vals + [m + s])
if vmax == vmin:
vmax = vmin + 1.0
vr = vmax - vmin
vmin -= 0.15 * vr
vmax += 0.15 * vr
def X(v):
return x0 + (v - vmin) * (x1 - x0) / (vmax - vmin)
axD.add_patch(patches.FancyBboxPatch((X(m - s), y - 0.03), max(0.002, X(m + s) - X(m - s)), 0.06, boxstyle="round,pad=0.01,rounding_size=0.02", facecolor="#ef4444", alpha=0.12, edgecolor="none"))
axD.plot([X(m), X(m)], [y - 0.05, y + 0.05], color="#ef4444", lw=2.2)
jit = [-0.03, 0.0, 0.03]
for i, v in enumerate(vals):
axD.scatter([X(v)], [y + jit[i % len(jit)]], s=40, color="#3b82f6", edgecolor="white", linewidth=0.9, zorder=3)
axD.text(0.96, y + 0.07, "mean={m:.4f}±{s:.4f}".format(m=m, s=s), fontsize=9, color="#6b7280", ha="right")
fig.tight_layout(rect=(0, 0, 1, 0.96))
out_path.parent.mkdir(parents=True, exist_ok=True)
fig.savefig(out_path, format="svg")
plt.close(fig)
def main():
args = parse_args()
hist_path = Path(args.history)
if not hist_path.exists():
raise SystemExit("missing history file: %s" % hist_path)
rows = []
with hist_path.open("r", encoding="utf-8", newline="") as f:
reader = csv.DictReader(f)
for r in reader:
rows.append(
{
"run_name": r["run_name"],
"seed": int(r["seed"]),
"avg_ks": float(r["avg_ks"]),
"avg_jsd": float(r["avg_jsd"]),
"avg_lag1_diff": float(r["avg_lag1_diff"]),
}
)
if not rows:
raise SystemExit("no rows in history file: %s" % hist_path)
rows = sorted(rows, key=lambda x: x["seed"])
seeds = [str(r["seed"]) for r in rows]
metrics = [
("avg_ks", "KS (continuous)"),
("avg_jsd", "JSD (discrete)"),
("avg_lag1_diff", "Abs Δ lag-1 autocorr"),
]
if args.out:
out_path = Path(args.out)
else:
if args.figure == "panel":
out_path = Path(__file__).resolve().parent.parent / "figures" / "benchmark_panel.svg"
else:
out_path = Path(__file__).resolve().parent.parent / "figures" / "benchmark_metrics.svg"
if args.figure == "summary":
if args.engine in {"auto", "matplotlib"}:
try:
plot_matplotlib(rows, seeds, metrics, out_path)
print("saved", out_path)
return
except Exception:
if args.engine == "matplotlib":
raise
plot_svg(rows, seeds, metrics, out_path)
print("saved", out_path)
return
ks_rows = read_csv_rows(args.ks_per_feature)
shift_rows = read_csv_rows(args.data_shift)
mh_rows = read_csv_rows(args.metrics_history)
fm = read_json(args.filtered_metrics)
bh_rows = [{"seed": r["seed"], "avg_ks": r["avg_ks"], "avg_jsd": r["avg_jsd"], "avg_lag1_diff": r["avg_lag1_diff"]} for r in rows]
if args.engine in {"auto", "matplotlib"}:
try:
panel_matplotlib(bh_rows, ks_rows, shift_rows, mh_rows, fm, out_path)
print("saved", out_path)
return
except Exception:
if args.engine == "matplotlib":
raise
panel_svg(bh_rows, ks_rows, shift_rows, mh_rows, fm, out_path)
print("saved", out_path)
if __name__ == "__main__":
main()