Add comprehensive evaluation and ablation runner

This commit is contained in:
MZ YANG
2026-03-25 22:20:43 +08:00
parent f1afd4bf38
commit 957b010ea1
8 changed files with 1730 additions and 30 deletions

View File

@@ -0,0 +1,772 @@
#!/usr/bin/env python3
"""Comprehensive evaluation suite for generated ICS feature sequences."""
from __future__ import annotations
import argparse
import json
import math
from pathlib import Path
from typing import Dict, Iterable, List, Optional, Sequence, Tuple
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from eval_utils import (
best_binary_f1,
binary_auroc,
binary_average_precision,
binary_f1_at_threshold,
build_flat_window_vectors,
build_histogram_embeddings,
compute_average_psd,
compute_corr_matrix,
duplicate_rate,
exact_match_rate,
infer_test_paths,
load_json,
load_split_columns,
load_stats_vectors,
load_vocab,
load_windows_from_paths,
nearest_neighbor_distance_stats,
one_nn_two_sample_accuracy,
psd_distance_stats,
rbf_mmd,
resolve_reference_paths,
sample_indices,
set_random_seed,
split_process_groups,
standardize_cont_windows,
)
from platform_utils import resolve_device
from window_models import MLPAutoencoder, MLPClassifier, MLPRegressor
def parse_args():
base_dir = Path(__file__).resolve().parent
parser = argparse.ArgumentParser(description="Comprehensive evaluation for generated.csv.")
parser.add_argument("--generated", default=str(base_dir / "results" / "generated.csv"))
parser.add_argument("--reference", default=str(base_dir / "config.json"))
parser.add_argument("--config", default=str(base_dir / "config.json"))
parser.add_argument("--split", default=str(base_dir / "feature_split.json"))
parser.add_argument("--stats", default=str(base_dir / "results" / "cont_stats.json"))
parser.add_argument("--vocab", default=str(base_dir / "results" / "disc_vocab.json"))
parser.add_argument("--out", default=str(base_dir / "results" / "comprehensive_eval.json"))
parser.add_argument("--seq-len", type=int, default=0)
parser.add_argument("--stride", type=int, default=0, help="0 means non-overlapping windows")
parser.add_argument("--max-train-windows", type=int, default=1024)
parser.add_argument("--max-generated-windows", type=int, default=1024)
parser.add_argument("--max-test-windows", type=int, default=1024)
parser.add_argument("--max-rows-per-file", type=int, default=0)
parser.add_argument("--device", default="auto", help="cpu, cuda, or auto")
parser.add_argument("--seed", type=int, default=1337)
parser.add_argument("--batch-size", type=int, default=64)
parser.add_argument("--classifier-epochs", type=int, default=12)
parser.add_argument("--predictor-epochs", type=int, default=12)
parser.add_argument("--detector-epochs", type=int, default=16)
parser.add_argument("--hidden-dim", type=int, default=256)
parser.add_argument("--detector-threshold-quantile", type=float, default=0.995)
return parser.parse_args()
def flatten_rows(windows: np.ndarray) -> np.ndarray:
if windows.size == 0:
return np.zeros((0, 0), dtype=np.float32)
return windows.reshape(-1, windows.shape[-1])
def lagged_corr_from_windows(windows: np.ndarray, lag: int = 1) -> np.ndarray:
if windows.shape[0] == 0 or windows.shape[1] <= lag:
return np.zeros((windows.shape[-1], windows.shape[-1]), dtype=np.float32)
x = windows[:, :-lag, :].reshape(-1, windows.shape[-1])
y = windows[:, lag:, :].reshape(-1, windows.shape[-1])
x = x - x.mean(axis=0, keepdims=True)
y = y - y.mean(axis=0, keepdims=True)
cov = x.T @ y / max(x.shape[0] - 1, 1)
std_x = np.sqrt(np.maximum((x ** 2).sum(axis=0) / max(x.shape[0] - 1, 1), 1e-8))
std_y = np.sqrt(np.maximum((y ** 2).sum(axis=0) / max(y.shape[0] - 1, 1), 1e-8))
denom = np.outer(std_x, std_y)
corr = cov / np.maximum(denom, 1e-8)
return np.nan_to_num(corr, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float32)
def mean_abs_matrix_diff(a: np.ndarray, b: np.ndarray) -> float:
return float(np.abs(a - b).mean()) if a.size and b.size else float("nan")
def fro_matrix_diff(a: np.ndarray, b: np.ndarray) -> float:
return float(np.linalg.norm(a - b)) if a.size and b.size else float("nan")
def safe_mean(values: Iterable[float]) -> float:
vals = [float(v) for v in values if v is not None and not math.isnan(float(v))]
return float(sum(vals) / len(vals)) if vals else float("nan")
def safe_median(values: Sequence[float]) -> float:
if not values:
return float("nan")
arr = sorted(float(v) for v in values)
mid = len(arr) // 2
if len(arr) % 2 == 1:
return float(arr[mid])
return float(0.5 * (arr[mid - 1] + arr[mid]))
def dwell_and_steps(series: Sequence[float]) -> Dict[str, float]:
if not series:
return {
"num_changes": float("nan"),
"mean_dwell": float("nan"),
"median_dwell": float("nan"),
"mean_step": float("nan"),
"median_step": float("nan"),
}
changes = 0
dwells: List[float] = []
steps: List[float] = []
current = float(series[0])
dwell = 1
for value in series[1:]:
value = float(value)
if value == current:
dwell += 1
continue
changes += 1
dwells.append(float(dwell))
steps.append(abs(value - current))
current = value
dwell = 1
dwells.append(float(dwell))
return {
"num_changes": float(changes),
"mean_dwell": safe_mean(dwells),
"median_dwell": safe_median(dwells),
"mean_step": safe_mean(steps),
"median_step": safe_median(steps),
}
def controller_stats(series: Sequence[float], vmin: float, vmax: float) -> Dict[str, float]:
if not series:
return {"saturation_ratio": float("nan"), "change_rate": float("nan"), "step_median": float("nan")}
rng = vmax - vmin
tol = 0.01 * rng if rng > 0 else 0.0
sat = sum(1 for value in series if value <= vmin + tol or value >= vmax - tol) / len(series)
changes = 0
steps: List[float] = []
prev = float(series[0])
for value in series[1:]:
value = float(value)
if value != prev:
changes += 1
steps.append(abs(value - prev))
prev = value
change_rate = changes / max(len(series) - 1, 1)
return {
"saturation_ratio": float(sat),
"change_rate": float(change_rate),
"step_median": safe_median(steps),
}
def actuator_stats(series: Sequence[float]) -> Dict[str, float]:
if not series:
return {
"unique_ratio": float("nan"),
"top1_mass": float("nan"),
"top3_mass": float("nan"),
"median_dwell": float("nan"),
}
rounded = [round(float(v), 2) for v in series]
counts: Dict[float, int] = {}
for value in rounded:
counts[value] = counts.get(value, 0) + 1
top = sorted(counts.values(), reverse=True)
dwells: List[float] = []
current = rounded[0]
dwell = 1
for value in rounded[1:]:
if value == current:
dwell += 1
else:
dwells.append(float(dwell))
current = value
dwell = 1
dwells.append(float(dwell))
return {
"unique_ratio": float(len(counts) / len(rounded)),
"top1_mass": float(top[0] / len(rounded)) if top else float("nan"),
"top3_mass": float(sum(top[:3]) / len(rounded)) if top else float("nan"),
"median_dwell": safe_median(dwells),
}
def pv_stats(series: Sequence[float]) -> Dict[str, float]:
if not series:
return {"q05": float("nan"), "q50": float("nan"), "q95": float("nan"), "tail_ratio": float("nan")}
xs = sorted(float(v) for v in series)
n = len(xs)
def q(prob: float) -> float:
idx = max(0, min(n - 1, int(round(prob * (n - 1)))))
return float(xs[idx])
q05 = q(0.05)
q50 = q(0.5)
q95 = q(0.95)
denom = q50 - q05
tail_ratio = (q95 - q50) / denom if denom != 0 else float("nan")
return {"q05": q05, "q50": q50, "q95": q95, "tail_ratio": float(tail_ratio)}
def aux_stats(series: Sequence[float]) -> Dict[str, float]:
if not series:
return {"mean": float("nan"), "std": float("nan"), "lag1": float("nan")}
arr = np.asarray(series, dtype=np.float32)
mean = float(arr.mean())
std = float(arr.std(ddof=1)) if arr.size > 1 else 0.0
if arr.size < 2:
lag1 = float("nan")
else:
x = arr[:-1] - arr[:-1].mean()
y = arr[1:] - arr[1:].mean()
denom = max(float(np.linalg.norm(x) * np.linalg.norm(y)), 1e-8)
lag1 = float(np.dot(x, y) / denom)
return {"mean": mean, "std": std, "lag1": lag1}
def metric_differences(generated: Dict[str, Dict[str, float]], reference: Dict[str, Dict[str, float]]) -> Dict[str, float]:
bucket: Dict[str, List[float]] = {}
for feature, metrics in generated.items():
ref_metrics = reference.get(feature, {})
for key, value in metrics.items():
ref_value = ref_metrics.get(key)
if ref_value is None:
continue
if math.isnan(float(value)) or math.isnan(float(ref_value)):
continue
bucket.setdefault(key, []).append(abs(float(value) - float(ref_value)))
return {f"mean_abs_diff_{key}": safe_mean(values) for key, values in bucket.items()}
def summarize_type_metrics(
feature_names: Sequence[str],
gen_rows: np.ndarray,
real_rows: np.ndarray,
features: Sequence[str],
stat_fn,
use_real_bounds: bool = False,
) -> Dict:
feature_to_idx = {name: idx for idx, name in enumerate(feature_names)}
generated: Dict[str, Dict[str, float]] = {}
reference: Dict[str, Dict[str, float]] = {}
for feature in features:
if feature not in feature_to_idx:
continue
idx = feature_to_idx[feature]
gen_series = gen_rows[:, idx].astype(float).tolist()
real_series = real_rows[:, idx].astype(float).tolist()
if use_real_bounds:
vmin = float(np.min(real_rows[:, idx])) if real_rows.size else 0.0
vmax = float(np.max(real_rows[:, idx])) if real_rows.size else 0.0
generated[feature] = stat_fn(gen_series, vmin, vmax)
reference[feature] = stat_fn(real_series, vmin, vmax)
else:
generated[feature] = stat_fn(gen_series)
reference[feature] = stat_fn(real_series)
return {
"features": list(features),
"generated": generated,
"reference": reference,
"aggregates": metric_differences(generated, reference),
}
def make_loader(x: np.ndarray, y: Optional[np.ndarray], batch_size: int, shuffle: bool) -> DataLoader:
x_tensor = torch.tensor(x, dtype=torch.float32)
if y is None:
dataset = TensorDataset(x_tensor)
else:
y_tensor = torch.tensor(y, dtype=torch.float32)
dataset = TensorDataset(x_tensor, y_tensor)
return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
def split_train_val(
x: np.ndarray,
y: np.ndarray,
seed: int,
val_ratio: float = 0.2,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
rng = np.random.default_rng(seed)
idx = np.arange(x.shape[0])
rng.shuffle(idx)
cut = max(1, int(round(x.shape[0] * (1.0 - val_ratio))))
train_idx = idx[:cut]
val_idx = idx[cut:] if cut < idx.size else idx[:0]
if val_idx.size == 0:
val_idx = train_idx
return x[train_idx], y[train_idx], x[val_idx], y[val_idx]
def train_classifier(
train_x: np.ndarray,
train_y: np.ndarray,
val_x: np.ndarray,
val_y: np.ndarray,
device: str,
hidden_dim: int,
batch_size: int,
epochs: int,
seed: int,
) -> Dict[str, float]:
if train_x.shape[0] < 2 or len(np.unique(train_y)) < 2:
return {"accuracy": float("nan"), "balanced_accuracy": float("nan"), "auroc": float("nan")}
torch.manual_seed(seed)
model = MLPClassifier(train_x.shape[1], hidden_dim=hidden_dim).to(device)
opt = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.BCEWithLogitsLoss()
loader = make_loader(train_x, train_y.reshape(-1, 1), batch_size=batch_size, shuffle=True)
model.train()
for _ in range(epochs):
for batch_x, batch_y in loader:
batch_x = batch_x.to(device)
batch_y = batch_y.to(device).view(-1)
logits = model(batch_x)
loss = loss_fn(logits, batch_y)
opt.zero_grad()
loss.backward()
opt.step()
model.eval()
with torch.no_grad():
logits = model(torch.tensor(val_x, dtype=torch.float32, device=device)).cpu().numpy()
probs = 1.0 / (1.0 + np.exp(-logits))
pred = (probs >= 0.5).astype(np.int64)
y_true = val_y.astype(np.int64)
accuracy = float((pred == y_true).mean())
tp = ((pred == 1) & (y_true == 1)).sum()
tn = ((pred == 0) & (y_true == 0)).sum()
fp = ((pred == 1) & (y_true == 0)).sum()
fn = ((pred == 0) & (y_true == 1)).sum()
tpr = tp / max(tp + fn, 1)
tnr = tn / max(tn + fp, 1)
return {
"accuracy": accuracy,
"balanced_accuracy": float(0.5 * (tpr + tnr)),
"auroc": binary_auroc(y_true, probs),
}
def train_regressor(
train_x: np.ndarray,
train_y: np.ndarray,
eval_x: np.ndarray,
eval_y: np.ndarray,
device: str,
hidden_dim: int,
batch_size: int,
epochs: int,
seed: int,
) -> Dict[str, float]:
if train_x.shape[0] == 0 or eval_x.shape[0] == 0:
return {"rmse": float("nan"), "mae": float("nan")}
torch.manual_seed(seed)
model = MLPRegressor(train_x.shape[1], train_y.shape[1], hidden_dim=hidden_dim).to(device)
opt = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()
loader = make_loader(train_x, train_y, batch_size=batch_size, shuffle=True)
model.train()
for _ in range(epochs):
for batch_x, batch_y in loader:
batch_x = batch_x.to(device)
batch_y = batch_y.to(device)
pred = model(batch_x)
loss = loss_fn(pred, batch_y)
opt.zero_grad()
loss.backward()
opt.step()
model.eval()
with torch.no_grad():
pred = model(torch.tensor(eval_x, dtype=torch.float32, device=device)).cpu().numpy()
diff = pred - eval_y
return {
"rmse": float(np.sqrt(np.mean(diff ** 2))),
"mae": float(np.mean(np.abs(diff))),
}
def train_autoencoder(
train_x: np.ndarray,
eval_x: np.ndarray,
eval_labels: np.ndarray,
device: str,
hidden_dim: int,
batch_size: int,
epochs: int,
seed: int,
threshold_quantile: float,
) -> Dict[str, float]:
if train_x.shape[0] == 0 or eval_x.shape[0] == 0:
return {
"auroc": float("nan"),
"auprc": float("nan"),
"threshold": float("nan"),
"f1": float("nan"),
"best_f1": float("nan"),
}
torch.manual_seed(seed)
latent_dim = max(32, hidden_dim // 4)
model = MLPAutoencoder(train_x.shape[1], hidden_dim=hidden_dim, latent_dim=latent_dim).to(device)
opt = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()
loader = make_loader(train_x, None, batch_size=batch_size, shuffle=True)
model.train()
for _ in range(epochs):
for (batch_x,) in loader:
batch_x = batch_x.to(device)
recon = model(batch_x)
loss = loss_fn(recon, batch_x)
opt.zero_grad()
loss.backward()
opt.step()
model.eval()
with torch.no_grad():
train_tensor = torch.tensor(train_x, dtype=torch.float32, device=device)
train_recon = model(train_tensor).cpu().numpy()
eval_tensor = torch.tensor(eval_x, dtype=torch.float32, device=device)
eval_recon = model(eval_tensor).cpu().numpy()
train_scores = np.mean((train_recon - train_x) ** 2, axis=1)
eval_scores = np.mean((eval_recon - eval_x) ** 2, axis=1)
threshold = float(np.quantile(train_scores, threshold_quantile))
f1_stats = binary_f1_at_threshold(eval_labels, eval_scores, threshold)
best_stats = best_binary_f1(eval_labels, eval_scores)
return {
"auroc": binary_auroc(eval_labels, eval_scores),
"auprc": binary_average_precision(eval_labels, eval_scores),
"threshold": threshold,
"f1": f1_stats["f1"],
"precision": f1_stats["precision"],
"recall": f1_stats["recall"],
"best_f1": best_stats["f1"],
"best_f1_threshold": best_stats["threshold"],
}
def bootstrap_to_size(array: np.ndarray, target_size: int, seed: int) -> np.ndarray:
if array.shape[0] == 0:
return array
if array.shape[0] >= target_size:
return array
rng = np.random.default_rng(seed)
idx = rng.choice(array.shape[0], size=target_size, replace=True)
return array[idx]
def build_predictive_pairs(cont_windows: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
if cont_windows.shape[0] == 0 or cont_windows.shape[1] < 2:
return np.zeros((0, 0), dtype=np.float32), np.zeros((0, 0), dtype=np.float32)
x = cont_windows[:, :-1, :].reshape(cont_windows.shape[0], -1).astype(np.float32)
y = cont_windows[:, -1, :].astype(np.float32)
return x, y
def main():
args = parse_args()
set_random_seed(args.seed)
device = resolve_device(args.device, verbose=False)
cfg = {}
if args.config and Path(args.config).exists():
cfg = load_json(args.config)
seq_len = int(args.seq_len or cfg.get("sample_seq_len", cfg.get("seq_len", 96)))
stride = int(args.stride or seq_len)
max_rows_per_file = args.max_rows_per_file if args.max_rows_per_file > 0 else None
_, cont_cols, disc_cols, label_cols = load_split_columns(args.split)
train_paths = resolve_reference_paths(args.reference)
test_paths = infer_test_paths(args.reference)
vocab, vocab_sizes = load_vocab(args.vocab, disc_cols)
mean_vec, std_vec = load_stats_vectors(args.stats, cont_cols)
train_cont, train_disc, _ = load_windows_from_paths(
train_paths,
cont_cols,
disc_cols,
seq_len=seq_len,
vocab=vocab,
label_cols=None,
stride=stride,
max_windows=args.max_train_windows,
max_rows_per_file=max_rows_per_file,
)
gen_cont, gen_disc, _ = load_windows_from_paths(
[args.generated],
cont_cols,
disc_cols,
seq_len=seq_len,
vocab=vocab,
label_cols=None,
stride=seq_len,
max_windows=args.max_generated_windows,
max_rows_per_file=max_rows_per_file,
)
test_cont, test_disc, test_labels = load_windows_from_paths(
test_paths,
cont_cols,
disc_cols,
seq_len=seq_len,
vocab=vocab,
label_cols=label_cols,
stride=stride,
max_windows=args.max_test_windows,
max_rows_per_file=max_rows_per_file,
)
if gen_cont.shape[0] == 0:
raise SystemExit("generated.csv did not contain enough rows for one evaluation window")
if test_labels is None or test_labels.size == 0:
split_at = max(1, int(round(train_cont.shape[0] * 0.8)))
test_cont = train_cont[split_at:]
test_disc = train_disc[split_at:]
test_labels = np.zeros(test_cont.shape[0], dtype=np.int64)
train_cont = train_cont[:split_at]
train_disc = train_disc[:split_at]
normal_test_mask = test_labels == 0
normal_test_cont = test_cont[normal_test_mask] if normal_test_mask.any() else test_cont
normal_test_disc = test_disc[normal_test_mask] if normal_test_mask.any() else test_disc
flat_train = build_flat_window_vectors(train_cont, train_disc, mean_vec, std_vec, vocab_sizes)
flat_gen = build_flat_window_vectors(gen_cont, gen_disc, mean_vec, std_vec, vocab_sizes)
flat_test = build_flat_window_vectors(test_cont, test_disc, mean_vec, std_vec, vocab_sizes)
hist_train = build_histogram_embeddings(train_cont, train_disc, mean_vec, std_vec, vocab_sizes)
hist_gen = build_histogram_embeddings(gen_cont, gen_disc, mean_vec, std_vec, vocab_sizes)
hist_test = build_histogram_embeddings(test_cont, test_disc, mean_vec, std_vec, vocab_sizes)
hist_normal_test = build_histogram_embeddings(normal_test_cont, normal_test_disc, mean_vec, std_vec, vocab_sizes)
cont_train_flat = standardize_cont_windows(train_cont, mean_vec, std_vec).reshape(train_cont.shape[0], -1)
cont_gen_flat = standardize_cont_windows(gen_cont, mean_vec, std_vec).reshape(gen_cont.shape[0], -1)
train_rows = flatten_rows(train_cont)
gen_rows = flatten_rows(gen_cont)
balanced = min(hist_train.shape[0], hist_gen.shape[0])
idx_real = sample_indices(hist_train.shape[0], balanced, args.seed)
idx_gen = sample_indices(hist_gen.shape[0], balanced, args.seed + 1)
discrim_x = np.concatenate([flat_train[idx_real], flat_gen[idx_gen]], axis=0) if balanced > 0 else np.zeros((0, flat_train.shape[1]), dtype=np.float32)
discrim_y = np.concatenate(
[np.zeros(balanced, dtype=np.int64), np.ones(balanced, dtype=np.int64)],
axis=0,
) if balanced > 0 else np.zeros((0,), dtype=np.int64)
if discrim_x.shape[0] > 0:
x_train, y_train, x_val, y_val = split_train_val(discrim_x, discrim_y, seed=args.seed)
discriminative = train_classifier(
x_train,
y_train,
x_val,
y_val,
device=device,
hidden_dim=args.hidden_dim,
batch_size=args.batch_size,
epochs=args.classifier_epochs,
seed=args.seed,
)
else:
discriminative = {"accuracy": float("nan"), "balanced_accuracy": float("nan"), "auroc": float("nan")}
mmd_cont, gamma_cont = rbf_mmd(cont_train_flat[idx_real] if balanced > 0 else cont_train_flat, cont_gen_flat[idx_gen] if balanced > 0 else cont_gen_flat)
mmd_hist, gamma_hist = rbf_mmd(hist_train[idx_real] if balanced > 0 else hist_train, hist_gen[idx_gen] if balanced > 0 else hist_gen)
holdout_base = hist_normal_test if hist_normal_test.shape[0] > 0 else hist_test
nn_gen = nearest_neighbor_distance_stats(hist_gen, hist_train)
nn_holdout = nearest_neighbor_distance_stats(holdout_base, hist_train)
diversity = {
"duplicate_rate": duplicate_rate(flat_gen),
"exact_match_rate_to_train": exact_match_rate(flat_gen, flat_train),
"nn_gen_to_train_mean": nn_gen["mean"],
"nn_holdout_to_train_mean": nn_holdout["mean"],
"memorization_ratio": float(nn_gen["mean"] / max(nn_holdout["mean"], 1e-8)) if not math.isnan(nn_gen["mean"]) and not math.isnan(nn_holdout["mean"]) else float("nan"),
"one_nn_two_sample_accuracy": one_nn_two_sample_accuracy(holdout_base, hist_gen),
}
corr_real = compute_corr_matrix(train_rows)
corr_gen = compute_corr_matrix(gen_rows)
lag_corr_real = lagged_corr_from_windows(train_cont)
lag_corr_gen = lagged_corr_from_windows(gen_cont)
coupling = {
"corr_mean_abs_diff": mean_abs_matrix_diff(corr_real, corr_gen),
"corr_frobenius": fro_matrix_diff(corr_real, corr_gen),
"lag1_corr_mean_abs_diff": mean_abs_matrix_diff(lag_corr_real, lag_corr_gen),
"lag1_corr_frobenius": fro_matrix_diff(lag_corr_real, lag_corr_gen),
"by_process": {},
}
process_groups = split_process_groups(cont_cols)
for process_name, indices in process_groups.items():
real_block = corr_real[np.ix_(indices, indices)]
gen_block = corr_gen[np.ix_(indices, indices)]
real_lag_block = lag_corr_real[np.ix_(indices, indices)]
gen_lag_block = lag_corr_gen[np.ix_(indices, indices)]
coupling["by_process"][process_name] = {
"corr_mean_abs_diff": mean_abs_matrix_diff(real_block, gen_block),
"corr_frobenius": fro_matrix_diff(real_block, gen_block),
"lag1_corr_mean_abs_diff": mean_abs_matrix_diff(real_lag_block, gen_lag_block),
"lag1_corr_frobenius": fro_matrix_diff(real_lag_block, gen_lag_block),
}
frequency = psd_distance_stats(compute_average_psd(train_cont), compute_average_psd(gen_cont))
cfg_types = {
"type1": list(cfg.get("type1_features", []) or []),
"type2": list(cfg.get("type2_features", []) or []),
"type3": list(cfg.get("type3_features", []) or []),
"type4": list(cfg.get("type4_features", []) or []),
"type5": list(cfg.get("type5_features", []) or []),
"type6": list(cfg.get("type6_features", []) or []),
}
type_metrics = {
"type1_program": summarize_type_metrics(cont_cols, gen_rows, train_rows, cfg_types["type1"], dwell_and_steps),
"type2_controller": summarize_type_metrics(cont_cols, gen_rows, train_rows, cfg_types["type2"], controller_stats, use_real_bounds=True),
"type3_actuator": summarize_type_metrics(cont_cols, gen_rows, train_rows, cfg_types["type3"], actuator_stats),
"type4_pv": summarize_type_metrics(cont_cols, gen_rows, train_rows, cfg_types["type4"], pv_stats),
"type5_program_proxy": summarize_type_metrics(cont_cols, gen_rows, train_rows, cfg_types["type5"], dwell_and_steps),
"type6_aux": summarize_type_metrics(cont_cols, gen_rows, train_rows, cfg_types["type6"], aux_stats),
}
pred_train_x, pred_train_y = build_predictive_pairs(standardize_cont_windows(train_cont, mean_vec, std_vec))
pred_gen_x, pred_gen_y = build_predictive_pairs(standardize_cont_windows(gen_cont, mean_vec, std_vec))
pred_eval_x, pred_eval_y = build_predictive_pairs(standardize_cont_windows(normal_test_cont, mean_vec, std_vec))
predictive = {
"real_only": train_regressor(
pred_train_x,
pred_train_y,
pred_eval_x,
pred_eval_y,
device=device,
hidden_dim=args.hidden_dim,
batch_size=args.batch_size,
epochs=args.predictor_epochs,
seed=args.seed,
),
"synthetic_only": train_regressor(
pred_gen_x,
pred_gen_y,
pred_eval_x,
pred_eval_y,
device=device,
hidden_dim=args.hidden_dim,
batch_size=args.batch_size,
epochs=args.predictor_epochs,
seed=args.seed + 1,
),
"real_plus_synthetic": train_regressor(
np.concatenate([pred_train_x, bootstrap_to_size(pred_gen_x, pred_train_x.shape[0], args.seed + 2)], axis=0) if pred_train_x.size and pred_gen_x.size else pred_train_x,
np.concatenate([pred_train_y, bootstrap_to_size(pred_gen_y, pred_train_y.shape[0], args.seed + 2)], axis=0) if pred_train_y.size and pred_gen_y.size else pred_train_y,
pred_eval_x,
pred_eval_y,
device=device,
hidden_dim=args.hidden_dim,
batch_size=args.batch_size,
epochs=args.predictor_epochs,
seed=args.seed + 2,
),
}
predictive["rmse_ratio_synth_to_real"] = (
float(predictive["synthetic_only"]["rmse"] / max(predictive["real_only"]["rmse"], 1e-8))
if not math.isnan(predictive["synthetic_only"]["rmse"]) and not math.isnan(predictive["real_only"]["rmse"])
else float("nan")
)
target_size = max(flat_train.shape[0], min(512, flat_test.shape[0])) if flat_train.shape[0] > 0 else flat_gen.shape[0]
utility = {
"real_only": train_autoencoder(
flat_train,
flat_test,
test_labels.astype(np.int64),
device=device,
hidden_dim=args.hidden_dim,
batch_size=args.batch_size,
epochs=args.detector_epochs,
seed=args.seed,
threshold_quantile=args.detector_threshold_quantile,
),
"synthetic_only": train_autoencoder(
bootstrap_to_size(flat_gen, target_size, args.seed + 3),
flat_test,
test_labels.astype(np.int64),
device=device,
hidden_dim=args.hidden_dim,
batch_size=args.batch_size,
epochs=args.detector_epochs,
seed=args.seed + 3,
threshold_quantile=args.detector_threshold_quantile,
),
"real_plus_synthetic": train_autoencoder(
np.concatenate([flat_train, bootstrap_to_size(flat_gen, flat_train.shape[0], args.seed + 4)], axis=0) if flat_train.size and flat_gen.size else flat_train,
flat_test,
test_labels.astype(np.int64),
device=device,
hidden_dim=args.hidden_dim,
batch_size=args.batch_size,
epochs=args.detector_epochs,
seed=args.seed + 4,
threshold_quantile=args.detector_threshold_quantile,
),
}
eval_name = "eval_post.json" if Path(args.generated).name.startswith("generated_post") else "eval.json"
eval_candidate = Path(args.generated).with_name(eval_name)
basic_eval = load_json(eval_candidate) if eval_candidate.exists() else {}
out = {
"generated_path": str(Path(args.generated).resolve()),
"reference_paths": train_paths,
"test_paths": test_paths,
"seq_len": seq_len,
"stride": stride,
"counts": {
"train_windows": int(train_cont.shape[0]),
"generated_windows": int(gen_cont.shape[0]),
"test_windows": int(test_cont.shape[0]),
"test_anomalous_windows": int(test_labels.sum()),
"test_normal_windows": int(normal_test_cont.shape[0]),
},
"basic_eval": {
"avg_ks": basic_eval.get("avg_ks"),
"avg_jsd": basic_eval.get("avg_jsd"),
"avg_lag1_diff": basic_eval.get("avg_lag1_diff"),
},
"two_sample": {
"continuous_mmd_rbf": mmd_cont,
"continuous_mmd_gamma": gamma_cont,
"histogram_mmd_rbf": mmd_hist,
"histogram_mmd_gamma": gamma_hist,
"discriminative_accuracy": discriminative["accuracy"],
"discriminative_balanced_accuracy": discriminative["balanced_accuracy"],
"discriminative_auroc": discriminative["auroc"],
},
"diversity_privacy": diversity,
"coupling": coupling,
"frequency": frequency,
"type_metrics": type_metrics,
"predictive_consistency": predictive,
"anomaly_utility": utility,
}
out_path = Path(args.out)
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(json.dumps(out, indent=2), encoding="utf-8")
print("wrote", out_path)
print("generated_windows", gen_cont.shape[0])
print("continuous_mmd_rbf", mmd_cont)
print("discriminative_accuracy", discriminative["accuracy"])
print("utility_real_plus_synthetic_auprc", utility["real_plus_synthetic"]["auprc"])
if __name__ == "__main__":
main()