update

2026-02-04 03:53:17 +08:00
parent 2072351c0d
commit 10c0721ee1
6 changed files with 1134 additions and 104 deletions
--- a/example/export_samples.py
+++ b/example/export_samples.py
@@ -73,6 +73,13 @@ def parse_args():
    return parser.parse_args()
 def load_torch_state(path: str, device: str):
    try:
        return torch.load(path, map_location=device, weights_only=True)
    except TypeError:
        return torch.load(path, map_location=device)
 # 使用 platform_utils 中的 resolve_device 函数
@@ -193,9 +200,9 @@ def main():
    ).to(device)
    if args.use_ema and os.path.exists(args.model_path.replace("model.pt", "model_ema.pt")):
        ema_path = args.model_path.replace("model.pt", "model_ema.pt")
-        model.load_state_dict(torch.load(ema_path, map_location=device, weights_only=True))
+        model.load_state_dict(load_torch_state(ema_path, device))
    else:
-        model.load_state_dict(torch.load(args.model_path, map_location=device, weights_only=True))
+        model.load_state_dict(load_torch_state(args.model_path, device))
    model.eval()
    temporal_model = None
@@ -221,7 +228,7 @@ def main():
        temporal_path = Path(args.model_path).with_name("temporal.pt")
        if not temporal_path.exists():
            raise SystemExit(f"missing temporal model file: {temporal_path}")
-        temporal_model.load_state_dict(torch.load(temporal_path, map_location=device, weights_only=True))
+        temporal_model.load_state_dict(load_torch_state(str(temporal_path), device))
        temporal_model.eval()
    betas = cosine_beta_schedule(args.timesteps).to(device)
--- a/example/run_all.py
+++ b/example/run_all.py
@@ -2,7 +2,9 @@
 """One-command pipeline runner with config-driven paths."""
 import argparse
 import csv
 import json
 import math
 import subprocess
 import sys
 from pathlib import Path
@@ -23,6 +25,13 @@ def parse_args():
    parser = argparse.ArgumentParser(description="Run full pipeline end-to-end.")
    base_dir = Path(__file__).resolve().parent
    parser.add_argument("--config", default=str(base_dir / "config.json"))
    parser.add_argument("--configs", default="", help="Comma-separated configs or globs for batch runs")
    parser.add_argument("--seeds", default="", help="Comma-separated seeds for batch runs")
    parser.add_argument("--repeat", type=int, default=1, help="Repeat each config with different seeds")
    parser.add_argument("--runs-root", default="", help="Root directory for per-run artifacts (batch)")
    parser.add_argument("--benchmark-history", default="", help="CSV path for batch history output")
    parser.add_argument("--benchmark-summary", default="", help="CSV path for batch summary output")
    parser.add_argument("--name-prefix", default="", help="Prefix for batch run directory names")
    parser.add_argument("--device", default="auto", help="cpu, cuda, or auto")
    parser.add_argument("--reference", default="", help="override reference glob (train*.csv.gz)")
    parser.add_argument("--skip-prepare", action="store_true")
@@ -57,21 +66,209 @@ def resolve_config_path(base_dir: Path, cfg_arg: str) -> Path:
    raise SystemExit(f"config not found: {cfg_arg}\ntried:\n{tried}")
 def resolve_like(base: Path, value: str) -> str:
    if not value:
        return ""
    p = Path(value)
    if p.is_absolute():
        return str(p)
    s = str(value)
    if any(ch in s for ch in ["*", "?", "["]):
        return str(base / p)
    return str((base / p).resolve())
 def expand_config_args(base_dir: Path, arg: str):
    if not arg:
        return []
    repo_dir = base_dir.parent
    tokens = [t.strip() for t in arg.split(",") if t.strip()]
    out = []
    for t in tokens:
        if any(ch in t for ch in ["*", "?", "["]):
            p = Path(t)
            if p.is_absolute():
                base = p.parent
                pat = p.name
                out.extend(sorted(base.glob(pat)))
            else:
                candidates = [base_dir / p, repo_dir / p, p]
                matched = False
                for c in candidates:
                    base = c.parent
                    pat = c.name
                    matches = sorted(base.glob(pat))
                    if matches:
                        out.extend(matches)
                        matched = True
                        break
                if not matched:
                    raise SystemExit(f"no configs matched glob: {t}")
        else:
            out.append(resolve_config_path(base_dir, t))
    seen = set()
    uniq = []
    for p in out:
        rp = str(Path(p).resolve())
        if rp in seen:
            continue
        seen.add(rp)
        uniq.append(Path(rp))
    return uniq
 def parse_seeds(arg: str):
    if not arg:
        return []
    out = []
    for part in [p.strip() for p in arg.split(",") if p.strip()]:
        out.append(int(part))
    return out
 def compute_summary(history_path: Path, out_path: Path):
    if not history_path.exists():
        return
    rows = []
    with history_path.open("r", encoding="utf-8", newline="") as f:
        reader = csv.DictReader(f)
        for r in reader:
            rows.append(r)
    if not rows:
        return
    def to_float(v):
        try:
            return float(v)
        except Exception:
            return None
    grouped = {}
    for r in rows:
        cfg = r.get("config", "") or ""
        grouped.setdefault(cfg, []).append(r)
    def mean_std(vals):
        xs = [x for x in vals if x is not None]
        if not xs:
            return None, None
        mu = sum(xs) / len(xs)
        if len(xs) <= 1:
            return mu, 0.0
        var = sum((x - mu) ** 2 for x in xs) / (len(xs) - 1)
        return mu, math.sqrt(var)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    with out_path.open("w", encoding="utf-8", newline="") as f:
        fieldnames = [
            "config",
            "n_runs",
            "avg_ks_mean",
            "avg_ks_std",
            "avg_jsd_mean",
            "avg_jsd_std",
            "avg_lag1_diff_mean",
            "avg_lag1_diff_std",
            "best_run_name",
            "best_avg_ks",
        ]
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        for cfg, rs in sorted(grouped.items(), key=lambda kv: kv[0]):
            kss = [to_float(x.get("avg_ks")) for x in rs]
            jsds = [to_float(x.get("avg_jsd")) for x in rs]
            lags = [to_float(x.get("avg_lag1_diff")) for x in rs]
            ks_mu, ks_sd = mean_std(kss)
            jsd_mu, jsd_sd = mean_std(jsds)
            lag_mu, lag_sd = mean_std(lags)
            best = None
            for r in rs:
                ks = to_float(r.get("avg_ks"))
                if ks is None:
                    continue
                if best is None or ks < best[0]:
                    best = (ks, r.get("run_name", ""))
            writer.writerow(
                {
                    "config": cfg,
                    "n_runs": len(rs),
                    "avg_ks_mean": ks_mu,
                    "avg_ks_std": ks_sd,
                    "avg_jsd_mean": jsd_mu,
                    "avg_jsd_std": jsd_sd,
                    "avg_lag1_diff_mean": lag_mu,
                    "avg_lag1_diff_std": lag_sd,
                    "best_run_name": "" if best is None else best[1],
                    "best_avg_ks": None if best is None else best[0],
                }
            )
 def main():
    args = parse_args()
    base_dir = Path(__file__).resolve().parent
-    config_path = resolve_config_path(base_dir, args.config)
+    seed_list = parse_seeds(args.seeds)
    config_paths = expand_config_args(base_dir, args.configs) if args.configs else [resolve_config_path(base_dir, args.config)]
    batch_mode = bool(args.configs or args.seeds or (args.repeat and args.repeat > 1) or args.runs_root or args.benchmark_history or args.benchmark_summary)
    if not args.skip_prepare:
        run([sys.executable, str(base_dir / "prepare_data.py")])
    runs_root = Path(args.runs_root) if args.runs_root else (base_dir / "results" / "runs")
    history_out = Path(args.benchmark_history) if args.benchmark_history else (base_dir / "results" / "benchmark_history.csv")
    summary_out = Path(args.benchmark_summary) if args.benchmark_summary else (base_dir / "results" / "benchmark_summary.csv")
    for config_path in config_paths:
        cfg_base = config_path.parent
        with open(config_path, "r", encoding="utf-8") as f:
            cfg = json.load(f)
        timesteps = cfg.get("timesteps", 200)
        seq_len = cfg.get("sample_seq_len", cfg.get("seq_len", 64))
        batch_size = cfg.get("sample_batch_size", cfg.get("batch_size", 2))
        clip_k = cfg.get("clip_k", 5.0)
-    if not args.skip_prepare:
+        seeds = seed_list
-        run([sys.executable, str(base_dir / "prepare_data.py")])
+        if not seeds:
            base_seed = int(cfg.get("seed", 1337))
            if args.repeat and args.repeat > 1:
                seeds = [base_seed + i for i in range(int(args.repeat))]
            else:
                seeds = [base_seed]
        for seed in seeds:
            run_dir = base_dir / "results" if not batch_mode else (runs_root / f"{args.name_prefix}{config_path.stem}__seed{seed}")
            run_dir.mkdir(parents=True, exist_ok=True)
            data_path = resolve_like(cfg_base, str(cfg.get("data_path", "")))
            data_glob = resolve_like(cfg_base, str(cfg.get("data_glob", "")))
            split_path = resolve_like(cfg_base, str(cfg.get("split_path", ""))) or str(base_dir / "feature_split.json")
            stats_path = resolve_like(cfg_base, str(cfg.get("stats_path", ""))) or str(base_dir / "results" / "cont_stats.json")
            vocab_path = resolve_like(cfg_base, str(cfg.get("vocab_path", ""))) or str(base_dir / "results" / "disc_vocab.json")
            ref = args.reference or cfg.get("data_glob") or cfg.get("data_path") or ""
            ref = resolve_like(cfg_base, str(ref)) if ref else ""
            if not args.skip_train:
-        run([sys.executable, str(base_dir / "train.py"), "--config", str(config_path), "--device", args.device])
+                run(
                    [
                        sys.executable,
                        str(base_dir / "train.py"),
                        "--config",
                        str(config_path),
                        "--device",
                        args.device,
                        "--out-dir",
                        str(run_dir),
                        "--seed",
                        str(seed),
                    ]
                )
            config_used = run_dir / "config_used.json"
            cfg_for_steps = config_used if config_used.exists() else config_path
            if not args.skip_export:
                run(
                    [
@@ -81,7 +278,21 @@ def main():
                        "--device",
                        args.device,
                        "--config",
-                str(config_path),
+                        str(cfg_for_steps),
                        "--data-path",
                        str(data_path),
                        "--data-glob",
                        str(data_glob),
                        "--split-path",
                        str(split_path),
                        "--stats-path",
                        str(stats_path),
                        "--vocab-path",
                        str(vocab_path),
                        "--model-path",
                        str(run_dir / "model.pt"),
                        "--out",
                        str(run_dir / "generated.csv"),
                        "--timesteps",
                        str(timesteps),
                        "--seq-len",
@@ -93,22 +304,66 @@ def main():
                        "--use-ema",
                    ]
                )
-    ref = args.reference or cfg.get("data_glob") or cfg.get("data_path") or ""
+
            if not args.skip_eval:
                cmd = [
                    sys.executable,
                    str(base_dir / "evaluate_generated.py"),
                    "--generated",
                    str(run_dir / "generated.csv"),
                    "--split",
                    str(split_path),
                    "--stats",
                    str(stats_path),
                    "--vocab",
                    str(vocab_path),
                    "--out",
                    str(run_dir / "eval.json"),
                ]
                if ref:
-            run([sys.executable, str(base_dir / "evaluate_generated.py"), "--reference", str(ref)])
+                    cmd += ["--reference", str(ref)]
                run(cmd)
                if batch_mode:
                    run(
                        [
                            sys.executable,
                            str(base_dir / "summary_metrics.py"),
                            "--eval",
                            str(run_dir / "eval.json"),
                            "--history",
                            str(history_out),
                            "--run-name",
                            run_dir.name,
                            "--config",
                            str(config_path),
                            "--seed",
                            str(seed),
                        ]
                    )
                else:
-            run([sys.executable, str(base_dir / "evaluate_generated.py")])
+                    run(
-        run([sys.executable, str(base_dir / "summary_metrics.py")])
+                        [
                            sys.executable,
                            str(base_dir / "summary_metrics.py"),
                            "--eval",
                            str(run_dir / "eval.json"),
                            "--history",
                            str(base_dir / "results" / "metrics_history.csv"),
                        ]
                    )
            if not args.skip_postprocess:
                cmd = [
                    sys.executable,
                    str(base_dir / "postprocess_types.py"),
                    "--generated",
-            str(base_dir / "results" / "generated.csv"),
+                    str(run_dir / "generated.csv"),
                    "--config",
-            str(config_path),
+                    str(cfg_for_steps),
                    "--out",
                    str(run_dir / "generated_post.csv"),
                    "--seed",
                    str(seed),
                ]
                if ref:
                    cmd += ["--reference", str(ref)]
@@ -119,9 +374,15 @@ def main():
                    sys.executable,
                    str(base_dir / "evaluate_generated.py"),
                    "--generated",
-            str(base_dir / "results" / "generated_post.csv"),
+                    str(run_dir / "generated_post.csv"),
                    "--split",
                    str(split_path),
                    "--stats",
                    str(stats_path),
                    "--vocab",
                    str(vocab_path),
                    "--out",
-            "results/eval_post.json",
+                    str(run_dir / "eval_post.json"),
                ]
                if ref:
                    cmd += ["--reference", str(ref)]
@@ -129,14 +390,109 @@ def main():
            if not args.skip_diagnostics:
                if ref:
-            run([sys.executable, str(base_dir / "diagnose_ks.py"), "--generated", str(base_dir / "results" / "generated_post.csv"), "--reference", str(ref)])
+                    run(
-        run([sys.executable, str(base_dir / "filtered_metrics.py"), "--eval", str(base_dir / "results" / "eval_post.json")])
+                        [
-        run([sys.executable, str(base_dir / "ranked_ks.py"), "--eval", str(base_dir / "results" / "eval_post.json")])
+                            sys.executable,
-        run([sys.executable, str(base_dir / "program_stats.py"), "--config", str(config_path), "--reference", str(ref or config_path)])
+                            str(base_dir / "diagnose_ks.py"),
-        run([sys.executable, str(base_dir / "controller_stats.py"), "--config", str(config_path), "--reference", str(ref or config_path)])
+                            "--generated",
-        run([sys.executable, str(base_dir / "actuator_stats.py"), "--config", str(config_path), "--reference", str(ref or config_path)])
+                            str(run_dir / "generated_post.csv"),
-        run([sys.executable, str(base_dir / "pv_stats.py"), "--config", str(config_path), "--reference", str(ref or config_path)])
+                            "--reference",
-        run([sys.executable, str(base_dir / "aux_stats.py"), "--config", str(config_path), "--reference", str(ref or config_path)])
+                            str(ref),
                        ]
                    )
                run(
                    [
                        sys.executable,
                        str(base_dir / "filtered_metrics.py"),
                        "--eval",
                        str(run_dir / "eval_post.json"),
                        "--out",
                        str(run_dir / "filtered_metrics.json"),
                    ]
                )
                run(
                    [
                        sys.executable,
                        str(base_dir / "ranked_ks.py"),
                        "--eval",
                        str(run_dir / "eval_post.json"),
                        "--out",
                        str(run_dir / "ranked_ks.csv"),
                    ]
                )
                run(
                    [
                        sys.executable,
                        str(base_dir / "program_stats.py"),
                        "--generated",
                        str(run_dir / "generated_post.csv"),
                        "--config",
                        str(cfg_for_steps),
                        "--reference",
                        str(cfg_for_steps),
                        "--out",
                        str(run_dir / "program_stats.json"),
                    ]
                )
                run(
                    [
                        sys.executable,
                        str(base_dir / "controller_stats.py"),
                        "--generated",
                        str(run_dir / "generated_post.csv"),
                        "--config",
                        str(cfg_for_steps),
                        "--reference",
                        str(cfg_for_steps),
                        "--out",
                        str(run_dir / "controller_stats.json"),
                    ]
                )
                run(
                    [
                        sys.executable,
                        str(base_dir / "actuator_stats.py"),
                        "--generated",
                        str(run_dir / "generated_post.csv"),
                        "--config",
                        str(cfg_for_steps),
                        "--reference",
                        str(cfg_for_steps),
                        "--out",
                        str(run_dir / "actuator_stats.json"),
                    ]
                )
                run(
                    [
                        sys.executable,
                        str(base_dir / "pv_stats.py"),
                        "--generated",
                        str(run_dir / "generated_post.csv"),
                        "--config",
                        str(cfg_for_steps),
                        "--reference",
                        str(cfg_for_steps),
                        "--out",
                        str(run_dir / "pv_stats.json"),
                    ]
                )
                run(
                    [
                        sys.executable,
                        str(base_dir / "aux_stats.py"),
                        "--generated",
                        str(run_dir / "generated_post.csv"),
                        "--config",
                        str(cfg_for_steps),
                        "--reference",
                        str(cfg_for_steps),
                        "--out",
                        str(run_dir / "aux_stats.json"),
                    ]
                )
    if batch_mode:
        compute_summary(history_out, summary_out)
 if __name__ == "__main__":
--- a/example/sample.py
+++ b/example/sample.py
@@ -29,6 +29,13 @@ BATCH_SIZE = 2
 CLIP_K = 5.0
 def load_torch_state(path: str, device: str):
    try:
        return torch.load(path, map_location=device, weights_only=True)
    except TypeError:
        return torch.load(path, map_location=device)
 def load_vocab():
    with open(str(VOCAB_PATH), "r", encoding="utf-8") as f:
        return json.load(f)["vocab"]
@@ -110,7 +117,7 @@ def main():
        eps_scale=eps_scale,
    ).to(DEVICE)
    if MODEL_PATH.exists():
-        model.load_state_dict(torch.load(str(MODEL_PATH), map_location=DEVICE, weights_only=True))
+        model.load_state_dict(load_torch_state(str(MODEL_PATH), DEVICE))
    model.eval()
    temporal_model = None
@@ -136,7 +143,7 @@ def main():
        temporal_path = BASE_DIR / "results" / "temporal.pt"
        if not temporal_path.exists():
            raise SystemExit(f"missing temporal model file: {temporal_path}")
-        temporal_model.load_state_dict(torch.load(str(temporal_path), map_location=DEVICE, weights_only=True))
+        temporal_model.load_state_dict(load_torch_state(str(temporal_path), DEVICE))
        temporal_model.eval()
    betas = cosine_beta_schedule(timesteps).to(DEVICE)
--- a/example/summary_metrics.py
+++ b/example/summary_metrics.py
@@ -1,41 +1,62 @@
 #!/usr/bin/env python3
-"""Print average metrics from eval.json and compare with previous run."""
+"""Print average metrics from eval.json and append to a history CSV."""
 import argparse
 import csv
 import json
 from datetime import datetime
 from pathlib import Path
 from typing import Optional
 def mean(values):
    return sum(values) / len(values) if values else None
-def parse_last_row(history_path: Path):
+def parse_args():
    base_dir = Path(__file__).resolve().parent
    parser = argparse.ArgumentParser(description="Summarize eval.json into a history CSV.")
    parser.add_argument("--eval", dest="eval_path", default=str(base_dir / "results" / "eval.json"))
    parser.add_argument("--history", default=str(base_dir / "results" / "metrics_history.csv"))
    parser.add_argument("--run-name", default="")
    parser.add_argument("--config", default="")
    parser.add_argument("--seed", type=int, default=-1)
    return parser.parse_args()
 def read_last_row(history_path: Path) -> Optional[dict]:
    if not history_path.exists():
        return None
-    rows = history_path.read_text(encoding="utf-8").strip().splitlines()
+    with history_path.open("r", encoding="utf-8", newline="") as f:
-    if len(rows) < 2:
+        reader = csv.DictReader(f)
        rows = list(reader)
    if not rows:
        return None
-    for line in reversed(rows[1:]):
+    last = rows[-1]
-        parts = line.split(",")
+    for key in ["avg_ks", "avg_jsd", "avg_lag1_diff"]:
-        if len(parts) < 4:
+        if key in last and last[key] not in [None, ""]:
            continue
            try:
-            return {
+                last[key] = float(last[key])
                "avg_ks": float(parts[1]),
                "avg_jsd": float(parts[2]),
                "avg_lag1_diff": float(parts[3]),
            }
            except Exception:
-            continue
+                last[key] = None
-    return None
+    return last
 def ensure_header(history_path: Path, fieldnames):
    if history_path.exists():
        return
    history_path.parent.mkdir(parents=True, exist_ok=True)
    with history_path.open("w", encoding="utf-8", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
 def main():
-    base_dir = Path(__file__).resolve().parent
+    args = parse_args()
-    eval_path = base_dir / "results" / "eval.json"
+    eval_path = Path(args.eval_path)
    if not eval_path.exists():
        raise SystemExit(f"missing eval.json: {eval_path}")
    history_path = Path(args.history)
    obj = json.loads(eval_path.read_text(encoding="utf-8"))
    ks = list(obj.get("continuous_ks", {}).values())
@@ -46,22 +67,48 @@ def main():
    avg_jsd = mean(jsd)
    avg_lag1 = mean(lag)
-    history_path = base_dir / "results" / "metrics_history.csv"
+    obj["avg_ks"] = avg_ks
-    prev = parse_last_row(history_path)
+    obj["avg_jsd"] = avg_jsd
    obj["avg_lag1_diff"] = avg_lag1
    eval_path.write_text(json.dumps(obj, indent=2), encoding="utf-8")
-    if not history_path.exists():
+    prev = read_last_row(history_path)
-        history_path.write_text("timestamp,avg_ks,avg_jsd,avg_lag1_diff\n", encoding="utf-8")
+
-    with history_path.open("a", encoding="utf-8") as f:
+    fieldnames = ["timestamp", "avg_ks", "avg_jsd", "avg_lag1_diff"]
-        f.write(f"{datetime.utcnow().isoformat()},{avg_ks},{avg_jsd},{avg_lag1}\n")
+    extended = any([args.run_name, args.config, args.seed >= 0])
    if extended:
        fieldnames = ["timestamp", "run_name", "config", "seed", "avg_ks", "avg_jsd", "avg_lag1_diff"]
    ensure_header(history_path, fieldnames)
    row = {
        "timestamp": datetime.utcnow().isoformat(),
        "avg_ks": avg_ks,
        "avg_jsd": avg_jsd,
        "avg_lag1_diff": avg_lag1,
    }
    if extended:
        row["run_name"] = args.run_name
        row["config"] = args.config
        row["seed"] = args.seed if args.seed >= 0 else ""
    with history_path.open("a", encoding="utf-8", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writerow(row)
    print("avg_ks", avg_ks)
    print("avg_jsd", avg_jsd)
    print("avg_lag1_diff", avg_lag1)
    if prev is not None:
-        print("delta_avg_ks", avg_ks - prev["avg_ks"])
+        pks = prev.get("avg_ks")
-        print("delta_avg_jsd", avg_jsd - prev["avg_jsd"])
+        pjsd = prev.get("avg_jsd")
-        print("delta_avg_lag1_diff", avg_lag1 - prev["avg_lag1_diff"])
+        plag = prev.get("avg_lag1_diff")
        if pks is not None:
            print("delta_avg_ks", avg_ks - pks)
        if pjsd is not None:
            print("delta_avg_jsd", avg_jsd - pjsd)
        if plag is not None:
            print("delta_avg_lag1_diff", avg_lag1 - plag)
 if __name__ == "__main__":
--- a/example/train.py
+++ b/example/train.py
@@ -108,6 +108,8 @@ def parse_args():
    parser = argparse.ArgumentParser(description="Train hybrid diffusion on HAI.")
    parser.add_argument("--config", default=None, help="Path to JSON config.")
    parser.add_argument("--device", default="auto", help="cpu, cuda, or auto")
    parser.add_argument("--out-dir", default=None, help="Override output directory")
    parser.add_argument("--seed", type=int, default=None, help="Override random seed")
    return parser.parse_args()
@@ -168,6 +170,14 @@ def main():
    # 优先使用命令行传入的device参数
    if args.device != "auto":
        config["device"] = args.device
    if args.out_dir:
        out_dir = Path(args.out_dir)
        if not out_dir.is_absolute():
            base = Path(args.config).resolve().parent if args.config else BASE_DIR
            out_dir = resolve_path(base, out_dir)
        config["out_dir"] = str(out_dir)
    if args.seed is not None:
        config["seed"] = int(args.seed)
    set_seed(int(config["seed"]))
--- a/paper.md
+++ b/paper.md
@@ -0,0 +1,603 @@
 Introduction
 Industrial control systems (ICS) form the backbone of modern critical infrastructure, which includes power grids, water treatment, manufacturing, and transportation, among others. These systems monitor, regulate, and automate the physical processes through sensors, actuators, programmable logic controllers (PLCs), and monitoring software. Unlike conventional IT systems, ICS operate in real time, closely coupled with physical processes and safety‑critical constraints, using heterogeneous and legacy communication protocols such as Modbus/TCP and DNP3 that were not originally designed with robust security in mind. This architectural complexity and operational criticality make ICS high‑impact targets for cyber attacks, where disruptions can result in physical damage, environmental harm, and even loss of life. Recent reviews of ICS security highlight the expanding attack surface due to increased connectivity, legacy systems’ vulnerabilities, and the inadequacy of traditional security controls in capturing the nuances of ICS networks and protocols [1, 2].
 工业控制系统（ICS）构成了现代关键基础设施的支柱，这些基础设施包括电网、水处理、制造业和交通运输等，通过传感器、执行器、可编程逻辑控制器（PLC）和监控软件对物理过程进行监测、调节和自动化控制。与传统的信息技术（IT）系统不同，ICS实时运行，与物理过程和安全关键约束紧密耦合，使用诸如Modbus/TCP和DNP3等异构和遗留通信协议，而这些协议最初设计时并未充分考虑强大的安全性。这种架构的复杂性和操作的关键性使ICS成为网络攻击的高影响目标，攻击造成的中断可能导致物理损坏、环境危害，甚至人员伤亡。最近对ICS安全的评估强调，由于连接性增加、遗留系统的漏洞以及传统安全控制措施在捕捉ICS网络和协议细微差别方面的不足，攻击面正在不断扩大[1, 2]。
 While machine learning (ML) techniques have shown promise for anomaly detection and automated cybersecurity within ICS, they rely heavily on labeled datasets that capture both benign operations and diverse attack patterns. In practice, real ICS traffic data — especially attack‑triggered captures — are scarce due to confidentiality, safety, and legal restrictions, and available public ICS datasets are few, limited in scope, or fail to reflect current threat modalities. For instance, the HAI Security Dataset provides operational telemetry and anomaly flags from a realistic control system setup for research purposes, but must be carefully preprocessed to derive protocol‑relevant features for ML tasks [3].  Data scarcity directly undermines model generalization, evaluation reproducibility, and the robustness of intrusion detection research, especially when training or testing ML models on realistic ICS behavior remains confined to small or outdated collections of examples [4]. 
 虽然机器学习（ML）技术在工业控制系统（ICS）的异常检测和自动化网络安全方面展现出了潜力，但它们严重依赖于能够捕捉正常操作和各种攻击模式的标记数据集。实际上，由于保密、安全和法律限制，真实的ICS流量数据（尤其是攻击触发的捕获数据）非常稀缺，而可用的公共ICS数据集数量少、范围有限，或者无法反映当前的威胁模式。例如，HAI安全数据集提供了来自真实控制系统设置的操作遥测数据和异常标记，用于研究目的，但必须进行仔细的预处理，才能提取与协议相关的特征，用于ML任务[Kaggle HAI安全数据集]。[3]数据稀缺直接影响模型的泛化能力、评估的可重复性以及入侵检测研究的稳健性，尤其是当对ML模型进行训练或测试时，对真实ICS行为的研究仍然局限于少量或过时的示例集合[4]。
 Synthetic data generation offers a practical pathway to mitigate these challenges. By programmatically generating feature‑level sequences that mimic the statistical and temporal structure of real ICS telemetry, researchers can augment scarce training sets, standardize benchmarking, and preserve operational confidentiality. Relative to raw packet captures, feature‑level synthesis abstracts critical protocol semantics and statistical patterns without exposing sensitive fields, making it more compatible with safety constraints and compliance requirements in ICS environments. Modern generative modeling — including diffusion models — has advanced significantly in producing high‑fidelity synthetic data across domains. Diffusion approaches, such as denoising diffusion probabilistic models, learn to transform noise into coherent structured samples and have been successfully applied to tabular or time series data synthesis with better stability and data coverage compared to adversarial methods [5, 6]. 
 合成数据生成提供了一条切实可行的途径来缓解这些挑战。通过以编程方式生成模仿真实 ICS 遥测数据的统计和时间结构的特征级序列，研究人员可以扩充稀缺的训练集、规范基准测试，并保护操作机密性。相对于原始数据包捕获，特征级合成在不暴露敏感字段的情况下提取关键协议语义和统计模式，使其更符合 ICS 环境中的安全约束和合规要求。包括扩散模型在内的现代生成式建模在跨领域生成高保真合成数据方面取得了显著进展。扩散方法，如去噪扩散概率模型，学习将噪声转换为连贯的结构化样本，与对抗性方法相比，已成功应用于表格或时间序列数据合成，具有更好的稳定性和数据覆盖范围 [5, 6]。
 Despite these advances, most existing work either focuses on packet‑level generation [7] or is limited to generic tabular data [5], rather than domain‑specific control sequence synthesis tailored for ICS protocols where temporal coherence, multi‑channel dependencies, and discrete protocol legality are jointly required. This gap motivates our focus on protocol feature‑level generation for ICS — synthesizing sequences of protocol‑relevant fields conditioned on their temporal and cross‑channel structure. In this work, we formulate a hybrid modeling pipeline that decouples long‑horizon trends and local statistical detail while preserving discrete semantics of protocol tokens. By combining causal Transformers with diffusion‑based refiners, and enforcing deterministic validity constraints during sampling, our framework generates semantically coherent, temporally consistent, and distributionally faithful ICS feature sequences. We evaluate features derived from the HAI Security Dataset and demonstrate that our approach produces high‑quality synthetic sequences suitable for downstream augmentation, benchmarking, and integration into packet‑construction workflows that respect realistic ICS constraints.
 尽管取得了这些进展，但大多数现有工作要么专注于数据包级生成 [7]，要么仅限于通用表格数据 [5]，而不是针对ICS协议量身定制的特定领域控制序列合成，因为ICS协议需要同时满足时间连贯性、多通道依赖性和离散协议合法性。这一差距促使我们专注于ICS的协议特征级生成——基于其时间和跨通道结构合成与协议相关字段的序列。在这项工作中，我们设计了一个混合建模流程，在保留协议令牌离散语义的同时，将长期趋势与局部统计细节解耦。通过将因果Transformer与基于扩散的细化器相结合，并在采样过程中实施确定性有效性约束，我们的框架生成了语义连贯、时间一致且分布忠实的ICS特征序列。我们评估了从HAI安全数据集导出的特征，并证明我们的方法生成的高质量合成序列适用于下游增强、基准测试，以及集成到尊重现实ICS约束的数据包构建工作流程中。
 References for Introduction
 [1] Machine learning in industrial control system (ICS) security: current landscape, opportunities and challenges https://dl.acm.org/doi/abs/10.1007/s10844-022-00753-1
@article{10.1007/s10844-022-00753-1,
 author = {Koay, Abigail M. Y. and Ko, Ryan K. L and Hettema, Hinne and Radke, Kenneth},
 title = {Machine learning in industrial control system (ICS) security: current landscape, opportunities and challenges},
 year = {2022},
 issue_date = {Apr 2023},
 publisher = {Kluwer Academic Publishers},
 address = {USA},
 volume = {60},
 number = {2},
 issn = {0925-9902},
 url = {https://doi.org/10.1007/s10844-022-00753-1},
 doi = {10.1007/s10844-022-00753-1},
 abstract = {The advent of Industry 4.0 has led to a rapid increase in cyber attacks on industrial systems and processes, particularly on Industrial Control Systems (ICS). These systems are increasingly becoming prime targets for cyber criminals and nation-states looking to extort large ransoms or cause disruptions due to their ability to cause devastating impact whenever they cease working or malfunction. Although myriads of cyber attack detection systems have been proposed and developed, these detection systems still face many challenges that are typically not found in traditional detection systems. Motivated by the need to better understand these challenges to improve current approaches, this paper aims to (1) understand the current vulnerability landscape in ICS, (2) survey current advancements of Machine Learning (ML) based methods with respect to the usage of ML base classifiers (3) provide insights to benefits and limitations of recent advancement with respect to two performance vectors; detection accuracy and attack variety. Based on our findings, we present key open challenges which will represent exciting research opportunities for the research community.},
 journal = {J. Intell. Inf. Syst.},
 month = oct,
 pages = {377–405},
 numpages = {29},
 keywords = {Operational technology, Cyber security, Dataset, Industrial control systems, Machine learning, Critical infrastructure}
 }
 [2] Securing Industrial Control Systems: Components, Cyber Threats, and Machine Learning-Driven Defense Strategies https://www.mdpi.com/1424-8220/23/21/8840
@ARTICLE{Nankya2023-gp,
  title     = "Securing industrial Control Systems: Components, cyber threats,
               and machine learning-driven defense strategies",
  author    = "Nankya, Mary and Chataut, Robin and Akl, Robert",
  abstract  = "Industrial Control Systems (ICS), which include Supervisory
               Control and Data Acquisition (SCADA) systems, Distributed
               Control Systems (DCS), and Programmable Logic Controllers (PLC),
               play a crucial role in managing and regulating industrial
               processes. However, ensuring the security of these systems is of
               utmost importance due to the potentially severe consequences of
               cyber attacks. This article presents an overview of ICS
               security, covering its components, protocols, industrial
               applications, and performance aspects. It also highlights the
               typical threats and vulnerabilities faced by these systems.
               Moreover, the article identifies key factors that influence the
               design decisions concerning control, communication, reliability,
               and redundancy properties of ICS, as these are critical in
               determining the security needs of the system. The article
               outlines existing security countermeasures, including network
               segmentation, access control, patch management, and security
               monitoring. Furthermore, the article explores the integration of
               machine learning techniques to enhance the cybersecurity of ICS.
               Machine learning offers several advantages, such as anomaly
               detection, threat intelligence analysis, and predictive
               maintenance. However, combining machine learning with other
               security measures is essential to establish a comprehensive
               defense strategy for ICS. The article also addresses the
               challenges associated with existing measures and provides
               recommendations for improving ICS security. This paper becomes a
               valuable reference for researchers aiming to make meaningful
               contributions within the constantly evolving ICS domain by
               providing an in-depth examination of the present state,
               challenges, and potential future advancements.",
  journal   = "Sensors (Basel)",
  publisher = "MDPI AG",
  volume    =  23,
  number    =  21,
  pages     = "8840",
  month     =  oct,
  year      =  2023,
  keywords  = "SCADA; anomaly detection; artificial intelligence; attacks;
               cyber defense; cyber threats; industrial control systems;
               security; vulnerabilities",
  copyright = "https://creativecommons.org/licenses/by/4.0/",
  language  = "en"
 }
 [3] HAI Security Dataset https://www.kaggle.com/datasets/icsdataset/hai-security-dataset
@misc{shin,
        hyeok-ki_lee,
        woomyo_choi,
        seungoh_yun,
        jeong-han_min,
        byung gil_kim,
        hyoungchun_2023,
        title={HAI Security Dataset},
        url={https://www.kaggle.com/dsv/5821622},
        DOI={10.34740/KAGGLE/DSV/5821622},
        publisher={Kaggle},
        author={Shin,
        Hyeok-Ki and Lee,
        Woomyo and Choi,
        Seungoh and Yun,
        Jeong-Han and Min,
        Byung Gil and Kim,
        HyoungChun},
        year={2023}
 }
 [4] Intrusion Detection in Industrial Control Systems Using Transfer Learning Guided by Reinforcement Learning https://doi.org/10.3390/info16100910
@Article{info16100910,
 AUTHOR = {Ali, Jokha and Ali, Saqib and Al Balushi, Taiseera and Nadir, Zia},
 TITLE = {Intrusion Detection in Industrial Control Systems Using Transfer Learning Guided by Reinforcement Learning},
 JOURNAL = {Information},
 VOLUME = {16},
 YEAR = {2025},
 NUMBER = {10},
 ARTICLE-NUMBER = {910},
 URL = {https://www.mdpi.com/2078-2489/16/10/910},
 ISSN = {2078-2489},
 ABSTRACT = {Securing Industrial Control Systems (ICSs) is critical, but it is made challenging by the constant evolution of cyber threats and the scarcity of labeled attack data in these specialized environments. Standard intrusion detection systems (IDSs) often fail to adapt when transferred to new networks with limited data. To address this, this paper introduces an adaptive intrusion detection framework that combines a hybrid Convolutional Neural Network and Long Short-Term Memory (CNN-LSTM) model with a novel transfer learning strategy. We employ a Reinforcement Learning (RL) agent to intelligently guide the fine-tuning process, which allows the IDS to dynamically adjust its parameters such as layer freezing and learning rates in real-time based on performance feedback. We evaluated our system in a realistic data-scarce scenario using only 50 labeled training samples. Our RL-Guided model achieved a final F1-score of 0.9825, significantly outperforming a standard neural fine-tuning model (0.861) and a target baseline model (0.759). Analysis of the RL agent’s behavior confirmed that it learned a balanced and effective policy for adapting the model to the target domain. We conclude that the proposed RL-guided approach creates a highly accurate and adaptive IDS that overcomes the limitations of static transfer learning methods. This dynamic fine-tuning strategy is a powerful and promising direction for building resilient cybersecurity defenses for critical infrastructure.},
 DOI = {10.3390/info16100910}
 }
 [5] TabDDPM: Modelling Tabular Data with Diffusion Models https://arxiv.org/abs/2209.15421
@InProceedings{pmlr-v202-kotelnikov23a,
  title =          {{T}ab{DDPM}: Modelling Tabular Data with Diffusion Models},
  author =       {Kotelnikov, Akim and Baranchuk, Dmitry and Rubachev, Ivan and Babenko, Artem},
  booktitle =          {Proceedings of the 40th International Conference on Machine Learning},
  pages =          {17564--17579},
  year =          {2023},
  editor =          {Krause, Andreas and Brunskill, Emma and Cho, Kyunghyun and Engelhardt, Barbara and Sabato, Sivan and Scarlett, Jonathan},
  volume =          {202},
  series =          {Proceedings of Machine Learning Research},
  month =          {23--29 Jul},
  publisher =    {PMLR},
  pdf =          {https://proceedings.mlr.press/v202/kotelnikov23a/kotelnikov23a.pdf},
  url =          {https://proceedings.mlr.press/v202/kotelnikov23a.html},
  abstract =          {Denoising diffusion probabilistic models are becoming the leading generative modeling paradigm for many important data modalities. Being the most prevalent in the computer vision community, diffusion models have recently gained some attention in other domains, including speech, NLP, and graph-like data. In this work, we investigate if the framework of diffusion models can be advantageous for general tabular problems, where data points are typically represented by vectors of heterogeneous features. The inherent heterogeneity of tabular data makes it quite challenging for accurate modeling since the individual features can be of a completely different nature, i.e., some of them can be continuous and some can be discrete. To address such data types, we introduce TabDDPM — a diffusion model that can be universally applied to any tabular dataset and handles any feature types. We extensively evaluate TabDDPM on a wide set of benchmarks and demonstrate its superiority over existing GAN/VAE alternatives, which is consistent with the advantage of diffusion models in other fields.}
 }
 [6] Autoregressive Denoising Diffusion Models for Multivariate Probabilistic Time Series Forecasting https://arxiv.org/abs/2101.12072
@misc{rasul2021autoregressivedenoisingdiffusionmodels,
      title={Autoregressive Denoising Diffusion Models for Multivariate Probabilistic Time Series Forecasting}, 
      author={Kashif Rasul and Calvin Seward and Ingmar Schuster and Roland Vollgraf},
      year={2021},
      eprint={2101.12072},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2101.12072}, 
 }
 [7] NetDiffusion: Network Data Augmentation Through Protocol-Constrained Traffic Generation. https://arxiv.org/abs/2310.08543
@misc{jiang2023netdiffusionnetworkdataaugmentation,
      title={NetDiffusion: Network Data Augmentation Through Protocol-Constrained Traffic Generation}, 
      author={Xi Jiang and Shinan Liu and Aaron Gember-Jacobson and Arjun Nitin Bhagoji and Paul Schmitt and Francesco Bronzino and Nick Feamster},
      year={2023},
      eprint={2310.08543},
      archivePrefix={arXiv},
      primaryClass={cs.NI},
      url={https://arxiv.org/abs/2310.08543}, 
 }
 Related Work
 Early generation of network data oriented towards "realism" mostly remained at the packet/flow header level, either through replay or statistical synthesis based on single-point observations. Swing, in a closed-loop, network-responsive manner, extracts user/application/network distributions from single-point observations to reproduce burstiness and correlation across multiple time scales [1]. Subsequently, a series of works advanced header synthesis to learning-based generation: the WGAN-based method added explicit verification of protocol field consistency to NetFlow/IPFIX [2], NetShare reconstructed header modeling as flow-level time series and improved fidelity and scalability through domain encoding and parallel fine-tuning [3], and DoppelGANger preserved the long-range structure and downstream sorting consistency of networked time series by decoupling attributes from sequences [4]. However, in industrial control system (ICS) scenarios, the original PCAP is usually not shareable, and public testbeds (such as SWaT, WADI) mostly provide process/monitoring telemetry and protocol interactions for security assessment, but public datasets emphasize operational variables rather than packet-level traces [5, 6]. This makes "synthesis at the feature/telemetry level, aware of protocol and semantics" more feasible and necessary in practice: we are more concerned with reproducing high-level distributions and multi-scale temporal patterns according to operational semantics and physical constraints without relying on the original packets. From this perspective, the generation paradigm naturally shifts from "packet syntax reproduction" to "modeling of high-level spatio-temporal distributions and uncertainties", requiring stable training, strong distribution fitting, and interpretable uncertainty characterization.
 早期面向“真实感”网络数据的生成大多停留在分组/流头部层面，要么重放、要么在单点观测的前提下做统计合成。Swing 以闭环、网络响应的方式，从单点观测中抽取用户/应用/网络分布，从而在多时间尺度上重现突发性与相关性[1]；随后的一系列工作把对头部的合成推进到学习型生成：基于 WGAN 的方法在 NetFlow/IPFIX 上加入协议字段一致性的显式校验[2]，NetShare 将头部建模重构为流级时间序列并用领域编码与并行微调提升保真与可扩展性[3]，DoppelGANger 则通过将属性与序列解耦，保留网络化时间序列的长程结构与下游排序一致性[4]。然而在工业控制系统（ICS）场景，原始 PCAP 通常不可分享，公开测试床（如 SWaT、WADI）更多提供过程/监控遥测与协议交互，用于安全评估，但公开数据集中强调运行变量而非报文级踪迹[5,6]。这使得“特征/遥测层级的、协议与语义感知的合成”在实践上更具可行性与必要性：我们更关心在不依赖原始报文的前提下，按操作语义与物理约束重现高层分布与多尺度时序模式。沿着这一视角，生成范式也自然从“报文语法复现”转向“高层时空分布与不确定性的建模”，需要稳定训练、强分布拟合与可解释的不确定性刻画。
 Diffusion models exhibit good fit along this path: DDPM achieves high-quality sampling and stable optimization through efficient ε parameterization and weighted variational objectives [7], the SDE perspective unifies score-based and diffusion, providing likelihood evaluation and prediction-correction sampling strategies based on probability flow ODEs [8]. For time series, TimeGrad replaces the constrained output distribution with conditional denoising, capturing high-dimensional correlations at each step [9]; CSDI explicitly performs conditional diffusion and uses two-dimensional attention to simultaneously leverage temporal and cross-feature dependencies, suitable for conditioning and filling in missing values [10]; in a more general spatio-temporal structure, DiffSTG generalizes diffusion to spatio-temporal graphs, combining TCN/GCN with denoising U-Net to improve CRPS and inference efficiency in a non-autoregressive manner [11], and PriSTI further enhances conditional features and geographical relationships, maintaining robustness under high missing rates and sensor failures [12]; in long sequences and continuous domains, DiffWave verifies that diffusion can also match the quality of strong vocoders under non-autoregressive fast synthesis [13]; studies on cellular communication traffic show that diffusion can recover spatio-temporal patterns and provide uncertainty characterization at the urban scale [14]. These results overall point to a conclusion: when the research focus is on "telemetry/high-level features" rather than raw messages, diffusion models provide stable and fine-grained distribution fitting and uncertainty quantification, which is exactly in line with the requirements of ICS telemetry synthesis. Meanwhile, directly entrusting all structures to a "monolithic diffusion" is not advisable: long-range temporal skeletons and fine-grained marginal distributions often have optimization tensions, requiring explicit decoupling in modeling.
 扩散模型在这一路径上表现出良好的契合度：DDPM 通过高效的 ε 参数化与加权变分目标实现高质量采样与稳定优化[7]，SDE 视角把 score-based 与扩散统一起来，提供了基于概率流 ODE 的似然评估与预测—校正采样策略[8]。面向时间序列，TimeGrad 用条件去噪替代受限的输出分布，在每一步捕获高维相关性[9]；CSDI 显式地做条件扩散并用二维注意力同时利用时间与跨特征依赖，适合条件化与填补缺失[10]；在更广义的时空结构上，DiffSTG 将扩散推广到时空图，结合 TCN/GCN 与去噪 U-Net 以非自回归方式提升 CRPS 与推理效率[11]，PriSTI 进一步增强条件特征与地理关系，在高缺失率与传感失效下保持鲁棒性[12]；在长序列与连续域，DiffWave 验证了扩散在非自回归快速合成下也能匹配强声码器的质量[13]；蜂窝通信流量的研究表明，扩散可以在城市尺度恢复时空模式并提供不确定性刻画[14]。这些结果整体指向一个结论：当研究重点在“遥测/高层特征”而非原始报文时，扩散模型提供了稳定、精细的分布拟合与不确定性量化，正契合 ICS 遥测合成的需求。与此同时，直接把所有结构交给一个“单体扩散”并不可取：长程时序骨架与细粒度边际分布往往存在优化张力，需要在建模上显式解耦。
 Looking further into the mechanism complexity of ICS: its channel types are inherently mixed, containing both continuous process trajectories and discrete supervision/status variables, and discrete channels must be "legal" under operational constraints. The aforementioned progress in time series diffusion has mainly occurred in continuous spaces, but discrete diffusion has also developed systematic methods: D3PM improves sampling quality and likelihood through absorption/masking and structured transitions in discrete state spaces [15], subsequent masked diffusion provides stable reconstruction on categorical data in a more simplified form [4], multinomial diffusion directly defines diffusion on a finite vocabulary through mechanisms such as argmax flows [20], and Diffusion-LM demonstrates an effective path for controllable text generation by imposing gradient constraints in continuous latent spaces [16]. From the perspectives of protocols and finite-state machines, coverage-guided fuzz testing emphasizes the criticality of "sequence legality and state coverage" [17–19], echoing the concept of "legality by construction" in discrete diffusion: preferentially adopting absorption/masking diffusion on discrete channels, supplemented by type-aware conditioning and sampling constraints, to avoid semantic invalidity and marginal distortion caused by post hoc thresholding.
 进一步看 ICS 的机制复杂性：其通道类型天然是混合的，既含连续的过程轨迹，也含离散的监督/状态变量，且离散通道必须在操作约束下“合法”。前述时间序列扩散的进展主要发生在连续空间，但离散扩散同样已形成系统化方法：D3PM 在离散状态空间中以吸收/掩蔽和结构化转移提升采样质量与似然[15]，后续的掩蔽扩散以更简化的形式在类别数据上提供稳定重构[4]，多项分布（Multinomial）扩散通过 argmax flows 等机制把扩散直接定义在有限词表上[20]，而 Diffusion-LM 则展示了在连续潜空间施加梯度约束以实现可控文本生成的有效路径[16]。结合协议与状态机视角，覆盖引导的模糊测试强调“序列合法性与状态覆盖”的关键性[17–19]，与离散扩散的“按构造合法”理念相呼应：在离散通道上优先采用吸收/掩蔽式扩散并辅以类型感知的条件化与采样约束，避免事后阈值化导致的语义无效与边际失真。
 From the perspective of high-level synthesis, the temporal structure is equally indispensable: ICS control often involves delay effects, phased operating conditions, and cross-channel coupling, requiring models to be able to characterize low-frequency, long-range dependencies while also overlaying multi-modal fine-grained fluctuations on them. The Transformer series has provided sufficient evidence in long-sequence time series tasks: Transformer-XL breaks through the fixed-length context limitation through a reusable memory mechanism and significantly enhances long-range dependency expression [21]; Informer uses ProbSparse attention and efficient decoding to balance span and efficiency in long-sequence prediction [22]; Autoformer robustly models long-term seasonality and trends through autocorrelation and decomposition mechanisms [23]; FEDformer further improves long-period prediction performance in frequency domain enhancement and decomposition [24]; PatchTST enhances the stability and generalization of long-sequence multivariate prediction through local patch-based representation and channel-independent modeling [25]. Combining our previous positioning of diffusion, this chain of evidence points to a natural division of labor: using attention-based sequence models to first extract stable low-frequency trends/conditions (long-range skeletons), and then allowing diffusion to focus on margins and details in the residual space; meanwhile, discrete masking/absorbing diffusion is applied to supervised/pattern variables to ensure vocabulary legality by construction. This design not only inherits the advantages of time series diffusion in distribution fitting and uncertainty characterization [9–14], but also stabilizes the macroscopic temporal support through the long-range attention of Transformer, enabling the formation of an operational integrated generation pipeline under the mixed types and multi-scale dynamics of ICS.
 从高层合成的角度，时序结构同样不可或缺：ICS 控制往往伴随延迟效应、阶段性工况与跨通道耦合，需要模型既能刻画低频、长程依赖，又能在其上叠加多模态的细粒度波动。Transformer 系列在长序列时间序列任务上已提供了充分证据：Transformer‑XL 通过可复用的记忆机制突破固定长度上下文限制、显著增强长程依赖表达[21]；Informer 在长序列预测中用 ProbSparse 注意力与高效解码兼顾跨度与效率[22]；Autoformer 通过自相关与分解机制稳健建模长期季节与趋势[23]；FEDformer 在频域增强与分解上进一步提升长周期预测的表现[24]；PatchTST 则以局部补丁化的表示和通道独立建模提升长序列多变量预测的稳定性与泛化[25]。结合我们在前文对扩散的定位，这一证据链指向一种自然的分工：用注意力型序列模型先抽取稳定的低频趋势/条件（长程骨架），再让扩散在残差空间聚焦于边际与细节；与此同时，针对监督/模式类变量采用离散掩蔽/吸收扩散按构造保证词表合法性。该设计既继承了时间序列扩散在分布拟合与不确定性刻画上的优势[9–14]，又借助 Transformer 的长距注意力稳固了宏观时序支撑，使得在 ICS 的混合类型与多尺度动态下形成可操作的一体化生成管线。
 References for Related Work
 [1] Realistic and responsive network traffic generation https://dl.acm.org/doi/10.1145/1159913.1159928
@article{10.1145/1151659.1159928,
 author = {Vishwanath, Kashi Venkatesh and Vahdat, Amin},
 title = {Realistic and responsive network traffic generation},
 year = {2006},
 issue_date = {October 2006},
 publisher = {Association for Computing Machinery},
 address = {New York, NY, USA},
 volume = {36},
 number = {4},
 issn = {0146-4833},
 url = {https://doi.org/10.1145/1151659.1159928},
 doi = {10.1145/1151659.1159928},
 abstract = {This paper presents Swing, a closed-loop, network-responsive traffic generator that accurately captures the packet interactions of a range of applications using a simple structural model. Starting from observed traffic at a single point in the network, Swing automatically extracts distributions for user, application, and network behavior. It then generates live traffic corresponding to the underlying models in a network emulation environment running commodity network protocol stacks. We find that the generated traces are statistically similar to the original traces. Further, to the best of our knowledge, we are the first to reproduce burstiness in traffic across a range of timescales using a model applicable to a variety of network settings. An initial sensitivity analysis reveals the importance of capturing and recreating user, application, and network characteristics to accurately reproduce such burstiness. Finally, we explore Swing's ability to vary user characteristics, application properties, and wide-area network conditions to project traffic characteristics into alternate scenarios.},
 journal = {SIGCOMM Comput. Commun. Rev.},
 month = aug,
 pages = {111–122},
 numpages = {12},
 keywords = {burstiness, energy plot, generator, internet, modeling, structural model, traffic, wavelets}
 }
@inproceedings{10.1145/1159913.1159928,
 author = {Vishwanath, Kashi Venkatesh and Vahdat, Amin},
 title = {Realistic and responsive network traffic generation},
 year = {2006},
 isbn = {1595933085},
 publisher = {Association for Computing Machinery},
 address = {New York, NY, USA},
 url = {https://doi.org/10.1145/1159913.1159928},
 doi = {10.1145/1159913.1159928},
 abstract = {This paper presents Swing, a closed-loop, network-responsive traffic generator that accurately captures the packet interactions of a range of applications using a simple structural model. Starting from observed traffic at a single point in the network, Swing automatically extracts distributions for user, application, and network behavior. It then generates live traffic corresponding to the underlying models in a network emulation environment running commodity network protocol stacks. We find that the generated traces are statistically similar to the original traces. Further, to the best of our knowledge, we are the first to reproduce burstiness in traffic across a range of timescales using a model applicable to a variety of network settings. An initial sensitivity analysis reveals the importance of capturing and recreating user, application, and network characteristics to accurately reproduce such burstiness. Finally, we explore Swing's ability to vary user characteristics, application properties, and wide-area network conditions to project traffic characteristics into alternate scenarios.},
 booktitle = {Proceedings of the 2006 Conference on Applications, Technologies, Architectures, and Protocols for Computer Communications},
 pages = {111–122},
 numpages = {12},
 keywords = {burstiness, energy plot, generator, internet, modeling, structural model, traffic, wavelets},
 location = {Pisa, Italy},
 series = {SIGCOMM '06}
 }
 [2] Flow-based network traffic generation using Generative Adversarial Networks https://arxiv.org/abs/1810.07795
@article{Ring_2019,
   title={Flow-based network traffic generation using Generative Adversarial Networks},
   volume={82},
   ISSN={0167-4048},
   url={http://dx.doi.org/10.1016/j.cose.2018.12.012},
   DOI={10.1016/j.cose.2018.12.012},
   journal={Computers &amp; Security},
   publisher={Elsevier BV},
   author={Ring, Markus and Schlör, Daniel and Landes, Dieter and Hotho, Andreas},
   year={2019},
   month=may, pages={156–172} }
 [3] Practical GAN-based synthetic IP header trace generation using NetShare https://dl.acm.org/doi/abs/10.1145/3544216.3544251?download=true
@inproceedings{10.1145/3544216.3544251,
 author = {Yin, Yucheng and Lin, Zinan and Jin, Minhao and Fanti, Giulia and Sekar, Vyas},
 title = {Practical GAN-based synthetic IP header trace generation using NetShare},
 year = {2022},
 isbn = {9781450394208},
 publisher = {Association for Computing Machinery},
 address = {New York, NY, USA},
 url = {https://doi.org/10.1145/3544216.3544251},
 doi = {10.1145/3544216.3544251},
 abstract = {We explore the feasibility of using Generative Adversarial Networks (GANs) to automatically learn generative models to generate synthetic packet- and flow header traces for networking tasks (e.g., telemetry, anomaly detection, provisioning). We identify key fidelity, scalability, and privacy challenges and tradeoffs in existing GAN-based approaches. By synthesizing domain-specific insights with recent advances in machine learning and privacy, we identify design choices to tackle these challenges. Building on these insights, we develop an end-to-end framework, NetShare. We evaluate NetShare on six diverse packet header traces and find that: (1) across all distributional metrics and traces, it achieves 46\% more accuracy than baselines and (2) it meets users' requirements of downstream tasks in evaluating accuracy and rank ordering of candidate approaches.},
 booktitle = {Proceedings of the ACM SIGCOMM 2022 Conference},
 pages = {458–472},
 numpages = {15},
 keywords = {synthetic data generation, privacy, network packets, network flows, generative adversarial networks},
 location = {Amsterdam, Netherlands},
 series = {SIGCOMM '22}
 }
 [4] Using GANs for Sharing Networked Time Series Data: Challenges, Initial Promise, and Open Questions https://arxiv.org/abs/1909.13403
@inproceedings{Lin_2020, series={IMC ’20},
   title={Using GANs for Sharing Networked Time Series Data: Challenges, Initial Promise, and Open Questions},
   url={http://dx.doi.org/10.1145/3419394.3423643},
   DOI={10.1145/3419394.3423643},
   booktitle={Proceedings of the ACM Internet Measurement Conference},
   publisher={ACM},
   author={Lin, Zinan and Jain, Alankar and Wang, Chen and Fanti, Giulia and Sekar, Vyas},
   year={2020},
   month=oct, pages={464–483},
   collection={IMC ’20} }
 [5] SWaT: a water treatment testbed for research and training on ICS security https://ieeexplore.ieee.org/document/7469060
@INPROCEEDINGS{7469060,
  author={Mathur, Aditya P. and Tippenhauer, Nils Ole},
  booktitle={2016 International Workshop on Cyber-physical Systems for Smart Water Networks (CySWater)}, 
  title={SWaT: a water treatment testbed for research and training on ICS security}, 
  year={2016},
  volume={},
  number={},
  pages={31-36},
  keywords={Sensors;Actuators;Feeds;Process control;Chemicals;Chemical sensors;Security;Cyber Physical Systems;Industrial Control Systems;Cyber Attacks;Cyber Defense;Water Testbed},
  doi={10.1109/CySWater.2016.7469060}}
 [6] WADI: a water distribution testbed for research in the design of secure cyber physical systems https://www.researchgate.net/publication/315849116_WADI_a_water_distribution_testbed_for_research_in_the_design_of_secure_cyber_physical_systems
@inproceedings{10.1145/3055366.3055375,
 author = {Ahmed, Chuadhry Mujeeb and Palleti, Venkata Reddy and Mathur, Aditya P.},
 title = {WADI: a water distribution testbed for research in the design of secure cyber physical systems},
 year = {2017},
 isbn = {9781450349758},
 publisher = {Association for Computing Machinery},
 address = {New York, NY, USA},
 url = {https://doi.org/10.1145/3055366.3055375},
 doi = {10.1145/3055366.3055375},
 abstract = {The architecture of a water distribution testbed (WADI), and on-going research in the design of secure water distribution system is presented. WADI consists of three stages controlled by Programmable Logic Controllers (PLCs) and two stages controlled via Remote Terminal Units (RTUs). Each PLC and RTU uses sensors to estimate the system state and the actuators to effect control. WADI is currently used to (a) conduct security analysis for water distribution networks, (b) experimentally assess detection mechanisms for potential cyber and physical attacks, and (c) understand how the impact of an attack on one CPS could cascade to other connected CPSs. The cascading effects of attacks can be studied in WADI through its connection to two other testbeds, namely for water treatment and power generation and distribution.},
 booktitle = {Proceedings of the 3rd International Workshop on Cyber-Physical Systems for Smart Water Networks},
 pages = {25–28},
 numpages = {4},
 keywords = {attack detection, cyber physical systems, cyber security, industrial control systems, water distribution testbed},
 location = {Pittsburgh, Pennsylvania},
 series = {CySWATER '17}
 }
 [7] Denoising Diffusion Probabilistic Models https://arxiv.org/abs/2006.11239
@inproceedings{NEURIPS2020_4c5bcfec,
 author = {Ho, Jonathan and Jain, Ajay and Abbeel, Pieter},
 booktitle = {Advances in Neural Information Processing Systems},
 editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin},
 pages = {6840--6851},
 publisher = {Curran Associates, Inc.},
 title = {Denoising Diffusion Probabilistic Models},
 url = {https://proceedings.neurips.cc/paper_files/paper/2020/file/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf},
 volume = {33},
 year = {2020}
 }
 [8] Score-Based Generative Modeling through Stochastic Differential Equations https://arxiv.org/abs/2011.13456
@misc{song2021scorebasedgenerativemodelingstochastic,
      title={Score-Based Generative Modeling through Stochastic Differential Equations}, 
      author={Yang Song and Jascha Sohl-Dickstein and Diederik P. Kingma and Abhishek Kumar and Stefano Ermon and Ben Poole},
      year={2021},
      eprint={2011.13456},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2011.13456}, 
 }
 [9] Autoregressive Denoising Diffusion Models for Multivariate Probabilistic Time Series Forecasting https://arxiv.org/abs/2101.12072
@misc{rasul2021autoregressivedenoisingdiffusionmodels,
      title={Autoregressive Denoising Diffusion Models for Multivariate Probabilistic Time Series Forecasting}, 
      author={Kashif Rasul and Calvin Seward and Ingmar Schuster and Roland Vollgraf},
      year={2021},
      eprint={2101.12072},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2101.12072}, 
 }
 [10] CSDI: Conditional Score-based Diffusion Models for Probabilistic Time Series Imputation https://arxiv.org/abs/2107.03502
@misc{tashiro2021csdiconditionalscorebaseddiffusion,
      title={CSDI Conditional Score-based Diffusion Models for Probabilistic Time Series Imputation}, 
      author={Yusuke Tashiro and Jiaming Song and Yang Song and Stefano Ermon},
      year={2021},
      eprint={2107.03502},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={httpsarxiv.orgabs2107.03502}, 
 }
 [11] DiffSTG: Probabilistic Spatio-Temporal Graph Forecasting with Denoising Diffusion Models https://arxiv.org/abs/2301.13629
@misc{wen2024diffstgprobabilisticspatiotemporalgraph,
      title={DiffSTG: Probabilistic Spatio-Temporal Graph Forecasting with Denoising Diffusion Models}, 
      author={Haomin Wen and Youfang Lin and Yutong Xia and Huaiyu Wan and Qingsong Wen and Roger Zimmermann and Yuxuan Liang},
      year={2024},
      eprint={2301.13629},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2301.13629}, 
 }
 [12] PriSTI: A Conditional Diffusion Framework for Spatiotemporal Imputation https://arxiv.org/abs/2302.09746
@misc{liu2023pristiconditionaldiffusionframework,
      title={PriSTI: A Conditional Diffusion Framework for Spatiotemporal Imputation}, 
      author={Mingzhe Liu and Han Huang and Hao Feng and Leilei Sun and Bowen Du and Yanjie Fu},
      year={2023},
      eprint={2302.09746},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2302.09746}, 
 }
 [13] DiffWave: A Versatile Diffusion Model for Audio Synthesis https://arxiv.org/abs/2009.09761
@misc{kong2021diffwaveversatilediffusionmodel,
      title={DiffWave: A Versatile Diffusion Model for Audio Synthesis}, 
      author={Zhifeng Kong and Wei Ping and Jiaji Huang and Kexin Zhao and Bryan Catanzaro},
      year={2021},
      eprint={2009.09761},
      archivePrefix={arXiv},
      primaryClass={eess.AS},
      url={https://arxiv.org/abs/2009.09761}, 
 }
 [14] Spatio-Temporal Diffusion Model for Cellular Traffic Generation https://ieeexplore.ieee.org/document/11087622
@ARTICLE{11087622,
  author={Liu, Xiaosi and Xu, Xiaowen and Liu, Zhidan and Li, Zhenjiang and Wu, Kaishun},
  journal={IEEE Transactions on Mobile Computing}, 
  title={Spatio-Temporal Diffusion Model for Cellular Traffic Generation}, 
  year={2026},
  volume={25},
  number={1},
  pages={257-271},
  keywords={Base stations;Diffusion models;Data models;Uncertainty;Predictive models;Generative adversarial networks;Knowledge graphs;Mobile computing;Telecommunication traffic;Semantics;Cellular traffic;data generation;diffusion model;spatio-temporal graph},
  doi={10.1109/TMC.2025.3591183}}
 [15] Structured Denoising Diffusion Models in Discrete State-Spaces https://arxiv.org/abs/2107.03006
@misc{austin2023structureddenoisingdiffusionmodels,
      title={Structured Denoising Diffusion Models in Discrete State-Spaces}, 
      author={Jacob Austin and Daniel D. Johnson and Jonathan Ho and Daniel Tarlow and Rianne van den Berg},
      year={2023},
      eprint={2107.03006},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2107.03006}, 
 }
 [16] Diffusion-LM Improves Controllable Text Generation https://arxiv.org/abs/2205.14217
@misc{li2022diffusionlmimprovescontrollabletext,
      title={Diffusion-LM Improves Controllable Text Generation}, 
      author={Xiang Lisa Li and John Thickstun and Ishaan Gulrajani and Percy Liang and Tatsunori B. Hashimoto},
      year={2022},
      eprint={2205.14217},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={httpsarxiv.orgabs2205.14217}, 
 }
 [17] AFLNet: Five Years Later On Coverage-Guided Protocol Fuzzing https://arxiv.org/html/2412.20324v1
@misc{meng2025aflnetyearslatercoverageguided,
      title={AFLNet Five Years Later: On Coverage-Guided Protocol Fuzzing}, 
      author={Ruijie Meng and Van-Thuan Pham and Marcel Böhme and Abhik Roychoudhury},
      year={2025},
      eprint={2412.20324},
      archivePrefix={arXiv},
      primaryClass={cs.SE},
      url={https://arxiv.org/abs/2412.20324}, 
 }
 [18] Learn&Fuzz: Machine Learning for Input Fuzzing https://arxiv.org/abs/1701.07232
@misc{godefroid2017learnfuzzmachinelearninginput,
      title={Learn&Fuzz: Machine Learning for Input Fuzzing}, 
      author={Patrice Godefroid and Hila Peleg and Rishabh Singh},
      year={2017},
      eprint={1701.07232},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
      url={https://arxiv.org/abs/1701.07232}, 
 }
 [19] NEUZZ: Efficient Fuzzing with Neural Program Smoothing https://arxiv.org/abs/1807.05620
@misc{she2019neuzzefficientfuzzingneural,
      title={NEUZZ: Efficient Fuzzing with Neural Program Smoothing}, 
      author={Dongdong She and Kexin Pei and Dave Epstein and Junfeng Yang and Baishakhi Ray and Suman Jana},
      year={2019},
      eprint={1807.05620},
      archivePrefix={arXiv},
      primaryClass={cs.CR},
      url={https://arxiv.org/abs/1807.05620}, 
 }
 [20] Argmax Flows and Multinomial Diffusion: Learning Categorical Distributions https://arxiv.org/abs/2102.05379 
@misc{hoogeboom2021argmaxflowsmultinomialdiffusion,
      title={Argmax Flows and Multinomial Diffusion: Learning Categorical Distributions}, 
      author={Emiel Hoogeboom and Didrik Nielsen and Priyank Jaini and Patrick Forré and Max Welling},
      year={2021},
      eprint={2102.05379},
      archivePrefix={arXiv},
      primaryClass={stat.ML},
      url={https://arxiv.org/abs/2102.05379}, 
 }
 [21] Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context https://arxiv.org/abs/1901.02860
@misc{dai2019transformerxlattentivelanguagemodels,
      title={Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context}, 
      author={Zihang Dai and Zhilin Yang and Yiming Yang and Jaime Carbonell and Quoc V. Le and Ruslan Salakhutdinov},
      year={2019},
      eprint={1901.02860},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/1901.02860}, 
 }
 [22] Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting https://arxiv.org/abs/2012.07436
@misc{zhou2021informerefficienttransformerlong,
      title={Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting}, 
      author={Haoyi Zhou and Shanghang Zhang and Jieqi Peng and Shuai Zhang and Jianxin Li and Hui Xiong and Wancai Zhang},
      year={2021},
      eprint={2012.07436},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2012.07436}, 
 }
 [23] Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting  https://arxiv.org/abs/2106.13008
@misc{wu2022autoformerdecompositiontransformersautocorrelation,
      title={Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting}, 
      author={Haixu Wu and Jiehui Xu and Jianmin Wang and Mingsheng Long},
      year={2022},
      eprint={2106.13008},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2106.13008}, 
 }
 [24] FEDformer: Frequency Enhanced Decomposed Transformer for Long-term Series Forecasting https://arxiv.org/abs/2201.12740
@misc{zhou2022fedformerfrequencyenhanceddecomposed,
      title={FEDformer: Frequency Enhanced Decomposed Transformer for Long-term Series Forecasting}, 
      author={Tian Zhou and Ziqing Ma and Qingsong Wen and Xue Wang and Liang Sun and Rong Jin},
      year={2022},
      eprint={2201.12740},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2201.12740}, 
 }
 [25] A Note on Extremal Sombor Indices of Trees with a Given Degree Sequence https://arxiv.org/abs/2211.11920
@article{2023,
   title={A Note on Extremal Sombor Indices of Trees with a Given Degree Sequence},
   volume={90},
   ISSN={0340-6253},
   url={http://dx.doi.org/10.46793/match.90-1.197D},
   DOI={10.46793/match.90-1.197d},
   number={1},
   journal={Match Communications in Mathematical and in Computer Chemistry},
   publisher={University Library in Kragujevac},
   author={Damjanović, Ivan and Milošević, Marko and Stevanović, Dragan},
   year={2023},
   pages={197–202} }
 Methodology
 Industrial control system (ICS) telemetry is intrinsically mixed-type and mechanistically heterogeneous: continuous process trajectories (e.g., sensor and actuator signals) coexist with discrete supervisory states (e.g., modes, alarms, interlocks), and the underlying generating mechanisms range from physical inertia to program-driven step logic. This heterogeneity is not cosmetic—it directly affects what “realistic” synthesis means, because a generator must jointly satisfy (i) temporal coherence, (ii) distributional fidelity, and (iii) discrete semantic validity (i.e., every discrete output must belong to its legal vocabulary by construction). These properties are emphasized broadly in operational-technology security guidance and ICS engineering practice, where state logic and physical dynamics are tightly coupled. [12]
 We formalize each training instance as a fixed-length window of length We model each training instance as a fixed-length window of length $$L$$, consisting of (i) continuous channels $$X\in\mathbb{R}^{L\times d_c}$$ and (ii) discrete channels $$Y=\{{y^{(j)}_{1:L}}\}_{j=1}^{d_d}$$,  where each discrete variable $$y^{(j)}_t\in\mathcal{V}_j$$ belongs to a finite vocabulary $$\mathcal{V}_j$$. Our objective is to learn a generator that produces synthetic $$(\hat{X},\hat{Y})$$ that are simultaneously coherent and distributionally faithful, while also ensuring $$\hat{y}^{(j)}_t\in\mathcal{V}_j$$ for all $$j, t$$ by construction (rather than via post-hoc rounding or thresholding).
 A key empirical and methodological tension in ICS synthesis is that temporal realism and marginal/distributional realism can compete when optimized monolithically: sequence models trained primarily for regression often over-smooth heavy tails and intermittent bursts, while purely distribution-matching objectives can erode long-range structure. Diffusion models provide a principled route to rich distribution modeling through iterative denoising, but they do not, by themselves, resolve (i) the need for a stable low-frequency temporal scaffold, nor (ii) the discrete legality constraints for supervisory variables. [2,8] Recent time-series diffusion work further suggests that separating coarse structure from stochastic refinement can be an effective inductive bias for long-horizon realism. [6,7]
 [图片]
 **PLACEHOLDER_ONLY_DO_NOT_USE_IN_REAL_PAPER**
 Motivated by these considerations, we propose Mask-DDPM, organized in the following order:
 1. Transformer trend module: learns the dominant temporal backbone of continuous dynamics via attention-based sequence modeling [1].
 2. Residual DDPM for continuous variables: models distributional detail as stochastic residual structure conditioned on the learned trend [2, 6].
 3. Masked diffusion for discrete variables: generates discrete ICS states with an absorbing/masking corruption process and categorical reconstruction [3,4].
 4.  Type-aware decomposition: a type-aware factorization and routing layer that assigns variables to the most appropriate modeling mechanism and enforces deterministic constraints where warranted.
 This ordering is intentional. The trend module establishes a macro-temporal scaffold; residual diffusion then concentrates capacity on micro-structure and marginal fidelity; masked diffusion provides a native mechanism for discrete legality; and the type-aware layer operationalizes the observation that not all ICS variables should be modeled with the same stochastic mechanism. Importantly, while diffusion-based generation for ICS telemetry has begun to emerge, existing approaches remain limited and typically emphasize continuous synthesis or augmentation; in contrast, our pipeline integrates (i) a Transformer-conditioned residual diffusion backbone, (ii) a discrete masked-diffusion branch, and (iii) explicit type-aware routing for heterogeneous variable mechanisms within a single coherent generator. [10,11]
 ---
 Transformer trend module for continuous dynamics
 We instantiate the temporal backbone as a causal Transformer trend extractor, leveraging self-attention’s ability to represent long-range dependencies and cross-channel interactions without recurrence. [1] Compared with recurrent trend extractors (e.g., GRU-style backbones), a Transformer trend module offers a direct mechanism to model delayed effects and multivariate coupling—common in ICS, where control actions may influence downstream sensors with nontrivial lags and regime-dependent propagation. [1,12] Crucially, in our design the Transformer is not asked to be the entire generator; instead, it serves a deliberately restricted role: providing a stable, temporally coherent conditioning signal that later stochastic components refine.
 For continuous channels $$X$$, we posit an additive decomposition 
 $$X = S + R$$ , 
 where $$S\in\mathbb{R}^{L\times d_c}$$ is a smooth trend capturing predictable temporal evolution, and $$R\in\mathbb{R}^{L\times d_c}$$ is a residual capturing distributional detail (e.g., bursts, heavy tails, local fluctuations) that is difficult to represent robustly with a purely regression-based temporal objective. This separation reflects an explicit division of labor: the trend module prioritizes temporal coherence, while diffusion (introduced next) targets distributional realism at the residual level—a strategy aligned with “predict-then-refine” perspectives in time-series diffusion modeling. [6,7]
 We parameterize the trend $$S$$ using a causal Transformer $$f_\phi$$ . With teacher forcing, we train $$f_\phi$$to predict the next-step trend from past observations: 
 $$\hat{S}_{t+1} = f_\phi(X_{1:t}), \qquad t=1,\dots,L-1,$$
 using the mean-squared error objective
 $$\mathcal{L}_{trend}(\phi) = \frac{1}{(L-1)d_c}\sum_{t=1}^{L-1}\left\| \hat{S}_{t+1} - X_{t+1}\right\|_2^2$$
 At inference, we roll out the Transformer autoregressively to obtain $$\hat{S}$$ , and then define the residual target for diffusion as $$R = X - \hat{S}$$. This setup intentionally “locks in” a coherent low-frequency scaffold before any stochastic refinement is applied, thereby reducing the burden on downstream diffusion modules to simultaneously learn both long-range structure and marginal detail. In this sense, our use of Transformers is distinctive: it is a conditioning-first temporal backbone designed to stabilize mixed-type diffusion synthesis in ICS, rather than an end-to-end monolithic generator. [1,6,10]
 DDPM for continuous residual generation
 We model the residual RRR with a denoising diffusion probabilistic model (DDPM) conditioned on the trend $$\hat{S}$$. [2] Diffusion models learn complex data distributions by inverting a tractable noising process through iterative denoising, and have proven effective at capturing multimodality and heavy-tailed structure that is often attenuated by purely regression-based sequence models. [2,8] Conditioning the diffusion model on $$\hat{S}$$ is central: it prevents the denoiser from re-learning the low-frequency scaffold and focuses capacity on residual micro-structure, mirroring the broader principle that diffusion excels as a distributional corrector when a reasonable coarse structure is available. [6,7]
 Let $$K$$ denote the number of diffusion steps, with a noise schedule $$\{\beta_k\}_{k=1}^K$$, $$\alpha_k = 1-\beta_k$$, and $$\bar{\alpha}_k=\prod_{i=1}^k \alpha_i$$ . The forward corruption process is:
 $$q(r_k\mid r_0)=\mathcal{N}\left(\sqrt{\bar{\alpha}_k}r_0,\ (1-\bar{\alpha}_k)\mathbf{I}\right)$$
 equivalently,
 $$r_k = \sqrt{\bar{\alpha}_k}r_0 + \sqrt{1-\bar{\alpha}_k}\epsilon,\qquad \epsilon\sim\mathcal{N}(0,\mathbf{I})$$
 The learned reverse process is parameterized as
 $$p_{\theta}(r_{k-1}\mid r_k,\hat{S})=\mathcal{N}\left(\mu_{\theta}(r_k,k,\hat{S}),\ \Sigma_{\theta}(k)\right)$$
 where $$\mu_\theta$$ is implemented by a Transformer denoiser that consumes (i) the noised residual $$r_k$$, (ii) a timestep embedding for $$k$$, and (iii) conditioning features derived from $$\hat{S}$$. This denoiser architecture is consistent with the growing use of attention-based denoisers for long-context time-series diffusion, while our key methodological emphasis is the trend-conditioned residual factorization as the object of diffusion learning. [2,7]
 We train the denoiser using the standard DDPM $$\epsilon$$-prediction objective:
 $$\mathcal{L}_{\text{cont}}(\theta)
 =
 \mathbb{E}_{k,r_0,\epsilon}
 \left[
 \left \|
 \epsilon - \epsilon_{\theta}(r_k,k,\hat{S})
 \right \|_2^2
 \right]$$
 Because diffusion optimization can exhibit timestep imbalance (i.e., some timesteps dominate gradients), we optionally apply an SNR-based reweighting consistent with Min-SNR training:
 $$\mathcal{L}^{\text{snr}}_{\text{cont}}(\theta)
 =
 \mathbb{E}_{k,r_0,\epsilon}
 \left[
 w(k)\left\|
 \epsilon - \epsilon_{\theta}(r_k,k,\hat{S})
 \right\|_2^2
 \right],
 \qquad
 w(k)=\frac{\mathrm{SNR}_k}{\mathrm{SNR}_k+\gamma}$$
 where $$\mathrm{SNR}_k=\bar{\alpha}_k/(1-\bar{\alpha}_k)$$ and $$\gamma>0$$ is a cap parameter. [5]
 After sampling $$\hat{R}$$ by reverse diffusion, we reconstruct the continuous output as
 $$\hat{X} = \hat{S} + \hat{R}$$ . 
 Overall, the DDPM component serves as a distributional corrector on top of a temporally coherent backbone, which is particularly suited to ICS where low-frequency dynamics are strong and persistent but fine-scale variability (including bursts and regime-conditioned noise) remains important for realism. Relative to prior ICS diffusion efforts that primarily focus on continuous augmentation, our formulation elevates trend-conditioned residual diffusion as a modular mechanism for disentangling temporal structure from distributional refinement. [10,11]
 Masked diffusion for discrete ICS variables
 Discrete ICS variables must remain categorical, making Gaussian diffusion inappropriate for supervisory states and mode-like channels. While one can attempt continuous relaxations or post-hoc discretization, such strategies risk producing semantically invalid intermediate states (e.g., “in-between” modes) and can distort the discrete marginal distribution. Discrete-state diffusion provides a principled alternative by defining a valid corruption process directly on categorical variables. [3,4] In the ICS setting, this is not a secondary detail: supervisory tags often encode control logic boundaries (modes, alarms, interlocks) that must remain within a finite vocabulary to preserve semantic correctness. [12]
 We therefore adopt masked (absorbing) diffusion for discrete channels, where corruption replaces tokens with a special $$\texttt{[MASK]}$$ symbol according to a schedule. [4] For each variable $$j$$, define a masking schedule $${m_k}_{k=1}^K$$ (with $$m_k\in[0,1]$$) increasing in $$k$$. The forward corruption process is
 $$q(y^{(j)}_k \mid y^{(j)}_0)=
 \begin{cases}
 y^{(j)}, & \text{with probability } 1-m_k,\\
 \texttt{[MASK]}, & \text{with probability } m_k,
 \end{cases}$$
 applied independently across $$j$$ and $$t$$. Let $$\mathcal{M}$$ denote the set of masked positions at step $$k$$. The denoiser $$h_{\psi}$$ predicts a categorical distribution over $$\mathcal{V}_j$$ for each masked token, conditioned on (i) the corrupted discrete sequence, (ii) the diffusion step $$k$$, and (iii) continuous context. Concretely, we condition on $$\hat{S}$$ and $$\hat{X}$$to couple supervisory reconstruction to the underlying continuous dynamics:
 $$p_{\psi}\left(y^{(j)}_0 \mid y_k, k, \hat{S}, \hat{X}\right)
 = h_{\psi}(y_k,k,\hat{S},\hat{X}).$$
 This conditioning choice is motivated by the fact that many discrete ICS states are not standalone, they are functions of regimes, thresholds, and procedural phases that manifest in continuous channels. [12]
 Training uses a categorical denoising objective:
 $$\mathcal{L}_{\text{disc}}(\psi)
 =
 \mathbb{E}_{k}
 \left[
 \frac{1}{|\mathcal{M}|}\sum_{(j,t)\in\mathcal{M}}
 \mathrm{CE}\left(
 h_\psi\left(y_k,k,\hat{S},\hat{X}\right)_{j,t},\ y^{(j)}_{0,t}
 \right)
 \right]$$
 Where $$\mathrm{CE}(\cdot,\cdot)$$is cross-entropy. At sampling time, we initialize all discrete tokens as $$\texttt{[MASK]}$$and iteratively unmask them using the learned conditionals, ensuring that every output token lies in its legal vocabulary by construction. This discrete branch is a key differentiator of our pipeline: unlike typical continuous-only diffusion augmentation in ICS, we integrate masked diffusion as a first-class mechanism for supervisory-variable legality within the same end-to-end synthesis workflow. [4,10]
 Type-aware decomposition as factorization and routing layer
 Even with a trend-conditioned residual DDPM and a discrete masked-diffusion branch, a single uniform modeling treatment can remain suboptimal because ICS variables are generated by qualitatively different mechanisms. For example, program-driven setpoints exhibit step-and-dwell dynamics; controller outputs follow control laws conditioned on process feedback; actuator positions may show saturation and dwell; and some “derived tags” are deterministic functions of other channels. Treating all channels as if they were exchangeable stochastic processes can misallocate model capacity and induce systematic error concentration on a small subset of mechanistically distinct variables. [12]
 We therefore introduce a type-aware decomposition that formalizes this heterogeneity as a routing and constraint layer.  Let $$\tau(i)\in{1,\dots,6}$$  assign each variable (i) to a type class. The type assignment can be initialized from domain semantics (tag metadata, value domains, and engineering meaning), and subsequently refined via an error-attribution workflow described in the Benchmark section. Importantly, this refinement does not change the core diffusion backbone; it changes which mechanism is responsible for which variable, thereby aligning inductive bias with variable-generating mechanism while preserving overall coherence.
 We use the following taxonomy:
 - Type 1 (program-driven / setpoint-like): externally commanded, step-and-dwell variables. These variables can be treated as exogenous drivers (conditioning signals) or routed to specialized change-point / dwell-time models, rather than being forced into a smooth denoiser that may over-regularize step structure.
 - Type 2 (controller outputs): continuous variables tightly coupled to feedback loops; these benefit from conditional modeling where the conditioning includes relevant process variables and commanded setpoints.
 - Type 3 (actuator states/positions): often exhibit saturation, dwell, and rate limits; these may require stateful dynamics beyond generic residual diffusion, motivating either specialized conditional modules or additional inductive constraints.
 - Type 4 (process variables): inertia-dominated continuous dynamics; these are the primary beneficiaries of the Transformer trend + residual DDPM pipeline. 
 - Type 5 (derived/deterministic variables): algebraic or rule-based functions of other variables; we enforce deterministic reconstruction $$\hat{x}^{(i)} = g_i(\hat{X},\hat{Y})$$ rather than learning a stochastic generator, improving logical consistency and sample efficiency.
 - Type 6 (auxiliary/low-impact variables): weakly coupled or sparse signals; we allow simplified modeling (e.g., calibrated marginals or lightweight temporal models) to avoid allocating diffusion capacity where it is not warranted.
 Type-aware decomposition improves synthesis quality through three mechanisms. First, it improves capacity allocation by preventing a small set of mechanistically atypical variables from dominating gradients and distorting the learned distribution for the majority class (typically Type 4). Second, it enables constraint enforcement by deterministically reconstructing Type 5 variables, preventing logically inconsistent samples that purely learned generators can produce. Third, it improves mechanism alignment by attaching inductive biases consistent with step/dwell or saturation behaviors where generic denoisers may implicitly favor smoothness.
 From a novelty standpoint, this layer is not merely an engineering “patch”; it is an explicit methodological statement that ICS synthesis benefits from typed factorization—a principle that has analogues in mixed-type generative modeling more broadly, but that remains underexplored in diffusion-based ICS telemetry synthesis. [9,10,12]
 Joint optimization and end-to-end sampling
 We train the model in a staged manner consistent with the above factorization, which improves optimization stability and encourages each component to specialize in its intended role. Specifically: (i) we train the trend Transformer $$f_{\phi}$$ to obtain $$\hat{S}$$; (ii) we compute residual targets $$R=X-\hat{S}$$ for the continuous variables routed to residual diffusion; (iii) we train the residual DDPM $$p_{\theta}(R\mid \hat{S})$$ and masked diffusion model $$p_{\psi}(Y\mid \text{masked}(Y), \hat{S}, \hat{X})$$; and (iv) we apply type-aware routing and deterministic reconstruction during sampling. This staged strategy is aligned with the design goal of separating temporal scaffolding from distributional refinement, and it mirrors the broader intuition in time-series diffusion that decoupling coarse structure and stochastic detail can mitigate “structure vs. realism” conflicts. [6,7]
 A simple combined objective is
 $$\mathcal{L} = \lambda\mathcal{L}_{\text{cont}} + (1-\lambda)\mathcal{L}_{\text{disc}}$$, 
 with $$\lambda\in[0,1]$$controlling the balance between continuous and discrete learning. Type-aware routing determines which channels contribute to which loss and which are excluded in favor of deterministic reconstruction. In practice, this routing acts as a principled guardrail against negative transfer across variable mechanisms: channels that are best handled deterministically (Type 5) or by specialized drivers (Type 1/3, depending on configuration) are prevented from forcing the diffusion models into statistically incoherent compromises.
 At inference time, generation follows the same structured order: (i) trend $$\hat{S}$$via the Transformer, (ii) residual $$\hat{R}$$ via DDPM, (iii) discrete $$\hat{Y}$$ via masked diffusion, and (iv) type-aware assembly with deterministic reconstruction for routed variables. This pipeline produces $$(\hat{X},\hat{Y})$$ that are temporally coherent by construction (through $$\hat{S}$$), distributionally expressive (through $$\hat{R}$$ denoising), and discretely valid (through masked diffusion), while explicitly accounting for heterogeneous variable-generating mechanisms through type-aware routing. In combination, these choices constitute our central methodological contribution: a unified Transformer + mixed diffusion generator for ICS telemetry, augmented by typed factorization to align model capacity with domain mechanism. [2,4,10,12]
 References for Methodology Part
 [1] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., Kaiser, Ł., & Polosukhin, I. Attention Is All You Need. Advances in Neural Information Processing Systems (NeurIPS), 30, 2017.
 🔗 https://arxiv.org/abs/1706.03762 | https://proceedings.neurips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html
 [2] Ho, J., Jain, A., & Abbeel, P. Denoising Diffusion Probabilistic Models. Advances in Neural Information Processing Systems (NeurIPS), 33, 2020.
 🔗 https://arxiv.org/abs/2006.11239 | https://proceedings.neurips.cc/paper/2020/file/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf
 [3] Austin, J., Johnson, D. D., Ho, J., Tarlow, D., & van den Berg, R. Structured Denoising Diffusion Models in Discrete State-Spaces. Advances in Neural Information Processing Systems (NeurIPS), 34, 2021.
 🔗 https://arxiv.org/abs/2107.03006 | https://proceedings.neurips.cc/paper/2021/hash/958c530554f78bcd8e97125b70e6973d-Abstract.html
 [4] Shi, J., Han, K., Wang, Z., Doucet, A., & Titsias, M. K. Simplified and Generalized Masked Diffusion for Discrete Data. arXiv preprint arXiv:2406.04329, 2024.
 🔗 https://arxiv.org/abs/2406.04329
 [5] Hang, T., Gu, S., Li, C., Bao, J., Chen, D., Hu, H., Geng, X., & Guo, B. Efficient Diffusion Training via Min-SNR Weighting Strategy. IEEE/CVF International Conference on Computer Vision (ICCV), pp. 7407–7417, 2023.
 🔗 https://arxiv.org/abs/2303.09556 | https://openaccess.thecvf.com/content/ICCV2023/html/Hang_Efficient_Diffusion_Training_via_Min-SNR_Weighting_Strategy_ICCV_2023_paper.html
 [6] Kollovieh, M., Ansari, A. F., Bohlke-Schneider, M., Fatir Ansari, A., & Salinas, D. Predict, Refine, Synthesize: Self-Guiding Diffusion Models for Probabilistic Time Series Forecasting. Advances in Neural Information Processing Systems (NeurIPS), 36, 2023.
 🔗 https://arxiv.org/abs/2307.11494 | https://proceedings.neurips.cc/paper_files/paper/2023/hash/5a1a10c2c2c9b9af1514687bc24b8f3d-Abstract-Conference.html
 [7] Sikder, M. F., Ramachandranpillai, R., & Heintz, F. TransFusion: Generating Long, High Fidelity Time Series using Diffusion Models with Transformers. arXiv preprint arXiv:2307.12667, 2023.
 🔗 https://arxiv.org/abs/2307.12667
 [8] Song, Y., Sohl-Dickstein, J., Kingma, D. P., Kumar, A., Ermon, S., & Poole, B. Score-Based Generative Modeling through Stochastic Differential Equations. International Conference on Learning Representations (ICLR), 2021.
 🔗 https://arxiv.org/abs/2011.13456 | https://openreview.net/forum?id=PxTIG12RRHS
 [9] Shi, J., Xu, M., Hua, H., Zhang, H., Ermon, S., & Leskovec, J. TabDiff: a Mixed-type Diffusion Model for Tabular Data Generation. International Conference on Learning Representations (ICLR), 2025.
 🔗 https://arxiv.org/abs/2410.20626 | https://openreview.net/forum?id=swvURjrt8z
 Note: First author is Juntong Shi (not Zhang); title uses "Mixed-type" (v3+ of arXiv preprint)
 [10] Yuan, Y., Sha, Y., Zhao, W., & Zhang, K. CTU-DDPM: Generating Industrial Control System Time-Series Data with a CNN-Transformer Hybrid Diffusion Model. Proceedings of the 2025 International Symposium on Artificial Intelligence and Computational Social Sciences (ACM AICSS '25), pp. 123–132, 2025. DOI:10.1145/3776759.3776845.
 🔗 https://dl.acm.org/doi/10.1145/3776759.3776845
 Note: Correct title does not contain "Conditional Transformer U-net"; authors include Yusong Yuan and Yun Sha
 [11] Sha, Y., Yuan, Y., Wu, Y., & Zhao, H. DDPM Fusing Mamba and Adaptive Attention: An Augmentation Method for Industrial Control Systems Anomaly Data. SSRN Electronic Journal, posted January 10, 2026. SSRN ID: 6055903. DOI:10.2139/ssrn.6055903.
 🔗 https://papers.ssrn.com/sol3/papers.cfm?abstract_id=6055903
 Note: This is a preprint (not peer-reviewed); SSRN entry exists with Jan 10, 2026 posting date
 [12] Stouffer, K., Lightman, S., Pillitteri, L., Abrams, M., Hahn, A., & Smith, J. Guide to Operational Technology (OT) Security (NIST Special Publication 800-82 Rev. 3). National Institute of Standards and Technology, September 2023.
 🔗 https://csrc.nist.gov/pubs/sp/800/82/r3/final
 Benchmark
 Future works
 Conclusion