优化6个类,现在ks降低到0.28,史称3.0版本

This commit is contained in:
2026-01-28 20:10:42 +08:00
parent 59697c0640
commit 39eede92f6
28 changed files with 3317 additions and 225 deletions

102
example/pv_stats.py Normal file
View File

@@ -0,0 +1,102 @@
#!/usr/bin/env python3
"""Stats for PV sensors (Type 4) with heavy tails / regime changes."""
import argparse
import csv
import gzip
import json
from pathlib import Path
from typing import Dict, List
def parse_args():
base_dir = Path(__file__).resolve().parent
parser = argparse.ArgumentParser(description="PV stats.")
parser.add_argument("--generated", default=str(base_dir / "results" / "generated.csv"))
parser.add_argument("--reference", default=str(base_dir / "config.json"))
parser.add_argument("--features", default="", help="comma-separated list")
parser.add_argument("--config", default=str(base_dir / "config.json"))
parser.add_argument("--out", default=str(base_dir / "results" / "pv_stats.json"))
parser.add_argument("--max-rows", type=int, default=200000)
return parser.parse_args()
def resolve_reference_glob(ref_arg: str) -> str:
ref_path = Path(ref_arg)
if ref_path.suffix == ".json":
cfg = json.loads(ref_path.read_text(encoding="utf-8"))
data_glob = cfg.get("data_glob") or cfg.get("data_path") or ""
if not data_glob:
raise SystemExit("reference config has no data_glob/data_path")
combined = ref_path.parent / data_glob
if "*" in str(combined) or "?" in str(combined):
return str(combined)
return str(combined.resolve())
return str(ref_path)
def read_series(path: Path, cols: List[str], max_rows: int) -> Dict[str, List[float]]:
vals = {c: [] for c in cols}
opener = gzip.open if str(path).endswith(".gz") else open
with opener(path, "rt", newline="") as fh:
reader = csv.DictReader(fh)
for i, row in enumerate(reader):
for c in cols:
try:
vals[c].append(float(row[c]))
except Exception:
pass
if max_rows > 0 and i + 1 >= max_rows:
break
return vals
def quantile_stats(series: List[float]):
if not series:
return {"q05": None, "q50": None, "q95": None, "tail_ratio": None}
xs = sorted(series)
n = len(xs)
def q(p):
idx = int(round(p * (n - 1)))
idx = max(0, min(n - 1, idx))
return xs[idx]
q05 = q(0.05)
q50 = q(0.5)
q95 = q(0.95)
tail_ratio = (q95 - q50) / (q50 - q05) if (q50 - q05) != 0 else None
return {"q05": q05, "q50": q50, "q95": q95, "tail_ratio": tail_ratio}
def main():
args = parse_args()
features = [f.strip() for f in args.features.split(",") if f.strip()]
if not features and Path(args.config).exists():
cfg = json.loads(Path(args.config).read_text(encoding="utf-8"))
features = cfg.get("type4_features", []) or []
if not features:
raise SystemExit("no features specified for pv_stats")
gen_vals = read_series(Path(args.generated), features, args.max_rows)
ref_glob = resolve_reference_glob(args.reference)
ref_paths = sorted(Path(ref_glob).parent.glob(Path(ref_glob).name))
if not ref_paths:
raise SystemExit(f"no reference files matched: {ref_glob}")
real_vals = {c: [] for c in features}
for p in ref_paths:
vals = read_series(p, features, args.max_rows)
for c in features:
real_vals[c].extend(vals[c])
out = {"features": features, "generated": {}, "reference": {}}
for c in features:
out["generated"][c] = quantile_stats(gen_vals[c])
out["reference"][c] = quantile_stats(real_vals[c])
out_path = Path(args.out)
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(json.dumps(out, indent=2), encoding="utf-8")
print("wrote", out_path)
if __name__ == "__main__":
main()