forked from manbo/internal-docs
新版pdf
This commit is contained in:
@@ -16,7 +16,7 @@
|
|||||||
\citation{Lin_2020}
|
\citation{Lin_2020}
|
||||||
\citation{7469060,10.1145/3055366.3055375}
|
\citation{7469060,10.1145/3055366.3055375}
|
||||||
\citation{NEURIPS2020_4c5bcfec}
|
\citation{NEURIPS2020_4c5bcfec}
|
||||||
\citation{song2021scorebasedgenerativemodelingstochastic}
|
\citation{song2021score}
|
||||||
\citation{rasul2021autoregressivedenoisingdiffusionmodels}
|
\citation{rasul2021autoregressivedenoisingdiffusionmodels}
|
||||||
\citation{tashiro2021csdiconditionalscorebaseddiffusion}
|
\citation{tashiro2021csdiconditionalscorebaseddiffusion}
|
||||||
\citation{wen2024diffstgprobabilisticspatiotemporalgraph}
|
\citation{wen2024diffstgprobabilisticspatiotemporalgraph}
|
||||||
@@ -147,18 +147,17 @@
|
|||||||
\bibcite{shi2025tabdiff}{32}
|
\bibcite{shi2025tabdiff}{32}
|
||||||
\bibcite{shin}{33}
|
\bibcite{shin}{33}
|
||||||
\bibcite{sikder2023transfusion}{34}
|
\bibcite{sikder2023transfusion}{34}
|
||||||
\bibcite{song2021scorebasedgenerativemodelingstochastic}{35}
|
\bibcite{song2021score}{35}
|
||||||
\bibcite{song2021score}{36}
|
\bibcite{stenger2024survey}{36}
|
||||||
\bibcite{stenger2024survey}{37}
|
\bibcite{tashiro2021csdiconditionalscorebaseddiffusion}{37}
|
||||||
\bibcite{tashiro2021csdiconditionalscorebaseddiffusion}{38}
|
\bibcite{vaswani2017attention}{38}
|
||||||
\bibcite{vaswani2017attention}{39}
|
\bibcite{10.1145/1151659.1159928}{39}
|
||||||
\bibcite{10.1145/1151659.1159928}{40}
|
\bibcite{wen2024diffstgprobabilisticspatiotemporalgraph}{40}
|
||||||
\bibcite{wen2024diffstgprobabilisticspatiotemporalgraph}{41}
|
\bibcite{wu2022autoformerdecompositiontransformersautocorrelation}{41}
|
||||||
\bibcite{wu2022autoformerdecompositiontransformersautocorrelation}{42}
|
\bibcite{yang2001interlock}{42}
|
||||||
\bibcite{yang2001interlock}{43}
|
\bibcite{10.1145/3544216.3544251}{43}
|
||||||
\bibcite{10.1145/3544216.3544251}{44}
|
\bibcite{yoon2019timegan}{44}
|
||||||
\bibcite{yoon2019timegan}{45}
|
\bibcite{yuan2025ctu}{45}
|
||||||
\bibcite{yuan2025ctu}{46}
|
\bibcite{zhou2021informerefficienttransformerlong}{46}
|
||||||
\bibcite{zhou2021informerefficienttransformerlong}{47}
|
\bibcite{zhou2022fedformerfrequencyenhanceddecomposed}{47}
|
||||||
\bibcite{zhou2022fedformerfrequencyenhanceddecomposed}{48}
|
|
||||||
\gdef \@abspage@last{21}
|
\gdef \@abspage@last{21}
|
||||||
|
|||||||
@@ -228,11 +228,6 @@ Sikder, M.F., Ramachandranpillai, R., Heintz, F.: Transfusion: Generating long,
|
|||||||
\doi{https://doi.org/10.1016/j.mlwa.2025.100652},
|
\doi{https://doi.org/10.1016/j.mlwa.2025.100652},
|
||||||
\url{https://www.sciencedirect.com/science/article/pii/S2666827025000350}
|
\url{https://www.sciencedirect.com/science/article/pii/S2666827025000350}
|
||||||
|
|
||||||
\bibitem{song2021scorebasedgenerativemodelingstochastic}
|
|
||||||
Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.:
|
|
||||||
Score-based generative modeling through stochastic differential equations
|
|
||||||
(2021), \url{https://arxiv.org/abs/2011.13456}
|
|
||||||
|
|
||||||
\bibitem{song2021score}
|
\bibitem{song2021score}
|
||||||
Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.:
|
Song, Y., Sohl-Dickstein, J., Kingma, D.P., Kumar, A., Ermon, S., Poole, B.:
|
||||||
Score-based generative modeling through stochastic differential equations
|
Score-based generative modeling through stochastic differential equations
|
||||||
|
|||||||
@@ -5,44 +5,44 @@ Reallocating 'name_of_file' (item size: 1) to 9 items.
|
|||||||
The style file: splncs04.bst
|
The style file: splncs04.bst
|
||||||
Reallocating 'name_of_file' (item size: 1) to 11 items.
|
Reallocating 'name_of_file' (item size: 1) to 11 items.
|
||||||
Database file #1: references.bib
|
Database file #1: references.bib
|
||||||
You've used 48 entries,
|
You've used 47 entries,
|
||||||
2850 wiz_defined-function locations,
|
2850 wiz_defined-function locations,
|
||||||
923 strings with 18565 characters,
|
922 strings with 18519 characters,
|
||||||
and the built_in function-call counts, 36804 in all, are:
|
and the built_in function-call counts, 36282 in all, are:
|
||||||
= -- 2972
|
= -- 2938
|
||||||
> -- 1473
|
> -- 1441
|
||||||
< -- 64
|
< -- 64
|
||||||
+ -- 575
|
+ -- 562
|
||||||
- -- 526
|
- -- 514
|
||||||
* -- 2581
|
* -- 2543
|
||||||
:= -- 4611
|
:= -- 4540
|
||||||
add.period$ -- 115
|
add.period$ -- 115
|
||||||
call.type$ -- 48
|
call.type$ -- 47
|
||||||
change.case$ -- 419
|
change.case$ -- 410
|
||||||
chr.to.int$ -- 0
|
chr.to.int$ -- 0
|
||||||
cite$ -- 48
|
cite$ -- 47
|
||||||
duplicate$ -- 3146
|
duplicate$ -- 3107
|
||||||
empty$ -- 3007
|
empty$ -- 2964
|
||||||
format.name$ -- 589
|
format.name$ -- 576
|
||||||
if$ -- 8031
|
if$ -- 7925
|
||||||
int.to.chr$ -- 0
|
int.to.chr$ -- 0
|
||||||
int.to.str$ -- 48
|
int.to.str$ -- 47
|
||||||
missing$ -- 693
|
missing$ -- 683
|
||||||
newline$ -- 150
|
newline$ -- 147
|
||||||
num.names$ -- 130
|
num.names$ -- 128
|
||||||
pop$ -- 1246
|
pop$ -- 1219
|
||||||
preamble$ -- 1
|
preamble$ -- 1
|
||||||
purify$ -- 314
|
purify$ -- 306
|
||||||
quote$ -- 0
|
quote$ -- 0
|
||||||
skip$ -- 960
|
skip$ -- 951
|
||||||
stack$ -- 0
|
stack$ -- 0
|
||||||
substring$ -- 1960
|
substring$ -- 1947
|
||||||
swap$ -- 1955
|
swap$ -- 1934
|
||||||
text.length$ -- 64
|
text.length$ -- 64
|
||||||
text.prefix$ -- 0
|
text.prefix$ -- 0
|
||||||
top$ -- 0
|
top$ -- 0
|
||||||
type$ -- 192
|
type$ -- 188
|
||||||
warning$ -- 0
|
warning$ -- 0
|
||||||
while$ -- 241
|
while$ -- 238
|
||||||
width$ -- 50
|
width$ -- 49
|
||||||
write$ -- 595
|
write$ -- 587
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
This is pdfTeX, Version 3.141592653-2.6-1.40.28 (MiKTeX 25.12) (preloaded format=pdflatex 2026.4.14) 20 APR 2026 23:46
|
This is pdfTeX, Version 3.141592653-2.6-1.40.28 (MiKTeX 25.12) (preloaded format=pdflatex 2026.4.14) 21 APR 2026 00:08
|
||||||
entering extended mode
|
entering extended mode
|
||||||
restricted \write18 enabled.
|
restricted \write18 enabled.
|
||||||
%&-line parsing enabled.
|
%&-line parsing enabled.
|
||||||
@@ -706,13 +706,13 @@ lmtt/m/n/9 https : / / www .
|
|||||||
[]
|
[]
|
||||||
|
|
||||||
[19]
|
[19]
|
||||||
Underfull \hbox (badness 2229) in paragraph at lines 263--267
|
Underfull \hbox (badness 2229) in paragraph at lines 258--262
|
||||||
\T1/lmr/m/n/9 er-a-tion. SIG-COMM Com-put. Com-mun. Rev. \T1/lmr/bx/n/9 36\T1/l
|
\T1/lmr/m/n/9 er-a-tion. SIG-COMM Com-put. Com-mun. Rev. \T1/lmr/bx/n/9 36\T1/l
|
||||||
mr/m/n/9 (4), 111^^U122 (Aug 2006).
|
mr/m/n/9 (4), 111^^U122 (Aug 2006).
|
||||||
[]
|
[]
|
||||||
|
|
||||||
|
|
||||||
Underfull \hbox (badness 10000) in paragraph at lines 263--267
|
Underfull \hbox (badness 10000) in paragraph at lines 258--262
|
||||||
\T1/lmr/m/n/9 https://doi.org/10.1145/1151659.1159928, $\T1/lmtt/m/n/9 https :
|
\T1/lmr/m/n/9 https://doi.org/10.1145/1151659.1159928, $\T1/lmtt/m/n/9 https :
|
||||||
/ / doi . org / 10 . 1145 / 1151659 .
|
/ / doi . org / 10 . 1145 / 1151659 .
|
||||||
[]
|
[]
|
||||||
@@ -724,10 +724,10 @@ L3 programming layer <2025-12-24>
|
|||||||
***********
|
***********
|
||||||
)
|
)
|
||||||
Here is how much of TeX's memory you used:
|
Here is how much of TeX's memory you used:
|
||||||
6343 strings out of 467871
|
6342 strings out of 467871
|
||||||
97437 string characters out of 5435199
|
97389 string characters out of 5435199
|
||||||
552261 words of memory out of 5000000
|
552257 words of memory out of 5000000
|
||||||
35107 multiletter control sequences out of 15000+600000
|
35106 multiletter control sequences out of 15000+600000
|
||||||
706871 words of font info for 99 fonts, out of 8000000 for 9000
|
706871 words of font info for 99 fonts, out of 8000000 for 9000
|
||||||
1141 hyphenation exceptions out of 8191
|
1141 hyphenation exceptions out of 8191
|
||||||
57i,9n,65p,2477b,352s stack positions out of 10000i,1000n,20000p,200000b,200000s
|
57i,9n,65p,2477b,352s stack positions out of 10000i,1000n,20000p,200000b,200000s
|
||||||
@@ -744,7 +744,7 @@ type1/public/lm/lmri9.pfb><D:/MikTex/fonts/type1/public/lm/lmsy10.pfb><D:/MikTe
|
|||||||
x/fonts/type1/public/lm/lmsy7.pfb><D:/MikTex/fonts/type1/public/lm/lmsy9.pfb><D
|
x/fonts/type1/public/lm/lmsy7.pfb><D:/MikTex/fonts/type1/public/lm/lmsy9.pfb><D
|
||||||
:/MikTex/fonts/type1/public/lm/lmtt10.pfb><D:/MikTex/fonts/type1/public/lm/lmtt
|
:/MikTex/fonts/type1/public/lm/lmtt10.pfb><D:/MikTex/fonts/type1/public/lm/lmtt
|
||||||
9.pfb><D:/MikTex/fonts/type1/public/amsfonts/symbols/msbm10.pfb>
|
9.pfb><D:/MikTex/fonts/type1/public/amsfonts/symbols/msbm10.pfb>
|
||||||
Output written on main.pdf (21 pages, 1116672 bytes).
|
Output written on main.pdf (21 pages, 1116645 bytes).
|
||||||
PDF statistics:
|
PDF statistics:
|
||||||
317 PDF objects out of 1000 (max. 8388607)
|
317 PDF objects out of 1000 (max. 8388607)
|
||||||
0 named destinations out of 1000 (max. 500000)
|
0 named destinations out of 1000 (max. 500000)
|
||||||
|
|||||||
Binary file not shown.
@@ -51,7 +51,7 @@ Despite these advances, most existing work either focuses on packet-level genera
|
|||||||
\label{sec:related}
|
\label{sec:related}
|
||||||
Early generation of network data oriented towards "realism" mostly remained at the packet/flow header level, either through replay or statistical synthesis based on single-point observations. Swing, in a closed-loop, network-responsive manner, extracts user/application/network distributions from single-point observations to reproduce burstiness and correlation across multiple time scales \citep{10.1145/1151659.1159928}. Subsequently, a series of works advanced header synthesis to learning-based generation: the WGAN-based method added explicit verification of protocol field consistency to NetFlow/IPFIX \citep{Ring_2019}, NetShare reconstructed header modeling as flow-level time series and improved fidelity and scalability through domain encoding and parallel fine-tuning \citep{10.1145/3544216.3544251}, and DoppelGANger preserved the long-range structure and downstream sorting consistency of networked time series by decoupling attributes from sequences \citep{Lin_2020}. However, in industrial control system (ICS) scenarios, the original PCAP is usually not shareable, and public testbeds (such as SWaT, WADI) mostly provide process/monitoring telemetry and protocol interactions for security assessment, but public datasets emphasize operational variables rather than packet-level traces \citep{7469060,10.1145/3055366.3055375}. This makes "synthesis at the feature/telemetry level, aware of protocol and semantics" more feasible and necessary in practice: we are more concerned with reproducing high-level distributions and multi-scale temporal patterns according to operational semantics and physical constraints without relying on the original packets. From this perspective, the generation paradigm naturally shifts from "packet syntax reproduction" to "modeling of high-level spatio-temporal distributions and uncertainties", requiring stable training, strong distribution fitting, and interpretable uncertainty characterization.
|
Early generation of network data oriented towards "realism" mostly remained at the packet/flow header level, either through replay or statistical synthesis based on single-point observations. Swing, in a closed-loop, network-responsive manner, extracts user/application/network distributions from single-point observations to reproduce burstiness and correlation across multiple time scales \citep{10.1145/1151659.1159928}. Subsequently, a series of works advanced header synthesis to learning-based generation: the WGAN-based method added explicit verification of protocol field consistency to NetFlow/IPFIX \citep{Ring_2019}, NetShare reconstructed header modeling as flow-level time series and improved fidelity and scalability through domain encoding and parallel fine-tuning \citep{10.1145/3544216.3544251}, and DoppelGANger preserved the long-range structure and downstream sorting consistency of networked time series by decoupling attributes from sequences \citep{Lin_2020}. However, in industrial control system (ICS) scenarios, the original PCAP is usually not shareable, and public testbeds (such as SWaT, WADI) mostly provide process/monitoring telemetry and protocol interactions for security assessment, but public datasets emphasize operational variables rather than packet-level traces \citep{7469060,10.1145/3055366.3055375}. This makes "synthesis at the feature/telemetry level, aware of protocol and semantics" more feasible and necessary in practice: we are more concerned with reproducing high-level distributions and multi-scale temporal patterns according to operational semantics and physical constraints without relying on the original packets. From this perspective, the generation paradigm naturally shifts from "packet syntax reproduction" to "modeling of high-level spatio-temporal distributions and uncertainties", requiring stable training, strong distribution fitting, and interpretable uncertainty characterization.
|
||||||
|
|
||||||
Diffusion models exhibit good fit along this path: DDPM achieves high-quality sampling and stable optimization through efficient $\epsilon$ parameterization and weighted variational objectives \citep{NEURIPS2020_4c5bcfec}, the SDE perspective unifies score-based and diffusion, providing likelihood evaluation and prediction-correction sampling strategies based on probability flow ODEs \citep{song2021scorebasedgenerativemodelingstochastic}. For time series, TimeGrad replaces the constrained output distribution with conditional denoising, capturing high-dimensional correlations at each step \citep{rasul2021autoregressivedenoisingdiffusionmodels}; CSDI explicitly performs conditional diffusion and uses two-dimensional attention to simultaneously leverage temporal and cross-feature dependencies, suitable for conditioning and filling in missing values \citep{tashiro2021csdiconditionalscorebaseddiffusion}; in a more general spatio-temporal structure, DiffSTG generalizes diffusion to spatio-temporal graphs, combining TCN/GCN with denoising U-Net to improve CRPS and inference efficiency in a non-autoregressive manner \citep{wen2024diffstgprobabilisticspatiotemporalgraph}, and PriSTI further enhances conditional features and geographical relationships, maintaining robustness under high missing rates and sensor failures \citep{liu2023pristiconditionaldiffusionframework}; in long sequences and continuous domains, DiffWave verifies that diffusion can also match the quality of strong vocoders under non-autoregressive fast synthesis \citep{kong2021diffwaveversatilediffusionmodel}; studies on cellular communication traffic show that diffusion can recover spatio-temporal patterns and provide uncertainty characterization at the urban scale \citep{11087622}. These results overall point to a conclusion: when the research focus is on "telemetry/high-level features" rather than raw messages, diffusion models provide stable and fine-grained distribution fitting and uncertainty quantification, which is exactly in line with the requirements of ICS telemetry synthesis. Meanwhile, directly entrusting all structures to a "monolithic diffusion" is not advisable: long-range temporal skeletons and fine-grained marginal distributions often have optimization tensions, requiring explicit decoupling in modeling.
|
Diffusion models exhibit good fit along this path: DDPM achieves high-quality sampling and stable optimization through efficient $\epsilon$ parameterization and weighted variational objectives \citep{NEURIPS2020_4c5bcfec}, the SDE perspective unifies score-based and diffusion, providing likelihood evaluation and prediction-correction sampling strategies based on probability flow ODEs \citep{song2021score}. For time series, TimeGrad replaces the constrained output distribution with conditional denoising, capturing high-dimensional correlations at each step \citep{rasul2021autoregressivedenoisingdiffusionmodels}; CSDI explicitly performs conditional diffusion and uses two-dimensional attention to simultaneously leverage temporal and cross-feature dependencies, suitable for conditioning and filling in missing values \citep{tashiro2021csdiconditionalscorebaseddiffusion}; in a more general spatio-temporal structure, DiffSTG generalizes diffusion to spatio-temporal graphs, combining TCN/GCN with denoising U-Net to improve CRPS and inference efficiency in a non-autoregressive manner \citep{wen2024diffstgprobabilisticspatiotemporalgraph}, and PriSTI further enhances conditional features and geographical relationships, maintaining robustness under high missing rates and sensor failures \citep{liu2023pristiconditionaldiffusionframework}; in long sequences and continuous domains, DiffWave verifies that diffusion can also match the quality of strong vocoders under non-autoregressive fast synthesis \citep{kong2021diffwaveversatilediffusionmodel}; studies on cellular communication traffic show that diffusion can recover spatio-temporal patterns and provide uncertainty characterization at the urban scale \citep{11087622}. These results overall point to a conclusion: when the research focus is on "telemetry/high-level features" rather than raw messages, diffusion models provide stable and fine-grained distribution fitting and uncertainty quantification, which is exactly in line with the requirements of ICS telemetry synthesis. Meanwhile, directly entrusting all structures to a "monolithic diffusion" is not advisable: long-range temporal skeletons and fine-grained marginal distributions often have optimization tensions, requiring explicit decoupling in modeling.
|
||||||
|
|
||||||
Looking further into the mechanism complexity of ICS: its channel types are inherently mixed, containing both continuous process trajectories and discrete supervision/status variables, and discrete channels must be "legal" under operational constraints. The aforementioned progress in time series diffusion has mainly occurred in continuous spaces, but discrete diffusion has also developed systematic methods: D3PM improves sampling quality and likelihood through absorption/masking and structured transitions in discrete state spaces \citep{austin2023structureddenoisingdiffusionmodels}, subsequent masked diffusion provides stable reconstruction on categorical data in a more simplified form \citep{Lin_2020}, multinomial diffusion directly defines diffusion on a finite vocabulary through mechanisms such as argmax flows \citep{hoogeboom2021argmaxflowsmultinomialdiffusion}, and Diffusion-LM demonstrates an effective path for controllable text generation by imposing gradient constraints in continuous latent spaces \citep{li2022diffusionlmimprovescontrollabletext}. From the perspectives of protocols and finite-state machines, coverage-guided fuzz testing emphasizes the criticality of "sequence legality and state coverage" \citep{meng2025aflnetyearslatercoverageguided,godefroid2017learnfuzzmachinelearninginput,she2019neuzzefficientfuzzingneural}, echoing the concept of "legality by construction" in discrete diffusion: preferentially adopting absorption/masking diffusion on discrete channels, supplemented by type-aware conditioning and sampling constraints, to avoid semantic invalidity and marginal distortion caused by post hoc thresholding.
|
Looking further into the mechanism complexity of ICS: its channel types are inherently mixed, containing both continuous process trajectories and discrete supervision/status variables, and discrete channels must be "legal" under operational constraints. The aforementioned progress in time series diffusion has mainly occurred in continuous spaces, but discrete diffusion has also developed systematic methods: D3PM improves sampling quality and likelihood through absorption/masking and structured transitions in discrete state spaces \citep{austin2023structureddenoisingdiffusionmodels}, subsequent masked diffusion provides stable reconstruction on categorical data in a more simplified form \citep{Lin_2020}, multinomial diffusion directly defines diffusion on a finite vocabulary through mechanisms such as argmax flows \citep{hoogeboom2021argmaxflowsmultinomialdiffusion}, and Diffusion-LM demonstrates an effective path for controllable text generation by imposing gradient constraints in continuous latent spaces \citep{li2022diffusionlmimprovescontrollabletext}. From the perspectives of protocols and finite-state machines, coverage-guided fuzz testing emphasizes the criticality of "sequence legality and state coverage" \citep{meng2025aflnetyearslatercoverageguided,godefroid2017learnfuzzmachinelearninginput,she2019neuzzefficientfuzzingneural}, echoing the concept of "legality by construction" in discrete diffusion: preferentially adopting absorption/masking diffusion on discrete channels, supplemented by type-aware conditioning and sampling constraints, to avoid semantic invalidity and marginal distortion caused by post hoc thresholding.
|
||||||
|
|
||||||
|
|||||||
@@ -648,16 +648,6 @@ keywords = {burstiness, energy plot, generator, internet, modeling, structural m
|
|||||||
year = {2020}
|
year = {2020}
|
||||||
}
|
}
|
||||||
|
|
||||||
@misc{song2021scorebasedgenerativemodelingstochastic,
|
|
||||||
title={Score-Based Generative Modeling through Stochastic Differential Equations},
|
|
||||||
author={Yang Song and Jascha Sohl-Dickstein and Diederik P. Kingma and Abhishek Kumar and Stefano Ermon and Ben Poole},
|
|
||||||
year={2021},
|
|
||||||
eprint={2011.13456},
|
|
||||||
archivePrefix={arXiv},
|
|
||||||
primaryClass={cs.LG},
|
|
||||||
url={https://arxiv.org/abs/2011.13456},
|
|
||||||
}
|
|
||||||
|
|
||||||
@inproceedings{nie2023patchtst,
|
@inproceedings{nie2023patchtst,
|
||||||
title={A Time Series is Worth 64 Words: Long-term Forecasting with Transformers},
|
title={A Time Series is Worth 64 Words: Long-term Forecasting with Transformers},
|
||||||
author={Nie, Yuqi and Nguyen, Nam H. and Sinthong, Phanwadee and Kalagnanam, Jayant},
|
author={Nie, Yuqi and Nguyen, Nam H. and Sinthong, Phanwadee and Kalagnanam, Jayant},
|
||||||
|
|||||||
Reference in New Issue
Block a user