Intro and Related Work Completed

- The reference of HAI dataset still have problems.
2026-02-04 19:39:36 +08:00
parent 81625b5c4e
commit 272e159df1
2 changed files with 222 additions and 83 deletions
--- a/arxiv-style/references.bib
+++ b/arxiv-style/references.bib
@@ -1,3 +1,4 @@
+Reference for Methodology Part
@inproceedings{vaswani2017attention,
  title={Attention Is All You Need},
  author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
@@ -116,6 +117,141 @@
  url={https://csrc.nist.gov/pubs/sp/800/82/r3/final}
 }

+Reference for Introduction Part
+@article{10.1007/s10844-022-00753-1,
+author = {Koay, Abigail M. Y. and Ko, Ryan K. L and Hettema, Hinne and Radke, Kenneth},
+title = {Machine learning in industrial control system (ICS) security: current landscape, opportunities and challenges},
+year = {2022},
+issue_date = {Apr 2023},
+publisher = {Kluwer Academic Publishers},
+address = {USA},
+volume = {60},
+number = {2},
+issn = {0925-9902},
+url = {https://doi.org/10.1007/s10844-022-00753-1},
+doi = {10.1007/s10844-022-00753-1},
+abstract = {The advent of Industry 4.0 has led to a rapid increase in cyber attacks on industrial systems and processes, particularly on Industrial Control Systems (ICS). These systems are increasingly becoming prime targets for cyber criminals and nation-states looking to extort large ransoms or cause disruptions due to their ability to cause devastating impact whenever they cease working or malfunction. Although myriads of cyber attack detection systems have been proposed and developed, these detection systems still face many challenges that are typically not found in traditional detection systems. Motivated by the need to better understand these challenges to improve current approaches, this paper aims to (1) understand the current vulnerability landscape in ICS, (2) survey current advancements of Machine Learning (ML) based methods with respect to the usage of ML base classifiers (3) provide insights to benefits and limitations of recent advancement with respect to two performance vectors; detection accuracy and attack variety. Based on our findings, we present key open challenges which will represent exciting research opportunities for the research community.},
+journal = {J. Intell. Inf. Syst.},
+month = oct,
+pages = {377–405},
+numpages = {29},
+keywords = {Operational technology, Cyber security, Dataset, Industrial control systems, Machine learning, Critical infrastructure}
+}
+
+@ARTICLE{Nankya2023-gp,
+  title     = "Securing industrial Control Systems: Components, cyber threats,
+               and machine learning-driven defense strategies",
+  author    = "Nankya, Mary and Chataut, Robin and Akl, Robert",
+  abstract  = "Industrial Control Systems (ICS), which include Supervisory
+               Control and Data Acquisition (SCADA) systems, Distributed
+               Control Systems (DCS), and Programmable Logic Controllers (PLC),
+               play a crucial role in managing and regulating industrial
+               processes. However, ensuring the security of these systems is of
+               utmost importance due to the potentially severe consequences of
+               cyber attacks. This article presents an overview of ICS
+               security, covering its components, protocols, industrial
+               applications, and performance aspects. It also highlights the
+               typical threats and vulnerabilities faced by these systems.
+               Moreover, the article identifies key factors that influence the
+               design decisions concerning control, communication, reliability,
+               and redundancy properties of ICS, as these are critical in
+               determining the security needs of the system. The article
+               outlines existing security countermeasures, including network
+               segmentation, access control, patch management, and security
+               monitoring. Furthermore, the article explores the integration of
+               machine learning techniques to enhance the cybersecurity of ICS.
+               Machine learning offers several advantages, such as anomaly
+               detection, threat intelligence analysis, and predictive
+               maintenance. However, combining machine learning with other
+               security measures is essential to establish a comprehensive
+               defense strategy for ICS. The article also addresses the
+               challenges associated with existing measures and provides
+               recommendations for improving ICS security. This paper becomes a
+               valuable reference for researchers aiming to make meaningful
+               contributions within the constantly evolving ICS domain by
+               providing an in-depth examination of the present state,
+               challenges, and potential future advancements.",
+  journal   = "Sensors (Basel)",
+  publisher = "MDPI AG",
+  volume    =  23,
+  number    =  21,
+  pages     = "8840",
+  month     =  oct,
+  year      =  2023,
+  keywords  = "SCADA; anomaly detection; artificial intelligence; attacks;
+               cyber defense; cyber threats; industrial control systems;
+               security; vulnerabilities",
+  copyright = "https://creativecommons.org/licenses/by/4.0/",
+  language  = "en"
+}
+
+@misc{shin,
+        hyeok-ki_lee,
+        woomyo_choi,
+        seungoh_yun,
+        jeong-han_min,
+        byung gil_kim,
+        hyoungchun_2023,
+        title={HAI Security Dataset},
+        url={https://www.kaggle.com/dsv/5821622},
+        DOI={10.34740/KAGGLE/DSV/5821622},
+        publisher={Kaggle},
+        author={Shin, Hyeok-Ki and Lee, Woomyo and Choi, Seungoh and Yun, Jeong-Han and Min, Byung Gil and Kim, HyoungChun},
+        year={2023}
+}
+
+
+@Article{info16100910,
+AUTHOR = {Ali, Jokha and Ali, Saqib and Al Balushi, Taiseera and Nadir, Zia},
+TITLE = {Intrusion Detection in Industrial Control Systems Using Transfer Learning Guided by Reinforcement Learning},
+JOURNAL = {Information},
+VOLUME = {16},
+YEAR = {2025},
+NUMBER = {10},
+ARTICLE-NUMBER = {910},
+URL = {https://www.mdpi.com/2078-2489/16/10/910},
+ISSN = {2078-2489},
+ABSTRACT = {Securing Industrial Control Systems (ICSs) is critical, but it is made challenging by the constant evolution of cyber threats and the scarcity of labeled attack data in these specialized environments. Standard intrusion detection systems (IDSs) often fail to adapt when transferred to new networks with limited data. To address this, this paper introduces an adaptive intrusion detection framework that combines a hybrid Convolutional Neural Network and Long Short-Term Memory (CNN-LSTM) model with a novel transfer learning strategy. We employ a Reinforcement Learning (RL) agent to intelligently guide the fine-tuning process, which allows the IDS to dynamically adjust its parameters such as layer freezing and learning rates in real-time based on performance feedback. We evaluated our system in a realistic data-scarce scenario using only 50 labeled training samples. Our RL-Guided model achieved a final F1-score of 0.9825, significantly outperforming a standard neural fine-tuning model (0.861) and a target baseline model (0.759). Analysis of the RL agent’s behavior confirmed that it learned a balanced and effective policy for adapting the model to the target domain. We conclude that the proposed RL-guided approach creates a highly accurate and adaptive IDS that overcomes the limitations of static transfer learning methods. This dynamic fine-tuning strategy is a powerful and promising direction for building resilient cybersecurity defenses for critical infrastructure.},
+DOI = {10.3390/info16100910}
+}
+
+@InProceedings{pmlr-v202-kotelnikov23a,
+  title =          {{T}ab{DDPM}: Modelling Tabular Data with Diffusion Models},
+  author =       {Kotelnikov, Akim and Baranchuk, Dmitry and Rubachev, Ivan and Babenko, Artem},
+  booktitle =          {Proceedings of the 40th International Conference on Machine Learning},
+  pages =          {17564--17579},
+  year =          {2023},
+  editor =          {Krause, Andreas and Brunskill, Emma and Cho, Kyunghyun and Engelhardt, Barbara and Sabato, Sivan and Scarlett, Jonathan},
+  volume =          {202},
+  series =          {Proceedings of Machine Learning Research},
+  month =          {23--29 Jul},
+  publisher =    {PMLR},
+  pdf =          {https://proceedings.mlr.press/v202/kotelnikov23a/kotelnikov23a.pdf},
+  url =          {https://proceedings.mlr.press/v202/kotelnikov23a.html},
+  abstract =          {Denoising diffusion probabilistic models are becoming the leading generative modeling paradigm for many important data modalities. Being the most prevalent in the computer vision community, diffusion models have recently gained some attention in other domains, including speech, NLP, and graph-like data. In this work, we investigate if the framework of diffusion models can be advantageous for general tabular problems, where data points are typically represented by vectors of heterogeneous features. The inherent heterogeneity of tabular data makes it quite challenging for accurate modeling since the individual features can be of a completely different nature, i.e., some of them can be continuous and some can be discrete. To address such data types, we introduce TabDDPM — a diffusion model that can be universally applied to any tabular dataset and handles any feature types. We extensively evaluate TabDDPM on a wide set of benchmarks and demonstrate its superiority over existing GAN/VAE alternatives, which is consistent with the advantage of diffusion models in other fields.}
+}
+
+@misc{rasul2021autoregressivedenoisingdiffusionmodels,
+      title={Autoregressive Denoising Diffusion Models for Multivariate Probabilistic Time Series Forecasting},
+      author={Kashif Rasul and Calvin Seward and Ingmar Schuster and Roland Vollgraf},
+      year={2021},
+      eprint={2101.12072},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG},
+      url={https://arxiv.org/abs/2101.12072},
+}
+
+@misc{jiang2023netdiffusionnetworkdataaugmentation,
+      title={NetDiffusion: Network Data Augmentation Through Protocol-Constrained Traffic Generation},
+      author={Xi Jiang and Shinan Liu and Aaron Gember-Jacobson and Arjun Nitin Bhagoji and Paul Schmitt and Francesco Bronzino and Nick Feamster},
+      year={2023},
+      eprint={2310.08543},
+      archivePrefix={arXiv},
+      primaryClass={cs.NI},
+      url={https://arxiv.org/abs/2310.08543},
+}
+
+Reference for Related Work
@article{10.1145/1151659.1159928,
 author = {Vishwanath, Kashi Venkatesh and Vahdat, Amin},
 title = {Realistic and responsive network traffic generation},
@@ -164,47 +300,50 @@ series = {SIGCOMM '06}
   publisher={Elsevier BV},
   author={Ring, Markus and Schlör, Daniel and Landes, Dieter and Hotho, Andreas},
   year={2019},
-   month=may, pages={156–172} }
+   month=may, pages={156–172}
+}

-   @inproceedings{10.1145/3544216.3544251,
-   author = {Yin, Yucheng and Lin, Zinan and Jin, Minhao and Fanti, Giulia and Sekar, Vyas},
-   title = {Practical GAN-based synthetic IP header trace generation using NetShare},
-   year = {2022},
-   isbn = {9781450394208},
-   publisher = {Association for Computing Machinery},
-   address = {New York, NY, USA},
-   url = {https://doi.org/10.1145/3544216.3544251},
-   doi = {10.1145/3544216.3544251},
-   abstract = {We explore the feasibility of using Generative Adversarial Networks (GANs) to automatically learn generative models to generate synthetic packet- and flow header traces for networking tasks (e.g., telemetry, anomaly detection, provisioning). We identify key fidelity, scalability, and privacy challenges and tradeoffs in existing GAN-based approaches. By synthesizing domain-specific insights with recent advances in machine learning and privacy, we identify design choices to tackle these challenges. Building on these insights, we develop an end-to-end framework, NetShare. We evaluate NetShare on six diverse packet header traces and find that: (1) across all distributional metrics and traces, it achieves 46\% more accuracy than baselines and (2) it meets users' requirements of downstream tasks in evaluating accuracy and rank ordering of candidate approaches.},
-   booktitle = {Proceedings of the ACM SIGCOMM 2022 Conference},
-   pages = {458–472},
-   numpages = {15},
-   keywords = {synthetic data generation, privacy, network packets, network flows, generative adversarial networks},
-   location = {Amsterdam, Netherlands},
-   series = {SIGCOMM '22}
-   }
+@inproceedings{10.1145/3544216.3544251,
+author = {Yin, Yucheng and Lin, Zinan and Jin, Minhao and Fanti, Giulia and Sekar, Vyas},
+title = {Practical GAN-based synthetic IP header trace generation using NetShare},
+year = {2022},
+isbn = {9781450394208},
+publisher = {Association for Computing Machinery},
+address = {New York, NY, USA},
+url = {https://doi.org/10.1145/3544216.3544251},
+doi = {10.1145/3544216.3544251},
+abstract = {We explore the feasibility of using Generative Adversarial Networks (GANs) to automatically learn generative models to generate synthetic packet- and flow header traces for networking tasks (e.g., telemetry, anomaly detection, provisioning). We identify key fidelity, scalability, and privacy challenges and tradeoffs in existing GAN-based approaches. By synthesizing domain-specific insights with recent advances in machine learning and privacy, we identify design choices to tackle these challenges. Building on these insights, we develop an end-to-end framework, NetShare. We evaluate NetShare on six diverse packet header traces and find that: (1) across all distributional metrics and traces, it achieves 46\% more accuracy than baselines and (2) it meets users' requirements of downstream tasks in evaluating accuracy and rank ordering of candidate approaches.},
+booktitle = {Proceedings of the ACM SIGCOMM 2022 Conference},
+pages = {458–472},
+numpages = {15},
+keywords = {synthetic data generation, privacy, network packets, network flows, generative adversarial networks},
+location = {Amsterdam, Netherlands},
+series = {SIGCOMM '22}
+}

@inproceedings{Lin_2020, series={IMC ’20},
-    title={Using GANs for Sharing Networked Time Series Data: Challenges, Initial Promise, and Open Questions},
-    url={http://dx.doi.org/10.1145/3419394.3423643},
-    DOI={10.1145/3419394.3423643},
-    booktitle={Proceedings of the ACM Internet Measurement Conference},
-    publisher={ACM},
-    author={Lin, Zinan and Jain, Alankar and Wang, Chen and Fanti, Giulia and Sekar, Vyas},
-    year={2020},
-    month=oct, pages={464–483},
-    collection={IMC ’20} }
+   title={Using GANs for Sharing Networked Time Series Data: Challenges, Initial Promise, and Open Questions},
+   url={http://dx.doi.org/10.1145/3419394.3423643},
+   DOI={10.1145/3419394.3423643},
+   booktitle={Proceedings of the ACM Internet Measurement Conference},
+   publisher={ACM},
+   author={Lin, Zinan and Jain, Alankar and Wang, Chen and Fanti, Giulia and Sekar, Vyas},
+   year={2020},
+   month=oct, pages={464–483},
+   collection={IMC ’20}
+}

@INPROCEEDINGS{7469060,
-    author={Mathur, Aditya P. and Tippenhauer, Nils Ole},
-    booktitle={2016 International Workshop on Cyber-physical Systems for Smart Water Networks (CySWater)},
-    title={SWaT: a water treatment testbed for research and training on ICS security},
-    year={2016},
-    volume={},
-    number={},
-    pages={31-36},
-    keywords={Sensors;Actuators;Feeds;Process control;Chemicals;Chemical sensors;Security;Cyber Physical Systems;Industrial Control Systems;Cyber Attacks;Cyber Defense;Water Testbed},
-    doi={10.1109/CySWater.2016.7469060}}
+  author={Mathur, Aditya P. and Tippenhauer, Nils Ole},
+  booktitle={2016 International Workshop on Cyber-physical Systems for Smart Water Networks (CySWater)},
+  title={SWaT: a water treatment testbed for research and training on ICS security},
+  year={2016},
+  volume={},
+  number={},
+  pages={31-36},
+  keywords={Sensors;Actuators;Feeds;Process control;Chemicals;Chemical sensors;Security;Cyber Physical Systems;Industrial Control Systems;Cyber Attacks;Cyber Defense;Water Testbed},
+  doi={10.1109/CySWater.2016.7469060}
+}

@inproceedings{10.1145/3055366.3055375,
 author = {Ahmed, Chuadhry Mujeeb and Palleti, Venkata Reddy and Mathur, Aditya P.},
@@ -225,15 +364,15 @@ series = {CySWATER '17}
 }

@inproceedings{NEURIPS2020_4c5bcfec,
-author = {Ho, Jonathan and Jain, Ajay and Abbeel, Pieter},
-booktitle = {Advances in Neural Information Processing Systems},
-editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin},
-pages = {6840--6851},
-publisher = {Curran Associates, Inc.},
-title = {Denoising Diffusion Probabilistic Models},
-url = {https://proceedings.neurips.cc/paper_files/paper/2020/file/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf},
-volume = {33},
-year = {2020}
+ author = {Ho, Jonathan and Jain, Ajay and Abbeel, Pieter},
+ booktitle = {Advances in Neural Information Processing Systems},
+ editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin},
+ pages = {6840--6851},
+ publisher = {Curran Associates, Inc.},
+ title = {Denoising Diffusion Probabilistic Models},
+ url = {https://proceedings.neurips.cc/paper_files/paper/2020/file/4c5bcfec8584af0d967f1ab10179ca4b-Paper.pdf},
+ volume = {33},
+ year = {2020}
 }

@misc{song2021scorebasedgenerativemodelingstochastic,
@@ -246,16 +385,6 @@ year = {2020}
      url={https://arxiv.org/abs/2011.13456},
 }

-@misc{rasul2021autoregressivedenoisingdiffusionmodels,
-      title={Autoregressive Denoising Diffusion Models for Multivariate Probabilistic Time Series Forecasting},
-      author={Kashif Rasul and Calvin Seward and Ingmar Schuster and Roland Vollgraf},
-      year={2021},
-      eprint={2101.12072},
-      archivePrefix={arXiv},
-      primaryClass={cs.LG},
-      url={https://arxiv.org/abs/2101.12072},
-}
-
@misc{tashiro2021csdiconditionalscorebaseddiffusion,
      title={CSDI Conditional Score-based Diffusion Models for Probabilistic Time Series Imputation},
      author={Yusuke Tashiro and Jiaming Song and Yang Song and Stefano Ermon},
@@ -305,7 +434,8 @@ year = {2020}
  number={1},
  pages={257-271},
  keywords={Base stations;Diffusion models;Data models;Uncertainty;Predictive models;Generative adversarial networks;Knowledge graphs;Mobile computing;Telecommunication traffic;Semantics;Cellular traffic;data generation;diffusion model;spatio-temporal graph},
-  doi={10.1109/TMC.2025.3591183}}
+  doi={10.1109/TMC.2025.3591183}
+}

@misc{austin2023structureddenoisingdiffusionmodels,
    title={Structured Denoising Diffusion Models in Discrete State-Spaces},
@@ -317,6 +447,16 @@ year = {2020}
    url={https://arxiv.org/abs/2107.03006},
 }

+@misc{hoogeboom2021argmaxflowsmultinomialdiffusion,
+      title={Argmax Flows and Multinomial Diffusion: Learning Categorical Distributions},
+      author={Emiel Hoogeboom and Didrik Nielsen and Priyank Jaini and Patrick Forré and Max Welling},
+      year={2021},
+      eprint={2102.05379},
+      archivePrefix={arXiv},
+      primaryClass={stat.ML},
+      url={https://arxiv.org/abs/2102.05379},
+}
+
@misc{li2022diffusionlmimprovescontrollabletext,
      title={Diffusion-LM Improves Controllable Text Generation},
      author={Xiang Lisa Li and John Thickstun and Ishaan Gulrajani and Percy Liang and Tatsunori B. Hashimoto},
@@ -357,16 +497,6 @@ year = {2020}
      url={https://arxiv.org/abs/1807.05620},
 }

-@misc{hoogeboom2021argmaxflowsmultinomialdiffusion,
-      title={Argmax Flows and Multinomial Diffusion: Learning Categorical Distributions},
-      author={Emiel Hoogeboom and Didrik Nielsen and Priyank Jaini and Patrick Forré and Max Welling},
-      year={2021},
-      eprint={2102.05379},
-      archivePrefix={arXiv},
-      primaryClass={stat.ML},
-      url={https://arxiv.org/abs/2102.05379},
-}
-
@misc{dai2019transformerxlattentivelanguagemodels,
      title={Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context},
      author={Zihang Dai and Zhilin Yang and Yiming Yang and Jaime Carbonell and Quoc V. Le and Ruslan Salakhutdinov},
@@ -418,4 +548,7 @@ year = {2020}
   publisher={University Library in Kragujevac},
   author={Damjanović, Ivan and Milošević, Marko and Stevanović, Dragan},
   year={2023},
-   pages={197–202} }
+   pages={197–202}
+}
+
+Reference for Benchmark