From c1c36cdf56dbe7778bd8b6751e31d51216bf979e Mon Sep 17 00:00:00 2001 From: DaZuo0122 <1085701449@qq.com> Date: Thu, 15 Jan 2026 19:07:26 +0800 Subject: [PATCH] Implemented checker as described in internal-docs/notes4coding/checker_design.md --- .cargo/config.toml | 2 + .gitignore | 1 + Cargo.lock | 371 +++++++++++++++++++++++ Cargo.toml | 11 + README.md | 69 +++++ docs/api.md | 188 ++++++++++++ docs/design.md | 357 ++++++++++++++++++++++ docs/examples/modbus.json | 16 + docs/examples/report.json | 31 ++ docs/examples/trace.meta.jsonl | 1 + src/config.rs | 59 ++++ src/decode.rs | 76 +++++ src/main.rs | 110 +++++++ src/mbap.rs | 45 +++ src/meta.rs | 75 +++++ src/modbus_desc.rs | 120 ++++++++ src/report.rs | 46 +++ src/state.rs | 59 ++++ src/validate.rs | 535 +++++++++++++++++++++++++++++++++ 19 files changed, 2172 insertions(+) create mode 100644 .cargo/config.toml create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 docs/api.md create mode 100644 docs/design.md create mode 100644 docs/examples/modbus.json create mode 100644 docs/examples/report.json create mode 100644 docs/examples/trace.meta.jsonl create mode 100644 src/config.rs create mode 100644 src/decode.rs create mode 100644 src/main.rs create mode 100644 src/mbap.rs create mode 100644 src/meta.rs create mode 100644 src/modbus_desc.rs create mode 100644 src/report.rs create mode 100644 src/state.rs create mode 100644 src/validate.rs diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..9de3f80 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustflags = ["-L", "C:/npcap-sdk-1.15/Lib/x64"] diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..76d228a --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,371 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cc" +version = "1.2.52" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd4932aefd12402b36c60956a4fe0035421f544799057659ff86f923657aada3" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "checker" +version = "0.1.0" +dependencies = [ + "anyhow", + "pcap", + "pnet_packet", + "serde", + "serde_json", +] + +[[package]] +name = "errno" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc", + "winapi", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f449e6c6c08c865631d4890cfacf252b3d396c9bcc83adb6623cdb02a8336c41" + +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "libc" +version = "0.2.180" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "no-std-net" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43794a0ace135be66a25d3ae77d41b91615fb68ae937f904090203e81f755b65" + +[[package]] +name = "pcap" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2eecc2ddc671ec563b5b39f846556aade68a65d1afb14d8fe6b30b0457d75" +dependencies = [ + "bitflags", + "errno", + "libc", + "libloading", + "pkg-config", + "regex", + "windows-sys", +] + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "pnet_base" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffc190d4067df16af3aba49b3b74c469e611cad6314676eaf1157f31aa0fb2f7" +dependencies = [ + "no-std-net", +] + +[[package]] +name = "pnet_macros" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13325ac86ee1a80a480b0bc8e3d30c25d133616112bb16e86f712dcf8a71c863" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "syn", +] + +[[package]] +name = "pnet_macros_support" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eed67a952585d509dd0003049b1fc56b982ac665c8299b124b90ea2bdb3134ab" +dependencies = [ + "pnet_base", +] + +[[package]] +name = "pnet_packet" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c96ebadfab635fcc23036ba30a7d33a80c39e8461b8bd7dc7bb186acb96560f" +dependencies = [ + "glob", + "pnet_base", + "pnet_macros", + "pnet_macros_support", +] + +[[package]] +name = "proc-macro2" +version = "1.0.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "syn" +version = "2.0.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +dependencies = [ + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" + +[[package]] +name = "windows_i686_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" + +[[package]] +name = "windows_i686_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" + +[[package]] +name = "zmij" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd8f3f50b848df28f887acb68e41201b5aea6bc8a8dacc00fb40635ff9a72fea" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..5eb3131 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "checker" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = "1" +pcap = "2" +pnet_packet = "0.35" +serde = { version = "1", features = ["derive"] } +serde_json = "1" diff --git a/README.md b/README.md new file mode 100644 index 0000000..b214956 --- /dev/null +++ b/README.md @@ -0,0 +1,69 @@ +# Checker + +Modbus/TCP traffic checker that validates generated PCAP/PCAPNG traces against +Modbus rules, request/response pairing, and optional expected fields supplied +via JSONL sidecar metadata. + +## What It Does + +- Parses Ethernet/RAW PCAP packets, extracts TCP payloads +- Validates MBAP header and basic Modbus/TCP invariants +- Parses PDU fields using descriptor JSON (request/response) +- Tracks outstanding requests and flags unmatched responses +- Compares observed values with optional expected fields in JSONL +- Emits a JSON report with findings and a summary + +## Build + +```bash +cargo build +``` + +## Run + +```bash +cargo run -- \ + --pcap trace.pcapng \ + --meta trace.meta.jsonl \ + --config modbus.json \ + --report report.json \ + --port 502 \ + --mode mvp +``` + +Sample CLI with the example files: + +```bash +cargo run -- \ + --pcap trace.pcapng \ + --meta docs/examples/trace.meta.jsonl \ + --config docs/examples/modbus.json \ + --report report.json +``` + +## CLI Options + +- `--pcap `: PCAP or PCAPNG input file +- `--meta `: JSONL sidecar metadata (1 line per packet) +- `--config `: Modbus descriptor JSON +- `--report `: Report JSON output (default: `report.json`) +- `--port `: Modbus/TCP port (default: `502`) +- `--mode mvp|strict`: Validation mode (default: `mvp`) +- `--fail-fast`: Stop on first fatal error + +## Files and Formats + +See `docs/api.md` for the full schema of: +- `trace.meta.jsonl` lines +- Modbus descriptor JSON +- `report.json` output + +Example files live in `docs/examples/`: +- `docs/examples/trace.meta.jsonl` +- `docs/examples/modbus.json` +- `docs/examples/report.json` + +## Notes + +- This checker assumes one Modbus ADU per TCP payload. +- TCP reassembly and checksum validation are not implemented. diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..478a79f --- /dev/null +++ b/docs/api.md @@ -0,0 +1,188 @@ +# API Documentation + +This document describes the input/output formats and the checker program API. + +## CLI API + +Command: + +```bash +checker --pcap --meta --config \ + --report [--port 502] [--mode mvp|strict] [--fail-fast] +``` + +Exit behavior: + +- Returns a non-zero exit code only on process-level errors (I/O, parse failures). +- Validation findings are written to the report file. + +## JSONL Sidecar (`trace.meta.jsonl`) + +Each line corresponds to one packet, in the same order as the PCAP. + +```json +{ + "trace_id": "c7f1...", + "event_id": 42, + "pcap_index": 42, + "ts_ns": 1736451234567890123, + "direction": "c2s", + "flow": { + "src_ip": "10.0.0.10", + "src_port": 51012, + "dst_ip": "10.0.0.20", + "dst_port": 502 + }, + "expected": { + "modbus": { + "transaction_id": 513, + "unit_id": 1, + "function_code": 3 + }, + "fields": { + "starting_address": 0, + "quantity": 10 + } + } +} +``` + +Example: `docs/examples/trace.meta.jsonl` + +Fields: + +- `trace_id` (string, optional): Trace identifier. +- `event_id` (integer, optional): Event identifier from generator. +- `pcap_index` (integer, optional): Packet index for reference. +- `ts_ns` (integer, optional): Timestamp in nanoseconds. +- `direction` (string, required): `c2s` (request) or `s2c` (response). +- `flow` (object, required): Flow metadata used for request/response tracking. +- `expected` (object, optional): Expected Modbus header and/or field values. + +`expected.modbus`: + +- `transaction_id` (u16, optional) +- `unit_id` (u8, optional) +- `function_code` (u8, optional) + +`expected.fields`: + +- Arbitrary JSON object whose keys match descriptor field names. +- Values are compared against parsed output. + +## Modbus Descriptor JSON (`modbus.json`) + +Top-level: + +```json +{ + "functions": [ + { + "function": 3, + "name": "read_holding_registers", + "request": [ + {"name":"starting_address","type":"u16"}, + {"name":"quantity","type":"u16"} + ], + "response": [ + {"name":"byte_count","type":"u8"}, + {"name":"registers","type":"bytes","length_from":"byte_count"} + ] + } + ] +} +``` + +Example: `docs/examples/modbus.json` + +Function descriptor: + +- `function` (u8, required): Function code. +- `name` (string, optional): Human-readable function name. +- `request` (array, optional): Field list for client-to-server PDUs. +- `response` (array, optional): Field list for server-to-client PDUs. + +Field descriptor: + +- `name` (string, required): Field name used in output JSON. +- `type` (string, required): `u8`, `u16`, `u32`, `i16`, `i32`, `bytes`. +- `length` (integer, optional): Fixed length for `bytes`. +- `length_from` (string, optional): Name of a previous numeric field. +- `scale` (number, optional): Multiply numeric values by this scale. +- `enum_map` (object, optional): Map numeric strings to JSON values. + +Notes: + +- `length_from` uses values parsed earlier in the same descriptor. +- `bytes` output is an array of integers. + +## Report JSON (`report.json`) + +Structure: + +```json +{ + "summary": { + "total_packets": 1000, + "total_findings": 8, + "fatal": 1, + "error": 4, + "warn": 3, + "info": 0 + }, + "findings": [ + { + "pcap_index": 7, + "event_id": 42, + "severity": "error", + "code": "mbap_protocol", + "message": "Protocol id is 1, expected 0", + "flow": { + "src_ip": "10.0.0.10", + "src_port": 51012, + "dst_ip": "10.0.0.20", + "dst_port": 502 + }, + "observed": {"payload_len": 42, "mbap_length": 10}, + "expected": null + } + ] +} +``` + +Example: `docs/examples/report.json` + +Summary fields: + +- `total_packets`: Total packets processed. +- `total_findings`: Total findings emitted. +- `fatal`, `error`, `warn`, `info`: Counts by severity. + +Finding fields: + +- `pcap_index` (u64): Index in PCAP stream. +- `event_id` (u64, optional): Event identifier from metadata. +- `severity` (string): `fatal`, `error`, `warn`, `info`. +- `code` (string): Short machine-friendly code. +- `message` (string): Human-readable description. +- `flow` (object, optional): Source/destination addresses. +- `observed` (JSON, optional): Observed values for comparison. +- `expected` (JSON, optional): Expected values for comparison. + +## Validation Modes + +- `mvp`: Core checks only. +- `strict`: Treat MBAP length mismatch as a stopping condition. + +## Internal Module API (Rust) + +Primary modules: + +- `config`: Descriptor types and loader for JSON config. +- `meta`: JSONL metadata structs. +- `decode`: PCAP packet decode to TCP payload. +- `mbap`: MBAP parsing utilities. +- `modbus_desc`: Descriptor-based field parsing. +- `state`: Outstanding request tracking. +- `validate`: End-to-end validation pipeline. +- `report`: Report data structures. diff --git a/docs/design.md b/docs/design.md new file mode 100644 index 0000000..f59fb7d --- /dev/null +++ b/docs/design.md @@ -0,0 +1,357 @@ +## Conversation summary (so far) + +* You’re building a **Modbus/TCP traffic generation system for security use** with this pipeline: + **feature extraction → hybrid diffusion generates features → generator turns features into raw packets → checker validates traffic**. +* I proposed a checker that validates traffic at **multiple layers**: + + 1. frame/IP/TCP parseability, + 2. optional TCP conversation sanity, + 3. Modbus/TCP structural + semantic validity (MBAP + PDU), + 4. optional “feature-fidelity” checks against generator sidecar metadata. +* I recommended a **generator → checker contract** that is easy to debug and automate: + **PCAP/PCAPNG + JSONL sidecar** (1 metadata line per packet/event). +* You then shared your existing **descriptor-driven Modbus parser** (Rust) and a short doc. + The public API/type model is: `Config → FunctionDescriptor → FieldDescriptor(FieldType, length/length_from/scale/enum_map)` + and `parse_sawp_message(...)` returns JSON containing `unit`, `function`, optional `exception`, and `fields` . +* I said: the **descriptor engine is very reusable** for the checker, but the checker’s entry point should ideally work on **raw Modbus/TCP bytes** (MBAP+PDU), not require `sawp_modbus::Message`. + Also: for checker ergonomics, prefer structured error types over `Result` (but you can keep JSON output for the MVP). + +--- + +# Implementation document: Modbus/TCP Checker (Rust) + +This document is a practical, “not too complex” plan to implement the checker while still following good practices where they don’t add much difficulty. + +## 1) What the checker does (goals / non-goals) + +### Goals + +The checker verifies that generator output is: + +1. **Parsable** as TCP/IP traffic, +2. **Modbus/TCP-valid** at the application level (MBAP + PDU rules), +3. **Consistent** in request/response pairing (Transaction ID matching), +4. Optionally **matches the expected features** (function code, unit id, quantities, payload size, timing tolerances, etc.). + +### Non-goals (to keep it simple) + +To avoid turning this into a full Wireshark, we deliberately **do not** implement: + +* full TCP stream reassembly (segments split/merged), +* full TCP state machine with retransmits/out-of-order handling, +* IP/TCP checksum verification by default. + +Instead, we enforce a **generator constraint**: **one Modbus ADU per TCP payload** (no segmentation, no coalescing). This single constraint dramatically reduces checker complexity and is realistic for generated traces. + +> Trade-off: best practice would handle segmentation/coalescing and reassembly; difficulty rises a lot. The “one ADU per TCP payload” rule is the best complexity/benefit lever for this project. + +--- + +## 2) Generator output contract (what the checker consumes) + +### Recommended output (MVP-friendly and debuggable) + +**(A) PCAP or PCAPNG file** + +* `trace.pcapng` (or `.pcap`) containing the raw generated packets + +**(B) Sidecar JSONL metadata file** + +* `trace.meta.jsonl` where each line describes the corresponding packet/event (same order) + +This is the easiest way to: + +* reproduce failures, +* correlate packet index with expected semantic fields, +* produce actionable reports. + +### JSONL schema (minimal + optional) + +**Minimal fields (recommended):** + +* `trace_id` (string/uuid) +* `event_id` (monotonic integer) +* `pcap_index` (or implicit by line number) +* `ts_ns` timestamp +* `direction` (`"c2s"` or `"s2c"`) +* `flow` (src/dst ip/port) + +**Optional `expected` block (for feature-fidelity checks):** + +* `expected.modbus.transaction_id`, `unit_id`, `function_code`, and `expected.fields` (names matching your descriptor JSON). + +Example line: + +```json +{ + "trace_id": "c7f1...", + "event_id": 42, + "pcap_index": 42, + "ts_ns": 1736451234567890123, + "direction": "c2s", + "flow": {"src_ip":"10.0.0.10","src_port":51012,"dst_ip":"10.0.0.20","dst_port":502}, + "expected": { + "modbus": {"transaction_id": 513, "unit_id": 1, "function_code": 3}, + "fields": {"starting_address": 0, "quantity": 10} + } +} +``` + +> Trade-off: best practice is “self-describing PCAP” (pcapng custom blocks, or embedding metadata); difficulty higher. JSONL sidecar is dead simple and works well. + +--- + +## 3) Workflow (starting from generator output) + +### Step 0 — Load inputs + +1. Read `trace.meta.jsonl` into a lightweight iterator (don’t load all if trace is huge). +2. Open `trace.pcapng` and stream packets in order. + +### Step 1 — Align packets and metadata + +For each packet index `i`: + +* read packet `i` from PCAP +* read metadata line `i` from JSONL + If mismatch (missing line/packet), record a **Fatal** alignment error and stop (or continue with “best effort”, your call). + +### Step 2 — Decode packet and extract TCP payload + +Decode: + +* link layer (Ethernet/SLL/RAW depending on PCAP linktype), +* IPv4/IPv6, +* TCP, +* extract TCP payload bytes. + +Minimal checks: + +* packet parses, +* TCP payload length > 0 when direction indicates Modbus message, +* port 502 is present on either side (configurable if you generate non-502). + +### Step 3 — Parse Modbus/TCP ADU + +Assuming payload contains exactly one ADU: + +* parse MBAP (7 bytes) + PDU +* validate basic MBAP invariants +* parse function code and PDU data +* decide request vs response based on `direction` +* parse PDU data using descriptor map (your reusable part) + +### Step 4 — Stateful consistency checks + +Maintain per-flow state: + +* request/response pairing by `(transaction_id, unit_id)` +* outstanding request table with timeout/window limits + +### Step 5 — Feature-fidelity checks (optional) + +If `expected` exists in JSONL: + +* compare decoded modbus header + parsed fields with expected values +* compare sizes and (optionally) timing with tolerances + +### Step 6 — Emit report + +Output: + +* `report.json` with summary + per-finding samples (packet indices, flow key, reason, extracted fields) +* optional `report.txt` for quick reading + +--- + +## 4) Reusing your existing parser (what to keep, what to adjust) + +You already have: + +* A descriptor model (`Config/FunctionDescriptor/FieldDescriptor/FieldType`) +* A function that returns a JSON representation with the shape the checker wants (`unit`, `function`, optional `exception`, `fields`) + +### 4.1 What is immediately reusable + +**Highly reusable for the checker:** + +* Descriptor loading (serde) +* Field decoding logic (length/length_from, scale, enum_map) +* The “JSON output” idea for reporting and debugging + +### 4.2 Small design adjustment to make reuse clean (recommended) + +Your checker will naturally see **raw TCP payload bytes**. So the lowest-friction integration is: + +* Implement a tiny **MBAP parser** in the checker: + + * returns `(transaction_id, protocol_id, length, unit_id, function_code, pdu_data)` +* Then call your descriptor-based decoder on `pdu_data` (bytes **after** function code) + +Your doc shows the parser conceptually returns JSON with `fields` and supports request vs response descriptors , which maps perfectly to `direction`. + +**Suggested public entrypoint to expose from your parser module:** + +* `parse_with_descriptor(pdu_data: &[u8], unit: u8, function: u8, fields: &Vec) -> Result` + +If it’s currently private, just make it `pub(crate)` or `pub` and reuse it. This avoids binding the checker to `sawp_modbus::Message` and keeps implementation simple. + +> Trade-off: best practice would be to return a typed struct + typed errors; easier to maintain long term but more refactor work. For your “don’t make it hard” requirement, keeping JSON output + simple error types is totally fine for the first version. + +### 4.3 How the checker chooses which descriptor to use + +* If `direction == c2s` → request descriptor +* If `direction == s2c` → response descriptor + This matches the intent of having `request` and `response` descriptor vectors in your model . + +--- + +## 5) Checker internal design (simple but extensible) + +### 5.1 Core data structures + +* `FlowKey { src_ip, src_port, dst_ip, dst_port, ip_version }` +* `PacketCtx { trace_id, event_id, pcap_index, ts_ns, direction, flow }` +* `DecodedModbus { transaction_id, protocol_id, length, unit_id, function_code, is_exception, exception_code?, pdu_data, parsed_fields_json? }` + +### 5.2 “Rules” model (optional, but keeps code tidy) + +Instead of huge if/else blocks, implement a few rules that return findings: + +* `RuleMbapValid` +* `RuleFunctionPduWellFormed` (basic length sanity) +* `RuleTxIdPairing` +* `RuleExpectedMatch` (only if sidecar has expected) + +If you don’t want a formal trait system initially, just implement these as functions that append to a `Vec`. + +### 5.3 Findings + severity + +Use a compact severity scale: + +* `Fatal`: cannot parse / cannot continue reliably +* `Error`: protocol invalid +* `Warn`: unusual but maybe acceptable +* `Info`: stats + +A finding should include: + +* `pcap_index`, `event_id`, `flow`, `severity`, `code`, `message` +* optional `observed` and `expected` snippets + +--- + +## 6) What the checker validates (MVP vs stricter) + +### MVP validations (recommended first milestone) + +1. PCAP + JSONL aligned +2. Parse Ethernet/IP/TCP and extract payload +3. MBAP: + + * payload length ≥ 7 + * length field consistency (basic) +4. PDU: + + * function code exists + * exception handling if `fc & 0x80 != 0` +5. Descriptor parse success (request/response based on direction) +6. Transaction pairing: + + * every response matches an outstanding request by transaction_id/unit_id + * no duplicate outstanding txid unless you allow it + +### “Strict mode” additions (still reasonable) + +* enforce unit_id range (if you want) +* enforce function-code-specific invariants using parsed fields + + * e.g., `byte_count == 2 * quantity` for register reads/writes (if present in descriptor) +* timeouts: + + * response must arrive within configured window + +### Heavy features (avoid unless needed) + +* TCP reassembly and multi-ADU per segment +* checksum verification +* handling retransmits/out-of-order robustly + +--- + +## 7) Dependencies (crates) for the checker + +### Minimal set (keeps implementation easy) + +* **PCAP reading** + + * `pcap` (libpcap-backed; you already use it in your codebase) +* **Packet decoding** + + * `pnet_packet` (you already use `pnet` patterns) +* **Config + sidecar + report** + + * `serde`, `serde_json` +* **Errors + logging** + + * `anyhow` (fast to integrate) and/or `thiserror` (nicer structured errors) + * `tracing`, `tracing-subscriber` +* **Utilities** + + * `hashbrown` (optional; std HashMap is fine) + * `hex` (useful for debug/trailing bytes like your parser does) + +### If you want to reduce external requirements (optional alternative) + +* Replace `pcap` with `pcap-file` (pure Rust; no libpcap dependency) +* Replace `pnet` with `etherparse` (often simpler APIs) + +> Trade-off: “best practice” for portability is pure Rust (`pcap-file` + `etherparse`). +> “Best practice” for least effort *given your current code* is reusing `pcap` + `pnet`. + +--- + +## 8) Suggested project layout (simple) + +``` +checker/ + src/ + main.rs # CLI entry + config.rs # descriptor loading + meta.rs # JSONL reader structs + pcap_in.rs # pcap streaming + decode.rs # ethernet/ip/tcp extract payload + mbap.rs # Modbus/TCP MBAP parsing + modbus_desc.rs # reuse your parse_with_descriptor + types + state.rs # outstanding tx table + validate.rs # main validation pipeline + report.rs # report structs + JSON output +``` + +--- + +## 9) Practical implementation tips (to keep it from getting “hard”) + +1. **Enforce generator constraints**: + + * one ADU per TCP payload + * no splitting/coalescing + This keeps checker complexity low and makes failure reasons obvious. + +2. **Keep JSON output for parsed fields** at first: + + * You already have a clean JSON shape (`unit`, `function`, `fields`) + * Great for debugging mismatches with `expected.fields` + +3. **Add strictness as “modes”**: + + * `--mode=mvp | strict` + * or config file toggles + +4. **Fail-fast vs best-effort**: + + * For CI or batch filtering, fail-fast on `Fatal` is fine. + * For research/debugging, best-effort (continue and collect findings) is more useful. + +--- + diff --git a/docs/examples/modbus.json b/docs/examples/modbus.json new file mode 100644 index 0000000..8cd9190 --- /dev/null +++ b/docs/examples/modbus.json @@ -0,0 +1,16 @@ +{ + "functions": [ + { + "function": 3, + "name": "read_holding_registers", + "request": [ + {"name":"starting_address","type":"u16"}, + {"name":"quantity","type":"u16"} + ], + "response": [ + {"name":"byte_count","type":"u8"}, + {"name":"registers","type":"bytes","length_from":"byte_count"} + ] + } + ] +} diff --git a/docs/examples/report.json b/docs/examples/report.json new file mode 100644 index 0000000..9967922 --- /dev/null +++ b/docs/examples/report.json @@ -0,0 +1,31 @@ +{ + "summary": { + "total_packets": 1, + "total_findings": 1, + "fatal": 0, + "error": 1, + "warn": 0, + "info": 0 + }, + "findings": [ + { + "pcap_index": 0, + "event_id": 1, + "severity": "error", + "code": "expected_field_mismatch", + "message": "Field mismatch for quantity", + "flow": { + "src_ip": "10.0.0.10", + "src_port": 51012, + "dst_ip": "10.0.0.20", + "dst_port": 502 + }, + "observed": { + "field": "quantity", + "observed": 1, + "expected": 2 + }, + "expected": null + } + ] +} diff --git a/docs/examples/trace.meta.jsonl b/docs/examples/trace.meta.jsonl new file mode 100644 index 0000000..c10cd5c --- /dev/null +++ b/docs/examples/trace.meta.jsonl @@ -0,0 +1 @@ +{"trace_id":"example-trace","event_id":1,"pcap_index":0,"ts_ns":1736451234567890000,"direction":"c2s","flow":{"src_ip":"10.0.0.10","src_port":51012,"dst_ip":"10.0.0.20","dst_port":502},"expected":{"modbus":{"transaction_id":513,"unit_id":1,"function_code":3},"fields":{"starting_address":0,"quantity":2}}} diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..8a407de --- /dev/null +++ b/src/config.rs @@ -0,0 +1,59 @@ +use std::collections::HashMap; +use std::fs; +use std::path::Path; + +use anyhow::{Context, Result}; +use serde::Deserialize; + +#[derive(Debug, Deserialize)] +pub struct Config { + pub functions: Vec, +} + +impl Config { + pub fn load(path: &Path) -> Result { + let raw = fs::read_to_string(path) + .with_context(|| format!("Unable to read config file {}", path.display()))?; + let config: Config = serde_json::from_str(&raw) + .with_context(|| format!("Unable to parse config JSON {}", path.display()))?; + Ok(config) + } + + pub fn find_function(&self, function_code: u8) -> Option<&FunctionDescriptor> { + self.functions + .iter() + .find(|func| func.function == function_code) + } +} + +#[derive(Debug, Deserialize)] +pub struct FunctionDescriptor { + pub function: u8, + pub name: Option, + #[serde(default)] + pub request: Vec, + #[serde(default)] + pub response: Vec, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct FieldDescriptor { + pub name: String, + #[serde(rename = "type")] + pub field_type: FieldType, + pub length: Option, + pub length_from: Option, + pub scale: Option, + pub enum_map: Option>, +} + +#[derive(Debug, Deserialize, Clone, Copy)] +#[serde(rename_all = "snake_case")] +pub enum FieldType { + U8, + U16, + U32, + I16, + I32, + Bytes, +} diff --git a/src/decode.rs b/src/decode.rs new file mode 100644 index 0000000..3459f1e --- /dev/null +++ b/src/decode.rs @@ -0,0 +1,76 @@ +use std::net::IpAddr; + +use anyhow::{Context, Result}; +use pcap::Linktype; +use pnet_packet::ethernet::{EtherTypes, EthernetPacket}; +use pnet_packet::ip::IpNextHeaderProtocols; +use pnet_packet::ipv4::Ipv4Packet; +use pnet_packet::ipv6::Ipv6Packet; +use pnet_packet::tcp::TcpPacket; +use pnet_packet::Packet; + +#[derive(Debug)] +pub struct TcpPacketInfo { + pub src_ip: IpAddr, + pub dst_ip: IpAddr, + pub src_port: u16, + pub dst_port: u16, + pub payload: Vec, +} + +pub fn extract_tcp_payload(linktype: Linktype, data: &[u8]) -> Result { + match linktype { + Linktype::ETHERNET => parse_ethernet(data), + Linktype::RAW => parse_raw_ip(data), + other => anyhow::bail!("Unsupported linktype: {other:?}"), + } +} + +fn parse_ethernet(data: &[u8]) -> Result { + let eth = EthernetPacket::new(data).context("Unable to parse ethernet")?; + match eth.get_ethertype() { + EtherTypes::Ipv4 => parse_ipv4(eth.payload()), + EtherTypes::Ipv6 => parse_ipv6(eth.payload()), + other => anyhow::bail!("Unsupported ethertype: {other:?}"), + } +} + +fn parse_raw_ip(data: &[u8]) -> Result { + if let Some(packet) = Ipv4Packet::new(data) { + return parse_ipv4(packet.packet()); + } + if let Some(packet) = Ipv6Packet::new(data) { + return parse_ipv6(packet.packet()); + } + anyhow::bail!("Unsupported raw IP packet") +} + +fn parse_ipv4(data: &[u8]) -> Result { + let packet = Ipv4Packet::new(data).context("Unable to parse IPv4")?; + if packet.get_next_level_protocol() != IpNextHeaderProtocols::Tcp { + anyhow::bail!("Not a TCP IPv4 packet"); + } + let tcp = TcpPacket::new(packet.payload()).context("Unable to parse TCP")?; + Ok(TcpPacketInfo { + src_ip: IpAddr::V4(packet.get_source()), + dst_ip: IpAddr::V4(packet.get_destination()), + src_port: tcp.get_source(), + dst_port: tcp.get_destination(), + payload: tcp.payload().to_vec(), + }) +} + +fn parse_ipv6(data: &[u8]) -> Result { + let packet = Ipv6Packet::new(data).context("Unable to parse IPv6")?; + if packet.get_next_header() != IpNextHeaderProtocols::Tcp { + anyhow::bail!("Not a TCP IPv6 packet"); + } + let tcp = TcpPacket::new(packet.payload()).context("Unable to parse TCP")?; + Ok(TcpPacketInfo { + src_ip: IpAddr::V6(packet.get_source()), + dst_ip: IpAddr::V6(packet.get_destination()), + src_port: tcp.get_source(), + dst_port: tcp.get_destination(), + payload: tcp.payload().to_vec(), + }) +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..b8bd81a --- /dev/null +++ b/src/main.rs @@ -0,0 +1,110 @@ +mod config; +mod decode; +mod mbap; +mod meta; +mod modbus_desc; +mod report; +mod state; +mod validate; + +use std::env; +use std::path::PathBuf; + +use anyhow::{Context, Result}; + +#[derive(Debug)] +struct Args { + pcap_path: PathBuf, + meta_path: PathBuf, + config_path: PathBuf, + report_path: PathBuf, + port: u16, + mode: validate::Mode, + fail_fast: bool, +} + +fn usage() -> &'static str { + "Usage: checker --pcap --meta --config \ +--report [--port 502] [--mode mvp|strict] [--fail-fast]" +} + +fn parse_args() -> Result { + let mut args = env::args().skip(1); + let mut pcap_path = None; + let mut meta_path = None; + let mut config_path = None; + let mut report_path = None; + let mut port = 502u16; + let mut mode = validate::Mode::Mvp; + let mut fail_fast = false; + + while let Some(arg) = args.next() { + match arg.as_str() { + "--pcap" => pcap_path = args.next().map(PathBuf::from), + "--meta" => meta_path = args.next().map(PathBuf::from), + "--config" => config_path = args.next().map(PathBuf::from), + "--report" => report_path = args.next().map(PathBuf::from), + "--port" => { + let value = args.next().context("Missing value after --port")?; + port = value.parse::().context("Invalid --port value")?; + } + "--mode" => { + let value = args.next().context("Missing value after --mode")?; + mode = match value.as_str() { + "mvp" => validate::Mode::Mvp, + "strict" => validate::Mode::Strict, + _ => anyhow::bail!("Unknown --mode {value}"), + }; + } + "--fail-fast" => fail_fast = true, + "--help" | "-h" => { + println!("{}", usage()); + std::process::exit(0); + } + other => anyhow::bail!("Unknown argument: {other}"), + } + } + + let pcap_path = pcap_path.context("Missing --pcap")?; + let meta_path = meta_path.context("Missing --meta")?; + let config_path = config_path.context("Missing --config")?; + let report_path = report_path.unwrap_or_else(|| PathBuf::from("report.json")); + + Ok(Args { + pcap_path, + meta_path, + config_path, + report_path, + port, + mode, + fail_fast, + }) +} + +fn main() -> Result<()> { + let args = parse_args().context(usage())?; + let config = config::Config::load(&args.config_path) + .with_context(|| format!("Failed to load config at {}", args.config_path.display()))?; + + let report = validate::run(validate::ValidateArgs { + pcap_path: args.pcap_path, + meta_path: args.meta_path, + report_path: args.report_path, + port: args.port, + mode: args.mode, + fail_fast: args.fail_fast, + config, + })?; + + println!( + "Processed {} packets, findings: {} (fatal: {}, error: {}, warn: {}, info: {})", + report.summary.total_packets, + report.summary.total_findings, + report.summary.fatal, + report.summary.error, + report.summary.warn, + report.summary.info + ); + + Ok(()) +} diff --git a/src/mbap.rs b/src/mbap.rs new file mode 100644 index 0000000..d99df98 --- /dev/null +++ b/src/mbap.rs @@ -0,0 +1,45 @@ +use anyhow::{Context, Result}; + +#[derive(Debug)] +pub struct MbapParsed { + pub transaction_id: u16, + pub protocol_id: u16, + pub length: u16, + pub unit_id: u8, + pub function_code: u8, + pub pdu_data: Vec, + pub length_mismatch: bool, +} + +pub fn parse_mbap(payload: &[u8]) -> Result { + if payload.len() < 7 { + anyhow::bail!("Payload shorter than MBAP header"); + } + + let transaction_id = u16::from_be_bytes([payload[0], payload[1]]); + let protocol_id = u16::from_be_bytes([payload[2], payload[3]]); + let length = u16::from_be_bytes([payload[4], payload[5]]); + let unit_id = payload[6]; + + let total_len = 6usize + length as usize; + let length_mismatch = payload.len() != total_len; + + if payload.len() < 8 { + anyhow::bail!("Payload missing function code"); + } + let function_code = payload[7]; + let pdu_data = payload + .get(8..) + .context("Missing PDU data")? + .to_vec(); + + Ok(MbapParsed { + transaction_id, + protocol_id, + length, + unit_id, + function_code, + pdu_data, + length_mismatch, + }) +} diff --git a/src/meta.rs b/src/meta.rs new file mode 100644 index 0000000..11262f6 --- /dev/null +++ b/src/meta.rs @@ -0,0 +1,75 @@ +use std::net::IpAddr; + +use anyhow::{Context, Result}; +use serde::Deserialize; + +#[derive(Debug, Deserialize)] +pub struct MetaLine { + pub trace_id: Option, + pub event_id: Option, + pub pcap_index: Option, + pub ts_ns: Option, + pub direction: Direction, + pub flow: Flow, + pub expected: Option, +} + +#[derive(Debug, Deserialize, Clone, Copy)] +#[serde(rename_all = "lowercase")] +pub enum Direction { + C2s, + S2c, +} + +#[derive(Debug, Deserialize)] +pub struct Flow { + pub src_ip: String, + pub src_port: u16, + pub dst_ip: String, + pub dst_port: u16, +} + +impl Flow { + pub fn to_key(&self) -> Result { + let src_ip = self.src_ip.parse::().context("Invalid src_ip")?; + let dst_ip = self.dst_ip.parse::().context("Invalid dst_ip")?; + Ok(FlowKey { + src_ip, + src_port: self.src_port, + dst_ip, + dst_port: self.dst_port, + }) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct FlowKey { + pub src_ip: IpAddr, + pub src_port: u16, + pub dst_ip: IpAddr, + pub dst_port: u16, +} + +impl FlowKey { + pub fn reversed(&self) -> Self { + FlowKey { + src_ip: self.dst_ip, + src_port: self.dst_port, + dst_ip: self.src_ip, + dst_port: self.src_port, + } + } +} + +#[derive(Debug, Deserialize)] +pub struct Expected { + pub modbus: Option, + pub fields: Option, +} + +#[derive(Debug, Deserialize)] +pub struct ExpectedModbus { + pub transaction_id: Option, + pub unit_id: Option, + pub function_code: Option, +} diff --git a/src/modbus_desc.rs b/src/modbus_desc.rs new file mode 100644 index 0000000..683aade --- /dev/null +++ b/src/modbus_desc.rs @@ -0,0 +1,120 @@ +use anyhow::{Context, Result}; +use serde_json::Value; + +use crate::config::{FieldDescriptor, FieldType}; + +pub fn parse_with_descriptor(pdu_data: &[u8], fields: &[FieldDescriptor]) -> Result { + let mut offset = 0usize; + let mut values = serde_json::Map::new(); + + for field in fields { + let length = resolve_length(field, &values)?; + let value = match field.field_type { + FieldType::U8 => { + let bytes = read_bytes(pdu_data, &mut offset, 1)?; + Value::from(bytes[0]) + } + FieldType::U16 => { + let bytes = read_bytes(pdu_data, &mut offset, 2)?; + Value::from(u16::from_be_bytes([bytes[0], bytes[1]])) + } + FieldType::U32 => { + let bytes = read_bytes(pdu_data, &mut offset, 4)?; + Value::from(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])) + } + FieldType::I16 => { + let bytes = read_bytes(pdu_data, &mut offset, 2)?; + Value::from(i16::from_be_bytes([bytes[0], bytes[1]])) + } + FieldType::I32 => { + let bytes = read_bytes(pdu_data, &mut offset, 4)?; + Value::from(i32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])) + } + FieldType::Bytes => { + let len = length.context("bytes field missing length")?; + let bytes = read_bytes(pdu_data, &mut offset, len)?; + Value::from(bytes.iter().map(|b| Value::from(*b)).collect::>()) + } + }; + + let value = apply_scale(&value, field.scale); + let value = apply_enum_map(&value, field.enum_map.as_ref()); + values.insert(field.name.clone(), value); + } + + Ok(Value::Object(values)) +} + +fn resolve_length( + field: &FieldDescriptor, + values: &serde_json::Map, +) -> Result> { + if let Some(length) = field.length { + return Ok(Some(length)); + } + if let Some(source) = &field.length_from { + let value = values + .get(source) + .context(format!("length_from refers to missing field {source}"))?; + let len = match value { + Value::Number(num) => num + .as_u64() + .context(format!("length_from {source} not unsigned"))?, + _ => anyhow::bail!("length_from {source} not numeric"), + }; + return Ok(Some(len as usize)); + } + Ok(None) +} + +fn read_bytes<'a>(data: &'a [u8], offset: &mut usize, len: usize) -> Result<&'a [u8]> { + let end = *offset + len; + if end > data.len() { + anyhow::bail!("PDU too short for field"); + } + let bytes = &data[*offset..end]; + *offset = end; + Ok(bytes) +} + +fn apply_scale(value: &Value, scale: Option) -> Value { + let Some(scale) = scale else { + return value.clone(); + }; + match value { + Value::Number(num) => { + if let Some(i) = num.as_i64() { + Value::from((i as f64) * scale) + } else if let Some(u) = num.as_u64() { + Value::from((u as f64) * scale) + } else if let Some(f) = num.as_f64() { + Value::from(f * scale) + } else { + value.clone() + } + } + _ => value.clone(), + } +} + +fn apply_enum_map( + value: &Value, + enum_map: Option<&std::collections::HashMap>, +) -> Value { + let Some(enum_map) = enum_map else { + return value.clone(); + }; + if let Value::Number(num) = value { + let key = if let Some(i) = num.as_i64() { + i.to_string() + } else if let Some(u) = num.as_u64() { + u.to_string() + } else { + return value.clone(); + }; + if let Some(mapped) = enum_map.get(&key) { + return mapped.clone(); + } + } + value.clone() +} diff --git a/src/report.rs b/src/report.rs new file mode 100644 index 0000000..d414adc --- /dev/null +++ b/src/report.rs @@ -0,0 +1,46 @@ +use serde::Serialize; + +#[derive(Debug, Serialize)] +pub struct Report { + pub summary: Summary, + pub findings: Vec, +} + +#[derive(Debug, Serialize)] +pub struct Summary { + pub total_packets: u64, + pub total_findings: u64, + pub fatal: u64, + pub error: u64, + pub warn: u64, + pub info: u64, +} + +#[derive(Debug, Serialize, Clone, Copy, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum Severity { + Fatal, + Error, + Warn, + Info, +} + +#[derive(Debug, Serialize)] +pub struct Finding { + pub pcap_index: u64, + pub event_id: Option, + pub severity: Severity, + pub code: String, + pub message: String, + pub flow: Option, + pub observed: Option, + pub expected: Option, +} + +#[derive(Debug, Serialize)] +pub struct FlowSummary { + pub src_ip: String, + pub src_port: u16, + pub dst_ip: String, + pub dst_port: u16, +} diff --git a/src/state.rs b/src/state.rs new file mode 100644 index 0000000..2be9398 --- /dev/null +++ b/src/state.rs @@ -0,0 +1,59 @@ +use std::collections::HashMap; + +use crate::meta::{Direction, FlowKey}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct FlowPair { + pub client_ip: std::net::IpAddr, + pub client_port: u16, + pub server_ip: std::net::IpAddr, + pub server_port: u16, +} + +impl FlowPair { + pub fn from_flow(direction: Direction, flow: FlowKey) -> Self { + match direction { + Direction::C2s => FlowPair { + client_ip: flow.src_ip, + client_port: flow.src_port, + server_ip: flow.dst_ip, + server_port: flow.dst_port, + }, + Direction::S2c => FlowPair { + client_ip: flow.dst_ip, + client_port: flow.dst_port, + server_ip: flow.src_ip, + server_port: flow.src_port, + }, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct OutstandingKey { + pub flow: FlowPair, + pub transaction_id: u16, + pub unit_id: u8, +} + +#[derive(Debug, Clone)] +pub struct PendingRequest { + pub pcap_index: u64, + pub event_id: Option, + pub ts_ns: Option, +} + +#[derive(Default)] +pub struct StateTable { + outstanding: HashMap, +} + +impl StateTable { + pub fn insert(&mut self, key: OutstandingKey, pending: PendingRequest) -> Option { + self.outstanding.insert(key, pending) + } + + pub fn remove(&mut self, key: &OutstandingKey) -> Option { + self.outstanding.remove(key) + } +} diff --git a/src/validate.rs b/src/validate.rs new file mode 100644 index 0000000..8673b78 --- /dev/null +++ b/src/validate.rs @@ -0,0 +1,535 @@ +use std::fs::File; +use std::io::{BufRead, BufReader, Write}; +use std::path::PathBuf; + +use anyhow::{Context, Result}; +use pcap::Capture; + +use crate::config::Config; +use crate::decode; +use crate::mbap; +use crate::meta::{Direction, MetaLine}; +use crate::modbus_desc; +use crate::report::{Finding, FlowSummary, Report, Severity, Summary}; +use crate::state::{FlowPair, OutstandingKey, PendingRequest, StateTable}; + +#[derive(Debug, Clone, Copy)] +pub enum Mode { + Mvp, + Strict, +} + +pub struct ValidateArgs { + pub pcap_path: PathBuf, + pub meta_path: PathBuf, + pub report_path: PathBuf, + pub port: u16, + pub mode: Mode, + pub fail_fast: bool, + pub config: Config, +} + +pub fn run(args: ValidateArgs) -> Result { + let meta_file = File::open(&args.meta_path) + .with_context(|| format!("Failed to open {}", args.meta_path.display()))?; + let mut meta_lines = BufReader::new(meta_file).lines(); + + let mut cap = Capture::from_file(&args.pcap_path) + .with_context(|| format!("Failed to open {}", args.pcap_path.display()))?; + let linktype = cap.get_datalink(); + + let mut findings = Vec::new(); + let mut state = StateTable::default(); + let mut packet_index = 0u64; + let mut total_packets = 0u64; + + while let Ok(packet) = cap.next_packet() { + total_packets += 1; + let line = meta_lines + .next() + .transpose() + .context("Failed reading metadata line")?; + if line.is_none() { + findings.push(fatal( + packet_index, + None, + "meta_alignment", + "Missing metadata line for packet", + None, + None, + )); + break; + } + let line = line.unwrap(); + let meta: MetaLine = serde_json::from_str(&line) + .with_context(|| format!("Invalid JSONL line at packet {packet_index}"))?; + process_packet( + &args, + &meta, + packet_index, + packet.data, + linktype, + &mut state, + &mut findings, + )?; + + if args.fail_fast && findings.iter().any(|f| f.severity == Severity::Fatal) { + break; + } + packet_index += 1; + } + + if meta_lines.next().is_some() { + findings.push(fatal( + packet_index, + None, + "meta_alignment", + "Metadata has extra lines beyond pcap", + None, + None, + )); + } + + let summary = summarize(total_packets, &findings); + let report = Report { summary, findings }; + write_report(&args.report_path, &report)?; + Ok(report) +} + +fn process_packet( + args: &ValidateArgs, + meta: &MetaLine, + packet_index: u64, + data: &[u8], + linktype: pcap::Linktype, + state: &mut StateTable, + findings: &mut Vec, +) -> Result<()> { + let _flow = match meta.flow.to_key() { + Ok(flow) => flow, + Err(err) => { + findings.push(error( + packet_index, + meta.event_id, + "flow_parse", + format!("Flow parse error: {err}"), + None, + None, + )); + return Ok(()); + } + }; + + let decoded = match decode::extract_tcp_payload(linktype, data) { + Ok(decoded) => decoded, + Err(err) => { + findings.push(error( + packet_index, + meta.event_id, + "packet_decode", + format!("Decode error: {err}"), + Some(flow_summary(meta)), + None, + )); + return Ok(()); + } + }; + + if decoded.src_port != args.port && decoded.dst_port != args.port { + findings.push(warn( + packet_index, + meta.event_id, + "port_mismatch", + format!( + "Packet does not use expected Modbus port {}", + args.port + ), + Some(flow_summary(meta)), + None, + )); + } + + if decoded.payload.is_empty() { + findings.push(error( + packet_index, + meta.event_id, + "empty_payload", + "TCP payload is empty", + Some(flow_summary(meta)), + None, + )); + return Ok(()); + } + + let mbap = match mbap::parse_mbap(&decoded.payload) { + Ok(parsed) => parsed, + Err(err) => { + findings.push(error( + packet_index, + meta.event_id, + "mbap_parse", + format!("MBAP parse error: {err}"), + Some(flow_summary(meta)), + None, + )); + return Ok(()); + } + }; + + if mbap.protocol_id != 0 { + findings.push(error( + packet_index, + meta.event_id, + "mbap_protocol", + format!("Protocol id is {}, expected 0", mbap.protocol_id), + Some(flow_summary(meta)), + None, + )); + } + + if mbap.length_mismatch { + let observed = serde_json::json!({ "payload_len": decoded.payload.len(), "mbap_length": mbap.length }); + findings.push(warn( + packet_index, + meta.event_id, + "mbap_length", + "MBAP length does not match payload length", + Some(flow_summary(meta)), + Some(observed), + )); + if matches!(args.mode, Mode::Strict) { + return Ok(()); + } + } + + let base_function = mbap.function_code & 0x7f; + let is_exception = mbap.function_code & 0x80 != 0; + + let parsed_fields = if is_exception { + None + } else if let Some(func) = args.config.find_function(base_function) { + let fields = match meta.direction { + Direction::C2s => &func.request, + Direction::S2c => &func.response, + }; + match modbus_desc::parse_with_descriptor(&mbap.pdu_data, fields) { + Ok(value) => Some(value), + Err(err) => { + findings.push(error( + packet_index, + meta.event_id, + "descriptor_parse", + format!("Descriptor parse error: {err}"), + Some(flow_summary(meta)), + None, + )); + None + } + } + } else { + findings.push(warn( + packet_index, + meta.event_id, + "descriptor_missing", + format!("No descriptor for function {}", base_function), + Some(flow_summary(meta)), + None, + )); + None + }; + + update_state( + meta, + packet_index, + mbap.transaction_id, + mbap.unit_id, + state, + findings, + ); + + if let Some(expected) = &meta.expected { + check_expected(meta, packet_index, &mbap, &parsed_fields, expected, findings); + } + + if is_exception && mbap.pdu_data.is_empty() { + findings.push(error( + packet_index, + meta.event_id, + "exception_code_missing", + "Exception response missing exception code", + Some(flow_summary(meta)), + None, + )); + } + + Ok(()) +} + +fn update_state( + meta: &MetaLine, + packet_index: u64, + transaction_id: u16, + unit_id: u8, + state: &mut StateTable, + findings: &mut Vec, +) { + let flow_key = match meta.flow.to_key() { + Ok(flow) => flow, + Err(_) => return, + }; + let flow = FlowPair::from_flow(meta.direction, flow_key); + let key = OutstandingKey { + flow, + transaction_id, + unit_id, + }; + + match meta.direction { + Direction::C2s => { + if state.insert( + key, + PendingRequest { + pcap_index: packet_index, + event_id: meta.event_id, + ts_ns: meta.ts_ns, + }, + ) + .is_some() + { + findings.push(warn( + packet_index, + meta.event_id, + "duplicate_txid", + "Duplicate outstanding transaction id", + Some(flow_summary(meta)), + None, + )); + } + } + Direction::S2c => { + if state.remove(&key).is_none() { + findings.push(error( + packet_index, + meta.event_id, + "unmatched_response", + "Response without matching request", + Some(flow_summary(meta)), + None, + )); + } + } + } +} + +fn check_expected( + meta: &MetaLine, + packet_index: u64, + mbap: &mbap::MbapParsed, + parsed_fields: &Option, + expected: &crate::meta::Expected, + findings: &mut Vec, +) { + if let Some(modbus) = &expected.modbus { + if let Some(expected_tx) = modbus.transaction_id { + if expected_tx != mbap.transaction_id { + findings.push(error( + packet_index, + meta.event_id, + "expected_txid", + "Transaction id mismatch", + Some(flow_summary(meta)), + Some(serde_json::json!({ + "observed": mbap.transaction_id, + "expected": expected_tx + })), + )); + } + } + if let Some(expected_unit) = modbus.unit_id { + if expected_unit != mbap.unit_id { + findings.push(error( + packet_index, + meta.event_id, + "expected_unit", + "Unit id mismatch", + Some(flow_summary(meta)), + Some(serde_json::json!({ + "observed": mbap.unit_id, + "expected": expected_unit + })), + )); + } + } + if let Some(expected_fc) = modbus.function_code { + let observed_fc = mbap.function_code & 0x7f; + if expected_fc != observed_fc { + findings.push(error( + packet_index, + meta.event_id, + "expected_function", + "Function code mismatch", + Some(flow_summary(meta)), + Some(serde_json::json!({ + "observed": observed_fc, + "expected": expected_fc + })), + )); + } + } + } + + if let Some(expected_fields) = &expected.fields { + if let Some(observed_fields) = parsed_fields { + let expected_obj = expected_fields.as_object(); + let observed_obj = observed_fields.as_object(); + if let (Some(expected_obj), Some(observed_obj)) = (expected_obj, observed_obj) { + for (key, expected_value) in expected_obj { + match observed_obj.get(key) { + Some(observed_value) if values_equal(expected_value, observed_value) => {} + Some(observed_value) => findings.push(error( + packet_index, + meta.event_id, + "expected_field_mismatch", + format!("Field mismatch for {key}"), + Some(flow_summary(meta)), + Some(serde_json::json!({ + "field": key, + "observed": observed_value, + "expected": expected_value + })), + )), + None => findings.push(error( + packet_index, + meta.event_id, + "expected_field_missing", + format!("Expected field missing: {key}"), + Some(flow_summary(meta)), + Some(serde_json::json!({ + "field": key, + "expected": expected_value + })), + )), + } + } + } + } else { + findings.push(warn( + packet_index, + meta.event_id, + "expected_fields_unparsed", + "Expected fields present but parsing failed", + Some(flow_summary(meta)), + None, + )); + } + } +} + +fn values_equal(expected: &serde_json::Value, observed: &serde_json::Value) -> bool { + if expected == observed { + return true; + } + match (expected, observed) { + (serde_json::Value::Number(a), serde_json::Value::Number(b)) => { + a.as_f64().zip(b.as_f64()).map(|(a, b)| (a - b).abs() < f64::EPSILON).unwrap_or(false) + } + _ => false, + } +} + +fn summarize(total_packets: u64, findings: &[Finding]) -> Summary { + let mut summary = Summary { + total_packets, + total_findings: findings.len() as u64, + fatal: 0, + error: 0, + warn: 0, + info: 0, + }; + for finding in findings { + match finding.severity { + Severity::Fatal => summary.fatal += 1, + Severity::Error => summary.error += 1, + Severity::Warn => summary.warn += 1, + Severity::Info => summary.info += 1, + } + } + summary +} + +fn write_report(path: &PathBuf, report: &Report) -> Result<()> { + let mut file = File::create(path) + .with_context(|| format!("Failed to create report {}", path.display()))?; + let data = serde_json::to_vec_pretty(report)?; + file.write_all(&data)?; + Ok(()) +} + +fn flow_summary(meta: &MetaLine) -> FlowSummary { + FlowSummary { + src_ip: meta.flow.src_ip.clone(), + src_port: meta.flow.src_port, + dst_ip: meta.flow.dst_ip.clone(), + dst_port: meta.flow.dst_port, + } +} + +fn fatal( + pcap_index: u64, + event_id: Option, + code: &str, + message: impl Into, + flow: Option, + observed: Option, +) -> Finding { + Finding { + pcap_index, + event_id, + severity: Severity::Fatal, + code: code.to_string(), + message: message.into(), + flow, + observed, + expected: None, + } +} + +fn error( + pcap_index: u64, + event_id: Option, + code: &str, + message: impl Into, + flow: Option, + observed: Option, +) -> Finding { + Finding { + pcap_index, + event_id, + severity: Severity::Error, + code: code.to_string(), + message: message.into(), + flow, + observed, + expected: None, + } +} + +fn warn( + pcap_index: u64, + event_id: Option, + code: &str, + message: impl Into, + flow: Option, + observed: Option, +) -> Finding { + Finding { + pcap_index, + event_id, + severity: Severity::Warn, + code: code.to_string(), + message: message.into(), + flow, + observed, + expected: None, + } +}