Implemented checker as described in internal-docs/notes4coding/checker_design.md

This commit is contained in:
DaZuo0122
2026-01-15 19:07:26 +08:00
commit c1c36cdf56
19 changed files with 2172 additions and 0 deletions

2
.cargo/config.toml Normal file
View File

@@ -0,0 +1,2 @@
[build]
rustflags = ["-L", "C:/npcap-sdk-1.15/Lib/x64"]

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/target

371
Cargo.lock generated Normal file
View File

@@ -0,0 +1,371 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "cc"
version = "1.2.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd4932aefd12402b36c60956a4fe0035421f544799057659ff86f923657aada3"
dependencies = [
"find-msvc-tools",
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "checker"
version = "0.1.0"
dependencies = [
"anyhow",
"pcap",
"pnet_packet",
"serde",
"serde_json",
]
[[package]]
name = "errno"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1"
dependencies = [
"errno-dragonfly",
"libc",
"winapi",
]
[[package]]
name = "errno-dragonfly"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "find-msvc-tools"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f449e6c6c08c865631d4890cfacf252b3d396c9bcc83adb6623cdb02a8336c41"
[[package]]
name = "glob"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]]
name = "itoa"
version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
[[package]]
name = "libc"
version = "0.2.180"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
[[package]]
name = "libloading"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
dependencies = [
"cfg-if",
"windows-link",
]
[[package]]
name = "memchr"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "no-std-net"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43794a0ace135be66a25d3ae77d41b91615fb68ae937f904090203e81f755b65"
[[package]]
name = "pcap"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2eecc2ddc671ec563b5b39f846556aade68a65d1afb14d8fe6b30b0457d75"
dependencies = [
"bitflags",
"errno",
"libc",
"libloading",
"pkg-config",
"regex",
"windows-sys",
]
[[package]]
name = "pkg-config"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "pnet_base"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffc190d4067df16af3aba49b3b74c469e611cad6314676eaf1157f31aa0fb2f7"
dependencies = [
"no-std-net",
]
[[package]]
name = "pnet_macros"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13325ac86ee1a80a480b0bc8e3d30c25d133616112bb16e86f712dcf8a71c863"
dependencies = [
"proc-macro2",
"quote",
"regex",
"syn",
]
[[package]]
name = "pnet_macros_support"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eed67a952585d509dd0003049b1fc56b982ac665c8299b124b90ea2bdb3134ab"
dependencies = [
"pnet_base",
]
[[package]]
name = "pnet_packet"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c96ebadfab635fcc23036ba30a7d33a80c39e8461b8bd7dc7bb186acb96560f"
dependencies = [
"glob",
"pnet_base",
"pnet_macros",
"pnet_macros_support",
]
[[package]]
name = "proc-macro2"
version = "1.0.105"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "serde"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
"serde_derive",
]
[[package]]
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
dependencies = [
"itoa",
"memchr",
"serde",
"serde_core",
"zmij",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "syn"
version = "2.0.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-sys"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
dependencies = [
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
[[package]]
name = "windows_i686_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
[[package]]
name = "windows_i686_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
[[package]]
name = "windows_x86_64_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
[[package]]
name = "windows_x86_64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
[[package]]
name = "zmij"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd8f3f50b848df28f887acb68e41201b5aea6bc8a8dacc00fb40635ff9a72fea"

11
Cargo.toml Normal file
View File

@@ -0,0 +1,11 @@
[package]
name = "checker"
version = "0.1.0"
edition = "2024"
[dependencies]
anyhow = "1"
pcap = "2"
pnet_packet = "0.35"
serde = { version = "1", features = ["derive"] }
serde_json = "1"

69
README.md Normal file
View File

@@ -0,0 +1,69 @@
# Checker
Modbus/TCP traffic checker that validates generated PCAP/PCAPNG traces against
Modbus rules, request/response pairing, and optional expected fields supplied
via JSONL sidecar metadata.
## What It Does
- Parses Ethernet/RAW PCAP packets, extracts TCP payloads
- Validates MBAP header and basic Modbus/TCP invariants
- Parses PDU fields using descriptor JSON (request/response)
- Tracks outstanding requests and flags unmatched responses
- Compares observed values with optional expected fields in JSONL
- Emits a JSON report with findings and a summary
## Build
```bash
cargo build
```
## Run
```bash
cargo run -- \
--pcap trace.pcapng \
--meta trace.meta.jsonl \
--config modbus.json \
--report report.json \
--port 502 \
--mode mvp
```
Sample CLI with the example files:
```bash
cargo run -- \
--pcap trace.pcapng \
--meta docs/examples/trace.meta.jsonl \
--config docs/examples/modbus.json \
--report report.json
```
## CLI Options
- `--pcap <path>`: PCAP or PCAPNG input file
- `--meta <path>`: JSONL sidecar metadata (1 line per packet)
- `--config <path>`: Modbus descriptor JSON
- `--report <path>`: Report JSON output (default: `report.json`)
- `--port <u16>`: Modbus/TCP port (default: `502`)
- `--mode mvp|strict`: Validation mode (default: `mvp`)
- `--fail-fast`: Stop on first fatal error
## Files and Formats
See `docs/api.md` for the full schema of:
- `trace.meta.jsonl` lines
- Modbus descriptor JSON
- `report.json` output
Example files live in `docs/examples/`:
- `docs/examples/trace.meta.jsonl`
- `docs/examples/modbus.json`
- `docs/examples/report.json`
## Notes
- This checker assumes one Modbus ADU per TCP payload.
- TCP reassembly and checksum validation are not implemented.

188
docs/api.md Normal file
View File

@@ -0,0 +1,188 @@
# API Documentation
This document describes the input/output formats and the checker program API.
## CLI API
Command:
```bash
checker --pcap <trace.pcapng> --meta <trace.meta.jsonl> --config <modbus.json> \
--report <report.json> [--port 502] [--mode mvp|strict] [--fail-fast]
```
Exit behavior:
- Returns a non-zero exit code only on process-level errors (I/O, parse failures).
- Validation findings are written to the report file.
## JSONL Sidecar (`trace.meta.jsonl`)
Each line corresponds to one packet, in the same order as the PCAP.
```json
{
"trace_id": "c7f1...",
"event_id": 42,
"pcap_index": 42,
"ts_ns": 1736451234567890123,
"direction": "c2s",
"flow": {
"src_ip": "10.0.0.10",
"src_port": 51012,
"dst_ip": "10.0.0.20",
"dst_port": 502
},
"expected": {
"modbus": {
"transaction_id": 513,
"unit_id": 1,
"function_code": 3
},
"fields": {
"starting_address": 0,
"quantity": 10
}
}
}
```
Example: `docs/examples/trace.meta.jsonl`
Fields:
- `trace_id` (string, optional): Trace identifier.
- `event_id` (integer, optional): Event identifier from generator.
- `pcap_index` (integer, optional): Packet index for reference.
- `ts_ns` (integer, optional): Timestamp in nanoseconds.
- `direction` (string, required): `c2s` (request) or `s2c` (response).
- `flow` (object, required): Flow metadata used for request/response tracking.
- `expected` (object, optional): Expected Modbus header and/or field values.
`expected.modbus`:
- `transaction_id` (u16, optional)
- `unit_id` (u8, optional)
- `function_code` (u8, optional)
`expected.fields`:
- Arbitrary JSON object whose keys match descriptor field names.
- Values are compared against parsed output.
## Modbus Descriptor JSON (`modbus.json`)
Top-level:
```json
{
"functions": [
{
"function": 3,
"name": "read_holding_registers",
"request": [
{"name":"starting_address","type":"u16"},
{"name":"quantity","type":"u16"}
],
"response": [
{"name":"byte_count","type":"u8"},
{"name":"registers","type":"bytes","length_from":"byte_count"}
]
}
]
}
```
Example: `docs/examples/modbus.json`
Function descriptor:
- `function` (u8, required): Function code.
- `name` (string, optional): Human-readable function name.
- `request` (array, optional): Field list for client-to-server PDUs.
- `response` (array, optional): Field list for server-to-client PDUs.
Field descriptor:
- `name` (string, required): Field name used in output JSON.
- `type` (string, required): `u8`, `u16`, `u32`, `i16`, `i32`, `bytes`.
- `length` (integer, optional): Fixed length for `bytes`.
- `length_from` (string, optional): Name of a previous numeric field.
- `scale` (number, optional): Multiply numeric values by this scale.
- `enum_map` (object, optional): Map numeric strings to JSON values.
Notes:
- `length_from` uses values parsed earlier in the same descriptor.
- `bytes` output is an array of integers.
## Report JSON (`report.json`)
Structure:
```json
{
"summary": {
"total_packets": 1000,
"total_findings": 8,
"fatal": 1,
"error": 4,
"warn": 3,
"info": 0
},
"findings": [
{
"pcap_index": 7,
"event_id": 42,
"severity": "error",
"code": "mbap_protocol",
"message": "Protocol id is 1, expected 0",
"flow": {
"src_ip": "10.0.0.10",
"src_port": 51012,
"dst_ip": "10.0.0.20",
"dst_port": 502
},
"observed": {"payload_len": 42, "mbap_length": 10},
"expected": null
}
]
}
```
Example: `docs/examples/report.json`
Summary fields:
- `total_packets`: Total packets processed.
- `total_findings`: Total findings emitted.
- `fatal`, `error`, `warn`, `info`: Counts by severity.
Finding fields:
- `pcap_index` (u64): Index in PCAP stream.
- `event_id` (u64, optional): Event identifier from metadata.
- `severity` (string): `fatal`, `error`, `warn`, `info`.
- `code` (string): Short machine-friendly code.
- `message` (string): Human-readable description.
- `flow` (object, optional): Source/destination addresses.
- `observed` (JSON, optional): Observed values for comparison.
- `expected` (JSON, optional): Expected values for comparison.
## Validation Modes
- `mvp`: Core checks only.
- `strict`: Treat MBAP length mismatch as a stopping condition.
## Internal Module API (Rust)
Primary modules:
- `config`: Descriptor types and loader for JSON config.
- `meta`: JSONL metadata structs.
- `decode`: PCAP packet decode to TCP payload.
- `mbap`: MBAP parsing utilities.
- `modbus_desc`: Descriptor-based field parsing.
- `state`: Outstanding request tracking.
- `validate`: End-to-end validation pipeline.
- `report`: Report data structures.

357
docs/design.md Normal file
View File

@@ -0,0 +1,357 @@
## Conversation summary (so far)
* Youre building a **Modbus/TCP traffic generation system for security use** with this pipeline:
**feature extraction → hybrid diffusion generates features → generator turns features into raw packets → checker validates traffic**.
* I proposed a checker that validates traffic at **multiple layers**:
1. frame/IP/TCP parseability,
2. optional TCP conversation sanity,
3. Modbus/TCP structural + semantic validity (MBAP + PDU),
4. optional “feature-fidelity” checks against generator sidecar metadata.
* I recommended a **generator → checker contract** that is easy to debug and automate:
**PCAP/PCAPNG + JSONL sidecar** (1 metadata line per packet/event).
* You then shared your existing **descriptor-driven Modbus parser** (Rust) and a short doc.
The public API/type model is: `Config → FunctionDescriptor → FieldDescriptor(FieldType, length/length_from/scale/enum_map)`
and `parse_sawp_message(...)` returns JSON containing `unit`, `function`, optional `exception`, and `fields` .
* I said: the **descriptor engine is very reusable** for the checker, but the checkers entry point should ideally work on **raw Modbus/TCP bytes** (MBAP+PDU), not require `sawp_modbus::Message`.
Also: for checker ergonomics, prefer structured error types over `Result<Value, String>` (but you can keep JSON output for the MVP).
---
# Implementation document: Modbus/TCP Checker (Rust)
This document is a practical, “not too complex” plan to implement the checker while still following good practices where they dont add much difficulty.
## 1) What the checker does (goals / non-goals)
### Goals
The checker verifies that generator output is:
1. **Parsable** as TCP/IP traffic,
2. **Modbus/TCP-valid** at the application level (MBAP + PDU rules),
3. **Consistent** in request/response pairing (Transaction ID matching),
4. Optionally **matches the expected features** (function code, unit id, quantities, payload size, timing tolerances, etc.).
### Non-goals (to keep it simple)
To avoid turning this into a full Wireshark, we deliberately **do not** implement:
* full TCP stream reassembly (segments split/merged),
* full TCP state machine with retransmits/out-of-order handling,
* IP/TCP checksum verification by default.
Instead, we enforce a **generator constraint**: **one Modbus ADU per TCP payload** (no segmentation, no coalescing). This single constraint dramatically reduces checker complexity and is realistic for generated traces.
> Trade-off: best practice would handle segmentation/coalescing and reassembly; difficulty rises a lot. The “one ADU per TCP payload” rule is the best complexity/benefit lever for this project.
---
## 2) Generator output contract (what the checker consumes)
### Recommended output (MVP-friendly and debuggable)
**(A) PCAP or PCAPNG file**
* `trace.pcapng` (or `.pcap`) containing the raw generated packets
**(B) Sidecar JSONL metadata file**
* `trace.meta.jsonl` where each line describes the corresponding packet/event (same order)
This is the easiest way to:
* reproduce failures,
* correlate packet index with expected semantic fields,
* produce actionable reports.
### JSONL schema (minimal + optional)
**Minimal fields (recommended):**
* `trace_id` (string/uuid)
* `event_id` (monotonic integer)
* `pcap_index` (or implicit by line number)
* `ts_ns` timestamp
* `direction` (`"c2s"` or `"s2c"`)
* `flow` (src/dst ip/port)
**Optional `expected` block (for feature-fidelity checks):**
* `expected.modbus.transaction_id`, `unit_id`, `function_code`, and `expected.fields` (names matching your descriptor JSON).
Example line:
```json
{
"trace_id": "c7f1...",
"event_id": 42,
"pcap_index": 42,
"ts_ns": 1736451234567890123,
"direction": "c2s",
"flow": {"src_ip":"10.0.0.10","src_port":51012,"dst_ip":"10.0.0.20","dst_port":502},
"expected": {
"modbus": {"transaction_id": 513, "unit_id": 1, "function_code": 3},
"fields": {"starting_address": 0, "quantity": 10}
}
}
```
> Trade-off: best practice is “self-describing PCAP” (pcapng custom blocks, or embedding metadata); difficulty higher. JSONL sidecar is dead simple and works well.
---
## 3) Workflow (starting from generator output)
### Step 0 — Load inputs
1. Read `trace.meta.jsonl` into a lightweight iterator (dont load all if trace is huge).
2. Open `trace.pcapng` and stream packets in order.
### Step 1 — Align packets and metadata
For each packet index `i`:
* read packet `i` from PCAP
* read metadata line `i` from JSONL
If mismatch (missing line/packet), record a **Fatal** alignment error and stop (or continue with “best effort”, your call).
### Step 2 — Decode packet and extract TCP payload
Decode:
* link layer (Ethernet/SLL/RAW depending on PCAP linktype),
* IPv4/IPv6,
* TCP,
* extract TCP payload bytes.
Minimal checks:
* packet parses,
* TCP payload length > 0 when direction indicates Modbus message,
* port 502 is present on either side (configurable if you generate non-502).
### Step 3 — Parse Modbus/TCP ADU
Assuming payload contains exactly one ADU:
* parse MBAP (7 bytes) + PDU
* validate basic MBAP invariants
* parse function code and PDU data
* decide request vs response based on `direction`
* parse PDU data using descriptor map (your reusable part)
### Step 4 — Stateful consistency checks
Maintain per-flow state:
* request/response pairing by `(transaction_id, unit_id)`
* outstanding request table with timeout/window limits
### Step 5 — Feature-fidelity checks (optional)
If `expected` exists in JSONL:
* compare decoded modbus header + parsed fields with expected values
* compare sizes and (optionally) timing with tolerances
### Step 6 — Emit report
Output:
* `report.json` with summary + per-finding samples (packet indices, flow key, reason, extracted fields)
* optional `report.txt` for quick reading
---
## 4) Reusing your existing parser (what to keep, what to adjust)
You already have:
* A descriptor model (`Config/FunctionDescriptor/FieldDescriptor/FieldType`)
* A function that returns a JSON representation with the shape the checker wants (`unit`, `function`, optional `exception`, `fields`)
### 4.1 What is immediately reusable
**Highly reusable for the checker:**
* Descriptor loading (serde)
* Field decoding logic (length/length_from, scale, enum_map)
* The “JSON output” idea for reporting and debugging
### 4.2 Small design adjustment to make reuse clean (recommended)
Your checker will naturally see **raw TCP payload bytes**. So the lowest-friction integration is:
* Implement a tiny **MBAP parser** in the checker:
* returns `(transaction_id, protocol_id, length, unit_id, function_code, pdu_data)`
* Then call your descriptor-based decoder on `pdu_data` (bytes **after** function code)
Your doc shows the parser conceptually returns JSON with `fields` and supports request vs response descriptors , which maps perfectly to `direction`.
**Suggested public entrypoint to expose from your parser module:**
* `parse_with_descriptor(pdu_data: &[u8], unit: u8, function: u8, fields: &Vec<FieldDescriptor>) -> Result<Value, String>`
If its currently private, just make it `pub(crate)` or `pub` and reuse it. This avoids binding the checker to `sawp_modbus::Message` and keeps implementation simple.
> Trade-off: best practice would be to return a typed struct + typed errors; easier to maintain long term but more refactor work. For your “dont make it hard” requirement, keeping JSON output + simple error types is totally fine for the first version.
### 4.3 How the checker chooses which descriptor to use
* If `direction == c2s` → request descriptor
* If `direction == s2c` → response descriptor
This matches the intent of having `request` and `response` descriptor vectors in your model .
---
## 5) Checker internal design (simple but extensible)
### 5.1 Core data structures
* `FlowKey { src_ip, src_port, dst_ip, dst_port, ip_version }`
* `PacketCtx { trace_id, event_id, pcap_index, ts_ns, direction, flow }`
* `DecodedModbus { transaction_id, protocol_id, length, unit_id, function_code, is_exception, exception_code?, pdu_data, parsed_fields_json? }`
### 5.2 “Rules” model (optional, but keeps code tidy)
Instead of huge if/else blocks, implement a few rules that return findings:
* `RuleMbapValid`
* `RuleFunctionPduWellFormed` (basic length sanity)
* `RuleTxIdPairing`
* `RuleExpectedMatch` (only if sidecar has expected)
If you dont want a formal trait system initially, just implement these as functions that append to a `Vec<Finding>`.
### 5.3 Findings + severity
Use a compact severity scale:
* `Fatal`: cannot parse / cannot continue reliably
* `Error`: protocol invalid
* `Warn`: unusual but maybe acceptable
* `Info`: stats
A finding should include:
* `pcap_index`, `event_id`, `flow`, `severity`, `code`, `message`
* optional `observed` and `expected` snippets
---
## 6) What the checker validates (MVP vs stricter)
### MVP validations (recommended first milestone)
1. PCAP + JSONL aligned
2. Parse Ethernet/IP/TCP and extract payload
3. MBAP:
* payload length ≥ 7
* length field consistency (basic)
4. PDU:
* function code exists
* exception handling if `fc & 0x80 != 0`
5. Descriptor parse success (request/response based on direction)
6. Transaction pairing:
* every response matches an outstanding request by transaction_id/unit_id
* no duplicate outstanding txid unless you allow it
### “Strict mode” additions (still reasonable)
* enforce unit_id range (if you want)
* enforce function-code-specific invariants using parsed fields
* e.g., `byte_count == 2 * quantity` for register reads/writes (if present in descriptor)
* timeouts:
* response must arrive within configured window
### Heavy features (avoid unless needed)
* TCP reassembly and multi-ADU per segment
* checksum verification
* handling retransmits/out-of-order robustly
---
## 7) Dependencies (crates) for the checker
### Minimal set (keeps implementation easy)
* **PCAP reading**
* `pcap` (libpcap-backed; you already use it in your codebase)
* **Packet decoding**
* `pnet_packet` (you already use `pnet` patterns)
* **Config + sidecar + report**
* `serde`, `serde_json`
* **Errors + logging**
* `anyhow` (fast to integrate) and/or `thiserror` (nicer structured errors)
* `tracing`, `tracing-subscriber`
* **Utilities**
* `hashbrown` (optional; std HashMap is fine)
* `hex` (useful for debug/trailing bytes like your parser does)
### If you want to reduce external requirements (optional alternative)
* Replace `pcap` with `pcap-file` (pure Rust; no libpcap dependency)
* Replace `pnet` with `etherparse` (often simpler APIs)
> Trade-off: “best practice” for portability is pure Rust (`pcap-file` + `etherparse`).
> “Best practice” for least effort *given your current code* is reusing `pcap` + `pnet`.
---
## 8) Suggested project layout (simple)
```
checker/
src/
main.rs # CLI entry
config.rs # descriptor loading
meta.rs # JSONL reader structs
pcap_in.rs # pcap streaming
decode.rs # ethernet/ip/tcp extract payload
mbap.rs # Modbus/TCP MBAP parsing
modbus_desc.rs # reuse your parse_with_descriptor + types
state.rs # outstanding tx table
validate.rs # main validation pipeline
report.rs # report structs + JSON output
```
---
## 9) Practical implementation tips (to keep it from getting “hard”)
1. **Enforce generator constraints**:
* one ADU per TCP payload
* no splitting/coalescing
This keeps checker complexity low and makes failure reasons obvious.
2. **Keep JSON output for parsed fields** at first:
* You already have a clean JSON shape (`unit`, `function`, `fields`)
* Great for debugging mismatches with `expected.fields`
3. **Add strictness as “modes”**:
* `--mode=mvp | strict`
* or config file toggles
4. **Fail-fast vs best-effort**:
* For CI or batch filtering, fail-fast on `Fatal` is fine.
* For research/debugging, best-effort (continue and collect findings) is more useful.
---

16
docs/examples/modbus.json Normal file
View File

@@ -0,0 +1,16 @@
{
"functions": [
{
"function": 3,
"name": "read_holding_registers",
"request": [
{"name":"starting_address","type":"u16"},
{"name":"quantity","type":"u16"}
],
"response": [
{"name":"byte_count","type":"u8"},
{"name":"registers","type":"bytes","length_from":"byte_count"}
]
}
]
}

31
docs/examples/report.json Normal file
View File

@@ -0,0 +1,31 @@
{
"summary": {
"total_packets": 1,
"total_findings": 1,
"fatal": 0,
"error": 1,
"warn": 0,
"info": 0
},
"findings": [
{
"pcap_index": 0,
"event_id": 1,
"severity": "error",
"code": "expected_field_mismatch",
"message": "Field mismatch for quantity",
"flow": {
"src_ip": "10.0.0.10",
"src_port": 51012,
"dst_ip": "10.0.0.20",
"dst_port": 502
},
"observed": {
"field": "quantity",
"observed": 1,
"expected": 2
},
"expected": null
}
]
}

View File

@@ -0,0 +1 @@
{"trace_id":"example-trace","event_id":1,"pcap_index":0,"ts_ns":1736451234567890000,"direction":"c2s","flow":{"src_ip":"10.0.0.10","src_port":51012,"dst_ip":"10.0.0.20","dst_port":502},"expected":{"modbus":{"transaction_id":513,"unit_id":1,"function_code":3},"fields":{"starting_address":0,"quantity":2}}}

59
src/config.rs Normal file
View File

@@ -0,0 +1,59 @@
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use anyhow::{Context, Result};
use serde::Deserialize;
#[derive(Debug, Deserialize)]
pub struct Config {
pub functions: Vec<FunctionDescriptor>,
}
impl Config {
pub fn load(path: &Path) -> Result<Self> {
let raw = fs::read_to_string(path)
.with_context(|| format!("Unable to read config file {}", path.display()))?;
let config: Config = serde_json::from_str(&raw)
.with_context(|| format!("Unable to parse config JSON {}", path.display()))?;
Ok(config)
}
pub fn find_function(&self, function_code: u8) -> Option<&FunctionDescriptor> {
self.functions
.iter()
.find(|func| func.function == function_code)
}
}
#[derive(Debug, Deserialize)]
pub struct FunctionDescriptor {
pub function: u8,
pub name: Option<String>,
#[serde(default)]
pub request: Vec<FieldDescriptor>,
#[serde(default)]
pub response: Vec<FieldDescriptor>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct FieldDescriptor {
pub name: String,
#[serde(rename = "type")]
pub field_type: FieldType,
pub length: Option<usize>,
pub length_from: Option<String>,
pub scale: Option<f64>,
pub enum_map: Option<HashMap<String, serde_json::Value>>,
}
#[derive(Debug, Deserialize, Clone, Copy)]
#[serde(rename_all = "snake_case")]
pub enum FieldType {
U8,
U16,
U32,
I16,
I32,
Bytes,
}

76
src/decode.rs Normal file
View File

@@ -0,0 +1,76 @@
use std::net::IpAddr;
use anyhow::{Context, Result};
use pcap::Linktype;
use pnet_packet::ethernet::{EtherTypes, EthernetPacket};
use pnet_packet::ip::IpNextHeaderProtocols;
use pnet_packet::ipv4::Ipv4Packet;
use pnet_packet::ipv6::Ipv6Packet;
use pnet_packet::tcp::TcpPacket;
use pnet_packet::Packet;
#[derive(Debug)]
pub struct TcpPacketInfo {
pub src_ip: IpAddr,
pub dst_ip: IpAddr,
pub src_port: u16,
pub dst_port: u16,
pub payload: Vec<u8>,
}
pub fn extract_tcp_payload(linktype: Linktype, data: &[u8]) -> Result<TcpPacketInfo> {
match linktype {
Linktype::ETHERNET => parse_ethernet(data),
Linktype::RAW => parse_raw_ip(data),
other => anyhow::bail!("Unsupported linktype: {other:?}"),
}
}
fn parse_ethernet(data: &[u8]) -> Result<TcpPacketInfo> {
let eth = EthernetPacket::new(data).context("Unable to parse ethernet")?;
match eth.get_ethertype() {
EtherTypes::Ipv4 => parse_ipv4(eth.payload()),
EtherTypes::Ipv6 => parse_ipv6(eth.payload()),
other => anyhow::bail!("Unsupported ethertype: {other:?}"),
}
}
fn parse_raw_ip(data: &[u8]) -> Result<TcpPacketInfo> {
if let Some(packet) = Ipv4Packet::new(data) {
return parse_ipv4(packet.packet());
}
if let Some(packet) = Ipv6Packet::new(data) {
return parse_ipv6(packet.packet());
}
anyhow::bail!("Unsupported raw IP packet")
}
fn parse_ipv4(data: &[u8]) -> Result<TcpPacketInfo> {
let packet = Ipv4Packet::new(data).context("Unable to parse IPv4")?;
if packet.get_next_level_protocol() != IpNextHeaderProtocols::Tcp {
anyhow::bail!("Not a TCP IPv4 packet");
}
let tcp = TcpPacket::new(packet.payload()).context("Unable to parse TCP")?;
Ok(TcpPacketInfo {
src_ip: IpAddr::V4(packet.get_source()),
dst_ip: IpAddr::V4(packet.get_destination()),
src_port: tcp.get_source(),
dst_port: tcp.get_destination(),
payload: tcp.payload().to_vec(),
})
}
fn parse_ipv6(data: &[u8]) -> Result<TcpPacketInfo> {
let packet = Ipv6Packet::new(data).context("Unable to parse IPv6")?;
if packet.get_next_header() != IpNextHeaderProtocols::Tcp {
anyhow::bail!("Not a TCP IPv6 packet");
}
let tcp = TcpPacket::new(packet.payload()).context("Unable to parse TCP")?;
Ok(TcpPacketInfo {
src_ip: IpAddr::V6(packet.get_source()),
dst_ip: IpAddr::V6(packet.get_destination()),
src_port: tcp.get_source(),
dst_port: tcp.get_destination(),
payload: tcp.payload().to_vec(),
})
}

110
src/main.rs Normal file
View File

@@ -0,0 +1,110 @@
mod config;
mod decode;
mod mbap;
mod meta;
mod modbus_desc;
mod report;
mod state;
mod validate;
use std::env;
use std::path::PathBuf;
use anyhow::{Context, Result};
#[derive(Debug)]
struct Args {
pcap_path: PathBuf,
meta_path: PathBuf,
config_path: PathBuf,
report_path: PathBuf,
port: u16,
mode: validate::Mode,
fail_fast: bool,
}
fn usage() -> &'static str {
"Usage: checker --pcap <trace.pcapng> --meta <trace.meta.jsonl> --config <modbus.json> \
--report <report.json> [--port 502] [--mode mvp|strict] [--fail-fast]"
}
fn parse_args() -> Result<Args> {
let mut args = env::args().skip(1);
let mut pcap_path = None;
let mut meta_path = None;
let mut config_path = None;
let mut report_path = None;
let mut port = 502u16;
let mut mode = validate::Mode::Mvp;
let mut fail_fast = false;
while let Some(arg) = args.next() {
match arg.as_str() {
"--pcap" => pcap_path = args.next().map(PathBuf::from),
"--meta" => meta_path = args.next().map(PathBuf::from),
"--config" => config_path = args.next().map(PathBuf::from),
"--report" => report_path = args.next().map(PathBuf::from),
"--port" => {
let value = args.next().context("Missing value after --port")?;
port = value.parse::<u16>().context("Invalid --port value")?;
}
"--mode" => {
let value = args.next().context("Missing value after --mode")?;
mode = match value.as_str() {
"mvp" => validate::Mode::Mvp,
"strict" => validate::Mode::Strict,
_ => anyhow::bail!("Unknown --mode {value}"),
};
}
"--fail-fast" => fail_fast = true,
"--help" | "-h" => {
println!("{}", usage());
std::process::exit(0);
}
other => anyhow::bail!("Unknown argument: {other}"),
}
}
let pcap_path = pcap_path.context("Missing --pcap")?;
let meta_path = meta_path.context("Missing --meta")?;
let config_path = config_path.context("Missing --config")?;
let report_path = report_path.unwrap_or_else(|| PathBuf::from("report.json"));
Ok(Args {
pcap_path,
meta_path,
config_path,
report_path,
port,
mode,
fail_fast,
})
}
fn main() -> Result<()> {
let args = parse_args().context(usage())?;
let config = config::Config::load(&args.config_path)
.with_context(|| format!("Failed to load config at {}", args.config_path.display()))?;
let report = validate::run(validate::ValidateArgs {
pcap_path: args.pcap_path,
meta_path: args.meta_path,
report_path: args.report_path,
port: args.port,
mode: args.mode,
fail_fast: args.fail_fast,
config,
})?;
println!(
"Processed {} packets, findings: {} (fatal: {}, error: {}, warn: {}, info: {})",
report.summary.total_packets,
report.summary.total_findings,
report.summary.fatal,
report.summary.error,
report.summary.warn,
report.summary.info
);
Ok(())
}

45
src/mbap.rs Normal file
View File

@@ -0,0 +1,45 @@
use anyhow::{Context, Result};
#[derive(Debug)]
pub struct MbapParsed {
pub transaction_id: u16,
pub protocol_id: u16,
pub length: u16,
pub unit_id: u8,
pub function_code: u8,
pub pdu_data: Vec<u8>,
pub length_mismatch: bool,
}
pub fn parse_mbap(payload: &[u8]) -> Result<MbapParsed> {
if payload.len() < 7 {
anyhow::bail!("Payload shorter than MBAP header");
}
let transaction_id = u16::from_be_bytes([payload[0], payload[1]]);
let protocol_id = u16::from_be_bytes([payload[2], payload[3]]);
let length = u16::from_be_bytes([payload[4], payload[5]]);
let unit_id = payload[6];
let total_len = 6usize + length as usize;
let length_mismatch = payload.len() != total_len;
if payload.len() < 8 {
anyhow::bail!("Payload missing function code");
}
let function_code = payload[7];
let pdu_data = payload
.get(8..)
.context("Missing PDU data")?
.to_vec();
Ok(MbapParsed {
transaction_id,
protocol_id,
length,
unit_id,
function_code,
pdu_data,
length_mismatch,
})
}

75
src/meta.rs Normal file
View File

@@ -0,0 +1,75 @@
use std::net::IpAddr;
use anyhow::{Context, Result};
use serde::Deserialize;
#[derive(Debug, Deserialize)]
pub struct MetaLine {
pub trace_id: Option<String>,
pub event_id: Option<u64>,
pub pcap_index: Option<u64>,
pub ts_ns: Option<u64>,
pub direction: Direction,
pub flow: Flow,
pub expected: Option<Expected>,
}
#[derive(Debug, Deserialize, Clone, Copy)]
#[serde(rename_all = "lowercase")]
pub enum Direction {
C2s,
S2c,
}
#[derive(Debug, Deserialize)]
pub struct Flow {
pub src_ip: String,
pub src_port: u16,
pub dst_ip: String,
pub dst_port: u16,
}
impl Flow {
pub fn to_key(&self) -> Result<FlowKey> {
let src_ip = self.src_ip.parse::<IpAddr>().context("Invalid src_ip")?;
let dst_ip = self.dst_ip.parse::<IpAddr>().context("Invalid dst_ip")?;
Ok(FlowKey {
src_ip,
src_port: self.src_port,
dst_ip,
dst_port: self.dst_port,
})
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct FlowKey {
pub src_ip: IpAddr,
pub src_port: u16,
pub dst_ip: IpAddr,
pub dst_port: u16,
}
impl FlowKey {
pub fn reversed(&self) -> Self {
FlowKey {
src_ip: self.dst_ip,
src_port: self.dst_port,
dst_ip: self.src_ip,
dst_port: self.src_port,
}
}
}
#[derive(Debug, Deserialize)]
pub struct Expected {
pub modbus: Option<ExpectedModbus>,
pub fields: Option<serde_json::Value>,
}
#[derive(Debug, Deserialize)]
pub struct ExpectedModbus {
pub transaction_id: Option<u16>,
pub unit_id: Option<u8>,
pub function_code: Option<u8>,
}

120
src/modbus_desc.rs Normal file
View File

@@ -0,0 +1,120 @@
use anyhow::{Context, Result};
use serde_json::Value;
use crate::config::{FieldDescriptor, FieldType};
pub fn parse_with_descriptor(pdu_data: &[u8], fields: &[FieldDescriptor]) -> Result<Value> {
let mut offset = 0usize;
let mut values = serde_json::Map::new();
for field in fields {
let length = resolve_length(field, &values)?;
let value = match field.field_type {
FieldType::U8 => {
let bytes = read_bytes(pdu_data, &mut offset, 1)?;
Value::from(bytes[0])
}
FieldType::U16 => {
let bytes = read_bytes(pdu_data, &mut offset, 2)?;
Value::from(u16::from_be_bytes([bytes[0], bytes[1]]))
}
FieldType::U32 => {
let bytes = read_bytes(pdu_data, &mut offset, 4)?;
Value::from(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
}
FieldType::I16 => {
let bytes = read_bytes(pdu_data, &mut offset, 2)?;
Value::from(i16::from_be_bytes([bytes[0], bytes[1]]))
}
FieldType::I32 => {
let bytes = read_bytes(pdu_data, &mut offset, 4)?;
Value::from(i32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
}
FieldType::Bytes => {
let len = length.context("bytes field missing length")?;
let bytes = read_bytes(pdu_data, &mut offset, len)?;
Value::from(bytes.iter().map(|b| Value::from(*b)).collect::<Vec<_>>())
}
};
let value = apply_scale(&value, field.scale);
let value = apply_enum_map(&value, field.enum_map.as_ref());
values.insert(field.name.clone(), value);
}
Ok(Value::Object(values))
}
fn resolve_length(
field: &FieldDescriptor,
values: &serde_json::Map<String, Value>,
) -> Result<Option<usize>> {
if let Some(length) = field.length {
return Ok(Some(length));
}
if let Some(source) = &field.length_from {
let value = values
.get(source)
.context(format!("length_from refers to missing field {source}"))?;
let len = match value {
Value::Number(num) => num
.as_u64()
.context(format!("length_from {source} not unsigned"))?,
_ => anyhow::bail!("length_from {source} not numeric"),
};
return Ok(Some(len as usize));
}
Ok(None)
}
fn read_bytes<'a>(data: &'a [u8], offset: &mut usize, len: usize) -> Result<&'a [u8]> {
let end = *offset + len;
if end > data.len() {
anyhow::bail!("PDU too short for field");
}
let bytes = &data[*offset..end];
*offset = end;
Ok(bytes)
}
fn apply_scale(value: &Value, scale: Option<f64>) -> Value {
let Some(scale) = scale else {
return value.clone();
};
match value {
Value::Number(num) => {
if let Some(i) = num.as_i64() {
Value::from((i as f64) * scale)
} else if let Some(u) = num.as_u64() {
Value::from((u as f64) * scale)
} else if let Some(f) = num.as_f64() {
Value::from(f * scale)
} else {
value.clone()
}
}
_ => value.clone(),
}
}
fn apply_enum_map(
value: &Value,
enum_map: Option<&std::collections::HashMap<String, Value>>,
) -> Value {
let Some(enum_map) = enum_map else {
return value.clone();
};
if let Value::Number(num) = value {
let key = if let Some(i) = num.as_i64() {
i.to_string()
} else if let Some(u) = num.as_u64() {
u.to_string()
} else {
return value.clone();
};
if let Some(mapped) = enum_map.get(&key) {
return mapped.clone();
}
}
value.clone()
}

46
src/report.rs Normal file
View File

@@ -0,0 +1,46 @@
use serde::Serialize;
#[derive(Debug, Serialize)]
pub struct Report {
pub summary: Summary,
pub findings: Vec<Finding>,
}
#[derive(Debug, Serialize)]
pub struct Summary {
pub total_packets: u64,
pub total_findings: u64,
pub fatal: u64,
pub error: u64,
pub warn: u64,
pub info: u64,
}
#[derive(Debug, Serialize, Clone, Copy, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum Severity {
Fatal,
Error,
Warn,
Info,
}
#[derive(Debug, Serialize)]
pub struct Finding {
pub pcap_index: u64,
pub event_id: Option<u64>,
pub severity: Severity,
pub code: String,
pub message: String,
pub flow: Option<FlowSummary>,
pub observed: Option<serde_json::Value>,
pub expected: Option<serde_json::Value>,
}
#[derive(Debug, Serialize)]
pub struct FlowSummary {
pub src_ip: String,
pub src_port: u16,
pub dst_ip: String,
pub dst_port: u16,
}

59
src/state.rs Normal file
View File

@@ -0,0 +1,59 @@
use std::collections::HashMap;
use crate::meta::{Direction, FlowKey};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct FlowPair {
pub client_ip: std::net::IpAddr,
pub client_port: u16,
pub server_ip: std::net::IpAddr,
pub server_port: u16,
}
impl FlowPair {
pub fn from_flow(direction: Direction, flow: FlowKey) -> Self {
match direction {
Direction::C2s => FlowPair {
client_ip: flow.src_ip,
client_port: flow.src_port,
server_ip: flow.dst_ip,
server_port: flow.dst_port,
},
Direction::S2c => FlowPair {
client_ip: flow.dst_ip,
client_port: flow.dst_port,
server_ip: flow.src_ip,
server_port: flow.src_port,
},
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct OutstandingKey {
pub flow: FlowPair,
pub transaction_id: u16,
pub unit_id: u8,
}
#[derive(Debug, Clone)]
pub struct PendingRequest {
pub pcap_index: u64,
pub event_id: Option<u64>,
pub ts_ns: Option<u64>,
}
#[derive(Default)]
pub struct StateTable {
outstanding: HashMap<OutstandingKey, PendingRequest>,
}
impl StateTable {
pub fn insert(&mut self, key: OutstandingKey, pending: PendingRequest) -> Option<PendingRequest> {
self.outstanding.insert(key, pending)
}
pub fn remove(&mut self, key: &OutstandingKey) -> Option<PendingRequest> {
self.outstanding.remove(key)
}
}

535
src/validate.rs Normal file
View File

@@ -0,0 +1,535 @@
use std::fs::File;
use std::io::{BufRead, BufReader, Write};
use std::path::PathBuf;
use anyhow::{Context, Result};
use pcap::Capture;
use crate::config::Config;
use crate::decode;
use crate::mbap;
use crate::meta::{Direction, MetaLine};
use crate::modbus_desc;
use crate::report::{Finding, FlowSummary, Report, Severity, Summary};
use crate::state::{FlowPair, OutstandingKey, PendingRequest, StateTable};
#[derive(Debug, Clone, Copy)]
pub enum Mode {
Mvp,
Strict,
}
pub struct ValidateArgs {
pub pcap_path: PathBuf,
pub meta_path: PathBuf,
pub report_path: PathBuf,
pub port: u16,
pub mode: Mode,
pub fail_fast: bool,
pub config: Config,
}
pub fn run(args: ValidateArgs) -> Result<Report> {
let meta_file = File::open(&args.meta_path)
.with_context(|| format!("Failed to open {}", args.meta_path.display()))?;
let mut meta_lines = BufReader::new(meta_file).lines();
let mut cap = Capture::from_file(&args.pcap_path)
.with_context(|| format!("Failed to open {}", args.pcap_path.display()))?;
let linktype = cap.get_datalink();
let mut findings = Vec::new();
let mut state = StateTable::default();
let mut packet_index = 0u64;
let mut total_packets = 0u64;
while let Ok(packet) = cap.next_packet() {
total_packets += 1;
let line = meta_lines
.next()
.transpose()
.context("Failed reading metadata line")?;
if line.is_none() {
findings.push(fatal(
packet_index,
None,
"meta_alignment",
"Missing metadata line for packet",
None,
None,
));
break;
}
let line = line.unwrap();
let meta: MetaLine = serde_json::from_str(&line)
.with_context(|| format!("Invalid JSONL line at packet {packet_index}"))?;
process_packet(
&args,
&meta,
packet_index,
packet.data,
linktype,
&mut state,
&mut findings,
)?;
if args.fail_fast && findings.iter().any(|f| f.severity == Severity::Fatal) {
break;
}
packet_index += 1;
}
if meta_lines.next().is_some() {
findings.push(fatal(
packet_index,
None,
"meta_alignment",
"Metadata has extra lines beyond pcap",
None,
None,
));
}
let summary = summarize(total_packets, &findings);
let report = Report { summary, findings };
write_report(&args.report_path, &report)?;
Ok(report)
}
fn process_packet(
args: &ValidateArgs,
meta: &MetaLine,
packet_index: u64,
data: &[u8],
linktype: pcap::Linktype,
state: &mut StateTable,
findings: &mut Vec<Finding>,
) -> Result<()> {
let _flow = match meta.flow.to_key() {
Ok(flow) => flow,
Err(err) => {
findings.push(error(
packet_index,
meta.event_id,
"flow_parse",
format!("Flow parse error: {err}"),
None,
None,
));
return Ok(());
}
};
let decoded = match decode::extract_tcp_payload(linktype, data) {
Ok(decoded) => decoded,
Err(err) => {
findings.push(error(
packet_index,
meta.event_id,
"packet_decode",
format!("Decode error: {err}"),
Some(flow_summary(meta)),
None,
));
return Ok(());
}
};
if decoded.src_port != args.port && decoded.dst_port != args.port {
findings.push(warn(
packet_index,
meta.event_id,
"port_mismatch",
format!(
"Packet does not use expected Modbus port {}",
args.port
),
Some(flow_summary(meta)),
None,
));
}
if decoded.payload.is_empty() {
findings.push(error(
packet_index,
meta.event_id,
"empty_payload",
"TCP payload is empty",
Some(flow_summary(meta)),
None,
));
return Ok(());
}
let mbap = match mbap::parse_mbap(&decoded.payload) {
Ok(parsed) => parsed,
Err(err) => {
findings.push(error(
packet_index,
meta.event_id,
"mbap_parse",
format!("MBAP parse error: {err}"),
Some(flow_summary(meta)),
None,
));
return Ok(());
}
};
if mbap.protocol_id != 0 {
findings.push(error(
packet_index,
meta.event_id,
"mbap_protocol",
format!("Protocol id is {}, expected 0", mbap.protocol_id),
Some(flow_summary(meta)),
None,
));
}
if mbap.length_mismatch {
let observed = serde_json::json!({ "payload_len": decoded.payload.len(), "mbap_length": mbap.length });
findings.push(warn(
packet_index,
meta.event_id,
"mbap_length",
"MBAP length does not match payload length",
Some(flow_summary(meta)),
Some(observed),
));
if matches!(args.mode, Mode::Strict) {
return Ok(());
}
}
let base_function = mbap.function_code & 0x7f;
let is_exception = mbap.function_code & 0x80 != 0;
let parsed_fields = if is_exception {
None
} else if let Some(func) = args.config.find_function(base_function) {
let fields = match meta.direction {
Direction::C2s => &func.request,
Direction::S2c => &func.response,
};
match modbus_desc::parse_with_descriptor(&mbap.pdu_data, fields) {
Ok(value) => Some(value),
Err(err) => {
findings.push(error(
packet_index,
meta.event_id,
"descriptor_parse",
format!("Descriptor parse error: {err}"),
Some(flow_summary(meta)),
None,
));
None
}
}
} else {
findings.push(warn(
packet_index,
meta.event_id,
"descriptor_missing",
format!("No descriptor for function {}", base_function),
Some(flow_summary(meta)),
None,
));
None
};
update_state(
meta,
packet_index,
mbap.transaction_id,
mbap.unit_id,
state,
findings,
);
if let Some(expected) = &meta.expected {
check_expected(meta, packet_index, &mbap, &parsed_fields, expected, findings);
}
if is_exception && mbap.pdu_data.is_empty() {
findings.push(error(
packet_index,
meta.event_id,
"exception_code_missing",
"Exception response missing exception code",
Some(flow_summary(meta)),
None,
));
}
Ok(())
}
fn update_state(
meta: &MetaLine,
packet_index: u64,
transaction_id: u16,
unit_id: u8,
state: &mut StateTable,
findings: &mut Vec<Finding>,
) {
let flow_key = match meta.flow.to_key() {
Ok(flow) => flow,
Err(_) => return,
};
let flow = FlowPair::from_flow(meta.direction, flow_key);
let key = OutstandingKey {
flow,
transaction_id,
unit_id,
};
match meta.direction {
Direction::C2s => {
if state.insert(
key,
PendingRequest {
pcap_index: packet_index,
event_id: meta.event_id,
ts_ns: meta.ts_ns,
},
)
.is_some()
{
findings.push(warn(
packet_index,
meta.event_id,
"duplicate_txid",
"Duplicate outstanding transaction id",
Some(flow_summary(meta)),
None,
));
}
}
Direction::S2c => {
if state.remove(&key).is_none() {
findings.push(error(
packet_index,
meta.event_id,
"unmatched_response",
"Response without matching request",
Some(flow_summary(meta)),
None,
));
}
}
}
}
fn check_expected(
meta: &MetaLine,
packet_index: u64,
mbap: &mbap::MbapParsed,
parsed_fields: &Option<serde_json::Value>,
expected: &crate::meta::Expected,
findings: &mut Vec<Finding>,
) {
if let Some(modbus) = &expected.modbus {
if let Some(expected_tx) = modbus.transaction_id {
if expected_tx != mbap.transaction_id {
findings.push(error(
packet_index,
meta.event_id,
"expected_txid",
"Transaction id mismatch",
Some(flow_summary(meta)),
Some(serde_json::json!({
"observed": mbap.transaction_id,
"expected": expected_tx
})),
));
}
}
if let Some(expected_unit) = modbus.unit_id {
if expected_unit != mbap.unit_id {
findings.push(error(
packet_index,
meta.event_id,
"expected_unit",
"Unit id mismatch",
Some(flow_summary(meta)),
Some(serde_json::json!({
"observed": mbap.unit_id,
"expected": expected_unit
})),
));
}
}
if let Some(expected_fc) = modbus.function_code {
let observed_fc = mbap.function_code & 0x7f;
if expected_fc != observed_fc {
findings.push(error(
packet_index,
meta.event_id,
"expected_function",
"Function code mismatch",
Some(flow_summary(meta)),
Some(serde_json::json!({
"observed": observed_fc,
"expected": expected_fc
})),
));
}
}
}
if let Some(expected_fields) = &expected.fields {
if let Some(observed_fields) = parsed_fields {
let expected_obj = expected_fields.as_object();
let observed_obj = observed_fields.as_object();
if let (Some(expected_obj), Some(observed_obj)) = (expected_obj, observed_obj) {
for (key, expected_value) in expected_obj {
match observed_obj.get(key) {
Some(observed_value) if values_equal(expected_value, observed_value) => {}
Some(observed_value) => findings.push(error(
packet_index,
meta.event_id,
"expected_field_mismatch",
format!("Field mismatch for {key}"),
Some(flow_summary(meta)),
Some(serde_json::json!({
"field": key,
"observed": observed_value,
"expected": expected_value
})),
)),
None => findings.push(error(
packet_index,
meta.event_id,
"expected_field_missing",
format!("Expected field missing: {key}"),
Some(flow_summary(meta)),
Some(serde_json::json!({
"field": key,
"expected": expected_value
})),
)),
}
}
}
} else {
findings.push(warn(
packet_index,
meta.event_id,
"expected_fields_unparsed",
"Expected fields present but parsing failed",
Some(flow_summary(meta)),
None,
));
}
}
}
fn values_equal(expected: &serde_json::Value, observed: &serde_json::Value) -> bool {
if expected == observed {
return true;
}
match (expected, observed) {
(serde_json::Value::Number(a), serde_json::Value::Number(b)) => {
a.as_f64().zip(b.as_f64()).map(|(a, b)| (a - b).abs() < f64::EPSILON).unwrap_or(false)
}
_ => false,
}
}
fn summarize(total_packets: u64, findings: &[Finding]) -> Summary {
let mut summary = Summary {
total_packets,
total_findings: findings.len() as u64,
fatal: 0,
error: 0,
warn: 0,
info: 0,
};
for finding in findings {
match finding.severity {
Severity::Fatal => summary.fatal += 1,
Severity::Error => summary.error += 1,
Severity::Warn => summary.warn += 1,
Severity::Info => summary.info += 1,
}
}
summary
}
fn write_report(path: &PathBuf, report: &Report) -> Result<()> {
let mut file = File::create(path)
.with_context(|| format!("Failed to create report {}", path.display()))?;
let data = serde_json::to_vec_pretty(report)?;
file.write_all(&data)?;
Ok(())
}
fn flow_summary(meta: &MetaLine) -> FlowSummary {
FlowSummary {
src_ip: meta.flow.src_ip.clone(),
src_port: meta.flow.src_port,
dst_ip: meta.flow.dst_ip.clone(),
dst_port: meta.flow.dst_port,
}
}
fn fatal(
pcap_index: u64,
event_id: Option<u64>,
code: &str,
message: impl Into<String>,
flow: Option<FlowSummary>,
observed: Option<serde_json::Value>,
) -> Finding {
Finding {
pcap_index,
event_id,
severity: Severity::Fatal,
code: code.to_string(),
message: message.into(),
flow,
observed,
expected: None,
}
}
fn error(
pcap_index: u64,
event_id: Option<u64>,
code: &str,
message: impl Into<String>,
flow: Option<FlowSummary>,
observed: Option<serde_json::Value>,
) -> Finding {
Finding {
pcap_index,
event_id,
severity: Severity::Error,
code: code.to_string(),
message: message.into(),
flow,
observed,
expected: None,
}
}
fn warn(
pcap_index: u64,
event_id: Option<u64>,
code: &str,
message: impl Into<String>,
flow: Option<FlowSummary>,
observed: Option<serde_json::Value>,
) -> Finding {
Finding {
pcap_index,
event_id,
severity: Severity::Warn,
code: code.to_string(),
message: message.into(),
flow,
observed,
expected: None,
}
}