From ba28bb2cef92662e6b5ce727b5e1f4fb16cf3ad0 Mon Sep 17 00:00:00 2001 From: Behnam RK Date: Fri, 12 Jun 2026 17:58:04 +0330 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20DX=20overhaul=20=E2=80=94=20safe=20?= =?UTF-8?q?dev=20loop,=20deterministic=20VPN=20check,=20CI,=20docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operating and extending dezhban meant copy-pasting diagnostic commands by hand, with no way to test firewall rules without risking lockout and no config validation. This adds a fast, root-free dev loop and a deterministic endpoint sanity check that won't cry wolf under a full-tunnel VPN. - validate / print-rules / doctor subcommands + global -v/--verbose - build-tagged firewall.RenderRules (pure, exec-free) on all 3 OSes — inspect a block/guard ruleset without applying it - swap UDP-dial endpoint check (false-positives under full-tunnel default route) for deterministic subnet-containment; doctor --discover shells out on macOS to find the connected VPN's real server IP - scripts/*.sh (dev, rules, doctor, install-local, reinstall, panic) + matching Makefile targets; dev + vpn-guard sample configs - CI matrix (vet/test on 3 OSes + build-all) guards the build-tag seam - docs: lockout-recovery runbook + full config reference Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/ci.yml | 64 ++++++ Makefile | 46 ++++- README.md | 44 +++-- cmd/dezhban/main.go | 272 ++++++++++++++++++++++++-- configs/dezhban.dev.json | 23 +++ configs/dezhban.vpn-guard.json | 23 +++ docs/CONFIG.md | 63 ++++++ docs/TROUBLESHOOTING.md | 93 +++++++++ internal/firewall/render_darwin.go | 13 ++ internal/firewall/render_linux.go | 11 ++ internal/firewall/render_windows.go | 11 ++ internal/netdetect/discover_darwin.go | 175 +++++++++++++++++ internal/netdetect/discover_other.go | 25 +++ internal/netdetect/netdetect.go | 155 ++++++++++++++- internal/netdetect/netdetect_test.go | 69 ++++++- internal/runner/runner.go | 17 ++ scripts/dev.sh | 13 ++ scripts/doctor.sh | 13 ++ scripts/install-local.sh | 26 +++ scripts/panic.sh | 10 + scripts/reinstall.sh | 12 ++ scripts/rules.sh | 22 +++ scripts/uninstall-local.sh | 10 + 23 files changed, 1163 insertions(+), 47 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 configs/dezhban.dev.json create mode 100644 configs/dezhban.vpn-guard.json create mode 100644 docs/CONFIG.md create mode 100644 docs/TROUBLESHOOTING.md create mode 100644 internal/firewall/render_darwin.go create mode 100644 internal/firewall/render_linux.go create mode 100644 internal/firewall/render_windows.go create mode 100644 internal/netdetect/discover_darwin.go create mode 100644 internal/netdetect/discover_other.go create mode 100755 scripts/dev.sh create mode 100755 scripts/doctor.sh create mode 100755 scripts/install-local.sh create mode 100755 scripts/panic.sh create mode 100755 scripts/reinstall.sh create mode 100755 scripts/rules.sh create mode 100755 scripts/uninstall-local.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..993778b --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,64 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + +permissions: + contents: read + +jobs: + # Vet + test on every OS. Each runner compiles and tests its OWN build-tagged + # firewall backend (pf/nft/wfp), so a backend that fails to build — e.g. a + # missing RenderRules on a non-host OS — is caught here, not at release time. + test: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + - run: go vet ./... + - run: go test ./... + + # Verify gofmt cleanliness once (formatting is OS-independent). + fmt: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + - name: gofmt + run: | + unformatted="$(gofmt -l .)" + if [ -n "$unformatted" ]; then + echo "These files need gofmt:"; echo "$unformatted"; exit 1 + fi + + # Cross-compile all five release targets and assert every artifact exists. + build-all: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + - run: make build-all + - name: verify artifacts + run: | + set -eu + for f in \ + dist/dezhban-darwin-arm64 \ + dist/dezhban-darwin-amd64 \ + dist/dezhban-linux-amd64 \ + dist/dezhban-linux-arm64 \ + dist/dezhban-windows-amd64.exe; do + test -f "$f" || { echo "missing artifact: $f"; exit 1; } + done + echo "all 5 artifacts present" diff --git a/Makefile b/Makefile index 36315d4..9c4893c 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,14 @@ PLATFORMS := \ linux/arm64 \ windows/amd64 -.PHONY: build vet test build-all clean +# Config used by the dev-loop targets. Override on the command line, e.g. +# make rules CONFIG=configs/dezhban.vpn-guard.json +CONFIG ?= configs/dezhban.local.json +MODE ?= guard + +.PHONY: build vet test build-all clean lint \ + run-dry validate rules doctor \ + install-local reinstall uninstall-local panic build: ## Build for the host platform into ./$(BINARY) go build $(LDFLAGS) -o $(BINARY) $(PKG) @@ -30,6 +37,43 @@ vet: ## Static checks test: ## Run all tests go test ./... +lint: ## golangci-lint if installed, else gofmt + vet + @if command -v golangci-lint >/dev/null 2>&1; then \ + golangci-lint run; \ + else \ + echo "golangci-lint not found; running gofmt + go vet"; \ + test -z "$$(gofmt -l .)" || { echo "gofmt needed:"; gofmt -l .; exit 1; }; \ + go vet ./...; \ + fi + +# --- dev loop (no root) ----------------------------------------------------- + +run-dry: ## Build + run the monitor in dry-run (no firewall touch) + CONFIG=$(CONFIG) sh scripts/dev.sh + +validate: ## Load + validate CONFIG without side effects + go run $(PKG) validate --config $(CONFIG) + +rules: ## Print the ruleset for MODE (guard|fullblock|legacy) without applying + go run $(PKG) print-rules --mode $(MODE) --config $(CONFIG) + +doctor: ## Diagnose VPN guard config (add ARGS=--discover on macOS) + go run $(PKG) doctor --config $(CONFIG) $(ARGS) + +# --- service lifecycle (sudo) ---------------------------------------------- + +install-local: ## Validate, build, install config + service, start it + CONFIG=$(CONFIG) sh scripts/install-local.sh + +reinstall: ## Tear down then install fresh + CONFIG=$(CONFIG) sh scripts/reinstall.sh + +uninstall-local: ## Stop + unregister the service + sh scripts/uninstall-local.sh + +panic: ## Force-remove dezhban's rules (lockout escape hatch) + sh scripts/panic.sh + build-all: ## Cross-compile every platform into ./$(DIST) @mkdir -p $(DIST) @for p in $(PLATFORMS); do \ diff --git a/README.md b/README.md index f770599..1014dc7 100644 --- a/README.md +++ b/README.md @@ -80,20 +80,25 @@ linked). ## Usage ``` -dezhban [flags] +dezhban [-v] [flags] Commands: - run Run the monitor→decision→enforcement loop (root) - block Manually block network egress (root) - unblock Remove dezhban's firewall rules (root) - status Show version, config, service, and block state - panic Force-remove dezhban's rules even with no daemon (root) - install Register dezhban as a boot-persistent OS service (root) - uninstall Remove the OS service (root) - start Start the installed service (root) - stop Stop the installed service (removes firewall rules) (root) - detect-vpn Print detected VPN tunnel interfaces for config - version Print the version + run Run the monitor→decision→enforcement loop (root) + block Manually block network egress (root) + unblock Remove dezhban's firewall rules (root) + status Show version, config, service, and block state + validate Load + validate a config file (no root, no effects) + print-rules Print the ruleset a block/guard would apply, without applying it + doctor Diagnose VPN guard config (tunnels, endpoints, lockout risks) + panic Force-remove dezhban's rules even with no daemon (root) + install Register dezhban as a boot-persistent OS service (root) + uninstall Remove the OS service (root) + start Start the installed service (root) + stop Stop the installed service (removes firewall rules) (root) + detect-vpn Print detected VPN tunnel interfaces for config + version Print the version + +Global: -v / --verbose override the configured log level to debug ``` Privileged commands require root/admin and print a clear error otherwise. @@ -118,6 +123,21 @@ sudo dezhban panic # standalone teardown, no daem - `block --guard` — install the VPN interface guard (see below). - `unblock --force` — accepted for symmetry (`unblock` is already unconditional). +### Diagnose & test safely (no root) + +Inspect and validate before you risk a block — none of these touch the firewall: + +```bash +dezhban validate --config # parse + validate, summarize +dezhban print-rules --mode guard --config # exact ruleset, not applied +dezhban doctor --config # tunnels, subnets, endpoint sanity +dezhban doctor --discover --config # macOS: find the VPN's real server IP +``` + +`print-rules --mode` takes `guard`, `fullblock`, or `legacy`. See +[docs/TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) for the lockout-recovery +runbook and [docs/CONFIG.md](docs/CONFIG.md) for the full config reference. + ## Configuration JSON, with durations as strings (e.g. `"30s"`). See diff --git a/cmd/dezhban/main.go b/cmd/dezhban/main.go index 7c31bde..7adf7b2 100644 --- a/cmd/dezhban/main.go +++ b/cmd/dezhban/main.go @@ -36,23 +36,33 @@ import ( // version is overridden at build time via -ldflags "-X main.version=...". var version = "dev" +// verbose is the global -v/--verbose flag, stripped from args before dispatch. +// When set it overrides the configured log level to debug. +var verbose bool + const usage = `dezhban — network kill switch Usage: - dezhban [flags] + dezhban [-v] [flags] Commands: - run Run the monitor→decision→enforcement loop - block Manually block network egress - unblock Remove dezhban's firewall rules - status Show version, config, and current state - panic Force-remove dezhban's rules even if the daemon is dead - install Register dezhban as a boot-persistent OS service - uninstall Remove the OS service - start Start the installed service - stop Stop the installed service (removes firewall rules) - detect-vpn Print detected VPN tunnel interfaces to help fill the vpn config - version Print the version + run Run the monitor→decision→enforcement loop + block Manually block network egress + unblock Remove dezhban's firewall rules + status Show version, config, and current state + validate Load and validate a config file (no root, no side effects) + print-rules Print the firewall ruleset a block/guard would apply, without applying it + doctor Diagnose VPN guard config (tunnels, endpoints, lockout risks) + panic Force-remove dezhban's rules even if the daemon is dead + install Register dezhban as a boot-persistent OS service + uninstall Remove the OS service + start Start the installed service + stop Stop the installed service (removes firewall rules) + detect-vpn Print detected VPN tunnel interfaces to help fill the vpn config + version Print the version + +Global flags: + -v, --verbose Override the configured log level to debug Run "dezhban -h" for command flags.` @@ -61,6 +71,7 @@ func main() { } func run(args []string) int { + args = stripVerbose(args) if len(args) == 0 { fmt.Fprintln(os.Stderr, usage) return 2 @@ -76,13 +87,19 @@ func run(args []string) int { return cmdUnblock(rest) case "status": return cmdStatus(rest) + case "validate": + return cmdValidate(rest) + case "print-rules": + return cmdPrintRules(rest) + case "doctor": + return cmdDoctor(rest) case "panic": return cmdPanic(rest) case "install", "uninstall", "start", "stop": return cmdService(cmd, rest) case "detect-vpn": return cmdDetectVPN(rest) - case "version", "--version", "-v": + case "version", "--version": fmt.Println("dezhban", version) return 0 case "help", "--help", "-h": @@ -94,6 +111,35 @@ func run(args []string) int { } } +// stripVerbose removes the global -v/--verbose flag (which may appear before or +// after the subcommand) from args and records it in the package-level verbose. +// Pulling it out here lets every subcommand's FlagSet stay unaware of it. +func stripVerbose(args []string) []string { + out := make([]string, 0, len(args)) + for _, a := range args { + switch a { + case "-v", "--v", "-verbose", "--verbose": + verbose = true + default: + out = append(out, a) + } + } + return out +} + +// effectiveLevel is the log level after applying the global -v/--verbose override. +func effectiveLevel(cfg *config.Config) string { + if verbose { + return "debug" + } + return cfg.LogLevel +} + +// newLogger builds a logger honoring the -v/--verbose override. +func newLogger(cfg *config.Config) *slog.Logger { + return logging.New(effectiveLevel(cfg)) +} + // requireRoot prints a clear error and returns false if not privileged. func requireRoot(cmd string) bool { if privilege.IsPrivileged() { @@ -119,7 +165,7 @@ func cmdRun(args []string) int { fmt.Fprintln(os.Stderr, "config error:", err) return 1 } - log := logging.New(cfg.LogLevel) + log := newLogger(cfg) if *dryRun { return runDryRun(cfg, log) @@ -137,7 +183,7 @@ func cmdRun(args []string) int { build := func(l *slog.Logger) (runner.Options, error) { return assembleOptions(cfg, l) } - if err := svc.Run(build, log, cfg.LogLevel, *cfgPath); err != nil { + if err := svc.Run(build, log, effectiveLevel(cfg), *cfgPath); err != nil { log.Error("run loop failed", "err", err) return 1 } @@ -221,7 +267,7 @@ func cmdBlock(args []string) int { fmt.Fprintln(os.Stderr, "config error:", err) return 1 } - log := logging.New(cfg.LogLevel) + log := newLogger(cfg) if !requireRoot("block") { return 1 } @@ -543,6 +589,200 @@ func quoteJoin(ss []string) string { return strings.Join(q, ", ") } +// cmdValidate loads and validates a config without running anything or touching +// the firewall — a fast, root-free pre-flight. config.Load already runs +// Validate(), so a clean load is a valid config; print a one-line summary so the +// operator can eyeball the loaded values. +func cmdValidate(args []string) int { + fs := flag.NewFlagSet("validate", flag.ExitOnError) + cfgPath := fs.String("config", "", "path to config file (JSON)") + _ = fs.Parse(args) + + cfg, err := loadConfig(*cfgPath) + if err != nil { + fmt.Fprintln(os.Stderr, "config invalid:", err) + return 1 + } + src := *cfgPath + if src == "" { + src = "(defaults — no --config given)" + } + blocked := cfg.BlockedCountries + if len(blocked) == 0 { + blocked = []string{"(none)"} + } + fmt.Printf("config OK: %s\n", src) + fmt.Printf(" blocked countries: %s\n", strings.Join(blocked, ", ")) + fmt.Printf(" poll interval: %s\n", cfg.PollInterval) + fmt.Printf(" fail-closed: %t\n", cfg.FailClosed) + fmt.Printf(" vpn guard: %t\n", cfg.VPN.Enabled) + if cfg.VPN.Enabled { + fmt.Printf(" vpn tunnels: %s\n", strings.Join(cfg.VPN.TunnelInterfaces, ", ")) + fmt.Printf(" vpn endpoints: %s\n", strings.Join(cfg.VPN.Endpoints, ", ")) + } + return 0 +} + +// policyForMode builds the firewall Policy the named mode would apply, mirroring +// the run loop's guard/full-block construction (runner.runVPN). It is the single +// source print-rules renders from. NOTE: keep in sync with runner.runVPN; a +// future refactor should extract a shared constructor in the firewall package. +func policyForMode(cfg *config.Config, log *slog.Logger, mode string) (firewall.Policy, error) { + al := buildAllowlist(cfg, log) + switch mode { + case "guard": + return firewall.Policy{ + Mode: firewall.ModeGuard, + Allowlist: al, + TunnelIfaces: resolveTunnels(cfg, log), + VPNEndpoints: parseEndpoints(cfg.VPN.Endpoints, log), + }, nil + case "fullblock": + return firewall.Policy{ + Mode: firewall.ModeFullBlock, + Allowlist: al, + TunnelIfaces: resolveTunnels(cfg, log), + VPNEndpoints: parseEndpoints(cfg.VPN.Endpoints, log), + }, nil + case "legacy": + // Legacy direct model: full block with the dst-IP allowlist, no tunnel. + return firewall.Policy{Mode: firewall.ModeFullBlock, Allowlist: al}, nil + default: + return firewall.Policy{}, fmt.Errorf("unknown mode %q (valid: guard, fullblock, legacy)", mode) + } +} + +// cmdPrintRules renders the exact firewall ruleset a given policy would install +// and prints it to stdout WITHOUT applying it — the safe way to inspect a block +// or guard before risking a lockout. No root: rendering is pure. Diagnostic logs +// (allowlist resolution, etc.) go to stderr, so stdout is just the ruleset. +func cmdPrintRules(args []string) int { + fs := flag.NewFlagSet("print-rules", flag.ExitOnError) + cfgPath := fs.String("config", "", "path to config file (JSON)") + mode := fs.String("mode", "guard", "policy to render: guard, fullblock, or legacy") + _ = fs.Parse(args) + + cfg, err := loadConfig(*cfgPath) + if err != nil { + fmt.Fprintln(os.Stderr, "config error:", err) + return 1 + } + pol, err := policyForMode(cfg, newLogger(cfg), *mode) + if err != nil { + fmt.Fprintln(os.Stderr, err) + return 2 + } + rules, err := firewall.RenderRules(pol) + if err != nil { + fmt.Fprintln(os.Stderr, "render failed:", err) + return 1 + } + fmt.Print(rules) + return 0 +} + +// cmdDoctor diagnoses the VPN guard configuration without root or side effects: +// it validates config, lists tunnel interfaces and their subnets, and flags any +// endpoint that sits inside a tunnel's own subnet (a guaranteed lockout). With +// --discover it additionally runs the macOS-only best-effort hunt for the +// connected VPN's real server IP, automating the manual netstat/scutil dance. +func cmdDoctor(args []string) int { + fs := flag.NewFlagSet("doctor", flag.ExitOnError) + cfgPath := fs.String("config", "", "path to config file (JSON)") + discover := fs.Bool("discover", false, "best-effort: find the connected VPN's real server IP (macOS only)") + _ = fs.Parse(args) + + cfg, err := loadConfig(*cfgPath) + if err != nil { + fmt.Fprintln(os.Stderr, "config invalid:", err) + return 1 + } + log := newLogger(cfg) + + fmt.Println("dezhban doctor") + fmt.Println() + fmt.Println("config: OK (loaded and validated)") + fmt.Printf(" vpn guard enabled: %t\n", cfg.VPN.Enabled) + fmt.Println() + + tunnels := resolveTunnels(cfg, log) + fmt.Println("tunnels:") + if len(tunnels) == 0 { + fmt.Println(" (none — set vpn.tunnelInterfaces or vpn.autodetect)") + } else { + nets, _ := netdetect.TunnelSubnets(tunnels) + subsByIface := map[string][]string{} + for _, tn := range nets { + subsByIface[tn.Iface] = append(subsByIface[tn.Iface], tn.Subnet.String()) + } + for _, t := range tunnels { + if subs := subsByIface[t]; len(subs) > 0 { + fmt.Printf(" %s — %s\n", t, strings.Join(subs, ", ")) + } else { + fmt.Printf(" %s — no subnet (interface down or absent?)\n", t) + } + } + } + fmt.Println() + + endpoints := parseEndpoints(cfg.VPN.Endpoints, log) + fmt.Println("endpoints:") + var bad []netdetect.EndpointRoute + if len(endpoints) == 0 { + fmt.Println(" (none configured)") + } else { + bad, _ = netdetect.CheckEndpointRouting(endpoints, tunnels) + internal := map[string]netdetect.EndpointRoute{} + for _, b := range bad { + internal[b.Endpoint.String()] = b + } + for _, ep := range endpoints { + if b, ok := internal[ep.String()]; ok { + fmt.Printf(" %s — MISCONFIGURED: inside %s's subnet %s\n", ep, b.Iface, b.Subnet) + } else { + fmt.Printf(" %s — ok (assumed reachable on the physical interface)\n", ep) + } + } + } + if len(bad) > 0 { + fmt.Println() + fmt.Println("fixes:") + for _, b := range bad { + fmt.Printf(" - %s is a tunnel-internal address (inside %s %s); set vpn.endpoints to\n", b.Endpoint, b.Iface, b.Subnet) + fmt.Println(" your VPN server's PUBLIC IP from your VPN client config.") + } + } + fmt.Println() + + if *discover { + fmt.Println("discover (best-effort, macOS):") + cands, err := netdetect.DiscoverEndpoints() + switch { + case err != nil: + fmt.Println(" ", err) + case len(cands) == 0: + fmt.Println(" no physical-side public transport sockets found — is the VPN connected?") + default: + configured := map[string]bool{} + for _, ep := range endpoints { + configured[ep.String()] = true + } + for _, c := range cands { + line := fmt.Sprintf(" %s:%d", c.Server, c.Port) + if c.VPN != "" { + line += " [" + c.VPN + "]" + } + if !configured[c.Server.String()] { + line += " <- not in vpn.endpoints" + } + fmt.Println(line) + } + fmt.Println(" add any missing server IP to vpn.endpoints and drop stale entries.") + } + } + return 0 +} + func cmdStatus(args []string) int { fs := flag.NewFlagSet("status", flag.ExitOnError) cfgPath := fs.String("config", "", "path to config file (JSON)") diff --git a/configs/dezhban.dev.json b/configs/dezhban.dev.json new file mode 100644 index 0000000..756c592 --- /dev/null +++ b/configs/dezhban.dev.json @@ -0,0 +1,23 @@ +{ + "pollInterval": "5s", + "blockedCountries": [], + "failClosed": false, + "hysteresis": 1, + "providers": [ + "https://ipinfo.io/json", + "http://ip-api.com/json", + "https://ifconfig.co/json" + ], + "allowlist": { + "dns": ["1.1.1.1", "8.8.8.8"], + "hosts": [] + }, + "vpn": { + "enabled": false, + "tunnelInterfaces": [], + "endpoints": [], + "autodetect": false + }, + "providerQuorum": false, + "logLevel": "debug" +} diff --git a/configs/dezhban.vpn-guard.json b/configs/dezhban.vpn-guard.json new file mode 100644 index 0000000..051bd7c --- /dev/null +++ b/configs/dezhban.vpn-guard.json @@ -0,0 +1,23 @@ +{ + "pollInterval": "30s", + "blockedCountries": ["RU", "IR"], + "failClosed": true, + "hysteresis": 3, + "providers": [ + "https://ipinfo.io/json", + "http://ip-api.com/json", + "https://ifconfig.co/json" + ], + "allowlist": { + "dns": ["1.1.1.1", "8.8.8.8"], + "hosts": [] + }, + "vpn": { + "enabled": true, + "tunnelInterfaces": ["utun4"], + "endpoints": ["203.0.113.10"], + "autodetect": false + }, + "providerQuorum": false, + "logLevel": "info" +} diff --git a/docs/CONFIG.md b/docs/CONFIG.md new file mode 100644 index 0000000..d79f21d --- /dev/null +++ b/docs/CONFIG.md @@ -0,0 +1,63 @@ +# Configuration reference + +dezhban reads a JSON config. Durations are strings (Go syntax, e.g. `"30s"`, +`"5m"`). A missing `--config` loads built-in defaults. Validate any file without +running it: + +```sh +dezhban validate --config path/to/config.json +``` + +## Fields + +| Field | Type | Default | Notes | +|---|---|---|---| +| `pollInterval` | duration string | `"30s"` | How often the public IP / country is checked. Must be > 0. | +| `blockedCountries` | `[]string` | `[]` | ISO-3166 alpha-2 codes (e.g. `"RU"`, `"IR"`). Upper-cased on load; each must be exactly 2 letters. A match triggers a block. | +| `failClosed` | bool | `true` | When the country can't be determined, block anyway (security-first). The allowlist stays open so recovery still works. | +| `hysteresis` | int | `3` | Consecutive agreeing readings required before toggling block/allow. Must be ≥ 1. Damps flapping. | +| `providers` | `[]string` | 3 geo-IP URLs | Geo-location endpoints, tried for redundancy. At least one required. | +| `allowlist.dns` | `[]string` | `[]` | Resolver IPs kept reachable while blocking, so hostname re-resolution works. | +| `allowlist.hosts` | `[]string` | `[]` | Extra host IPs always allowed. Provider IPs are added automatically at block time. | +| `providerQuorum` | bool | `false` | Require a majority of providers to agree on the country before acting. | +| `logLevel` | string | `"info"` | One of `debug`, `info`, `warn`, `error`. The `-v`/`--verbose` flag overrides this to `debug`. | +| `vpn` | object | disabled | VPN interface-guard config — see below. | + +## `vpn` block + +For hosts behind a full-tunnel VPN, the guard cuts the **physical** interface +while keeping the **tunnel** open, instead of the destination-IP allowlist (which +is meaningless under a tunnel). Opt-in — a misconfigured guard can lock you out. + +| Field | Type | Default | Notes | +|---|---|---|---| +| `vpn.enabled` | bool | `false` | Turns on guard mode. | +| `vpn.tunnelInterfaces` | `[]string` | `[]` | Tunnel interface names (e.g. `["utun4"]`). Required unless `autodetect` is set. Run `dezhban detect-vpn` to find them. | +| `vpn.endpoints` | `[]string` | `[]` | VPN server **public IP(s)** reachable on the physical interface — kept open so the tunnel can stay up and reconnect. Required when `enabled`. Must be valid IPs. | +| `vpn.autodetect` | bool | `false` | Discover the tunnel interface(s) at runtime via `netdetect`. Explicit `tunnelInterfaces` always win. Endpoints are **never** autodetected (a wrong guess leaks). | + +### Validation rules (enforced by `validate` and at load) + +- `pollInterval` > 0 +- `hysteresis` ≥ 1 +- at least one `providers` entry +- every `blockedCountries` code is 2 letters +- when `vpn.enabled`: `tunnelInterfaces` non-empty **or** `autodetect` true +- when `vpn.enabled`: at least one `endpoints` entry, each a valid IP + +### Getting `vpn.endpoints` right + +A wrong or tunnel-internal endpoint is the #1 lockout cause — see +[TROUBLESHOOTING.md](TROUBLESHOOTING.md). Verify before enabling: + +```sh +dezhban doctor --config # flags endpoints inside a tunnel subnet +dezhban doctor --discover --config # macOS: print the VPN's real server IP +``` + +## Sample configs + +- [`configs/dezhban.example.json`](../configs/dezhban.example.json) — reference, legacy (non-VPN) mode. +- [`configs/dezhban.vpn-guard.json`](../configs/dezhban.vpn-guard.json) — VPN guard mode. +- [`configs/dezhban.dev.json`](../configs/dezhban.dev.json) — debug logging, fast poll, no blocking; for local dry-runs. +- `configs/dezhban.local.json` — your private config (git-ignored; may hold a real endpoint IP). diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md new file mode 100644 index 0000000..8cbc6f1 --- /dev/null +++ b/docs/TROUBLESHOOTING.md @@ -0,0 +1,93 @@ +# Troubleshooting + +## I'm locked out — no network after a block + +dezhban is fail-closed: a crashed `run`, a misconfigured guard, or a stale VPN +endpoint leaves the block-all rule in place by design (the kill switch must not +fail open). The escape hatch removes dezhban's rules with no daemon involved: + +```sh +sudo dezhban panic # or: make panic +dezhban status +``` + +`panic` only touches rules tagged `dezhban` (the pf anchor / nft table / WFP +sublayer), so it is always safe and a no-op on a clean system. After it runs, +connectivity is restored. Then fix the cause below before re-enabling the guard. + +## VPN guard: tunnel dies, DNS fails ("no such host"), country lookups time out + +Symptom (from the daemon log): + +``` +msg="vpn guard active (startup)" tunnels=[utun4] endpoints=1 +msg="country lookup failed" err="... dial tcp: lookup ip-api.com: no such host" +``` + +**Cause.** In guard mode dezhban blocks the physical interface except egress to +`vpn.endpoints`, keeping the VPN's encrypted transport alive so the tunnel can +stay up. If `vpn.endpoints` is **wrong** (a stale server IP) or **internal to the +tunnel** (an address like `10.0.0.x` that only exists inside the tunnel), the +real transport is blocked → the tunnel drops → all traffic (DNS included) routed +over the dead tunnel fails → the host locks itself out. + +The failure chain: + +``` +wrong/internal vpn.endpoints + → physical-side `pass to ` matches nothing real + → VPN transport blocked on the physical link + → tunnel drops, can't reconnect (its path to the server is cut) + → DNS + everything over the tunnel fails → lockout +``` + +**Recover, then diagnose:** + +```sh +sudo dezhban panic # restore connectivity +dezhban doctor # tunnels, subnets, endpoint sanity +dezhban doctor --discover # macOS: find the VPN's REAL server IP +``` + +`doctor` flags any endpoint that sits inside a tunnel's own subnet (a guaranteed +lockout). `--discover` (macOS) inspects the connected VPN's live sockets and +prints the actual server IP:port it talks to on the physical link — compare that +against `vpn.endpoints`. + +**Fix.** Set `vpn.endpoints` to the VPN server's **public IP** — the address the +client sends encrypted packets to on the physical interface. Get it from your VPN +client's config, or from `dezhban doctor --discover`. Then: + +```sh +dezhban validate --config # confirm it parses +sudo make reinstall # tear down + reinstall the service +``` + +### Note for NetworkExtension VPNs (macOS) + +Some macOS VPN clients (Lightway/RocketTunnel, WireGuard-go, Xray/V2Box) run their +transport inside a system extension and bind it directly to the physical +interface. `route get ` will show such an endpoint going via the tunnel +even when it's correct — that's why dezhban's check uses **subnet containment**, +not a route probe, and why `--discover` reads live sockets instead. The pf rule +still matches the provider's physical-side socket, so a correct **public** +endpoint works even though `route get` is misleading. + +## Preview rules before applying them + +Never find out what a block does by getting locked out. Render the exact ruleset +first, no root, no side effects: + +```sh +dezhban print-rules --mode guard --config # or: make rules MODE=guard +dezhban print-rules --mode fullblock --config +dezhban print-rules --mode legacy --config +``` + +## Config won't load + +```sh +dezhban validate --config # prints the precise validation error +``` + +See [CONFIG.md](CONFIG.md) for every field and its constraints. diff --git a/internal/firewall/render_darwin.go b/internal/firewall/render_darwin.go new file mode 100644 index 0000000..0a7a61e --- /dev/null +++ b/internal/firewall/render_darwin.go @@ -0,0 +1,13 @@ +//go:build darwin + +package firewall + +// RenderRules returns the exact pf ruleset text the darwin backend would load +// for policy p, WITHOUT applying it. It is pure (no pfctl, no root, no live +// firewall state), which is what makes `dezhban print-rules` safe: an operator +// can inspect precisely what a block/guard would install before risking a +// lockout. The host build compiles only its own backend, so this resolves to the +// platform whose firewall actually runs. +func RenderRules(p Policy) (string, error) { + return renderRuleset(p), nil +} diff --git a/internal/firewall/render_linux.go b/internal/firewall/render_linux.go new file mode 100644 index 0000000..b729e31 --- /dev/null +++ b/internal/firewall/render_linux.go @@ -0,0 +1,11 @@ +//go:build linux + +package firewall + +// RenderRules returns the exact nftables ruleset text the linux backend would +// load for policy p, WITHOUT applying it. See render_darwin.go for the rationale: +// pure rendering is what lets `dezhban print-rules` show a block/guard before it +// is installed. +func RenderRules(p Policy) (string, error) { + return renderNftRuleset(p), nil +} diff --git a/internal/firewall/render_windows.go b/internal/firewall/render_windows.go new file mode 100644 index 0000000..662524e --- /dev/null +++ b/internal/firewall/render_windows.go @@ -0,0 +1,11 @@ +//go:build windows + +package firewall + +// RenderRules returns the exact PowerShell/WFP script the windows backend would +// run for policy p, WITHOUT executing it. See render_darwin.go for the rationale: +// pure rendering is what lets `dezhban print-rules` show a block/guard before it +// is installed. +func RenderRules(p Policy) (string, error) { + return renderBlockScript(p), nil +} diff --git a/internal/netdetect/discover_darwin.go b/internal/netdetect/discover_darwin.go new file mode 100644 index 0000000..5bc21b0 --- /dev/null +++ b/internal/netdetect/discover_darwin.go @@ -0,0 +1,175 @@ +//go:build darwin + +package netdetect + +import ( + "bufio" + "context" + "net" + "net/netip" + "os/exec" + "strconv" + "strings" + "time" +) + +// Candidate is a guessed VPN server endpoint observed on the physical interface: +// the far side of an encrypted-transport socket that a VPN client opened directly +// on the WAN link (bypassing its own tunnel). VPN names the connected service it +// is attributed to, when known. +type Candidate struct { + VPN string + Server netip.Addr + Port int +} + +// DiscoverEndpoints heuristically finds the real server address(es) of the +// currently-connected VPN on macOS — automating the manual scutil/netstat/lsof +// hunt. The signal: a full-tunnel VPN routes all app traffic over its tunnel +// (local address = the tunnel's IP), so the ONLY sockets whose local address is a +// PHYSICAL interface IP and whose foreign address is PUBLIC are the VPN's own +// encrypted transport to its server. We read those from `netstat` and attribute +// them to the connected service named by `scutil --nc list`. +// +// Best-effort and macOS-only: it shells out, parses human output, and focuses on +// IPv4. Treat results as candidates to verify against your VPN client's config, +// not gospel — other daemons can also hold a physical-side public socket. +func DiscoverEndpoints() ([]Candidate, error) { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + phys, err := physicalIPv4s() + if err != nil { + return nil, err + } + vpn := connectedVPNName(ctx) // "" if none/unreadable; non-fatal + + var cands []Candidate + seen := map[string]bool{} + for _, proto := range []string{"tcp", "udp"} { + out, err := exec.CommandContext(ctx, "netstat", "-anv", "-p", proto).Output() + if err != nil { + continue // proto table unavailable; try the other + } + for _, c := range parseNetstat(string(out), phys) { + key := c.Server.String() + ":" + strconv.Itoa(c.Port) + if seen[key] { + continue + } + seen[key] = true + c.VPN = vpn + cands = append(cands, c) + } + } + return cands, nil +} + +// physicalIPv4s collects the IPv4 addresses of every up, non-loopback, +// non-tunnel interface — the source addresses a VPN's WAN transport would bind. +func physicalIPv4s() (map[netip.Addr]bool, error) { + ifaces, err := net.Interfaces() + if err != nil { + return nil, err + } + out := map[netip.Addr]bool{} + for _, ifc := range ifaces { + if ifc.Flags&net.FlagLoopback != 0 || ifc.Flags&net.FlagUp == 0 { + continue + } + if isTunnelName(ifc.Name) { + continue + } + addrs, err := ifc.Addrs() + if err != nil { + continue + } + for _, a := range addrs { + ipnet, ok := a.(*net.IPNet) + if !ok { + continue + } + if ip, ok := netip.AddrFromSlice(ipnet.IP); ok { + ip = ip.Unmap() + if ip.Is4() { + out[ip] = true + } + } + } + } + return out, nil +} + +// parseNetstat extracts (foreign public IPv4, port) pairs from `netstat -anv` +// output where the local address is one of our physical interface IPs. macOS +// formats addresses as IP.PORT (dot before the port), so the port is the segment +// after the final dot. +func parseNetstat(out string, phys map[netip.Addr]bool) []Candidate { + var cands []Candidate + sc := bufio.NewScanner(strings.NewReader(out)) + for sc.Scan() { + f := strings.Fields(sc.Text()) + if len(f) < 5 { + continue + } + // Column 0 is the protocol (tcp4/tcp6/udp4/…); only IPv4 rows here. + if !strings.HasSuffix(f[0], "4") { + continue + } + local, _, ok := splitHostPort(f[3]) + if !ok || !phys[local] { + continue + } + foreign, port, ok := splitHostPort(f[4]) + if !ok || port == 0 { + continue + } + // The server side must be a routable public address — this is what + // separates the VPN's WAN transport from LAN chatter to the gateway. + if foreign.IsGlobalUnicast() && !foreign.IsPrivate() { + cands = append(cands, Candidate{Server: foreign, Port: port}) + } + } + return cands +} + +// splitHostPort parses macOS netstat's IP.PORT form (e.g. "192.168.88.112.64656" +// or "*.443"). Returns ok=false for wildcards and unparyable addresses. +func splitHostPort(s string) (netip.Addr, int, bool) { + i := strings.LastIndex(s, ".") + if i < 0 { + return netip.Addr{}, 0, false + } + host, portStr := s[:i], s[i+1:] + addr, err := netip.ParseAddr(host) + if err != nil { + return netip.Addr{}, 0, false + } + port, err := strconv.Atoi(portStr) + if err != nil { + return netip.Addr{}, 0, false + } + return addr.Unmap(), port, true +} + +// connectedVPNName returns the name of the first Connected service in +// `scutil --nc list`, or "" if none/unavailable. +func connectedVPNName(ctx context.Context) string { + out, err := exec.CommandContext(ctx, "scutil", "--nc", "list").Output() + if err != nil { + return "" + } + sc := bufio.NewScanner(strings.NewReader(string(out))) + for sc.Scan() { + line := sc.Text() + if !strings.Contains(line, "(Connected)") { + continue + } + // The friendly name is the first double-quoted field on the line. + if a := strings.IndexByte(line, '"'); a >= 0 { + if b := strings.IndexByte(line[a+1:], '"'); b >= 0 { + return line[a+1 : a+1+b] + } + } + } + return "" +} diff --git a/internal/netdetect/discover_other.go b/internal/netdetect/discover_other.go new file mode 100644 index 0000000..202b1aa --- /dev/null +++ b/internal/netdetect/discover_other.go @@ -0,0 +1,25 @@ +//go:build !darwin + +package netdetect + +import ( + "errors" + "net/netip" +) + +// ErrDiscoverUnsupported is returned by DiscoverEndpoints on platforms without a +// discovery implementation. Endpoint auto-discovery currently exists only on +// macOS, where the connected VPN's WAN transport is observable via netstat/scutil. +var ErrDiscoverUnsupported = errors.New("vpn endpoint auto-discovery is only supported on macOS") + +// Candidate mirrors the darwin type so callers compile on every platform. +type Candidate struct { + VPN string + Server netip.Addr + Port int +} + +// DiscoverEndpoints is unsupported off macOS. +func DiscoverEndpoints() ([]Candidate, error) { + return nil, ErrDiscoverUnsupported +} diff --git a/internal/netdetect/netdetect.go b/internal/netdetect/netdetect.go index 3cd4655..3f9d279 100644 --- a/internal/netdetect/netdetect.go +++ b/internal/netdetect/netdetect.go @@ -8,7 +8,12 @@ // physical uplink (a PPPoE/cellular WAN, which is point-to-point and not a VPN) // would keep that uplink open and leak unencrypted traffic past the kill switch. // We therefore match on tunnel name only and do NOT trust the point-to-point -// flag alone, since ordinary WAN links carry it too. VPN *endpoint* detection is +// flag alone, since ordinary WAN links carry it too. A tunnel name is also not +// sufficient on its own: macOS spawns system utun interfaces (utun0–utun3 for +// Handoff, AirDrop, iCloud Private Relay) that match the name pattern but carry +// no routable address. Guarding those would cut ordinary traffic while missing +// the real VPN, so we additionally require a global-unicast address — the mark +// of an interface that actually routes. VPN *endpoint* detection is // likewise not automated: a wrong endpoint punches a hole in the block, or if // missing prevents reconnection, so endpoints stay explicit in config. Explicit // config values always win over detection. @@ -16,6 +21,7 @@ package netdetect import ( "net" + "net/netip" "strings" ) @@ -52,17 +58,45 @@ func isTunnelName(name string) bool { return false } +// hasGlobalUnicast reports whether any of addrs is a global-unicast IP — the +// signature of an interface that actually carries routable traffic. It excludes +// loopback, IPv4 link-local (169.254/16), IPv6 link-local (fe80::/10), multicast +// and the unspecified address. macOS system utun interfaces (utun0–utun3) carry +// only IPv6 link-local or nothing, so this filters them out while keeping every +// real full-tunnel VPN, which must assign a routable address (often private, +// e.g. 10.x — still global-unicast) to move traffic. +func hasGlobalUnicast(addrs []net.Addr) bool { + for _, a := range addrs { + var ip net.IP + switch v := a.(type) { + case *net.IPNet: + ip = v.IP + case *net.IPAddr: + ip = v.IP + } + if ip != nil && ip.IsGlobalUnicast() { + return true + } + } + return false +} + // isTunnelIface decides whether an interface is a usable tunnel: it must be up, -// non-loopback, and match a tunnel name. The point-to-point flag is intentionally -// NOT sufficient on its own — physical WAN links (PPPoE, cellular) carry it too, -// and trusting it would keep a physical uplink open in guard mode (a leak). Split -// out from TunnelInterfaces so the classification is testable without a live -// interface list. -func isTunnelIface(name string, flags net.Flags) bool { +// non-loopback, match a tunnel name, and carry a global-unicast address. The +// point-to-point flag is intentionally NOT sufficient on its own — physical WAN +// links (PPPoE, cellular) carry it too, and trusting it would keep a physical +// uplink open in guard mode (a leak). The address check is what separates a real +// VPN utun from a macOS system utun that shares the name pattern but routes +// nothing. Split out from TunnelInterfaces so the classification is testable +// without a live interface list. +func isTunnelIface(name string, flags net.Flags, addrs []net.Addr) bool { if flags&net.FlagLoopback != 0 || flags&net.FlagUp == 0 { return false } - return isTunnelName(name) + if !isTunnelName(name) { + return false + } + return hasGlobalUnicast(addrs) } // TunnelInterfaces returns the names of up, non-loopback interfaces that look @@ -76,9 +110,112 @@ func TunnelInterfaces() ([]string, error) { } var out []string for _, ifc := range ifaces { - if isTunnelIface(ifc.Name, ifc.Flags) { + // Can't read the addresses → can't confirm the iface routes → skip it. + // Never guard an interface we cannot verify (a false guard is a leak or + // a lockout); leaving it out is the safe failure. + addrs, err := ifc.Addrs() + if err != nil { + continue + } + if isTunnelIface(ifc.Name, ifc.Flags, addrs) { out = append(out, ifc.Name) } } return out, nil } + +// TunnelNet pairs a tunnel interface with one of its on-link subnets — the +// network its assigned address sits in (e.g. utun4 with inet 10.0.0.1/24 → +// 10.0.0.0/24). It is the unit CheckEndpointRouting and `doctor` reason over. +type TunnelNet struct { + Iface string + Subnet netip.Prefix +} + +// ifaceAddrs reads the addresses of a named interface. A package var so tests +// can supply synthetic interfaces without a live network. +var ifaceAddrs = func(name string) ([]net.Addr, error) { + ifc, err := net.InterfaceByName(name) + if err != nil { + return nil, err + } + return ifc.Addrs() +} + +// prefixFromIPNet converts a *net.IPNet to a normalized (network-address) +// netip.Prefix. Returns ok=false for addresses or masks it cannot represent. +func prefixFromIPNet(n *net.IPNet) (netip.Prefix, bool) { + ip, ok := netip.AddrFromSlice(n.IP) + if !ok { + return netip.Prefix{}, false + } + ip = ip.Unmap() + ones, _ := n.Mask.Size() // (0,0) for a non-contiguous mask we can't express + pfx := netip.PrefixFrom(ip, ones) + if !pfx.IsValid() { + return netip.Prefix{}, false + } + return pfx.Masked(), true +} + +// TunnelSubnets returns the on-link subnets of the given tunnel interfaces. An +// interface that is absent or down is skipped (not an error) — it simply +// contributes no subnet. Pure stdlib, fully portable, sends no packets. +func TunnelSubnets(tunnels []string) ([]TunnelNet, error) { + var out []TunnelNet + for _, name := range tunnels { + addrs, err := ifaceAddrs(name) + if err != nil { + continue + } + for _, a := range addrs { + ipnet, ok := a.(*net.IPNet) + if !ok { + continue + } + if pfx, ok := prefixFromIPNet(ipnet); ok { + out = append(out, TunnelNet{Iface: name, Subnet: pfx}) + } + } + } + return out, nil +} + +// EndpointRoute records a misconfigured VPN endpoint: one that falls inside a +// tunnel interface's own subnet, naming the tunnel and the subnet that contains it. +type EndpointRoute struct { + Endpoint netip.Addr + Iface string + Subnet netip.Prefix +} + +// CheckEndpointRouting flags every endpoint that sits INSIDE a tunnel's own +// subnet. A VPN endpoint must be the server's address reachable on the PHYSICAL +// interface: the guard keeps it open there so the encrypted transport survives a +// FULL BLOCK or tunnel drop and the host can recover. An endpoint that is itself +// a tunnel-internal address (e.g. the tunnel's peer at 10.0.0.x) can never serve +// that role — pinning a physical-side `pass to ` for it is futile, the +// tunnel can't reconnect through itself, and the host locks itself out. +// +// This is deterministic and has no false positives under a full-tunnel VPN: a +// full tunnel owns the default route, so a route-table probe would report EVERY +// public endpoint as "via the tunnel" — useless. Subnet containment instead only +// fires on addresses that are genuinely internal to the tunnel. It does NOT catch +// a wrong-but-public endpoint (a stale server IP); only observing the live VPN's +// real socket does — that is what `doctor --discover` is for. +func CheckEndpointRouting(endpoints []netip.Addr, tunnels []string) ([]EndpointRoute, error) { + nets, err := TunnelSubnets(tunnels) + if err != nil { + return nil, err + } + var bad []EndpointRoute + for _, ep := range endpoints { + for _, tn := range nets { + if tn.Subnet.Contains(ep) { + bad = append(bad, EndpointRoute{Endpoint: ep, Iface: tn.Iface, Subnet: tn.Subnet}) + break + } + } + } + return bad, nil +} diff --git a/internal/netdetect/netdetect_test.go b/internal/netdetect/netdetect_test.go index 8959856..657cc16 100644 --- a/internal/netdetect/netdetect_test.go +++ b/internal/netdetect/netdetect_test.go @@ -1,7 +1,9 @@ package netdetect import ( + "fmt" "net" + "net/netip" "testing" ) @@ -27,26 +29,75 @@ func TestIsTunnelName(t *testing.T) { } func TestIsTunnelIface(t *testing.T) { + // addr fixtures: a routable (global-unicast) address vs link-local / none. + routable := []net.Addr{&net.IPNet{IP: net.ParseIP("10.8.0.2"), Mask: net.CIDRMask(24, 32)}} + linkLocalOnly := []net.Addr{&net.IPNet{IP: net.ParseIP("fe80::1"), Mask: net.CIDRMask(64, 128)}} + var noAddr []net.Addr + cases := []struct { name string flags net.Flags + addrs []net.Addr want bool }{ - {"utun4", net.FlagUp, true}, - {"eth0", net.FlagUp | net.FlagPointToPoint, false}, // p2p alone is NOT enough (WAN links carry it) - {"ppp0", net.FlagUp | net.FlagPointToPoint, false}, // physical PPPoE/cellular WAN: not a tunnel - {"utun4", 0, false}, // down: skip - {"lo0", net.FlagUp | net.FlagLoopback, false}, // loopback: skip - {"wg0", net.FlagUp | net.FlagLoopback, false}, // loopback wins even with tunnel name - {"eth0", net.FlagUp, false}, // plain iface + {"utun4", net.FlagUp, routable, true}, // real VPN: tunnel name + routable addr + {"utun0", net.FlagUp, linkLocalOnly, false}, // macOS system utun: link-local only + {"utun1", net.FlagUp, noAddr, false}, // macOS system utun: no address + {"eth0", net.FlagUp | net.FlagPointToPoint, routable, false}, // p2p alone is NOT enough (WAN links carry it) + {"ppp0", net.FlagUp | net.FlagPointToPoint, routable, false}, // physical PPPoE/cellular WAN: not a tunnel + {"utun4", 0, routable, false}, // down: skip + {"lo0", net.FlagUp | net.FlagLoopback, routable, false}, // loopback: skip + {"wg0", net.FlagUp | net.FlagLoopback, routable, false}, // loopback wins even with tunnel name + {"eth0", net.FlagUp, routable, false}, // plain iface } for _, c := range cases { - if got := isTunnelIface(c.name, c.flags); got != c.want { - t.Errorf("isTunnelIface(%q, %v) = %v, want %v", c.name, c.flags, got, c.want) + if got := isTunnelIface(c.name, c.flags, c.addrs); got != c.want { + t.Errorf("isTunnelIface(%q, %v, %v) = %v, want %v", c.name, c.flags, c.addrs, got, c.want) } } } +func TestCheckEndpointRouting(t *testing.T) { + // Stub the interface reader so the test needs no live network. utun4 carries a + // tunnel-internal /24 plus an IPv6 link-local (which must NOT match anything); + // utun9 is "down"/absent (read error) → contributes no subnet. + addrs := map[string][]net.Addr{ + "utun4": { + &net.IPNet{IP: net.ParseIP("10.0.0.1"), Mask: net.CIDRMask(24, 32)}, + &net.IPNet{IP: net.ParseIP("fe80::1"), Mask: net.CIDRMask(64, 128)}, + }, + } + orig := ifaceAddrs + ifaceAddrs = func(name string) ([]net.Addr, error) { + a, ok := addrs[name] + if !ok { + return nil, fmt.Errorf("no such interface %s", name) + } + return a, nil + } + defer func() { ifaceAddrs = orig }() + + eps := []netip.Addr{ + netip.MustParseAddr("10.0.0.5"), // INTERNAL to utun4's 10.0.0.0/24 → flag + netip.MustParseAddr("5.253.65.186"), // public (stale server) → NOT subnet-detectable, pass + netip.MustParseAddr("5.253.65.43"), // public (correct server) → pass + } + bad, err := CheckEndpointRouting(eps, []string{"utun4", "utun9"}) + if err != nil { + t.Fatalf("CheckEndpointRouting error: %v", err) + } + + if len(bad) != 1 { + t.Fatalf("flagged %v, want exactly 10.0.0.5", bad) + } + if bad[0].Endpoint.String() != "10.0.0.5" || bad[0].Iface != "utun4" { + t.Errorf("flagged %+v, want endpoint 10.0.0.5 on utun4", bad[0]) + } + if bad[0].Subnet.String() != "10.0.0.0/24" { + t.Errorf("subnet = %s, want 10.0.0.0/24", bad[0].Subnet) + } +} + func TestTunnelInterfacesNeverErrorsAndExcludesLoopback(t *testing.T) { got, err := TunnelInterfaces() if err != nil { diff --git a/internal/runner/runner.go b/internal/runner/runner.go index 54e134b..52efac6 100644 --- a/internal/runner/runner.go +++ b/internal/runner/runner.go @@ -14,6 +14,7 @@ import ( "github.com/behnam-rk/dezhban/internal/decision" "github.com/behnam-rk/dezhban/internal/firewall" "github.com/behnam-rk/dezhban/internal/monitor" + "github.com/behnam-rk/dezhban/internal/netdetect" ) // probeEgressBudget caps how long the VPN recovery probe may hold the guard @@ -105,6 +106,22 @@ func (o Options) runVPN(ctx context.Context) error { } o.Log.Info("vpn guard active (startup)", "tunnels", o.Tunnels, "endpoints", len(o.Endpoints)) + // A VPN endpoint must be reachable on the PHYSICAL interface. An endpoint that + // is itself a tunnel-internal address can't be — the physical-side pass-to rule + // is futile, and cutting the tunnel cuts the only path to the endpoint, so it + // can never reconnect and the host locks itself out. Warn loudly at startup + // rather than discover it at the next FULL BLOCK. (run `dezhban doctor` for the + // full picture, including a stale-but-public endpoint this check can't see.) + if bad, err := netdetect.CheckEndpointRouting(o.Endpoints, o.Tunnels); err != nil { + o.Log.Debug("could not check endpoint routing", "err", err) + } else { + for _, br := range bad { + o.Log.Warn("vpn endpoint is inside the tunnel's own subnet — guaranteed lockout if blocked; "+ + "set vpn.endpoints to the server IP reachable on the physical interface", + "endpoint", br.Endpoint, "subnet", br.Subnet, "iface", br.Iface) + } + } + blocked := false // applied posture: false = GUARD, true = FULL BLOCK tick := time.NewTicker(o.Interval) defer tick.Stop() diff --git a/scripts/dev.sh b/scripts/dev.sh new file mode 100755 index 0000000..93cc49d --- /dev/null +++ b/scripts/dev.sh @@ -0,0 +1,13 @@ +#!/bin/sh +# dev.sh — build, then run the monitor in dry-run (no firewall touch, no root). +# Polls country and logs each reading so you can watch detection without risk. +# +# Override the config with CONFIG=path; defaults to the dev template. +set -eu +cd "$(dirname "$0")/.." + +CONFIG="${CONFIG:-configs/dezhban.dev.json}" + +make build +echo "running dry-run with $CONFIG (Ctrl-C to stop) ..." +exec ./dezhban -v run --dry-run --config "$CONFIG" diff --git a/scripts/doctor.sh b/scripts/doctor.sh new file mode 100755 index 0000000..6f9c293 --- /dev/null +++ b/scripts/doctor.sh @@ -0,0 +1,13 @@ +#!/bin/sh +# doctor.sh [extra args] — diagnose the VPN guard config (tunnels, endpoints, +# lockout risks). No root. Pass --discover to hunt the connected VPN's real +# server IP on macOS, e.g.: scripts/doctor.sh --discover +# +# Override the config with CONFIG=path; defaults to the local config. +set -eu +cd "$(dirname "$0")/.." + +CONFIG="${CONFIG:-configs/dezhban.local.json}" +[ -f "$CONFIG" ] || CONFIG="configs/dezhban.vpn-guard.json" + +exec go run ./cmd/dezhban doctor --config "$CONFIG" "$@" diff --git a/scripts/install-local.sh b/scripts/install-local.sh new file mode 100755 index 0000000..3547d9f --- /dev/null +++ b/scripts/install-local.sh @@ -0,0 +1,26 @@ +#!/bin/sh +# install-local.sh — build, install the config to the system path, register the +# service, and start it. Requires sudo (service registration is privileged). +# +# Override the config with CONFIG=path; defaults to the local config, falling +# back to the example. The system config path is /etc/dezhban/dezhban.json. +set -eu +cd "$(dirname "$0")/.." + +CONFIG="${CONFIG:-configs/dezhban.local.json}" +[ -f "$CONFIG" ] || CONFIG="configs/dezhban.example.json" +SYS_CONFIG=/etc/dezhban/dezhban.json + +echo "validating $CONFIG ..." +go run ./cmd/dezhban validate --config "$CONFIG" + +make build + +echo "installing config -> $SYS_CONFIG" +sudo mkdir -p /etc/dezhban +sudo install -m 600 "$CONFIG" "$SYS_CONFIG" + +echo "registering and starting service ..." +sudo ./dezhban install --config "$SYS_CONFIG" +sudo ./dezhban start +./dezhban status || true diff --git a/scripts/panic.sh b/scripts/panic.sh new file mode 100755 index 0000000..4b65aba --- /dev/null +++ b/scripts/panic.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# panic.sh — the lockout escape hatch. Force-removes dezhban's firewall rules +# even if no daemon is running, then prints status. Requires sudo. Use this when +# a misconfigured guard or a crashed `run` has cut your connectivity. +set -eu +cd "$(dirname "$0")/.." + +make build +sudo ./dezhban panic +./dezhban status || true diff --git a/scripts/reinstall.sh b/scripts/reinstall.sh new file mode 100755 index 0000000..98189e5 --- /dev/null +++ b/scripts/reinstall.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# reinstall.sh — tear down any existing service/rules, then install fresh. The +# one-shot for iterating on config or a new build. Requires sudo. +set -eu +cd "$(dirname "$0")/.." + +make build +echo "removing any existing install ..." +sudo ./dezhban stop || true +sudo ./dezhban uninstall || true + +exec sh scripts/install-local.sh diff --git a/scripts/rules.sh b/scripts/rules.sh new file mode 100755 index 0000000..8f8982a --- /dev/null +++ b/scripts/rules.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# rules.sh [mode] — print the firewall ruleset a policy would apply, WITHOUT +# applying it. No root. With no mode argument, prints all three modes. +# +# Override the config with CONFIG=path; defaults to the local config. +set -eu +cd "$(dirname "$0")/.." + +CONFIG="${CONFIG:-configs/dezhban.local.json}" +[ -f "$CONFIG" ] || CONFIG="configs/dezhban.vpn-guard.json" + +print_mode() { + echo "===== mode: $1 =====" + go run ./cmd/dezhban print-rules --mode "$1" --config "$CONFIG" + echo +} + +if [ "$#" -ge 1 ]; then + print_mode "$1" +else + for m in guard fullblock legacy; do print_mode "$m"; done +fi diff --git a/scripts/uninstall-local.sh b/scripts/uninstall-local.sh new file mode 100755 index 0000000..c0bbf52 --- /dev/null +++ b/scripts/uninstall-local.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# uninstall-local.sh — stop and unregister the service. Requires sudo. Stopping +# the service removes dezhban's firewall rules; if anything lingers, run panic.sh. +set -eu +cd "$(dirname "$0")/.." + +make build +sudo ./dezhban stop || true +sudo ./dezhban uninstall +echo "uninstalled. if rules persist, run: scripts/panic.sh" From 642e24ae84d0733724f7a36180903e64e175f637 Mon Sep 17 00:00:00 2001 From: Behnam RK Date: Fri, 12 Jun 2026 18:06:30 +0330 Subject: [PATCH 2/2] fix: fail-closed on tunnel-internal VPN endpoint + harden subnet check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address PR #13 review: - runner.runVPN now refuses to start (before touching the firewall) when a configured vpn.endpoint sits inside a tunnel's own subnet, instead of only warning. A warn scrolls past in a service log; the next FULL BLOCK then locks the host out — the exact incident that motivated the PR. Read errors stay non-fatal (can't classify ≠ misconfigured). - prefixFromIPNet rejects a mask whose Size() returns (0,0) (non-contiguous / unrepresentable) rather than treating it as 0.0.0.0/0, which would make Contains match every endpoint and falsely flag them all as tunnel-internal. - CI: run go test -race once on ubuntu (runner toggles posture from a ticker loop with a recovery probe); skipped on windows where -race needs gcc. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/ci.yml | 5 +++++ internal/netdetect/netdetect.go | 9 ++++++++- internal/runner/runner.go | 35 ++++++++++++++++++--------------- 3 files changed, 32 insertions(+), 17 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 993778b..f111efd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,6 +25,11 @@ jobs: go-version-file: go.mod - run: go vet ./... - run: go test ./... + # Race detector once (ubuntu): the runner toggles posture from a ticker + # loop with a recovery probe — catch data races there. Skipped on windows + # where -race needs a gcc toolchain that isn't preinstalled. + - if: matrix.os == 'ubuntu-latest' + run: go test -race ./... # Verify gofmt cleanliness once (formatting is OS-independent). fmt: diff --git a/internal/netdetect/netdetect.go b/internal/netdetect/netdetect.go index 3f9d279..2ffe3e8 100644 --- a/internal/netdetect/netdetect.go +++ b/internal/netdetect/netdetect.go @@ -150,7 +150,14 @@ func prefixFromIPNet(n *net.IPNet) (netip.Prefix, bool) { return netip.Prefix{}, false } ip = ip.Unmap() - ones, _ := n.Mask.Size() // (0,0) for a non-contiguous mask we can't express + ones, bits := n.Mask.Size() + // Size returns (0,0) for a non-contiguous mask it cannot express. Accept a + // real /0 only if the mask length matches the address family; otherwise reject + // — treating an unrepresentable mask as 0.0.0.0/0 would make Contains match + // EVERY endpoint and falsely flag them all as tunnel-internal. + if ones == 0 && bits == 0 { + return netip.Prefix{}, false + } pfx := netip.PrefixFrom(ip, ones) if !pfx.IsValid() { return netip.Prefix{}, false diff --git a/internal/runner/runner.go b/internal/runner/runner.go index 52efac6..0160d56 100644 --- a/internal/runner/runner.go +++ b/internal/runner/runner.go @@ -89,6 +89,25 @@ func Run(ctx context.Context, o Options) error { // the same hysteresis streak in the Decider, so one allowed reading does not // lift the block — it takes `Hysteresis` consecutive allowed probes. func (o Options) runVPN(ctx context.Context) error { + // A VPN endpoint must be reachable on the PHYSICAL interface. An endpoint that + // is itself a tunnel-internal address can't be — the physical-side pass-to rule + // is futile, and cutting the tunnel cuts the only path to the endpoint, so it + // can never reconnect and the host locks itself out. Refuse to start BEFORE + // touching the firewall rather than discover it at the next FULL BLOCK: a warn + // scrolls past in a service log, but a config that cannot recover must not run. + // (run `dezhban doctor` for the full picture, including a stale-but-public + // endpoint this check can't see.) A read error — can't classify — is non-fatal. + if bad, err := netdetect.CheckEndpointRouting(o.Endpoints, o.Tunnels); err != nil { + o.Log.Debug("could not check endpoint routing", "err", err) + } else if len(bad) > 0 { + for _, br := range bad { + o.Log.Error("vpn endpoint is inside the tunnel's own subnet — guaranteed lockout if blocked; "+ + "set vpn.endpoints to the server IP reachable on the physical interface", + "endpoint", br.Endpoint, "subnet", br.Subnet, "iface", br.Iface) + } + return fmt.Errorf("refusing to start: %d vpn endpoint(s) are tunnel-internal and would lock the host out on full block (run `dezhban doctor` to fix)", len(bad)) + } + guard := firewall.Policy{ Mode: firewall.ModeGuard, TunnelIfaces: o.Tunnels, @@ -106,22 +125,6 @@ func (o Options) runVPN(ctx context.Context) error { } o.Log.Info("vpn guard active (startup)", "tunnels", o.Tunnels, "endpoints", len(o.Endpoints)) - // A VPN endpoint must be reachable on the PHYSICAL interface. An endpoint that - // is itself a tunnel-internal address can't be — the physical-side pass-to rule - // is futile, and cutting the tunnel cuts the only path to the endpoint, so it - // can never reconnect and the host locks itself out. Warn loudly at startup - // rather than discover it at the next FULL BLOCK. (run `dezhban doctor` for the - // full picture, including a stale-but-public endpoint this check can't see.) - if bad, err := netdetect.CheckEndpointRouting(o.Endpoints, o.Tunnels); err != nil { - o.Log.Debug("could not check endpoint routing", "err", err) - } else { - for _, br := range bad { - o.Log.Warn("vpn endpoint is inside the tunnel's own subnet — guaranteed lockout if blocked; "+ - "set vpn.endpoints to the server IP reachable on the physical interface", - "endpoint", br.Endpoint, "subnet", br.Subnet, "iface", br.Iface) - } - } - blocked := false // applied posture: false = GUARD, true = FULL BLOCK tick := time.NewTicker(o.Interval) defer tick.Stop()