From 5fa9cc8339b0e2e0968cbfd3bc269d58e8b55f2e Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 27 Dec 2025 20:48:47 -0600 Subject: [PATCH] Add Technical_Decomp_Ignore --- Technical_Decomp_Ignore.md | 59 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 Technical_Decomp_Ignore.md diff --git a/Technical_Decomp_Ignore.md b/Technical_Decomp_Ignore.md new file mode 100644 index 0000000..7f3d7f7 --- /dev/null +++ b/Technical_Decomp_Ignore.md @@ -0,0 +1,59 @@ +## enroll/ignore.py + +### IgnorePolicy (dataclass) + +#### Purpose: the “don’t accidentally harvest secrets” gatekeeper. + +#### Fields: + +- deny_globs: list of fnmatch patterns that are always denied (unless dangerous=True) +- defaults include /etc/shadow, /etc/ssl/private/*, SSH host keys, letsencrypt, etc. +- allow_binary_globs: explicit allowlist of binary-ish config artifacts (APT keyrings etc.) +- max_file_bytes: hard cap; default 256 KB +- sample_bytes: how many bytes to inspect for content heuristics; default 64 KB +- dangerous: if True, relaxes some safety checks + +#### Methods: + +##### __post_init__ + +If deny_globs or allow_binary_globs weren’t passed, it fills them with the defaults. + +##### iter_effective_lines(content: bytes) + +Yields “meaningful” lines from a bytes blob by skipping: + +- empty lines +- line comments starting with #, ;, //, or * +- C-style block comments /* ... */ (best-effort state machine) + +This is used so secret scanning doesn’t trigger on commented-out examples. + +##### deny_reason(path: str) -> Optional[str] + +Returns a short deny code if the file should not be harvested; otherwise None. + +The decision pipeline is: + +- If path.endswith(".log") → "log_file" always denied. +- If not dangerous: + - if path matches any deny glob → "denied_path" +- os.stat() (follow symlinks): + - if stat fails → "unreadable" + - if size > max_file_bytes → "too_large" + - if not a regular file or is symlink → "not_regular_file" +- Read up to sample_bytes: + - if read fails → "unreadable" +- Binary-like detection: + - if the sample contains NUL (b"\x00"): + - if path matches allow-binary globs → allowed + - else → "binary_like" + - Note: this binary check still applies even in dangerous=True. +- If not dangerous: + - scan “effective lines” against regex patterns like: + - PEM private key headers + - password = ... + - keywords (token, secret, api_key, etc.) + - if matched → "sensitive_content" + +If nothing triggers, return None (allowed). \ No newline at end of file