Remote mode and dangerous flag, other tweaks

* Add remote mode for harvesting a remote machine via a local workstation (no need to install enroll remotely)
   Optionally use `--no-sudo` if you don't want the remote user to have passwordless sudo when conducting the
   harvest, albeit you'll end up with less useful data (same as if running `enroll harvest` on a machine without
   sudo)
 * Add `--dangerous` flag to capture even sensitive data (use at your own risk!)
 * Do a better job at capturing other config files in `/etc/<package>/` even if that package doesn't normally
   ship or manage those files.
This commit is contained in:
Miguel Jacq 2025-12-17 17:02:16 +11:00
parent 026416d158
commit 6a36a9d2d5
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
13 changed files with 1083 additions and 155 deletions

View file

@ -38,9 +38,13 @@ BLOCK_END = b"*/"
@dataclass
class IgnorePolicy:
deny_globs: list[str] = None
deny_globs: Optional[list[str]] = None
max_file_bytes: int = 256_000
sample_bytes: int = 64_000
# If True, be much less conservative about collecting potentially
# sensitive files. This disables deny globs (e.g. /etc/shadow,
# /etc/ssl/private/*) and skips heuristic content scanning.
dangerous: bool = False
def __post_init__(self) -> None:
if self.deny_globs is None:
@ -69,9 +73,10 @@ class IgnorePolicy:
yield raw
def deny_reason(self, path: str) -> Optional[str]:
for g in self.deny_globs:
if fnmatch.fnmatch(path, g):
return "denied_path"
if not self.dangerous:
for g in self.deny_globs or []:
if fnmatch.fnmatch(path, g):
return "denied_path"
try:
st = os.stat(path, follow_symlinks=True)
@ -93,9 +98,10 @@ class IgnorePolicy:
if b"\x00" in data:
return "binary_like"
for line in self.iter_effective_lines(data):
for pat in SENSITIVE_CONTENT_PATTERNS:
if pat.search(line):
return "sensitive_content"
if not self.dangerous:
for line in self.iter_effective_lines(data):
for pat in SENSITIVE_CONTENT_PATTERNS:
if pat.search(line):
return "sensitive_content"
return None