From 0a0f067111eb9b30b0589fdb459c216544a5972c Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 22 Jun 2026 10:57:54 +1000 Subject: [PATCH] Add other common strings that could represent sensitive values to ignore unless in --dangerous mode --- enroll/ignore.py | 36 ++++++++++++++++++++++++++++++++++-- tests/test_ignore.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/enroll/ignore.py b/enroll/ignore.py index a7bf297..f1225e3 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -46,9 +46,41 @@ DEFAULT_ALLOW_BINARY_GLOBS = [ "/etc/pki/rpm-gpg/*", ] +# Conservative secret patterns for default/safe harvesting. These are +# intentionally biased towards false positives: operators can opt in with +# --dangerous or targeted include/exclude review when a file is genuinely +# needed. +# +# The assignment pattern catches INI/YAML/JSON/TOML-ish keys such as: +# password: hunter2 +# "client_secret": "..." +# aws_secret_access_key = ... +# GOOGLE_APPLICATION_CREDENTIALS=/path/to/key.json SENSITIVE_CONTENT_PATTERNS = [ - re.compile(rb"-----BEGIN (RSA |EC |OPENSSH |)PRIVATE KEY-----"), - re.compile(rb"(?i)\bpassword\s*="), + re.compile(rb"-----BEGIN (RSA |EC |OPENSSH |DSA |)PRIVATE KEY-----"), + re.compile( + rb"""(?ix) + (^|[^A-Za-z0-9]) + [\"']? + ( + [A-Za-z0-9_.-]* + ( + password|passwd|passphrase| + token|auth[_-]?token|access[_-]?token|refresh[_-]?token| + secret|client[_-]?secret|secret[_-]?key| + api[_-]?key|access[_-]?key|private[_-]?key| + credential|credentials| + aws[_-]?access[_-]?key[_-]?id|aws[_-]?secret[_-]?access[_-]?key| + azure[_-]?client[_-]?secret|azure[_-]?tenant[_-]?id|azure[_-]?client[_-]?id| + google[_-]?application[_-]?credentials|gcp[_-]?service[_-]?account| + service[_-]?account[_-]?key + ) + [A-Za-z0-9_.-]* + ) + [\"']? + \s*[:=] + """ + ), re.compile(rb"(?i)\b(pass|passwd|token|secret|api[_-]?key)\b"), ] diff --git a/tests/test_ignore.py b/tests/test_ignore.py index 2ba9a90..8f088be 100644 --- a/tests/test_ignore.py +++ b/tests/test_ignore.py @@ -172,6 +172,40 @@ def test_deny_reason_private_key(tmp_path: Path): assert reason == "sensitive_content" +def test_deny_reason_sensitive_common_assignment_keys(tmp_path: Path): + pol = IgnorePolicy() + cases = { + "password_yaml": "password: hunter2\n", + "password_json": '{"password": "hunter2"}\n', + "db_password": "db_password: hunter2\n", + "client_secret": "client_secret: abc123\n", + "secret_key": "secret_key = abc123\n", + "auth_token": "auth_token: abc123\n", + "passphrase": "passphrase: abc123\n", + "credentials": "credentials = abc123\n", + } + for name, text in cases.items(): + config = tmp_path / name + config.write_text(text, encoding="utf-8") + assert pol.deny_reason(str(config)) == "sensitive_content", name + + +def test_deny_reason_sensitive_common_cloud_assignment_keys(tmp_path: Path): + pol = IgnorePolicy() + cases = { + "aws_access_key_id": "aws_access_key_id = AKIAIOSFODNN7EXAMPLE\n", + "aws_secret_access_key": "aws_secret_access_key = wJalrXUtnFEMI/K7MDENG/bPxRfiCY\n", + "azure_client_secret": "azure_client_secret: abc123\n", + "google_application_credentials": "GOOGLE_APPLICATION_CREDENTIALS=/etc/app/key.json\n", + "gcp_service_account": "gcp_service_account: svc@example.iam.gserviceaccount.com\n", + "service_account_key": "service_account_key: abc123\n", + } + for name, text in cases.items(): + config = tmp_path / name + config.write_text(text, encoding="utf-8") + assert pol.deny_reason(str(config)) == "sensitive_content", name + + def test_deny_reason_too_large(tmp_path: Path): pol = IgnorePolicy(max_file_bytes=100) large = tmp_path / "large.txt"