Add other common strings that could represent sensitive values to ignore unless in --dangerous mode

This commit is contained in:
Miguel Jacq 2026-06-22 10:57:54 +10:00
parent e2b61bcdf1
commit 0a0f067111
Signed by: mig5
GPG key ID: 03906B4110AAD3B8
2 changed files with 68 additions and 2 deletions

View file

@ -46,9 +46,41 @@ DEFAULT_ALLOW_BINARY_GLOBS = [
"/etc/pki/rpm-gpg/*",
]
# Conservative secret patterns for default/safe harvesting. These are
# intentionally biased towards false positives: operators can opt in with
# --dangerous or targeted include/exclude review when a file is genuinely
# needed.
#
# The assignment pattern catches INI/YAML/JSON/TOML-ish keys such as:
# password: hunter2
# "client_secret": "..."
# aws_secret_access_key = ...
# GOOGLE_APPLICATION_CREDENTIALS=/path/to/key.json
SENSITIVE_CONTENT_PATTERNS = [
re.compile(rb"-----BEGIN (RSA |EC |OPENSSH |)PRIVATE KEY-----"),
re.compile(rb"(?i)\bpassword\s*="),
re.compile(rb"-----BEGIN (RSA |EC |OPENSSH |DSA |)PRIVATE KEY-----"),
re.compile(
rb"""(?ix)
(^|[^A-Za-z0-9])
[\"']?
(
[A-Za-z0-9_.-]*
(
password|passwd|passphrase|
token|auth[_-]?token|access[_-]?token|refresh[_-]?token|
secret|client[_-]?secret|secret[_-]?key|
api[_-]?key|access[_-]?key|private[_-]?key|
credential|credentials|
aws[_-]?access[_-]?key[_-]?id|aws[_-]?secret[_-]?access[_-]?key|
azure[_-]?client[_-]?secret|azure[_-]?tenant[_-]?id|azure[_-]?client[_-]?id|
google[_-]?application[_-]?credentials|gcp[_-]?service[_-]?account|
service[_-]?account[_-]?key
)
[A-Za-z0-9_.-]*
)
[\"']?
\s*[:=]
"""
),
re.compile(rb"(?i)\b(pass|passwd|token|secret|api[_-]?key)\b"),
]

View file

@ -172,6 +172,40 @@ def test_deny_reason_private_key(tmp_path: Path):
assert reason == "sensitive_content"
def test_deny_reason_sensitive_common_assignment_keys(tmp_path: Path):
pol = IgnorePolicy()
cases = {
"password_yaml": "password: hunter2\n",
"password_json": '{"password": "hunter2"}\n',
"db_password": "db_password: hunter2\n",
"client_secret": "client_secret: abc123\n",
"secret_key": "secret_key = abc123\n",
"auth_token": "auth_token: abc123\n",
"passphrase": "passphrase: abc123\n",
"credentials": "credentials = abc123\n",
}
for name, text in cases.items():
config = tmp_path / name
config.write_text(text, encoding="utf-8")
assert pol.deny_reason(str(config)) == "sensitive_content", name
def test_deny_reason_sensitive_common_cloud_assignment_keys(tmp_path: Path):
pol = IgnorePolicy()
cases = {
"aws_access_key_id": "aws_access_key_id = AKIAIOSFODNN7EXAMPLE\n",
"aws_secret_access_key": "aws_secret_access_key = wJalrXUtnFEMI/K7MDENG/bPxRfiCY\n",
"azure_client_secret": "azure_client_secret: abc123\n",
"google_application_credentials": "GOOGLE_APPLICATION_CREDENTIALS=/etc/app/key.json\n",
"gcp_service_account": "gcp_service_account: svc@example.iam.gserviceaccount.com\n",
"service_account_key": "service_account_key: abc123\n",
}
for name, text in cases.items():
config = tmp_path / name
config.write_text(text, encoding="utf-8")
assert pol.deny_reason(str(config)) == "sensitive_content", name
def test_deny_reason_too_large(tmp_path: Path):
pol = IgnorePolicy(max_file_bytes=100)
large = tmp_path / "large.txt"