Capture other files in the user's home directory
Some checks failed
CI / test (push) Failing after 1m57s
Lint / test (push) Successful in 32s
Trivy / test (push) Successful in 27s

Such as `.bashrc`, `.bash_aliases`, `.profile`, if these files differ from the `/etc/skel` defaults
This commit is contained in:
Miguel Jacq 2026-01-05 15:02:22 +11:00
parent e68ec0bffc
commit a1433d645f
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
3 changed files with 110 additions and 1 deletions

View file

@ -2,6 +2,7 @@
* Introduce `enroll explain` - a tool to analyze and explain what's in (or not in) a harvest and why.
* Centralise the cron and logrotate stuff into their respective roles, we had a bit of duplication between roles based on harvest discovery.
* Capture other files in the user's home directory such as `.bashrc`, `.bash_aliases`, `.profile`, if these files differ from the `/etc/skel` defaults
# 0.2.3

View file

@ -5,6 +5,7 @@ import json
import os
import re
import shutil
import stat
import time
from dataclasses import dataclass, asdict, field
from typing import Dict, List, Optional, Set
@ -157,6 +158,54 @@ MAX_FILES_CAP = 4000
MAX_UNOWNED_FILES_PER_ROLE = 500
def _files_differ(a: str, b: str, *, max_bytes: int = 2_000_000) -> bool:
"""Return True if file `a` differs from file `b`.
Best-effort and conservative:
- If `b` (baseline) does not exist or is not a regular file, treat as
"different" so we err on the side of capturing user state.
- If we can't stat/read either file, treat as "different" (capture will
later be filtered via IgnorePolicy).
- If files are large, avoid reading them fully.
"""
try:
st_a = os.stat(a, follow_symlinks=True)
except OSError:
return True
# Refuse to do content comparisons on non-regular files.
if not stat.S_ISREG(st_a.st_mode):
return True
try:
st_b = os.stat(b, follow_symlinks=True)
except OSError:
return True
if not stat.S_ISREG(st_b.st_mode):
return True
if st_a.st_size != st_b.st_size:
return True
# If it's unexpectedly big, treat as different to avoid expensive reads.
if st_a.st_size > max_bytes:
return True
try:
with open(a, "rb") as fa, open(b, "rb") as fb:
while True:
ca = fa.read(1024 * 64)
cb = fb.read(1024 * 64)
if ca != cb:
return True
if not ca: # EOF on both
return False
except OSError:
return True
def _merge_parent_dirs(
existing_dirs: List[ManagedDir],
managed_files: List[ManagedFile],
@ -1319,6 +1368,18 @@ def harvest(
users_role_name = "users"
users_role_seen = seen_by_role.setdefault(users_role_name, set())
skel_dir = "/etc/skel"
# Dotfiles to harvest for non-system users. For the common "skeleton"
# files, only capture if the user's copy differs from /etc/skel.
skel_dotfiles = [
(".bashrc", "user_shell_rc"),
(".profile", "user_profile"),
(".bash_logout", "user_shell_logout"),
]
extra_dotfiles = [
(".bash_aliases", "user_shell_aliases"),
]
for u in user_records:
users_list.append(
{
@ -1353,6 +1414,48 @@ def harvest(
seen_global=captured_global,
)
# Capture common per-user shell dotfiles when they differ from /etc/skel.
# These still go through IgnorePolicy and user path filters.
home = (u.home or "").rstrip("/")
if home and home.startswith("/"):
for rel, reason in skel_dotfiles:
upath = os.path.join(home, rel)
if not os.path.exists(upath):
continue
skel_path = os.path.join(skel_dir, rel)
if not _files_differ(upath, skel_path, max_bytes=policy.max_file_bytes):
continue
_capture_file(
bundle_dir=bundle_dir,
role_name=users_role_name,
abs_path=upath,
reason=reason,
policy=policy,
path_filter=path_filter,
managed_out=users_managed,
excluded_out=users_excluded,
seen_role=users_role_seen,
seen_global=captured_global,
)
# Capture other common per-user shell files unconditionally if present.
for rel, reason in extra_dotfiles:
upath = os.path.join(home, rel)
if not os.path.exists(upath):
continue
_capture_file(
bundle_dir=bundle_dir,
role_name=users_role_name,
abs_path=upath,
reason=reason,
policy=policy,
path_filter=path_filter,
managed_out=users_managed,
excluded_out=users_excluded,
seen_role=users_role_seen,
seen_global=captured_global,
)
users_snapshot = UsersSnapshot(
role_name=users_role_name,
users=users_list,

View file

@ -819,7 +819,12 @@ def _manifest_from_bundle_dir(
group = str(u.get("primary_group") or owner)
break
mode = "0600" if mf.get("reason") == "authorized_keys" else "0644"
# Prefer the harvested file mode so we preserve any deliberate
# permissions (e.g. 0600 for certain dotfiles). For authorized_keys,
# enforce 0600 regardless.
mode = mf.get("mode") or "0644"
if mf.get("reason") == "authorized_keys":
mode = "0600"
ssh_files.append(
{
"dest": dest,