Consolidate logrotate and cron files into their main service/package roles if they exist. Standardise on MAX_FILES_CAP in one place

This commit is contained in:
Miguel Jacq 2025-12-28 09:30:21 +11:00
parent cae6246177
commit 303c1b0dd8
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
3 changed files with 208 additions and 57 deletions

View file

@ -125,7 +125,9 @@ ALLOWED_UNOWNED_EXTS = {
"", # allow extensionless (common in /etc/default and /etc/init.d)
}
MAX_UNOWNED_FILES_PER_ROLE = 400
MAX_FILES_CAP = 4000
MAX_UNOWNED_FILES_PER_ROLE = 500
# Directories that are shared across many packages; never attribute unowned files in these trees to a single package.
SHARED_ETC_TOPDIRS = {
@ -324,7 +326,7 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [
]
def _iter_matching_files(spec: str, *, cap: int = 2000) -> List[str]:
def _iter_matching_files(spec: str, *, cap: int = MAX_FILES_CAP) -> List[str]:
"""Expand a glob spec and also walk directories to collect files."""
out: List[str] = []
for p in glob.glob(spec):
@ -963,43 +965,141 @@ def harvest(
for mf in users_managed:
already.add(mf.path)
# Maps for re-attributing shared snippets (cron.d/logrotate.d) to existing roles.
svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps}
pkg_by_role: Dict[str, PackageSnapshot] = {p.role_name: p for p in pkg_snaps}
def _target_role_for_shared_snippet(path: str) -> Optional[tuple[str, str]]:
"""If `path` is a shared snippet, return (role_name, reason) to attach to."""
base = os.path.basename(path)
# Try full filename and stem (before first dot).
candidates: List[str] = [base]
if "." in base:
candidates.append(base.split(".", 1)[0])
seen: Set[str] = set()
uniq: List[str] = []
for c in candidates:
if c and c not in seen:
seen.add(c)
uniq.append(c)
if path.startswith("/etc/logrotate.d/"):
for c in uniq:
rn = _safe_name(c)
if rn in svc_by_role or rn in pkg_by_role:
return (rn, "logrotate_snippet")
return None
if path.startswith("/etc/cron.d/"):
for c in uniq:
rn = _safe_name(c)
if rn in svc_by_role or rn in pkg_by_role:
return (rn, "cron_snippet")
return None
return None
# Capture essential system config/state (even if package-owned).
for path, reason in _iter_system_capture_paths():
if path in already:
continue
target = _target_role_for_shared_snippet(path)
if path_filter.is_excluded(path):
etc_excluded.append(ExcludedFile(path=path, reason="user_excluded"))
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="user_excluded")
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="user_excluded")
)
else:
etc_excluded.append(ExcludedFile(path=path, reason="user_excluded"))
already.add(path)
continue
deny = policy.deny_reason(path)
if deny:
etc_excluded.append(ExcludedFile(path=path, reason=deny))
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason=deny)
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason=deny)
)
else:
etc_excluded.append(ExcludedFile(path=path, reason=deny))
already.add(path)
continue
try:
owner, group, mode = stat_triplet(path)
except OSError:
etc_excluded.append(ExcludedFile(path=path, reason="unreadable"))
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
else:
etc_excluded.append(ExcludedFile(path=path, reason="unreadable"))
already.add(path)
continue
src_rel = path.lstrip("/")
role_for_copy = etc_role_name
reason_for_role = reason
if target:
role_for_copy, reason_for_role = target
try:
_copy_into_bundle(bundle_dir, etc_role_name, path, src_rel)
_copy_into_bundle(bundle_dir, role_for_copy, path, src_rel)
except OSError:
etc_excluded.append(ExcludedFile(path=path, reason="unreadable"))
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
else:
etc_excluded.append(ExcludedFile(path=path, reason="unreadable"))
already.add(path)
continue
etc_managed.append(
ManagedFile(
path=path,
src_rel=src_rel,
owner=owner,
group=group,
mode=mode,
reason=reason,
)
mf = ManagedFile(
path=path,
src_rel=src_rel,
owner=owner,
group=group,
mode=mode,
reason=reason_for_role,
)
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].managed_files.append(mf)
elif rn in pkg_by_role:
pkg_by_role[rn].managed_files.append(mf)
else:
etc_managed.append(mf)
already.add(path)
# Walk /etc for remaining unowned config-ish files
@ -1016,45 +1116,106 @@ def harvest(
if not _is_confish(path):
continue
target = _target_role_for_shared_snippet(path)
if path_filter.is_excluded(path):
etc_excluded.append(ExcludedFile(path=path, reason="user_excluded"))
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="user_excluded")
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="user_excluded")
)
else:
etc_excluded.append(ExcludedFile(path=path, reason="user_excluded"))
already.add(path)
continue
deny = policy.deny_reason(path)
if deny:
etc_excluded.append(ExcludedFile(path=path, reason=deny))
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason=deny)
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason=deny)
)
else:
etc_excluded.append(ExcludedFile(path=path, reason=deny))
already.add(path)
continue
try:
owner, group, mode = stat_triplet(path)
except OSError:
etc_excluded.append(ExcludedFile(path=path, reason="unreadable"))
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
else:
etc_excluded.append(ExcludedFile(path=path, reason="unreadable"))
already.add(path)
continue
src_rel = path.lstrip("/")
role_for_copy = etc_role_name
reason_for_role = "custom_unowned"
if target:
role_for_copy, reason_for_role = target
try:
_copy_into_bundle(bundle_dir, etc_role_name, path, src_rel)
_copy_into_bundle(bundle_dir, role_for_copy, path, src_rel)
except OSError:
etc_excluded.append(ExcludedFile(path=path, reason="unreadable"))
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
else:
etc_excluded.append(ExcludedFile(path=path, reason="unreadable"))
already.add(path)
continue
etc_managed.append(
ManagedFile(
path=path,
src_rel=src_rel,
owner=owner,
group=group,
mode=mode,
reason="custom_unowned",
)
mf = ManagedFile(
path=path,
src_rel=src_rel,
owner=owner,
group=group,
mode=mode,
reason=reason_for_role,
)
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].managed_files.append(mf)
elif rn in pkg_by_role:
pkg_by_role[rn].managed_files.append(mf)
else:
etc_managed.append(mf)
scanned += 1
if scanned >= 2000:
if scanned >= MAX_FILES_CAP:
etc_notes.append(
"Reached file cap (2000) while scanning /etc for unowned files."
f"Reached file cap ({MAX_FILES_CAP}) while scanning /etc for unowned files."
)
break
if scanned >= 2000:
if scanned >= MAX_FILES_CAP:
break
etc_custom_snapshot = EtcCustomSnapshot(
@ -1146,7 +1307,7 @@ def harvest(
_scan_usr_local_tree(
"/usr/local/etc",
require_executable=False,
cap=2000,
cap=MAX_FILES_CAP,
reason="usr_local_etc_custom",
)
@ -1154,7 +1315,7 @@ def harvest(
_scan_usr_local_tree(
"/usr/local/bin",
require_executable=True,
cap=2000,
cap=MAX_FILES_CAP,
reason="usr_local_bin_script",
)
@ -1188,7 +1349,7 @@ def harvest(
files, inc_notes = expand_includes(
path_filter.iter_include_patterns(),
exclude=path_filter,
max_files=4000,
max_files=MAX_FILES_CAP,
)
included_files = files
extra_notes.extend(inc_notes)

View file

@ -138,7 +138,6 @@ def _copy_artifacts(
# If a file was successfully templatised by JinjaTurtle, do NOT
# also materialise the raw copy in the destination files dir.
# (This keeps the output minimal and avoids redundant "raw" files.)
if exclude_rels and rel in exclude_rels:
try:
if os.path.isfile(dst):
@ -165,7 +164,7 @@ def _write_role_scaffold(role_dir: str) -> None:
def _write_playbook_all(path: str, roles: List[str]) -> None:
pb_lines = [
"---",
"- name: Apply all roles on host",
"- name: Apply all roles on all hosts",
" hosts: all",
" become: true",
" roles:",
@ -179,7 +178,7 @@ def _write_playbook_all(path: str, roles: List[str]) -> None:
def _write_playbook_host(path: str, fqdn: str, roles: List[str]) -> None:
pb_lines = [
"---",
f"- name: Apply enroll roles on {fqdn}",
f"- name: Apply all roles on {fqdn}",
f" hosts: {fqdn}",
" become: true",
" roles:",
@ -390,9 +389,9 @@ def _render_generic_files_tasks(
# Using first_found makes roles work in both modes:
# - site-mode: inventory/host_vars/<host>/<role>/.files/...
# - non-site: roles/<role>/files/...
return f"""# Generated by enroll (data-driven tasks)
return f"""# Generated by enroll
- name: Deploy systemd unit files (templates)
- name: Deploy any systemd unit files (templates)
ansible.builtin.template:
src: "{{{{ item.src_rel }}}}.j2"
dest: "{{{{ item.dest }}}}"
@ -406,7 +405,7 @@ def _render_generic_files_tasks(
| list }}}}
notify: "{{{{ item.notify | default([]) }}}}"
- name: Deploy systemd unit files (copies)
- name: Deploy any systemd unit files (raw files)
vars:
_enroll_ff:
files:
@ -433,7 +432,7 @@ def _render_generic_files_tasks(
| list
| length) > 0
- name: Deploy other managed files (templates)
- name: Deploy any other managed files (templates)
ansible.builtin.template:
src: "{{{{ item.src_rel }}}}.j2"
dest: "{{{{ item.dest }}}}"
@ -447,7 +446,7 @@ def _render_generic_files_tasks(
| list }}}}
notify: "{{{{ item.notify | default([]) }}}}"
- name: Deploy other managed files (copies)
- name: Deploy any other managed files (raw files)
vars:
_enroll_ff:
files:
@ -668,11 +667,6 @@ def _manifest_from_bundle_dir(
manifested_service_roles: List[str] = []
manifested_pkg_roles: List[str] = []
# In site_mode, raw harvested files are stored under host-specific inventory
# to avoid cross-host clobber while still sharing a role definition.
# -------------------------
# -------------------------
# Users role (non-system users)
# -------------------------
@ -793,7 +787,7 @@ def _manifest_from_bundle_dir(
# tasks (data-driven)
users_tasks = """---
# Generated by enroll (data-driven tasks)
# Generated by enroll
- name: Ensure groups exist
ansible.builtin.group:
@ -893,8 +887,6 @@ Generated non-system user accounts and SSH public material.
manifested_users_roles.append(role)
# -------------------------
# -------------------------
# etc_custom role (unowned /etc not already attributed)
# -------------------------
@ -1212,8 +1204,6 @@ User-requested extra file harvesting.
manifested_usr_local_custom_roles.append(role)
# -------------------------
# -------------------------
# Service roles
# -------------------------
@ -1315,7 +1305,7 @@ User-requested extra file harvesting.
task_parts: List[str] = []
task_parts.append(
f"""---
# Generated by enroll (data-driven tasks)
# Generated by enroll
- name: Install packages for {role}
ansible.builtin.apt:
@ -1474,7 +1464,7 @@ Generated from `{unit}`.
task_parts: List[str] = []
task_parts.append(
f"""---
# Generated by enroll (data-driven tasks)
# Generated by enroll
- name: Install packages for {role}
ansible.builtin.apt:

View file

@ -174,7 +174,7 @@ def expand_includes(
patterns: Sequence[CompiledPathPattern],
*,
exclude: Optional[PathFilter] = None,
max_files: int = 4000,
max_files: int,
) -> Tuple[List[str], List[str]]:
"""Expand include patterns into concrete file paths.