Consolidate logrotate and cron files into their main service/package roles if they exist. Standardise on MAX_FILES_CAP in one place

This commit is contained in:
Miguel Jacq 2025-12-28 09:30:21 +11:00
parent cae6246177
commit 303c1b0dd8
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
3 changed files with 208 additions and 57 deletions

View file

@ -125,7 +125,9 @@ ALLOWED_UNOWNED_EXTS = {
"", # allow extensionless (common in /etc/default and /etc/init.d) "", # allow extensionless (common in /etc/default and /etc/init.d)
} }
MAX_UNOWNED_FILES_PER_ROLE = 400 MAX_FILES_CAP = 4000
MAX_UNOWNED_FILES_PER_ROLE = 500
# Directories that are shared across many packages; never attribute unowned files in these trees to a single package. # Directories that are shared across many packages; never attribute unowned files in these trees to a single package.
SHARED_ETC_TOPDIRS = { SHARED_ETC_TOPDIRS = {
@ -324,7 +326,7 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [
] ]
def _iter_matching_files(spec: str, *, cap: int = 2000) -> List[str]: def _iter_matching_files(spec: str, *, cap: int = MAX_FILES_CAP) -> List[str]:
"""Expand a glob spec and also walk directories to collect files.""" """Expand a glob spec and also walk directories to collect files."""
out: List[str] = [] out: List[str] = []
for p in glob.glob(spec): for p in glob.glob(spec):
@ -963,43 +965,141 @@ def harvest(
for mf in users_managed: for mf in users_managed:
already.add(mf.path) already.add(mf.path)
# Maps for re-attributing shared snippets (cron.d/logrotate.d) to existing roles.
svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps}
pkg_by_role: Dict[str, PackageSnapshot] = {p.role_name: p for p in pkg_snaps}
def _target_role_for_shared_snippet(path: str) -> Optional[tuple[str, str]]:
"""If `path` is a shared snippet, return (role_name, reason) to attach to."""
base = os.path.basename(path)
# Try full filename and stem (before first dot).
candidates: List[str] = [base]
if "." in base:
candidates.append(base.split(".", 1)[0])
seen: Set[str] = set()
uniq: List[str] = []
for c in candidates:
if c and c not in seen:
seen.add(c)
uniq.append(c)
if path.startswith("/etc/logrotate.d/"):
for c in uniq:
rn = _safe_name(c)
if rn in svc_by_role or rn in pkg_by_role:
return (rn, "logrotate_snippet")
return None
if path.startswith("/etc/cron.d/"):
for c in uniq:
rn = _safe_name(c)
if rn in svc_by_role or rn in pkg_by_role:
return (rn, "cron_snippet")
return None
return None
# Capture essential system config/state (even if package-owned). # Capture essential system config/state (even if package-owned).
for path, reason in _iter_system_capture_paths(): for path, reason in _iter_system_capture_paths():
if path in already: if path in already:
continue continue
target = _target_role_for_shared_snippet(path)
if path_filter.is_excluded(path): if path_filter.is_excluded(path):
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="user_excluded")
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="user_excluded")
)
else:
etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) etc_excluded.append(ExcludedFile(path=path, reason="user_excluded"))
already.add(path)
continue continue
deny = policy.deny_reason(path) deny = policy.deny_reason(path)
if deny: if deny:
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason=deny)
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason=deny)
)
else:
etc_excluded.append(ExcludedFile(path=path, reason=deny)) etc_excluded.append(ExcludedFile(path=path, reason=deny))
already.add(path)
continue continue
try: try:
owner, group, mode = stat_triplet(path) owner, group, mode = stat_triplet(path)
except OSError: except OSError:
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
else:
etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) etc_excluded.append(ExcludedFile(path=path, reason="unreadable"))
already.add(path)
continue continue
src_rel = path.lstrip("/") src_rel = path.lstrip("/")
role_for_copy = etc_role_name
reason_for_role = reason
if target:
role_for_copy, reason_for_role = target
try: try:
_copy_into_bundle(bundle_dir, etc_role_name, path, src_rel) _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel)
except OSError: except OSError:
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
else:
etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) etc_excluded.append(ExcludedFile(path=path, reason="unreadable"))
already.add(path)
continue continue
etc_managed.append( mf = ManagedFile(
ManagedFile(
path=path, path=path,
src_rel=src_rel, src_rel=src_rel,
owner=owner, owner=owner,
group=group, group=group,
mode=mode, mode=mode,
reason=reason, reason=reason_for_role,
)
) )
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].managed_files.append(mf)
elif rn in pkg_by_role:
pkg_by_role[rn].managed_files.append(mf)
else:
etc_managed.append(mf)
already.add(path) already.add(path)
# Walk /etc for remaining unowned config-ish files # Walk /etc for remaining unowned config-ish files
@ -1016,45 +1116,106 @@ def harvest(
if not _is_confish(path): if not _is_confish(path):
continue continue
target = _target_role_for_shared_snippet(path)
if path_filter.is_excluded(path): if path_filter.is_excluded(path):
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="user_excluded")
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="user_excluded")
)
else:
etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) etc_excluded.append(ExcludedFile(path=path, reason="user_excluded"))
already.add(path)
continue continue
deny = policy.deny_reason(path) deny = policy.deny_reason(path)
if deny: if deny:
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason=deny)
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason=deny)
)
else:
etc_excluded.append(ExcludedFile(path=path, reason=deny)) etc_excluded.append(ExcludedFile(path=path, reason=deny))
already.add(path)
continue continue
try: try:
owner, group, mode = stat_triplet(path) owner, group, mode = stat_triplet(path)
except OSError: except OSError:
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
else:
etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) etc_excluded.append(ExcludedFile(path=path, reason="unreadable"))
already.add(path)
continue continue
src_rel = path.lstrip("/") src_rel = path.lstrip("/")
role_for_copy = etc_role_name
reason_for_role = "custom_unowned"
if target:
role_for_copy, reason_for_role = target
try: try:
_copy_into_bundle(bundle_dir, etc_role_name, path, src_rel) _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel)
except OSError: except OSError:
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
elif rn in pkg_by_role:
pkg_by_role[rn].excluded.append(
ExcludedFile(path=path, reason="unreadable")
)
else:
etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) etc_excluded.append(ExcludedFile(path=path, reason="unreadable"))
already.add(path)
continue continue
etc_managed.append( mf = ManagedFile(
ManagedFile(
path=path, path=path,
src_rel=src_rel, src_rel=src_rel,
owner=owner, owner=owner,
group=group, group=group,
mode=mode, mode=mode,
reason="custom_unowned", reason=reason_for_role,
)
) )
if target:
rn, _ = target
if rn in svc_by_role:
svc_by_role[rn].managed_files.append(mf)
elif rn in pkg_by_role:
pkg_by_role[rn].managed_files.append(mf)
else:
etc_managed.append(mf)
scanned += 1 scanned += 1
if scanned >= 2000: if scanned >= MAX_FILES_CAP:
etc_notes.append( etc_notes.append(
"Reached file cap (2000) while scanning /etc for unowned files." f"Reached file cap ({MAX_FILES_CAP}) while scanning /etc for unowned files."
) )
break break
if scanned >= 2000: if scanned >= MAX_FILES_CAP:
break break
etc_custom_snapshot = EtcCustomSnapshot( etc_custom_snapshot = EtcCustomSnapshot(
@ -1146,7 +1307,7 @@ def harvest(
_scan_usr_local_tree( _scan_usr_local_tree(
"/usr/local/etc", "/usr/local/etc",
require_executable=False, require_executable=False,
cap=2000, cap=MAX_FILES_CAP,
reason="usr_local_etc_custom", reason="usr_local_etc_custom",
) )
@ -1154,7 +1315,7 @@ def harvest(
_scan_usr_local_tree( _scan_usr_local_tree(
"/usr/local/bin", "/usr/local/bin",
require_executable=True, require_executable=True,
cap=2000, cap=MAX_FILES_CAP,
reason="usr_local_bin_script", reason="usr_local_bin_script",
) )
@ -1188,7 +1349,7 @@ def harvest(
files, inc_notes = expand_includes( files, inc_notes = expand_includes(
path_filter.iter_include_patterns(), path_filter.iter_include_patterns(),
exclude=path_filter, exclude=path_filter,
max_files=4000, max_files=MAX_FILES_CAP,
) )
included_files = files included_files = files
extra_notes.extend(inc_notes) extra_notes.extend(inc_notes)

View file

@ -138,7 +138,6 @@ def _copy_artifacts(
# If a file was successfully templatised by JinjaTurtle, do NOT # If a file was successfully templatised by JinjaTurtle, do NOT
# also materialise the raw copy in the destination files dir. # also materialise the raw copy in the destination files dir.
# (This keeps the output minimal and avoids redundant "raw" files.)
if exclude_rels and rel in exclude_rels: if exclude_rels and rel in exclude_rels:
try: try:
if os.path.isfile(dst): if os.path.isfile(dst):
@ -165,7 +164,7 @@ def _write_role_scaffold(role_dir: str) -> None:
def _write_playbook_all(path: str, roles: List[str]) -> None: def _write_playbook_all(path: str, roles: List[str]) -> None:
pb_lines = [ pb_lines = [
"---", "---",
"- name: Apply all roles on host", "- name: Apply all roles on all hosts",
" hosts: all", " hosts: all",
" become: true", " become: true",
" roles:", " roles:",
@ -179,7 +178,7 @@ def _write_playbook_all(path: str, roles: List[str]) -> None:
def _write_playbook_host(path: str, fqdn: str, roles: List[str]) -> None: def _write_playbook_host(path: str, fqdn: str, roles: List[str]) -> None:
pb_lines = [ pb_lines = [
"---", "---",
f"- name: Apply enroll roles on {fqdn}", f"- name: Apply all roles on {fqdn}",
f" hosts: {fqdn}", f" hosts: {fqdn}",
" become: true", " become: true",
" roles:", " roles:",
@ -390,9 +389,9 @@ def _render_generic_files_tasks(
# Using first_found makes roles work in both modes: # Using first_found makes roles work in both modes:
# - site-mode: inventory/host_vars/<host>/<role>/.files/... # - site-mode: inventory/host_vars/<host>/<role>/.files/...
# - non-site: roles/<role>/files/... # - non-site: roles/<role>/files/...
return f"""# Generated by enroll (data-driven tasks) return f"""# Generated by enroll
- name: Deploy systemd unit files (templates) - name: Deploy any systemd unit files (templates)
ansible.builtin.template: ansible.builtin.template:
src: "{{{{ item.src_rel }}}}.j2" src: "{{{{ item.src_rel }}}}.j2"
dest: "{{{{ item.dest }}}}" dest: "{{{{ item.dest }}}}"
@ -406,7 +405,7 @@ def _render_generic_files_tasks(
| list }}}} | list }}}}
notify: "{{{{ item.notify | default([]) }}}}" notify: "{{{{ item.notify | default([]) }}}}"
- name: Deploy systemd unit files (copies) - name: Deploy any systemd unit files (raw files)
vars: vars:
_enroll_ff: _enroll_ff:
files: files:
@ -433,7 +432,7 @@ def _render_generic_files_tasks(
| list | list
| length) > 0 | length) > 0
- name: Deploy other managed files (templates) - name: Deploy any other managed files (templates)
ansible.builtin.template: ansible.builtin.template:
src: "{{{{ item.src_rel }}}}.j2" src: "{{{{ item.src_rel }}}}.j2"
dest: "{{{{ item.dest }}}}" dest: "{{{{ item.dest }}}}"
@ -447,7 +446,7 @@ def _render_generic_files_tasks(
| list }}}} | list }}}}
notify: "{{{{ item.notify | default([]) }}}}" notify: "{{{{ item.notify | default([]) }}}}"
- name: Deploy other managed files (copies) - name: Deploy any other managed files (raw files)
vars: vars:
_enroll_ff: _enroll_ff:
files: files:
@ -668,11 +667,6 @@ def _manifest_from_bundle_dir(
manifested_service_roles: List[str] = [] manifested_service_roles: List[str] = []
manifested_pkg_roles: List[str] = [] manifested_pkg_roles: List[str] = []
# In site_mode, raw harvested files are stored under host-specific inventory
# to avoid cross-host clobber while still sharing a role definition.
# -------------------------
# ------------------------- # -------------------------
# Users role (non-system users) # Users role (non-system users)
# ------------------------- # -------------------------
@ -793,7 +787,7 @@ def _manifest_from_bundle_dir(
# tasks (data-driven) # tasks (data-driven)
users_tasks = """--- users_tasks = """---
# Generated by enroll (data-driven tasks) # Generated by enroll
- name: Ensure groups exist - name: Ensure groups exist
ansible.builtin.group: ansible.builtin.group:
@ -893,8 +887,6 @@ Generated non-system user accounts and SSH public material.
manifested_users_roles.append(role) manifested_users_roles.append(role)
# -------------------------
# ------------------------- # -------------------------
# etc_custom role (unowned /etc not already attributed) # etc_custom role (unowned /etc not already attributed)
# ------------------------- # -------------------------
@ -1212,8 +1204,6 @@ User-requested extra file harvesting.
manifested_usr_local_custom_roles.append(role) manifested_usr_local_custom_roles.append(role)
# -------------------------
# ------------------------- # -------------------------
# Service roles # Service roles
# ------------------------- # -------------------------
@ -1315,7 +1305,7 @@ User-requested extra file harvesting.
task_parts: List[str] = [] task_parts: List[str] = []
task_parts.append( task_parts.append(
f"""--- f"""---
# Generated by enroll (data-driven tasks) # Generated by enroll
- name: Install packages for {role} - name: Install packages for {role}
ansible.builtin.apt: ansible.builtin.apt:
@ -1474,7 +1464,7 @@ Generated from `{unit}`.
task_parts: List[str] = [] task_parts: List[str] = []
task_parts.append( task_parts.append(
f"""--- f"""---
# Generated by enroll (data-driven tasks) # Generated by enroll
- name: Install packages for {role} - name: Install packages for {role}
ansible.builtin.apt: ansible.builtin.apt:

View file

@ -174,7 +174,7 @@ def expand_includes(
patterns: Sequence[CompiledPathPattern], patterns: Sequence[CompiledPathPattern],
*, *,
exclude: Optional[PathFilter] = None, exclude: Optional[PathFilter] = None,
max_files: int = 4000, max_files: int,
) -> Tuple[List[str], List[str]]: ) -> Tuple[List[str], List[str]]:
"""Expand include patterns into concrete file paths. """Expand include patterns into concrete file paths.