diff --git a/enroll/harvest.py b/enroll/harvest.py index 0543355..d4cfacd 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -125,7 +125,9 @@ ALLOWED_UNOWNED_EXTS = { "", # allow extensionless (common in /etc/default and /etc/init.d) } -MAX_UNOWNED_FILES_PER_ROLE = 400 +MAX_FILES_CAP = 4000 + +MAX_UNOWNED_FILES_PER_ROLE = 500 # Directories that are shared across many packages; never attribute unowned files in these trees to a single package. SHARED_ETC_TOPDIRS = { @@ -324,7 +326,7 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ ] -def _iter_matching_files(spec: str, *, cap: int = 2000) -> List[str]: +def _iter_matching_files(spec: str, *, cap: int = MAX_FILES_CAP) -> List[str]: """Expand a glob spec and also walk directories to collect files.""" out: List[str] = [] for p in glob.glob(spec): @@ -963,43 +965,141 @@ def harvest( for mf in users_managed: already.add(mf.path) + # Maps for re-attributing shared snippets (cron.d/logrotate.d) to existing roles. + svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} + pkg_by_role: Dict[str, PackageSnapshot] = {p.role_name: p for p in pkg_snaps} + + def _target_role_for_shared_snippet(path: str) -> Optional[tuple[str, str]]: + """If `path` is a shared snippet, return (role_name, reason) to attach to.""" + base = os.path.basename(path) + + # Try full filename and stem (before first dot). + candidates: List[str] = [base] + if "." in base: + candidates.append(base.split(".", 1)[0]) + + seen: Set[str] = set() + uniq: List[str] = [] + for c in candidates: + if c and c not in seen: + seen.add(c) + uniq.append(c) + + if path.startswith("/etc/logrotate.d/"): + for c in uniq: + rn = _safe_name(c) + if rn in svc_by_role or rn in pkg_by_role: + return (rn, "logrotate_snippet") + return None + + if path.startswith("/etc/cron.d/"): + for c in uniq: + rn = _safe_name(c) + if rn in svc_by_role or rn in pkg_by_role: + return (rn, "cron_snippet") + return None + + return None + # Capture essential system config/state (even if package-owned). for path, reason in _iter_system_capture_paths(): if path in already: continue + target = _target_role_for_shared_snippet(path) + if path_filter.is_excluded(path): - etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + already.add(path) continue deny = policy.deny_reason(path) if deny: - etc_excluded.append(ExcludedFile(path=path, reason=deny)) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason=deny)) + already.add(path) continue try: owner, group, mode = stat_triplet(path) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue src_rel = path.lstrip("/") + role_for_copy = etc_role_name + reason_for_role = reason + if target: + role_for_copy, reason_for_role = target + try: - _copy_into_bundle(bundle_dir, etc_role_name, path, src_rel) + _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue - etc_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + mf = ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason_for_role, ) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].managed_files.append(mf) + elif rn in pkg_by_role: + pkg_by_role[rn].managed_files.append(mf) + else: + etc_managed.append(mf) + already.add(path) # Walk /etc for remaining unowned config-ish files @@ -1016,45 +1116,106 @@ def harvest( if not _is_confish(path): continue + target = _target_role_for_shared_snippet(path) + if path_filter.is_excluded(path): - etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + already.add(path) continue deny = policy.deny_reason(path) if deny: - etc_excluded.append(ExcludedFile(path=path, reason=deny)) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason=deny)) + already.add(path) continue try: owner, group, mode = stat_triplet(path) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue src_rel = path.lstrip("/") + role_for_copy = etc_role_name + reason_for_role = "custom_unowned" + if target: + role_for_copy, reason_for_role = target + try: - _copy_into_bundle(bundle_dir, etc_role_name, path, src_rel) + _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue - etc_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason="custom_unowned", - ) + mf = ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason_for_role, ) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].managed_files.append(mf) + elif rn in pkg_by_role: + pkg_by_role[rn].managed_files.append(mf) + else: + etc_managed.append(mf) scanned += 1 - if scanned >= 2000: + if scanned >= MAX_FILES_CAP: etc_notes.append( - "Reached file cap (2000) while scanning /etc for unowned files." + f"Reached file cap ({MAX_FILES_CAP}) while scanning /etc for unowned files." ) break - if scanned >= 2000: + if scanned >= MAX_FILES_CAP: break etc_custom_snapshot = EtcCustomSnapshot( @@ -1146,7 +1307,7 @@ def harvest( _scan_usr_local_tree( "/usr/local/etc", require_executable=False, - cap=2000, + cap=MAX_FILES_CAP, reason="usr_local_etc_custom", ) @@ -1154,7 +1315,7 @@ def harvest( _scan_usr_local_tree( "/usr/local/bin", require_executable=True, - cap=2000, + cap=MAX_FILES_CAP, reason="usr_local_bin_script", ) @@ -1188,7 +1349,7 @@ def harvest( files, inc_notes = expand_includes( path_filter.iter_include_patterns(), exclude=path_filter, - max_files=4000, + max_files=MAX_FILES_CAP, ) included_files = files extra_notes.extend(inc_notes) diff --git a/enroll/manifest.py b/enroll/manifest.py index 2f28eab..d5ebff7 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -138,7 +138,6 @@ def _copy_artifacts( # If a file was successfully templatised by JinjaTurtle, do NOT # also materialise the raw copy in the destination files dir. - # (This keeps the output minimal and avoids redundant "raw" files.) if exclude_rels and rel in exclude_rels: try: if os.path.isfile(dst): @@ -165,7 +164,7 @@ def _write_role_scaffold(role_dir: str) -> None: def _write_playbook_all(path: str, roles: List[str]) -> None: pb_lines = [ "---", - "- name: Apply all roles on host", + "- name: Apply all roles on all hosts", " hosts: all", " become: true", " roles:", @@ -179,7 +178,7 @@ def _write_playbook_all(path: str, roles: List[str]) -> None: def _write_playbook_host(path: str, fqdn: str, roles: List[str]) -> None: pb_lines = [ "---", - f"- name: Apply enroll roles on {fqdn}", + f"- name: Apply all roles on {fqdn}", f" hosts: {fqdn}", " become: true", " roles:", @@ -390,9 +389,9 @@ def _render_generic_files_tasks( # Using first_found makes roles work in both modes: # - site-mode: inventory/host_vars///.files/... # - non-site: roles//files/... - return f"""# Generated by enroll (data-driven tasks) + return f"""# Generated by enroll -- name: Deploy systemd unit files (templates) +- name: Deploy any systemd unit files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" dest: "{{{{ item.dest }}}}" @@ -406,7 +405,7 @@ def _render_generic_files_tasks( | list }}}} notify: "{{{{ item.notify | default([]) }}}}" -- name: Deploy systemd unit files (copies) +- name: Deploy any systemd unit files (raw files) vars: _enroll_ff: files: @@ -433,7 +432,7 @@ def _render_generic_files_tasks( | list | length) > 0 -- name: Deploy other managed files (templates) +- name: Deploy any other managed files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" dest: "{{{{ item.dest }}}}" @@ -447,7 +446,7 @@ def _render_generic_files_tasks( | list }}}} notify: "{{{{ item.notify | default([]) }}}}" -- name: Deploy other managed files (copies) +- name: Deploy any other managed files (raw files) vars: _enroll_ff: files: @@ -668,11 +667,6 @@ def _manifest_from_bundle_dir( manifested_service_roles: List[str] = [] manifested_pkg_roles: List[str] = [] - # In site_mode, raw harvested files are stored under host-specific inventory - # to avoid cross-host clobber while still sharing a role definition. - - # ------------------------- - # ------------------------- # Users role (non-system users) # ------------------------- @@ -793,7 +787,7 @@ def _manifest_from_bundle_dir( # tasks (data-driven) users_tasks = """--- -# Generated by enroll (data-driven tasks) +# Generated by enroll - name: Ensure groups exist ansible.builtin.group: @@ -893,8 +887,6 @@ Generated non-system user accounts and SSH public material. manifested_users_roles.append(role) - # ------------------------- - # ------------------------- # etc_custom role (unowned /etc not already attributed) # ------------------------- @@ -1212,8 +1204,6 @@ User-requested extra file harvesting. manifested_usr_local_custom_roles.append(role) - # ------------------------- - # ------------------------- # Service roles # ------------------------- @@ -1315,7 +1305,7 @@ User-requested extra file harvesting. task_parts: List[str] = [] task_parts.append( f"""--- -# Generated by enroll (data-driven tasks) +# Generated by enroll - name: Install packages for {role} ansible.builtin.apt: @@ -1474,7 +1464,7 @@ Generated from `{unit}`. task_parts: List[str] = [] task_parts.append( f"""--- -# Generated by enroll (data-driven tasks) +# Generated by enroll - name: Install packages for {role} ansible.builtin.apt: diff --git a/enroll/pathfilter.py b/enroll/pathfilter.py index 9df4afa..6541ca9 100644 --- a/enroll/pathfilter.py +++ b/enroll/pathfilter.py @@ -174,7 +174,7 @@ def expand_includes( patterns: Sequence[CompiledPathPattern], *, exclude: Optional[PathFilter] = None, - max_files: int = 4000, + max_files: int, ) -> Tuple[List[str], List[str]]: """Expand include patterns into concrete file paths.