From a235028f3b28467687cc33a6996ec35efb2bf97f Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 13:34:37 +1100 Subject: [PATCH 01/85] black --- enroll/manifest.py | 2 +- enroll/remote.py | 2 +- enroll/sopsutil.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/enroll/manifest.py b/enroll/manifest.py index afb8b88..e55418c 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -567,7 +567,7 @@ def _tar_dir_to_with_progress( cols = shutil.get_terminal_size((80, 20)).columns msg = msg[: cols - 1] except Exception: - pass # nosec + pass # nosec os.write(2, ("\r" + msg).encode("utf-8", errors="replace")) with tarfile.open(tar_path, mode="w:gz") as tf: diff --git a/enroll/remote.py b/enroll/remote.py index 7ad8dc4..469248d 100644 --- a/enroll/remote.py +++ b/enroll/remote.py @@ -200,7 +200,7 @@ def remote_harvest( # Stream a tarball back to the local machine (avoid creating a tar file on the remote). cmd = f"tar -cz -C {rbundle} ." - _stdin, stdout, stderr = ssh.exec_command(cmd) # nosec + _stdin, stdout, stderr = ssh.exec_command(cmd) # nosec with open(local_tgz, "wb") as f: while True: chunk = stdout.read(1024 * 128) diff --git a/enroll/sopsutil.py b/enroll/sopsutil.py index 6c0c881..de36d4f 100644 --- a/enroll/sopsutil.py +++ b/enroll/sopsutil.py @@ -2,7 +2,7 @@ from __future__ import annotations import os import shutil -import subprocess # nosec +import subprocess # nosec import tempfile from pathlib import Path from typing import Iterable, List, Optional @@ -62,7 +62,7 @@ def encrypt_file_binary( ], capture_output=True, check=False, - ) # nosec + ) # nosec if res.returncode != 0: raise SopsError( "sops encryption failed:\n" @@ -112,7 +112,7 @@ def decrypt_file_binary_to( ], capture_output=True, check=False, - ) # nosec + ) # nosec if res.returncode != 0: raise SopsError( "sops decryption failed:\n" From 591ecaa23569c78e9b473dce59ab592f4234500f Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 13:41:22 +1100 Subject: [PATCH 02/85] Add pre-commit config --- .pre-commit-config.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..2fd6c83 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,19 @@ +repos: + - repo: https://github.com/pycqa/flake8 + rev: 7.3.0 + hooks: + - id: flake8 + args: ["--select=F"] + types: [python] + + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 25.11.0 + hooks: + - id: black + language_version: python3 + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer From bfa2f4a7243daf9e585ffb36cba7623c5c746905 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 13:44:26 +1100 Subject: [PATCH 03/85] Add bandit to pre-commit --- .pre-commit-config.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2fd6c83..62c3791 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,3 +17,8 @@ repos: hooks: - id: trailing-whitespace - id: end-of-file-fixer + + - repo: https://github.com/PyCQA/bandit + rev: 1.9.2 + hooks: + - id: bandit From e94bd86c75531a82092db610ca1b6a7c88bbe707 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 13:45:59 +1100 Subject: [PATCH 04/85] Add files param to bandit pre-commit --- .pre-commit-config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 62c3791..09c6889 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,3 +22,4 @@ repos: rev: 1.9.2 hooks: - id: bandit + files: ^enroll/ From 55e50ebf59f7e0a2a35ac44b62cb3f0229c75eed Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 13:50:00 +1100 Subject: [PATCH 05/85] Fix end of file/whitespace per pre-commit --- CHANGELOG.md | 2 +- README.md | 1 - enroll.svg | 1 - tests.sh | 4 ++-- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1505f1a..d8ca4b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,7 +39,7 @@ * Merge pkg_ and roles created based on file/service detection * Avoid idempotency issue with users (`password_lock`) - * Rename subcommands/args ('export' is now 'enroll', '--bundle' is now '--harvest') + * Rename subcommands/args ('export' is now 'enroll', '--bundle' is now '--harvest') * Don't try and start systemd services that were Inactive at harvest time * Capture miscellaneous files in /etc under their own `etc_custom` role, but not backup files * Add tests diff --git a/README.md b/README.md index cd3bba4..19b5377 100644 --- a/README.md +++ b/README.md @@ -357,4 +357,3 @@ My Forgejo doesn't yet support proper federation, and for that reason I've not o Instead, you can e-mail me (see the pyproject.toml for details) or contact me on the Fediverse: https://goto.mig5.net/@mig5 - diff --git a/enroll.svg b/enroll.svg index c986e1f..0ee1590 100644 --- a/enroll.svg +++ b/enroll.svg @@ -109,4 +109,3 @@ enroll - diff --git a/tests.sh b/tests.sh index ea7ad59..6becc39 100755 --- a/tests.sh +++ b/tests.sh @@ -15,10 +15,10 @@ poetry run \ --harvest "${BUNDLE_DIR}" \ --out "${ANSIBLE_DIR}" -builtin cd "${ANSIBLE_DIR}" +builtin cd "${ANSIBLE_DIR}" # Lint -ansible-lint "${ANSIBLE_DIR}" +ansible-lint "${ANSIBLE_DIR}" # Run ansible-playbook playbook.yml -i "localhost," -c local --check --diff From b5d2b99174a984ed941d5796a47f3d05e800bfb1 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 14:59:51 +1100 Subject: [PATCH 06/85] Add diff mode --- CHANGELOG.md | 5 + README.md | 388 +++++++++++------------- debian/changelog | 9 +- enroll/cli.py | 190 ++++++++++++ enroll/diff.py | 757 +++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- 6 files changed, 1131 insertions(+), 220 deletions(-) create mode 100644 enroll/diff.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d8ca4b5..81eed41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.1.1 + + * Add `diff` subcommand which can compare two harvests and send email or webhook notifications in different + formats. + # 0.1.0 * Add remote mode for harvesting a remote machine via a local workstation (no need to install enroll remotely) diff --git a/README.md b/README.md index 19b5377..68c35e5 100644 --- a/README.md +++ b/README.md @@ -4,217 +4,175 @@ Enroll logo -**enroll** inspects a Linux machine (currently Debian-only) and generates Ansible roles for things it finds running on the machine. +**enroll** inspects a Linux machine (currently Debian-only) and generates Ansible roles/playbooks (and optionally inventory) for what it finds. It aims to be **optimistic and noninteractive**: -- Detects packages that have been installed -- Detects Debian package ownership of `/etc` files using dpkg's local database. +- Detects packages that have been installed. +- Detects Debian package ownership of `/etc` files using dpkg’s local database. - Captures config that has **changed from packaged defaults** (dpkg conffile hashes + package md5sums when available). - Also captures **service-relevant custom/unowned files** under `/etc//...` (e.g. drop-in config includes). - Defensively excludes likely secrets (path denylist + content sniff + size caps). -- Captures non-system users that exist on the system, and their SSH public keys -- Captures miscellaneous `/etc` files that it can't attribute to a package, and installs it in an `etc_custom` role -- Avoids trying to start systemd services that were detected as being Inactive during harvest +- Captures non-system users and their SSH public keys. +- Captures miscellaneous `/etc` files it can’t attribute to a package and installs them in an `etc_custom` role. +- Avoids trying to start systemd services that were detected as inactive during harvest. --- -# Two modes: single-site vs multi-site (`--fqdn`) +## Mental model -**enroll** has two distinct ways to generate Ansible: +`enroll` works in two phases: -## 1) Single-site mode (default: *no* `--fqdn`) -Use this when you're enrolling **one server** (or you're generating a "golden" role set you intend to reuse). +1) **Harvest**: collect host facts + relevant files into a harvest bundle (`state.json` + harvested artifacts) +2) **Manifest**: turn that harvest into Ansible roles/playbooks (and optionally inventory) -**What you get** -- Config, templates, and defaults are primarily **contained inside each role**. -- Raw config files (when not templated) live in the role's `files/`. -- Template variables (when templated) live in the role's `defaults/main.yml`. +Additionally: -**Pros** -- Roles are more **self-contained** and easier to understand. -- Better starting point for **provisioning new servers**, because the role contains most of what it needs. -- Less inventory abstraction/duplication. +- **Diff**: compare two harvests and report what changed (packages/services/users/files) since the previous snapshot. -**Cons** -- Less convenient for quickly enrolling multiple hosts with divergent configs (you'll do more manual work to make roles flexible across hosts). +--- -## 2) Multi-site mode (`--fqdn`) -Use this when you want to enroll **several existing servers** quickly, especially if they differ. +## Output modes: single-site vs multi-site (`--fqdn`) -**What you get** -- Roles are **shared** across hosts, but host-specific data lives in inventory. -- Host inventory drives what's managed: - - which files to deploy for that host - - which packages are relevant for that host - - which services should be enabled/started for that host -- For non-templated config, raw files live in host-specific inventory under `.files/` (per role). +`enroll manifest` (and `enroll single-shot`) support two distinct output styles. -**Pros** -- Fastest way to retrofit **multiple servers** into config management. -- Avoids shared-role "host A breaks host B" problems by keeping host-specific state in inventory. -- Better fit when you already have a fleet and want to capture/reflect reality first. +### Single-site mode (default: *no* `--fqdn`) +Use when enrolling **one server** (or generating a “golden” role set you intend to reuse). -**Cons** -- More abstraction: roles become more "data-driven". -- Potential duplication: raw files may exist per-host in inventory (even if identical). -- Harder to use the roles to **provision a brand-new server** without also building an inventory for that new host, because multi-site output assumes the server already exists and is being retrofitted. +**Characteristics** +- Roles are more self-contained. +- Raw config files live in the role’s `files/`. +- Template variables live in the role’s `defaults/main.yml`. + +### Multi-site mode (`--fqdn`) +Use when enrolling **several existing servers** quickly, especially if they differ. + +**Characteristics** +- Roles are shared, host-specific state lives in inventory. +- Host inventory drives what gets managed (files/packages/services). +- Non-templated raw files live per-host under `inventory/host_vars///.files/...`. **Rule of thumb** -- If your goal is *"make this one server reproducible / provisionable"* → start with **single-site**. -- If your goal is *"get several already-running servers under management quickly"* → use **multi-site**. +- “Make this one server reproducible/provisionable” → start with **single-site** +- “Get multiple already-running servers under management quickly” → use **multi-site** --- -# Key concepts +## Subcommands -## Harvest +### `enroll harvest` +Harvest state about a host and write a harvest bundle. -**enroll** begins by 'harvesting' known state about your host. This includes detecting what running services exist, what packages have been installed 'manually' (that is, stuff that doesn't come out of the box with the OS), and anything 'custom' in `/etc` that it can't attribute to a specific package. +**What it captures (high level)** +- Detected services + service-relevant packages +- “Manual” packages +- Changed-from-default config (plus related custom/unowned files under service dirs) +- Non-system users + SSH public keys +- Misc `/etc` that can’t be attributed to a package (`etc_custom` role) -It also detects if any config files have been *changed* from their packaged defaults. If they have, it will attempt to 'harvest' them. If the config file is identical to how it comes with the package, then it doesn't bother harvesting it, because there's little value in config-managing it if it's identical to what you get by simply installing the package! +**Common flags** +- Remote harvesting: + - `--remote-host`, `--remote-user`, `--remote-port` + - `--no-sudo` (if you don’t want/need sudo) +- Sensitive-data behaviour: + - default: tries to avoid likely secrets + - `--dangerous`: disables secret-safety checks (see “Sensitive data” below) +- Encrypt bundles at rest: + - `--sops `: writes a single encrypted `harvest.tar.gz.sops` instead of a plaintext directory -The harvest writes a state.json file explaining all the data it harvested and, if it chose not to harvest something, explanations as to why that is the case (see below: sensitive data). +--- -### Remote harvesting (workstation → remote) +### `enroll manifest` +Generate Ansible output from an existing harvest bundle. -If you'd prefer not to install **enroll** on the target host, you can run the harvest over SSH from your workstation and pull the harvest bundle back locally: +**Inputs** +- `--harvest /path/to/harvest` (directory) + or `--harvest /path/to/harvest.tar.gz.sops` (if using `--sops`) -```bash -enroll harvest --remote-host myhost.example.com --remote-user myuser --out /tmp/enroll-harvest -``` +**Output** +- In plaintext mode: an Ansible repo-like directory structure (roles/playbooks, and inventory in multi-site mode). +- In `--sops` mode: a single encrypted file `manifest.tar.gz.sops` containing the generated output. -- `--remote-port` defaults to `22` -- `--remote-user` defaults to your local `$USER` +**Common flags** +- `--fqdn `: enables **multi-site** output style -This uploads a self-contained `enroll` zipapp to a temporary directory on the remote host, runs `harvest` there, then downloads the resulting harvest bundle to the `--out` directory on your workstation. +--- -**Privilege note:** A "full" harvest typically needs root access. Remote harvesting assumes the remote user can run `sudo` **without a password prompt** (NOPASSWD) so the harvest can run non-interactively. If you don't want this, pass `--no-sudo` as well. +### `enroll single-shot` +Convenience wrapper that runs **harvest → manifest** in one command. + +Use this when you want “get me something workable ASAP”. + +Supports the same general flags as harvest/manifest, including `--fqdn`, remote harvest flags, and `--sops`. + +--- + +### `enroll diff` +Compare two harvest bundles and report what changed. + +**What it reports** +- Packages added/removed +- Services enabled added/removed, plus key state changes +- Users added/removed, plus field changes (uid/gid/home/shell/groups, etc.) +- Managed files added/removed/changed (metadata + content hash changes where available) + +**Inputs** +- `--old ` and `--new ` (directories or `state.json` paths) +- `--sops` when comparing SOPS-encrypted harvest bundles + +**Output formats** +- `--format json` (default for webhooks) +- `--format markdown` / `--format text` (human-oriented) + +**Notifications** +- Webhook: + - `--webhook ` + - `--webhook-format json|markdown|text` + - `--webhook-header 'Header-Name: value'` (repeatable) +- Email (optional): + - `--email-to ` (plus optional SMTP/sendmail-related flags, depending on your install) + +--- ## Sensitive data -**enroll** doesn't make any assumptions about how you might handle sensitive data from your config files, in Ansible. Some people might use SOPS, others might use Vault, others might do something else entirely. +By default, `enroll` does **not** assume how you handle secrets in Ansible. It will attempt to avoid harvesting likely sensitive data (private keys, passwords, tokens, etc.). This can mean it skips some config files you may ultimately want to manage. -For this reason, **enroll** will attempt to read config files, and if it detects data that looks like a sensitive SSH/SSL private key, or password, or API key, etc, then it won't harvest it for config management. +If you opt in to collecting everything: -This inevitably means that it will deliberately miss some important config files that you probably *want* to manage in Ansible. +### `--dangerous` +**WARNING:** disables “likely secret” safety checks. This can copy private keys, TLS key material, API tokens, database passwords, and other credentials into the harvest output **in plaintext**. -Nonetheless, in the Harvest 'state' file, there should be an explanation of 'excluded files'. You can parse or inspect this file to find what it chose to ignore, and then you know what you might want to augment the results with later, once you 'manifest' the harvest into Ansible configuration. +If you intend to keep harvests/manifests long-term (especially in git), strongly consider encrypting them at rest. -Nonetheless, in some cases it may be appropriate to truly grab as much as you can, including secrets. For that, read on for the `--dangerous` flag. +### Encrypt bundles at rest with `--sops` +`--sops` encrypts the harvest and/or manifest outputs into a single `.tar.gz.sops` file (GPG). This is for **storage-at-rest**, not for direct “Ansible SOPS inventory” workflows. -### Opting in to fetching sensitive data: `--dangerous` - -**WARNING:** `--dangerous` disables enroll's "likely a secret" safety checks. This can cause private keys, TLS key material, API tokens, database passwords, and other credentials to be copied into your harvest output **in plaintext**. - -Only use `--dangerous` if you explicitly want to scoop up sensitive files and you understand where the harvest output is stored, who can read it, and how it will be handled (backups, git commits, etc, as well as risk of using `--out` with a shared `/tmp` location where other users could see the data). We offer no liability if your sensitive data is compromised through the use of this tool! - -**Strong recommendation:** If you plan to keep harvested files long-term (especially in git), encrypt secrets at rest. A common approach is to use **SOPS** and then use the **community.sops** Ansible collection to load/decrypt encrypted content during deploy. - -Install the collection: - -```bash -ansible-galaxy collection install community.sops -``` - -Then you can use the collection's lookup/vars plugins or modules to decrypt or load SOPS-encrypted vars at runtime. - -Note the section below **also** talks about SOPS, but this is in the context of simply encrypting the data generated by `enroll` at rest for safe-keeping, **not** for direct integration with Ansible. - - -### Encrypting harvest/manifests at rest with `--sops` - -If you want to use `--dangerous` (or you simply want to keep the harvested artifacts private when they're sitting on disk, in git, etc), you can pass `--sops` to `harvest`, `manifest`, or `single-shot`. - -To use `--sops`, you will need to have [sops](https://github.com/getsops/sops) installed on your `$PATH`. - -- `--sops` expects one or more **GPG key fingerprints**. If `sops` is not on the `$PATH`, **enroll** will error. -- `harvest --sops ...` writes a *single* encrypted file (`harvest.tar.gz.sops`) instead of a plaintext directory. -- `manifest --sops ...` (and `single-shot --sops ...`) will: - - decrypt the harvest bundle with `sops -d` (if the `--harvest` input is an encrypted file), then generate manifests as normal - - bundle the entire generated Ansible output into a *single* encrypted file (`manifest.tar.gz.sops`) - -⚠️ **Important:** `manifest --sops` (and `single-shot --sops`) produces **one encrypted file**. It is **not** an Ansible repo you can point `ansible-playbook` at directly. It is **not** the same as using SOPS inventory with the Ansible SOPS collection. - -To use the encrypted SOPS manifest, decrypt and extract it first, then run Ansible from inside the extracted `manifest/` directory: - -```bash -sops -d /path/to/manifest.tar.gz.sops | tar -xzvf - -cd manifest -ansible-playbook ... -``` - -Example: - -```bash -# Harvest (encrypted-at-rest) -enroll harvest --out /tmp/enroll-harvest --dangerous --sops - -# Manifest (encrypted-at-rest) -enroll manifest --harvest /tmp/enroll-harvest/harvest.tar.gz.sops --out /tmp/enroll-ansible --sops - -# Decrypt/extract manifest output for inspection / ansible runs -cd /tmp/enroll-ansible -sops -d manifest.tar.gz.sops | tar -xzvf - -cd manifest -``` - -(If you want to manually inspect an encrypted harvest bundle, extract it into its own directory, e.g. `mkdir -p harvest && sops -d harvest.tar.gz.sops | tar -xzvf - -C harvest`.) - - - -## Manifest - -The 'manifest' subcommand expects to be given a path to the 'harvest' obtained in the first step. It will then attempt to generate Ansible roles and playbooks (and potentially 'inventory') from that harvest. - -Manifesting is the most complex step because a lot of people will have opinions on how Ansible roles and inventory should work. No solution is perfect for everyone. However, **enroll** tries to strike a reasonable balance. - -Remember, the purpose of this tool is to save **time** getting your systems into a decently-managed state. It's still up to you to wrangle it into a form that works for you on an ongoing basis. +⚠️ Important: `manifest --sops` produces one encrypted file. You must decrypt + extract it before running `ansible-playbook`. --- -# Single-shot mode for the impatient sysadmin +## JinjaTurtle integration (both modes) -**enroll** has a 'single-shot' subcommand which combines the two other phases (harvest and manifest) into one. Use it to generate both the harvest and then manifest ansible from that harvest all in one go. Perfect if you're in a hurry! +If [JinjaTurtle](https://git.mig5.net/mig5/jinjaturtle) is installed, `enroll` can generate Jinja2 templates for ini/json/xml/toml-style config. ---- - -# JinjaTurtle integration (both modes) - -If you also have my other tool [JinjaTurtle](https://git.mig5.net/mig5/jinjaturtle) installed, **enroll** will attempt to create Jinja2 templates for any ini/json/xml/toml style configuration that it finds. - -- Templates live in the **role** (`roles//templates/...`) +- Templates live in `roles//templates/...` - Variables live in: - - **single-site**: `roles//defaults/main.yml` - - **multi-site** (`--fqdn`): `inventory/host_vars//.yml` + - single-site: `roles//defaults/main.yml` + - multi-site: `inventory/host_vars//.yml` -JinjaTurtle will be used automatically if it is detected on the `$PATH`. You can also be explicit and pass `--jinjaturtle`, but this will throw an error if JinjaTurtle is not on the `$PATH`. - -If you *do* have JinjaTurtle installed, but *don't* wish to make use of it, you can use `--no-jinjaturtle`, in which case all config files will be kept as 'raw' files. +You can force it on with `--jinjaturtle` or disable with `--no-jinjaturtle`. --- -# How multi-site avoids "shared role breaks a host" +## How multi-site avoids “shared role breaks a host” -In multi-site mode, **roles are data-driven**. The role contains generic tasks like: - -- "deploy all files listed for this host" -- "install packages listed for this host" -- "apply systemd enable/start state listed for this host" - -The host inventory is what decides which files/packages/services apply to that host. This prevents the classic failure mode where host2 adds a config file to a shared role and host1 then fails trying to deploy a file it never had. - -Raw non-templated files are stored under: - -- `inventory/host_vars///.files/...` - -…and the host's role variables describe which of those files should be deployed. +In multi-site mode, roles are **data-driven**. The role tasks are generic (“deploy the files listed for this host”, “install the packages listed for this host”, “apply systemd enable/start state listed for this host”). Host inventory decides what applies per-host, avoiding the classic “host2 adds config, host1 breaks” failure mode. --- # Install ## Ubuntu/Debian apt repository - ```bash sudo mkdir -p /usr/share/keyrings curl -fsSL https://mig5.net/static/mig5.asc | sudo gpg --dearmor -o /usr/share/keyrings/mig5.gpg @@ -224,25 +182,19 @@ sudo apt install enroll ``` ## AppImage - -Download the AppImage file from the Releases page (verify with GPG if you wish, my fingerprint is [here](https://mig5.net/static/mig5.asc)), -then make it executable and run it: +Download it from my Releases page, then: ```bash chmod +x Enroll.AppImage ./Enroll.AppImage ``` -### Pip/PipX - +## Pip/PipX ```bash pip install enroll ``` -### Poetry - -Clone this repository with git, then: - +## Poetry (dev) ```bash poetry install poetry run enroll --help @@ -250,110 +202,110 @@ poetry run enroll --help --- -# Usage +## Found a bug / have a suggestion? -## 1. Harvest state/information about the host +My Forgejo doesn’t currently support federation, so I haven’t opened registration/login for issues. -On the host (root recommended to harvest as much data as possible): +Instead, email me (see `pyproject.toml`) or contact me on the Fediverse: +https://goto.mig5.net/@mig5 + +--- + +# Examples + +## Harvest + +### Local harvest ```bash enroll harvest --out /tmp/enroll-harvest ``` -### Remote harvest over SSH (no enroll install required on the remote host, no need for --out) +### Remote harvest over SSH ```bash -enroll harvest --remote-host myhost.example.com --remote-user myuser +enroll harvest --remote-host myhost.example.com --remote-user myuser --out /tmp/enroll-harvest ``` -### `--dangerous` (captures potentially sensitive files — read the warning above) - +### `--dangerous` ```bash enroll harvest --out /tmp/enroll-harvest --dangerous ``` -Remote + dangerous: - +### Remote + dangerous: ```bash enroll harvest --remote-host myhost.example.com --remote-user myuser --dangerous ``` -### `--sops` (encrypt bundles at rest) - -`--sops` bundles and encrypts the output as a single SOPS-encrypted `.tar.gz.sops` file (GPG). This is particularly useful if you're using `--dangerous`. - +### `--sops` (encrypt at rest) ```bash # Encrypted harvest bundle (writes /tmp/enroll-harvest/harvest.tar.gz.sops) enroll harvest --out /tmp/enroll-harvest --dangerous --sops +``` -# Encrypted manifest bundle (writes /tmp/enroll-ansible/manifest.tar.gz.sops) +--- + +## Manifest + +### Single-site (default: no --fqdn) +```bash +enroll manifest --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible +``` + +### Multi-site (--fqdn) +```bash +enroll manifest --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible --fqdn "$(hostname -f)" +``` + +### Manifest with `--sops` +```bash +# Generate encrypted manifest bundle (writes /tmp/enroll-ansible/manifest.tar.gz.sops) enroll manifest --harvest /tmp/enroll-harvest/harvest.tar.gz.sops --out /tmp/enroll-ansible --sops # Decrypt/extract the manifest bundle, then run Ansible from inside ./manifest/ cd /tmp/enroll-ansible sops -d manifest.tar.gz.sops | tar -xzvf - cd manifest -ansible-playbook ./playbook.yml ``` - -## 2. Generate Ansible manifests (roles/playbook) from that harvest - -### Single-site (default: no --fqdn) - -Good for one server, or for producing roles you want to reuse to provision new machines: - -```bash -enroll manifest --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible -``` - -### Multi-site (--fqdn) - -Best when enrolling multiple already-running servers into one repo: - -```bash -enroll manifest --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible --fqdn "$(hostname -f)" -``` +--- ## Single-shot -Alternatively, do both steps in one shot: - ```bash enroll single-shot --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible --fqdn "$(hostname -f)" ``` -Remote single-shot (run harvest over SSH, then manifest locally): +Remote single-shot (run harvest over SSH, then manifest locally): ```bash -enroll single-shot --remote-host myhost.example.com --remote-user myuser --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible --fqdn "myhost.example.com" +enroll single-shot --remote-host myhost.example.com --remote-user myuser --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible --fqdn "myhost.example.com" ``` -In multi-site mode (`--fqdn`), you can run single-shot repeatedly against multiple hosts while reusing the same `--out` directory so each host merges into the existing Ansible repo. +--- +## Diff -## 3. Run Ansible +### Compare two harvest directories +```bash +enroll diff --old /path/to/harvestA --new /path/to/harvestB --format json +``` + +### Diff + webhook notify +```bash +enroll diff --old /path/to/golden/harvest --new /path/to/new/harvest --webhook https://nr.mig5.net/forms/webhooks/xxxx --webhook-format json --webhook-header 'X-Enroll-Secret: xxxx' +``` + +`diff` mode also supports email sending and text or markdown format, as well as `--exit-code` mode to trigger a return code of 2 (useful for crons or CI) + +--- + +## Run Ansible ### Single-site - -You can run it however you prefer (local connection or your own inventory). Example: - ```bash ansible-playbook -i "localhost," -c local /tmp/enroll-ansible/playbook.yml ``` ### Multi-site (--fqdn) - -In multi-site mode, enroll generates an ansible.cfg, `host_vars` inventory, and a host-specific playbook: - ```bash ansible-playbook /tmp/enroll-ansible/playbooks/"$(hostname -f)".yml ``` - ---- - -# Found a bug, have a suggestion? - -My Forgejo doesn't yet support proper federation, and for that reason I've not opened up registration/login to use the issue queue. - -Instead, you can e-mail me (see the pyproject.toml for details) or contact me on the Fediverse: - -https://goto.mig5.net/@mig5 diff --git a/debian/changelog b/debian/changelog index 16f7a0d..0cc5861 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.1.1) unstable; urgency=medium + + * Add `diff` subcommand which can compare two harvests and send email or webhook notifications in different + formats. + + -- Miguel Jacq Thu, 18 Dec 2025 15:00:00 +1100 + enroll (0.1.0) unstable; urgency=medium * Add remote mode for harvesting a remote machine via a local workstation (no need to install enroll remotely) @@ -12,7 +19,7 @@ enroll (0.1.0) unstable; urgency=medium ship or manage those files. * Don't collect files ending in `.log` - -- Miguel Jacq Tue, 17 Dec 2025 18:00:00 +1100 + -- Miguel Jacq Wed, 17 Dec 2025 18:00:00 +1100 enroll (0.0.5) unstable; urgency=medium diff --git a/enroll/cli.py b/enroll/cli.py index 60e48a2..2d8ed5e 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -8,6 +8,7 @@ from pathlib import Path from typing import Optional from .cache import new_harvest_cache_dir +from .diff import compare_harvests, format_report, post_webhook, send_email from .harvest import harvest from .manifest import manifest from .remote import remote_harvest @@ -211,6 +212,90 @@ def main() -> None: _add_common_manifest_args(s) _add_remote_args(s) + d = sub.add_parser("diff", help="Compare two harvests and report differences") + d.add_argument( + "--old", + required=True, + help=( + "Old/baseline harvest (directory, a path to state.json, a tarball, or a SOPS-encrypted bundle)." + ), + ) + d.add_argument( + "--new", + required=True, + help=( + "New/current harvest (directory, a path to state.json, a tarball, or a SOPS-encrypted bundle)." + ), + ) + d.add_argument( + "--sops", + action="store_true", + help="Allow SOPS-encrypted harvest bundle inputs (requires `sops` on PATH).", + ) + d.add_argument( + "--format", + choices=["text", "markdown", "json"], + default="text", + help="Report output format (default: text).", + ) + d.add_argument( + "--out", + help="Write the report to this file instead of stdout.", + ) + d.add_argument( + "--exit-code", + action="store_true", + help="Exit with status 2 if differences are detected.", + ) + d.add_argument( + "--notify-always", + action="store_true", + help="Send webhook/email even when there are no differences.", + ) + d.add_argument( + "--webhook", + help="POST the report to this URL (only when differences are detected, unless --notify-always).", + ) + d.add_argument( + "--webhook-format", + choices=["json", "text", "markdown"], + default="json", + help="Payload format for --webhook (default: json).", + ) + d.add_argument( + "--webhook-header", + action="append", + default=[], + metavar="K:V", + help="Extra HTTP header for --webhook (repeatable), e.g. 'Authorization: Bearer ...'.", + ) + d.add_argument( + "--email-to", + action="append", + default=[], + help="Email the report to this address (repeatable; only when differences are detected unless --notify-always).", + ) + d.add_argument( + "--email-from", + help="From address for --email-to (default: enroll@).", + ) + d.add_argument( + "--email-subject", + help="Subject for --email-to (default: 'enroll diff report').", + ) + d.add_argument( + "--smtp", + help="SMTP server host[:port] for --email-to. If omitted, uses local sendmail.", + ) + d.add_argument( + "--smtp-user", + help="SMTP username (optional).", + ) + d.add_argument( + "--smtp-password-env", + help="Environment variable containing SMTP password (optional).", + ) + args = ap.parse_args() remote_host: Optional[str] = getattr(args, "remote_host", None) @@ -287,6 +372,61 @@ def main() -> None: ) if getattr(args, "sops", None) and out_enc: print(str(out_enc)) + elif args.cmd == "diff": + report, has_changes = compare_harvests( + args.old, + args.new, + sops_mode=bool(getattr(args, "sops", False)), + ) + + txt = format_report(report, fmt=str(getattr(args, "format", "text"))) + out_path = getattr(args, "out", None) + if out_path: + p = Path(out_path).expanduser() + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(txt, encoding="utf-8") + else: + print(txt, end="" if txt.endswith("\n") else "\n") + + should_notify = has_changes or bool(getattr(args, "notify_always", False)) + + webhook = getattr(args, "webhook", None) + if webhook and should_notify: + wf = str(getattr(args, "webhook_format", "json")) + payload = format_report(report, fmt=wf) + body = payload.encode("utf-8") + headers = {} + if wf == "json": + headers["Content-Type"] = "application/json" + else: + headers["Content-Type"] = "text/plain; charset=utf-8" + for hv in getattr(args, "webhook_header", []) or []: + if ":" in hv: + k, v = hv.split(":", 1) + headers[k.strip()] = v.strip() + status, _resp = post_webhook(webhook, body, headers=headers) + if status and status >= 400: + raise SystemExit(f"error: webhook returned HTTP {status}") + + to_addrs = getattr(args, "email_to", []) or [] + if to_addrs and should_notify: + subject = getattr(args, "email_subject", None) or "enroll diff report" + smtp_pw = None + pw_env = getattr(args, "smtp_password_env", None) + if pw_env: + smtp_pw = os.environ.get(str(pw_env)) + send_email( + to_addrs=list(to_addrs), + subject=str(subject), + body=txt, + from_addr=getattr(args, "email_from", None), + smtp=getattr(args, "smtp", None), + smtp_user=getattr(args, "smtp_user", None), + smtp_password=smtp_pw, + ) + + if getattr(args, "exit_code", False) and has_changes: + raise SystemExit(2) elif args.cmd == "single-shot": sops_fps = getattr(args, "sops", None) if remote_host: @@ -379,5 +519,55 @@ def main() -> None: fqdn=args.fqdn, jinjaturtle=_jt_mode(args), ) + elif args.cmd == "diff": + report, has_changes = compare_harvests( + args.old, args.new, sops_mode=bool(getattr(args, "sops", False)) + ) + + rendered = format_report(report, fmt=str(args.format)) + if args.out: + Path(args.out).expanduser().write_text(rendered, encoding="utf-8") + else: + print(rendered, end="") + + do_notify = bool(has_changes or getattr(args, "notify_always", False)) + + if do_notify and getattr(args, "webhook", None): + wf = str(getattr(args, "webhook_format", "json")) + body = format_report(report, fmt=wf).encode("utf-8") + headers = {"User-Agent": "enroll"} + if wf == "json": + headers["Content-Type"] = "application/json" + else: + headers["Content-Type"] = "text/plain; charset=utf-8" + for hv in getattr(args, "webhook_header", []) or []: + if ":" not in hv: + raise SystemExit( + "error: --webhook-header must be in the form 'K:V'" + ) + k, v = hv.split(":", 1) + headers[k.strip()] = v.strip() + status, _ = post_webhook(str(args.webhook), body, headers=headers) + if status and status >= 400: + raise SystemExit(f"error: webhook returned HTTP {status}") + + if do_notify and (getattr(args, "email_to", []) or []): + subject = getattr(args, "email_subject", None) or "enroll diff report" + smtp_password = None + pw_env = getattr(args, "smtp_password_env", None) + if pw_env: + smtp_password = os.environ.get(str(pw_env)) + send_email( + to_addrs=list(getattr(args, "email_to", []) or []), + subject=str(subject), + body=rendered, + from_addr=getattr(args, "email_from", None), + smtp=getattr(args, "smtp", None), + smtp_user=getattr(args, "smtp_user", None), + smtp_password=smtp_password, + ) + + if getattr(args, "exit_code", False) and has_changes: + raise SystemExit(2) except SopsError as e: raise SystemExit(f"error: {e}") diff --git a/enroll/diff.py b/enroll/diff.py new file mode 100644 index 0000000..9b396fc --- /dev/null +++ b/enroll/diff.py @@ -0,0 +1,757 @@ +from __future__ import annotations + +import hashlib +import json +import os +import shutil +import subprocess # nosec +import tarfile +import tempfile +import urllib.request +from contextlib import ExitStack +from dataclasses import dataclass +from datetime import datetime, timezone +from email.message import EmailMessage +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Tuple + +from .remote import _safe_extract_tar +from .sopsutil import decrypt_file_binary_to, require_sops_cmd + + +def _utc_now_iso() -> str: + return datetime.now(tz=timezone.utc).isoformat() + + +def _sha256(path: Path) -> str: + h = hashlib.sha256() + with open(path, "rb") as f: + while True: + chunk = f.read(1024 * 1024) + if not chunk: + break + h.update(chunk) + return h.hexdigest() + + +@dataclass +class BundleRef: + """A prepared harvest bundle. + + `dir` is a directory containing state.json + artifacts/. + `tempdir` is set when the bundle needed extraction into a temp directory. + """ + + dir: Path + tempdir: Optional[tempfile.TemporaryDirectory] = None + + @property + def state_path(self) -> Path: + return self.dir / "state.json" + + +def _bundle_from_input(path: str, *, sops_mode: bool) -> BundleRef: + """Resolve a user-supplied path to a harvest bundle directory. + + Accepts: + - a bundle directory + - a path to state.json inside a bundle directory + - (sops mode or .sops) a SOPS-encrypted tar.gz bundle + - a plain tar.gz/tgz bundle + """ + + p = Path(path).expanduser() + + # Accept the state.json path directly (harvest often prints this). + if p.is_file() and p.name == "state.json": + p = p.parent + + if p.is_dir(): + return BundleRef(dir=p) + + if not p.exists(): + raise RuntimeError(f"Harvest path not found: {p}") + + # Auto-enable sops mode if it looks like an encrypted bundle. + is_sops = p.name.endswith(".sops") + if sops_mode or is_sops: + require_sops_cmd() + td = tempfile.TemporaryDirectory(prefix="enroll-harvest-") + td_path = Path(td.name) + try: + os.chmod(td_path, 0o700) + except OSError: + pass + + tar_path = td_path / "harvest.tar.gz" + out_dir = td_path / "bundle" + out_dir.mkdir(parents=True, exist_ok=True) + try: + os.chmod(out_dir, 0o700) + except OSError: + pass + + decrypt_file_binary_to(p, tar_path, mode=0o600) + with tarfile.open(tar_path, mode="r:gz") as tf: + _safe_extract_tar(tf, out_dir) + + return BundleRef(dir=out_dir, tempdir=td) + + # Plain tarballs (useful for operators who rsync/zip harvests around). + if p.suffixes[-2:] == [".tar", ".gz"] or p.suffix == ".tgz": + td = tempfile.TemporaryDirectory(prefix="enroll-harvest-") + td_path = Path(td.name) + try: + os.chmod(td_path, 0o700) + except OSError: + pass + out_dir = td_path / "bundle" + out_dir.mkdir(parents=True, exist_ok=True) + try: + os.chmod(out_dir, 0o700) + except OSError: + pass + with tarfile.open(p, mode="r:gz") as tf: + _safe_extract_tar(tf, out_dir) + return BundleRef(dir=out_dir, tempdir=td) + + raise RuntimeError( + f"Harvest path is not a directory, state.json, encrypted bundle, or tarball: {p}" + ) + + +def _load_state(bundle_dir: Path) -> Dict[str, Any]: + sp = bundle_dir / "state.json" + with open(sp, "r", encoding="utf-8") as f: + return json.load(f) + + +def _all_packages(state: Dict[str, Any]) -> List[str]: + pkgs = set(state.get("manual_packages", []) or []) + pkgs |= set(state.get("manual_packages_skipped", []) or []) + for s in state.get("services", []) or []: + for p in s.get("packages", []) or []: + pkgs.add(p) + return sorted(pkgs) + + +def _service_units(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: + out: Dict[str, Dict[str, Any]] = {} + for s in state.get("services", []) or []: + unit = s.get("unit") + if unit: + out[str(unit)] = s + return out + + +def _users_by_name(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: + users = (state.get("users") or {}).get("users") or [] + out: Dict[str, Dict[str, Any]] = {} + for u in users: + name = u.get("name") + if name: + out[str(name)] = u + return out + + +@dataclass(frozen=True) +class FileRec: + path: str + role: str + src_rel: str + owner: Optional[str] + group: Optional[str] + mode: Optional[str] + reason: Optional[str] + + +def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, Any]]]: + # Services + for s in state.get("services", []) or []: + role = s.get("role_name") or "unknown" + for mf in s.get("managed_files", []) or []: + yield str(role), mf + + # Package roles + for p in state.get("package_roles", []) or []: + role = p.get("role_name") or "unknown" + for mf in p.get("managed_files", []) or []: + yield str(role), mf + + # Users + u = state.get("users") or {} + u_role = u.get("role_name") or "users" + for mf in u.get("managed_files", []) or []: + yield str(u_role), mf + + # etc_custom + ec = state.get("etc_custom") or {} + ec_role = ec.get("role_name") or "etc_custom" + for mf in ec.get("managed_files", []) or []: + yield str(ec_role), mf + + +def _file_index(bundle_dir: Path, state: Dict[str, Any]) -> Dict[str, FileRec]: + """Return mapping of absolute path -> FileRec. + + If duplicates occur, the first one wins (should be rare by design). + """ + + out: Dict[str, FileRec] = {} + for role, mf in _iter_managed_files(state): + p = mf.get("path") + src_rel = mf.get("src_rel") + if not p or not src_rel: + continue + p = str(p) + if p in out: + continue + out[p] = FileRec( + path=p, + role=str(role), + src_rel=str(src_rel), + owner=mf.get("owner"), + group=mf.get("group"), + mode=mf.get("mode"), + reason=mf.get("reason"), + ) + return out + + +def _artifact_path(bundle_dir: Path, rec: FileRec) -> Path: + return bundle_dir / "artifacts" / rec.role / rec.src_rel + + +def compare_harvests( + old_path: str, + new_path: str, + *, + sops_mode: bool = False, +) -> Tuple[Dict[str, Any], bool]: + """Compare two harvests. + + Returns (report, has_changes). + """ + with ExitStack() as stack: + old_b = _bundle_from_input(old_path, sops_mode=sops_mode) + new_b = _bundle_from_input(new_path, sops_mode=sops_mode) + if old_b.tempdir: + stack.callback(old_b.tempdir.cleanup) + if new_b.tempdir: + stack.callback(new_b.tempdir.cleanup) + + old_state = _load_state(old_b.dir) + new_state = _load_state(new_b.dir) + + old_pkgs = set(_all_packages(old_state)) + new_pkgs = set(_all_packages(new_state)) + + pkgs_added = sorted(new_pkgs - old_pkgs) + pkgs_removed = sorted(old_pkgs - new_pkgs) + + old_units = _service_units(old_state) + new_units = _service_units(new_state) + units_added = sorted(set(new_units) - set(old_units)) + units_removed = sorted(set(old_units) - set(new_units)) + + units_changed: List[Dict[str, Any]] = [] + for unit in sorted(set(old_units) & set(new_units)): + a = old_units[unit] + b = new_units[unit] + ch: Dict[str, Any] = {} + for k in [ + "active_state", + "sub_state", + "unit_file_state", + "condition_result", + ]: + if a.get(k) != b.get(k): + ch[k] = {"old": a.get(k), "new": b.get(k)} + a_pk = set(a.get("packages", []) or []) + b_pk = set(b.get("packages", []) or []) + if a_pk != b_pk: + ch["packages"] = { + "added": sorted(b_pk - a_pk), + "removed": sorted(a_pk - b_pk), + } + if ch: + units_changed.append({"unit": unit, "changes": ch}) + + old_users = _users_by_name(old_state) + new_users = _users_by_name(new_state) + users_added = sorted(set(new_users) - set(old_users)) + users_removed = sorted(set(old_users) - set(new_users)) + + users_changed: List[Dict[str, Any]] = [] + for name in sorted(set(old_users) & set(new_users)): + a = old_users[name] + b = new_users[name] + ch: Dict[str, Any] = {} + for k in [ + "uid", + "gid", + "gecos", + "home", + "shell", + "primary_group", + ]: + if a.get(k) != b.get(k): + ch[k] = {"old": a.get(k), "new": b.get(k)} + a_sg = set(a.get("supplementary_groups", []) or []) + b_sg = set(b.get("supplementary_groups", []) or []) + if a_sg != b_sg: + ch["supplementary_groups"] = { + "added": sorted(b_sg - a_sg), + "removed": sorted(a_sg - b_sg), + } + if ch: + users_changed.append({"name": name, "changes": ch}) + + old_files = _file_index(old_b.dir, old_state) + new_files = _file_index(new_b.dir, new_state) + old_paths_set = set(old_files) + new_paths_set = set(new_files) + + files_added = sorted(new_paths_set - old_paths_set) + files_removed = sorted(old_paths_set - new_paths_set) + + # Hash cache to avoid reading the same file more than once. + hash_cache: Dict[str, str] = {} + + def _hash_for(bundle_dir: Path, rec: FileRec) -> Optional[str]: + ap = _artifact_path(bundle_dir, rec) + if not ap.exists() or not ap.is_file(): + return None + key = str(ap) + if key in hash_cache: + return hash_cache[key] + hash_cache[key] = _sha256(ap) + return hash_cache[key] + + files_changed: List[Dict[str, Any]] = [] + for p in sorted(old_paths_set & new_paths_set): + a = old_files[p] + b = new_files[p] + ch: Dict[str, Any] = {} + + # Role movement is itself interesting (e.g., file ownership attribution changed). + if a.role != b.role: + ch["role"] = {"old": a.role, "new": b.role} + for k in ["owner", "group", "mode", "reason"]: + av = getattr(a, k) + bv = getattr(b, k) + if av != bv: + ch[k] = {"old": av, "new": bv} + + ha = _hash_for(old_b.dir, a) + hb = _hash_for(new_b.dir, b) + if ha is None or hb is None: + if ha != hb: + ch["content"] = { + "old": "missing" if ha is None else "present", + "new": "missing" if hb is None else "present", + } + else: + if ha != hb: + ch["content"] = {"old_sha256": ha, "new_sha256": hb} + + if ch: + files_changed.append({"path": p, "changes": ch}) + + has_changes = any( + [ + pkgs_added, + pkgs_removed, + units_added, + units_removed, + units_changed, + users_added, + users_removed, + users_changed, + files_added, + files_removed, + files_changed, + ] + ) + + def _mtime_iso(p: Path) -> Optional[str]: + try: + ts = p.stat().st_mtime + except OSError: + return None + return datetime.fromtimestamp(ts, tz=timezone.utc).isoformat() + + report: Dict[str, Any] = { + "generated_at": _utc_now_iso(), + "old": { + "input": old_path, + "bundle_dir": str(old_b.dir), + "state_mtime": _mtime_iso(old_b.state_path), + "host": (old_state.get("host") or {}).get("hostname"), + }, + "new": { + "input": new_path, + "bundle_dir": str(new_b.dir), + "state_mtime": _mtime_iso(new_b.state_path), + "host": (new_state.get("host") or {}).get("hostname"), + }, + "packages": {"added": pkgs_added, "removed": pkgs_removed}, + "services": { + "enabled_added": units_added, + "enabled_removed": units_removed, + "changed": units_changed, + }, + "users": { + "added": users_added, + "removed": users_removed, + "changed": users_changed, + }, + "files": { + "added": [ + { + "path": p, + "role": new_files[p].role, + "reason": new_files[p].reason, + } + for p in files_added + ], + "removed": [ + { + "path": p, + "role": old_files[p].role, + "reason": old_files[p].reason, + } + for p in files_removed + ], + "changed": files_changed, + }, + } + + return report, has_changes + + +def format_report(report: Dict[str, Any], *, fmt: str = "text") -> str: + fmt = (fmt or "text").lower() + if fmt == "json": + return json.dumps(report, indent=2, sort_keys=True) + if fmt == "markdown": + return _report_markdown(report) + return _report_text(report) + + +def _report_text(report: Dict[str, Any]) -> str: + lines: List[str] = [] + old = report.get("old", {}) + new = report.get("new", {}) + lines.append( + f"enroll diff report (generated {report.get('generated_at')})\n" + f"old: {old.get('input')} (host={old.get('host')}, state_mtime={old.get('state_mtime')})\n" + f"new: {new.get('input')} (host={new.get('host')}, state_mtime={new.get('state_mtime')})" + ) + + pk = report.get("packages", {}) + lines.append("\nPackages") + lines.append(f" added: {len(pk.get('added', []) or [])}") + lines.append(f" removed: {len(pk.get('removed', []) or [])}") + for p in pk.get("added", []) or []: + lines.append(f" + {p}") + for p in pk.get("removed", []) or []: + lines.append(f" - {p}") + + sv = report.get("services", {}) + lines.append("\nServices (enabled systemd units)") + for u in sv.get("enabled_added", []) or []: + lines.append(f" + {u}") + for u in sv.get("enabled_removed", []) or []: + lines.append(f" - {u}") + for ch in sv.get("changed", []) or []: + unit = ch.get("unit") + lines.append(f" * {unit} changed") + for k, v in (ch.get("changes") or {}).items(): + if k == "packages": + a = (v or {}).get("added", []) + r = (v or {}).get("removed", []) + if a: + lines.append(f" packages +: {', '.join(a)}") + if r: + lines.append(f" packages -: {', '.join(r)}") + else: + lines.append(f" {k}: {v.get('old')} -> {v.get('new')}") + + us = report.get("users", {}) + lines.append("\nUsers") + for u in us.get("added", []) or []: + lines.append(f" + {u}") + for u in us.get("removed", []) or []: + lines.append(f" - {u}") + for ch in us.get("changed", []) or []: + name = ch.get("name") + lines.append(f" * {name} changed") + for k, v in (ch.get("changes") or {}).items(): + if k == "supplementary_groups": + a = (v or {}).get("added", []) + r = (v or {}).get("removed", []) + if a: + lines.append(f" groups +: {', '.join(a)}") + if r: + lines.append(f" groups -: {', '.join(r)}") + else: + lines.append(f" {k}: {v.get('old')} -> {v.get('new')}") + + fl = report.get("files", {}) + lines.append("\nFiles") + for e in fl.get("added", []) or []: + lines.append( + f" + {e.get('path')} (role={e.get('role')}, reason={e.get('reason')})" + ) + for e in fl.get("removed", []) or []: + lines.append( + f" - {e.get('path')} (role={e.get('role')}, reason={e.get('reason')})" + ) + for ch in fl.get("changed", []) or []: + p = ch.get("path") + lines.append(f" * {p} changed") + for k, v in (ch.get("changes") or {}).items(): + if k == "content": + if "old_sha256" in (v or {}): + lines.append(" content: sha256 changed") + else: + lines.append(f" content: {v.get('old')} -> {v.get('new')}") + else: + lines.append(f" {k}: {v.get('old')} -> {v.get('new')}") + + if not any( + [ + (pk.get("added") or []), + (pk.get("removed") or []), + (sv.get("enabled_added") or []), + (sv.get("enabled_removed") or []), + (sv.get("changed") or []), + (us.get("added") or []), + (us.get("removed") or []), + (us.get("changed") or []), + (fl.get("added") or []), + (fl.get("removed") or []), + (fl.get("changed") or []), + ] + ): + lines.append("\nNo differences detected.") + + return "\n".join(lines) + "\n" + + +def _report_markdown(report: Dict[str, Any]) -> str: + old = report.get("old", {}) + new = report.get("new", {}) + out: List[str] = [] + out.append("# enroll diff report\n") + out.append(f"Generated: `{report.get('generated_at')}`\n") + out.append( + f"- **Old**: `{old.get('input')}` (host={old.get('host')}, state_mtime={old.get('state_mtime')})\n" + f"- **New**: `{new.get('input')}` (host={new.get('host')}, state_mtime={new.get('state_mtime')})\n" + ) + + pk = report.get("packages", {}) + out.append("## Packages\n") + out.append(f"- Added: {len(pk.get('added', []) or [])}\n") + for p in pk.get("added", []) or []: + out.append(f" - `+ {p}`\n") + out.append(f"- Removed: {len(pk.get('removed', []) or [])}\n") + for p in pk.get("removed", []) or []: + out.append(f" - `- {p}`\n") + + sv = report.get("services", {}) + out.append("## Services (enabled systemd units)\n") + if sv.get("enabled_added"): + out.append("- Enabled added\n") + for u in sv.get("enabled_added", []) or []: + out.append(f" - `+ {u}`\n") + if sv.get("enabled_removed"): + out.append("- Enabled removed\n") + for u in sv.get("enabled_removed", []) or []: + out.append(f" - `- {u}`\n") + if sv.get("changed"): + out.append("- Changed\n") + for ch in sv.get("changed", []) or []: + unit = ch.get("unit") + out.append(f" - `{unit}`\n") + for k, v in (ch.get("changes") or {}).items(): + if k == "packages": + a = (v or {}).get("added", []) + r = (v or {}).get("removed", []) + if a: + out.append( + f" - packages added: {', '.join('`'+x+'`' for x in a)}\n" + ) + if r: + out.append( + f" - packages removed: {', '.join('`'+x+'`' for x in r)}\n" + ) + else: + out.append(f" - {k}: `{v.get('old')}` → `{v.get('new')}`\n") + + us = report.get("users", {}) + out.append("## Users\n") + if us.get("added"): + out.append("- Added\n") + for u in us.get("added", []) or []: + out.append(f" - `+ {u}`\n") + if us.get("removed"): + out.append("- Removed\n") + for u in us.get("removed", []) or []: + out.append(f" - `- {u}`\n") + if us.get("changed"): + out.append("- Changed\n") + for ch in us.get("changed", []) or []: + name = ch.get("name") + out.append(f" - `{name}`\n") + for k, v in (ch.get("changes") or {}).items(): + if k == "supplementary_groups": + a = (v or {}).get("added", []) + r = (v or {}).get("removed", []) + if a: + out.append( + f" - groups added: {', '.join('`'+x+'`' for x in a)}\n" + ) + if r: + out.append( + f" - groups removed: {', '.join('`'+x+'`' for x in r)}\n" + ) + else: + out.append(f" - {k}: `{v.get('old')}` → `{v.get('new')}`\n") + + fl = report.get("files", {}) + out.append("## Files\n") + if fl.get("added"): + out.append("- Added\n") + for e in fl.get("added", []) or []: + out.append( + f" - `+ {e.get('path')}` (role={e.get('role')}, reason={e.get('reason')})\n" + ) + if fl.get("removed"): + out.append("- Removed\n") + for e in fl.get("removed", []) or []: + out.append( + f" - `- {e.get('path')}` (role={e.get('role')}, reason={e.get('reason')})\n" + ) + if fl.get("changed"): + out.append("- Changed\n") + for ch in fl.get("changed", []) or []: + p = ch.get("path") + out.append(f" - `{p}`\n") + for k, v in (ch.get("changes") or {}).items(): + if k == "content": + if "old_sha256" in (v or {}): + out.append(" - content: sha256 changed\n") + else: + out.append( + f" - content: `{v.get('old')}` → `{v.get('new')}`\n" + ) + else: + out.append(f" - {k}: `{v.get('old')}` → `{v.get('new')}`\n") + + if not any( + [ + (pk.get("added") or []), + (pk.get("removed") or []), + (sv.get("enabled_added") or []), + (sv.get("enabled_removed") or []), + (sv.get("changed") or []), + (us.get("added") or []), + (us.get("removed") or []), + (us.get("changed") or []), + (fl.get("added") or []), + (fl.get("removed") or []), + (fl.get("changed") or []), + ] + ): + out.append("\n_No differences detected._\n") + + return "".join(out) + + +def post_webhook( + url: str, + body: bytes, + *, + headers: Optional[Dict[str, str]] = None, + timeout_s: int = 10, +) -> Tuple[int, str]: + req = urllib.request.Request(url=url, data=body, method="POST") + for k, v in (headers or {}).items(): + req.add_header(k, v) + try: + with urllib.request.urlopen(req, timeout=timeout_s) as resp: # nosec + status = int(getattr(resp, "status", 0) or 0) + text = resp.read().decode("utf-8", errors="replace") + return status, text + except Exception as e: + raise RuntimeError(f"webhook POST failed: {e}") from e + + +def send_email( + *, + to_addrs: List[str], + subject: str, + body: str, + from_addr: Optional[str] = None, + smtp: Optional[str] = None, + smtp_user: Optional[str] = None, + smtp_password: Optional[str] = None, +) -> None: + if not to_addrs: + raise RuntimeError("email: no recipients") + + msg = EmailMessage() + msg["To"] = ", ".join(to_addrs) + if from_addr: + msg["From"] = from_addr + else: + host = os.uname().nodename + msg["From"] = f"enroll@{host}" + msg["Subject"] = subject + msg.set_content(body) + + # Preferred: use local sendmail if smtp wasn't specified. + if not smtp: + sendmail = shutil.which("sendmail") + if not sendmail: + raise RuntimeError( + "email: no --smtp provided and sendmail not found on PATH" + ) + p = subprocess.run( + [sendmail, "-t", "-i"], + input=msg.as_bytes(), + capture_output=True, + check=False, + ) # nosec + if p.returncode != 0: + raise RuntimeError( + "email: sendmail failed:\n" + f" rc: {p.returncode}\n" + f" stderr: {p.stderr.decode('utf-8', errors='replace').strip()}" + ) + return + + import smtplib + + host = smtp + port = 25 + if ":" in smtp: + host, port_s = smtp.rsplit(":", 1) + try: + port = int(port_s) + except ValueError: + raise RuntimeError(f"email: invalid smtp port in {smtp!r}") + + with smtplib.SMTP(host, port, timeout=10) as s: + s.ehlo() + try: + s.starttls() + s.ehlo() + except Exception: + # STARTTLS is optional; ignore if unsupported. + pass # nosec + if smtp_user: + s.login(smtp_user, smtp_password or "") + s.send_message(msg) diff --git a/pyproject.toml b/pyproject.toml index ac65b02..5231ad9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.0" +version = "0.1.1" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" From 4660a0703e2471d35a3b6153d295b9195a7c4c93 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 17:11:04 +1100 Subject: [PATCH 07/85] Include files from `/usr/local/bin` and `/usr/local/etc` in harvest (assuming they aren't binaries or symlinks) and store in `usr_local_custom` role, similar to `etc_custom`. --- CHANGELOG.md | 5 ++ debian/changelog | 7 ++ enroll/diff.py | 6 ++ enroll/harvest.py | 106 ++++++++++++++++++++++++++ enroll/ignore.py | 5 ++ enroll/manifest.py | 102 +++++++++++++++++++++++++ pyproject.toml | 2 +- tests/test_diff_usr_local_custom.py | 111 ++++++++++++++++++++++++++++ tests/test_harvest.py | 41 +++++++++- tests/test_manifest.py | 73 ++++++++++++++++++ tests/test_misc_coverage.py | 96 ++++++++++++++++++++++++ 11 files changed, 551 insertions(+), 3 deletions(-) create mode 100644 tests/test_diff_usr_local_custom.py create mode 100644 tests/test_misc_coverage.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 81eed41..0e80a13 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.1.2 + + * Include files from `/usr/local/bin` and `/usr/local/etc` in harvest (assuming they aren't binaries or + symlinks) and store in `usr_local_custom` role, similar to `etc_custom`. + # 0.1.1 * Add `diff` subcommand which can compare two harvests and send email or webhook notifications in different diff --git a/debian/changelog b/debian/changelog index 0cc5861..0b16cfa 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.1.2) unstable; urgency=medium + + * Include files from `/usr/local/bin` and `/usr/local/etc` in harvest (assuming they aren't binaries or + symlinks) and store in `usr_local_custom` role, similar to `etc_custom`. + + -- Miguel Jacq Thu, 18 Dec 2025 17:07:00 +1100 + enroll (0.1.1) unstable; urgency=medium * Add `diff` subcommand which can compare two harvests and send email or webhook notifications in different diff --git a/enroll/diff.py b/enroll/diff.py index 9b396fc..e2861c9 100644 --- a/enroll/diff.py +++ b/enroll/diff.py @@ -190,6 +190,12 @@ def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, for mf in ec.get("managed_files", []) or []: yield str(ec_role), mf + # usr_local_custom + ul = state.get("usr_local_custom") or {} + ul_role = ul.get("role_name") or "usr_local_custom" + for mf in ul.get("managed_files", []) or []: + yield str(ul_role), mf + def _file_index(bundle_dir: Path, state: Dict[str, Any]) -> Dict[str, FileRec]: """Return mapping of absolute path -> FileRec. diff --git a/enroll/harvest.py b/enroll/harvest.py index ef93903..659bebc 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -78,6 +78,14 @@ class EtcCustomSnapshot: notes: List[str] +@dataclass +class UsrLocalCustomSnapshot: + role_name: str + managed_files: List[ManagedFile] + excluded: List[ExcludedFile] + notes: List[str] + + ALLOWED_UNOWNED_EXTS = { ".conf", ".cfg", @@ -701,6 +709,103 @@ def harvest( notes=etc_notes, ) + # ------------------------- + # usr_local_custom role (/usr/local/etc + /usr/local/bin scripts) + # ------------------------- + ul_notes: List[str] = [] + ul_excluded: List[ExcludedFile] = [] + ul_managed: List[ManagedFile] = [] + ul_role_name = "usr_local_custom" + + # Extend the already-captured set with etc_custom. + already_all: Set[str] = set(already) + for mf in etc_managed: + already_all.add(mf.path) + + def _scan_usr_local_tree( + root: str, *, require_executable: bool, cap: int, reason: str + ) -> None: + scanned = 0 + if not os.path.isdir(root): + return + for dirpath, _, filenames in os.walk(root): + for fn in filenames: + path = os.path.join(dirpath, fn) + if path in already_all: + continue + if not os.path.isfile(path) or os.path.islink(path): + continue + if require_executable: + try: + owner, group, mode = stat_triplet(path) + except OSError: + ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + try: + if (int(mode, 8) & 0o111) == 0: + continue + except ValueError: + # If mode parsing fails, be conservative and skip. + continue + else: + try: + owner, group, mode = stat_triplet(path) + except OSError: + ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + deny = policy.deny_reason(path) + if deny: + ul_excluded.append(ExcludedFile(path=path, reason=deny)) + continue + + src_rel = path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, ul_role_name, path, src_rel) + except OSError: + ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + ul_managed.append( + ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason, + ) + ) + + already_all.add(path) + scanned += 1 + if scanned >= cap: + ul_notes.append(f"Reached file cap ({cap}) while scanning {root}.") + return + + # /usr/local/etc: capture all non-binary regular files (filtered by IgnorePolicy) + _scan_usr_local_tree( + "/usr/local/etc", + require_executable=False, + cap=2000, + reason="usr_local_etc_custom", + ) + + # /usr/local/bin: capture executable scripts only (skip non-executable text) + _scan_usr_local_tree( + "/usr/local/bin", + require_executable=True, + cap=2000, + reason="usr_local_bin_script", + ) + + usr_local_custom_snapshot = UsrLocalCustomSnapshot( + role_name=ul_role_name, + managed_files=ul_managed, + excluded=ul_excluded, + notes=ul_notes, + ) + state = { "host": {"hostname": os.uname().nodename, "os": "debian"}, "users": asdict(users_snapshot), @@ -709,6 +814,7 @@ def harvest( "manual_packages_skipped": manual_pkgs_skipped, "package_roles": [asdict(p) for p in pkg_snaps], "etc_custom": asdict(etc_custom_snapshot), + "usr_local_custom": asdict(usr_local_custom_snapshot), } state_path = os.path.join(bundle_dir, "state.json") diff --git a/enroll/ignore.py b/enroll/ignore.py index d8ffce9..93ba423 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -23,6 +23,11 @@ DEFAULT_DENY_GLOBS = [ "/etc/gshadow", "/etc/*shadow", "/etc/letsencrypt/*", + "/usr/local/etc/ssl/private/*", + "/usr/local/etc/ssh/ssh_host_*", + "/usr/local/etc/*shadow", + "/usr/local/etc/*gshadow", + "/usr/local/etc/letsencrypt/*", ] SENSITIVE_CONTENT_PATTERNS = [ diff --git a/enroll/manifest.py b/enroll/manifest.py index e55418c..6909c5c 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -629,6 +629,7 @@ def _manifest_from_bundle_dir( package_roles: List[Dict[str, Any]] = state.get("package_roles", []) users_snapshot: Dict[str, Any] = state.get("users", {}) etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) + usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) site_mode = fqdn is not None and fqdn != "" @@ -661,6 +662,7 @@ def _manifest_from_bundle_dir( manifested_users_roles: List[str] = [] manifested_etc_custom_roles: List[str] = [] + manifested_usr_local_custom_roles: List[str] = [] manifested_service_roles: List[str] = [] manifested_pkg_roles: List[str] = [] @@ -999,6 +1001,105 @@ Unowned /etc config files not attributed to packages or services. # ------------------------- + # ------------------------- + + # ------------------------- + # usr_local_custom role (/usr/local/etc + /usr/local/bin scripts) + # ------------------------- + if usr_local_custom_snapshot and usr_local_custom_snapshot.get("managed_files"): + role = usr_local_custom_snapshot.get("role_name", "usr_local_custom") + role_dir = os.path.join(roles_root, role) + _write_role_scaffold(role_dir) + + var_prefix = role + + managed_files = usr_local_custom_snapshot.get("managed_files", []) + excluded = usr_local_custom_snapshot.get("excluded", []) + notes = usr_local_custom_snapshot.get("notes", []) + + templated, jt_vars = _jinjify_managed_files( + bundle_dir, + role, + role_dir, + managed_files, + jt_exe=jt_exe, + jt_enabled=jt_enabled, + overwrite_templates=not site_mode, + ) + + # Copy only the non-templated artifacts (templates live in the role). + if site_mode: + _copy_artifacts( + bundle_dir, + role, + _host_role_files_dir(out_dir, fqdn or "", role), + exclude_rels=templated, + ) + else: + _copy_artifacts( + bundle_dir, + role, + os.path.join(role_dir, "files"), + exclude_rels=templated, + ) + + files_var = _build_managed_files_var( + managed_files, + templated, + notify_other=None, + notify_systemd=None, + ) + + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} + vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map = _merge_mappings_overwrite(vars_map, jt_map) + + if site_mode: + _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_hostvars(out_dir, fqdn or "", role, vars_map) + else: + _write_role_defaults(role_dir, vars_map) + + tasks = "---\n" + _render_generic_files_tasks( + var_prefix, include_restart_notify=False + ) + with open( + os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write(tasks.rstrip() + "\n") + + # No handlers needed for this role, but keep a valid YAML document. + with open( + os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\n") + + with open( + os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\ndependencies: []\n") + + readme = ( + """# usr_local_custom\n\n""" + "Unowned /usr/local files (scripts in /usr/local/bin and config under /usr/local/etc).\n\n" + "## Managed files\n" + + ("\n".join([f"- {mf.get('path')}" for mf in managed_files]) or "- (none)") + + "\n\n## Excluded\n" + + ( + "\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) + or "- (none)" + ) + + "\n\n## Notes\n" + + ("\n".join([f"- {n}" for n in notes]) or "- (none)") + + "\n" + ) + with open(os.path.join(role_dir, "README.md"), "w", encoding="utf-8") as f: + f.write(readme) + + manifested_usr_local_custom_roles.append(role) + + # ------------------------- + # ------------------------- # Service roles # ------------------------- @@ -1310,6 +1411,7 @@ Generated for package `{pkg}`. manifested_pkg_roles + manifested_service_roles + manifested_etc_custom_roles + + manifested_usr_local_custom_roles + manifested_users_roles ) diff --git a/pyproject.toml b/pyproject.toml index 5231ad9..b5a07ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.1" +version = "0.1.2" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/tests/test_diff_usr_local_custom.py b/tests/test_diff_usr_local_custom.py new file mode 100644 index 0000000..88d594f --- /dev/null +++ b/tests/test_diff_usr_local_custom.py @@ -0,0 +1,111 @@ +import json +from pathlib import Path + +from enroll.diff import compare_harvests + + +def _write_bundle(root: Path, state: dict, artifacts: dict[str, bytes]) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "state.json").write_text(json.dumps(state, indent=2), encoding="utf-8") + for rel, data in artifacts.items(): + p = root / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_bytes(data) + + +def test_diff_includes_usr_local_custom_files(tmp_path: Path): + old = tmp_path / "old" + new = tmp_path / "new" + + old_state = { + "host": {"hostname": "h1", "os": "debian"}, + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [], + "package_roles": [], + "manual_packages": ["curl"], + "manual_packages_skipped": [], + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + } + ], + "excluded": [], + "notes": [], + }, + } + new_state = { + **old_state, + "manual_packages": ["curl", "htop"], + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + }, + { + "path": "/usr/local/bin/myscript", + "src_rel": "usr/local/bin/myscript", + "owner": "root", + "group": "root", + "mode": "0755", + "reason": "usr_local_bin_script", + }, + ], + "excluded": [], + "notes": [], + }, + } + + _write_bundle( + old, + old_state, + { + "artifacts/usr_local_custom/usr/local/etc/myapp.conf": b"myapp=1\n", + }, + ) + _write_bundle( + new, + new_state, + { + "artifacts/usr_local_custom/usr/local/etc/myapp.conf": b"myapp=2\n", + "artifacts/usr_local_custom/usr/local/bin/myscript": b"#!/bin/sh\necho hi\n", + }, + ) + + report, has_changes = compare_harvests(str(old), str(new)) + assert has_changes is True + + # Packages: htop was added. + assert report["packages"]["added"] == ["htop"] + + # Files: /usr/local/etc/myapp.conf should be detected as changed (content sha differs). + changed_paths = {c["path"] for c in report["files"]["changed"]} + assert "/usr/local/etc/myapp.conf" in changed_paths + + # Files: new script was added. + added_paths = {a["path"] for a in report["files"]["added"]} + assert "/usr/local/bin/myscript" in added_paths diff --git a/tests/test_harvest.py b/tests/test_harvest.py index 8e19fb4..a832c81 100644 --- a/tests/test_harvest.py +++ b/tests/test_harvest.py @@ -23,30 +23,51 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( real_islink = os.path.islink # Fake filesystem: two /etc files exist, only one is dpkg-owned. + # Also include some /usr/local files to populate usr_local_custom. files = { "/etc/openvpn/server.conf": b"server", "/etc/default/keyboard": b"kbd", + "/usr/local/etc/myapp.conf": b"myapp=1\n", + "/usr/local/bin/myscript": b"#!/bin/sh\necho hi\n", + # non-executable text under /usr/local/bin should be skipped + "/usr/local/bin/readme.txt": b"hello\n", + } + dirs = { + "/etc", + "/etc/openvpn", + "/etc/default", + "/usr", + "/usr/local", + "/usr/local/etc", + "/usr/local/bin", } - dirs = {"/etc", "/etc/openvpn", "/etc/default"} def fake_isfile(p: str) -> bool: if p.startswith("/etc/") or p == "/etc": return p in files + if p.startswith("/usr/local/"): + return p in files return real_isfile(p) def fake_isdir(p: str) -> bool: if p.startswith("/etc"): return p in dirs + if p.startswith("/usr/local") or p in ("/usr", "/usr/local"): + return p in dirs return real_isdir(p) def fake_islink(p: str) -> bool: if p.startswith("/etc"): return False + if p.startswith("/usr/local"): + return False return real_islink(p) def fake_exists(p: str) -> bool: if p.startswith("/etc"): return p in files or p in dirs + if p.startswith("/usr/local") or p in ("/usr", "/usr/local"): + return p in files or p in dirs return real_exists(p) def fake_walk(root: str): @@ -57,6 +78,10 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( yield ("/etc/openvpn", [], ["server.conf"]) elif root == "/etc/default": yield ("/etc/default", [], ["keyboard"]) + elif root == "/usr/local/etc": + yield ("/usr/local/etc", [], ["myapp.conf"]) + elif root == "/usr/local/bin": + yield ("/usr/local/bin", [], ["myscript", "readme.txt"]) else: yield (root, [], []) @@ -109,7 +134,13 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( monkeypatch.setattr(h, "list_manual_packages", lambda: ["openvpn", "curl"]) monkeypatch.setattr(h, "collect_non_system_users", lambda: []) - monkeypatch.setattr(h, "stat_triplet", lambda p: ("root", "root", "0644")) + def fake_stat_triplet(p: str): + if p == "/usr/local/bin/myscript": + return ("root", "root", "0755") + # /usr/local/bin/readme.txt remains non-executable + return ("root", "root", "0644") + + monkeypatch.setattr(h, "stat_triplet", fake_stat_triplet) # Avoid needing source files on disk by implementing our own bundle copier def fake_copy(bundle_dir: str, role_name: str, abs_path: str, src_rel: str): @@ -139,3 +170,9 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( assert any( mf["path"] == "/etc/default/keyboard" for mf in etc_custom["managed_files"] ) + + # /usr/local content is attributed to usr_local_custom + ul = st["usr_local_custom"] + assert any(mf["path"] == "/usr/local/etc/myapp.conf" for mf in ul["managed_files"]) + assert any(mf["path"] == "/usr/local/bin/myscript" for mf in ul["managed_files"]) + assert all(mf["path"] != "/usr/local/bin/readme.txt" for mf in ul["managed_files"]) diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 99040b0..92c3dfc 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -47,6 +47,29 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): "excluded": [], "notes": [], }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + }, + { + "path": "/usr/local/bin/myscript", + "src_rel": "usr/local/bin/myscript", + "owner": "root", + "group": "root", + "mode": "0755", + "reason": "usr_local_bin_script", + }, + ], + "excluded": [], + "notes": [], + }, "services": [ { "unit": "foo.service", @@ -92,6 +115,26 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): "kbd", encoding="utf-8" ) + # Create artifacts for usr_local_custom files so copy works + (bundle / "artifacts" / "usr_local_custom" / "usr" / "local" / "etc").mkdir( + parents=True, exist_ok=True + ) + ( + bundle + / "artifacts" + / "usr_local_custom" + / "usr" + / "local" + / "etc" + / "myapp.conf" + ).write_text("myapp=1\n", encoding="utf-8") + (bundle / "artifacts" / "usr_local_custom" / "usr" / "local" / "bin").mkdir( + parents=True, exist_ok=True + ) + ( + bundle / "artifacts" / "usr_local_custom" / "usr" / "local" / "bin" / "myscript" + ).write_text("#!/bin/sh\necho hi\n", encoding="utf-8") + manifest(str(bundle), str(out)) # Service role: systemd management should be gated on foo_manage_unit and a probe. @@ -119,6 +162,7 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): pb = (out / "playbook.yml").read_text(encoding="utf-8") assert "- users" in pb assert "- etc_custom" in pb + assert "- usr_local_custom" in pb assert "- curl" in pb assert "- foo" in pb @@ -168,6 +212,21 @@ def test_manifest_site_mode_creates_host_inventory_and_raw_files(tmp_path: Path) "excluded": [], "notes": [], }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + } + ], + "excluded": [], + "notes": [], + }, "services": [ { "unit": "foo.service", @@ -197,6 +256,20 @@ def test_manifest_site_mode_creates_host_inventory_and_raw_files(tmp_path: Path) bundle.mkdir(parents=True, exist_ok=True) (bundle / "state.json").write_text(json.dumps(state, indent=2), encoding="utf-8") + # Artifacts for usr_local_custom file so copy works. + (bundle / "artifacts" / "usr_local_custom" / "usr" / "local" / "etc").mkdir( + parents=True, exist_ok=True + ) + ( + bundle + / "artifacts" + / "usr_local_custom" + / "usr" + / "local" + / "etc" + / "myapp.conf" + ).write_text("myapp=1\n", encoding="utf-8") + manifest(str(bundle), str(out), fqdn=fqdn) # Host playbook exists. diff --git a/tests/test_misc_coverage.py b/tests/test_misc_coverage.py new file mode 100644 index 0000000..b4250fc --- /dev/null +++ b/tests/test_misc_coverage.py @@ -0,0 +1,96 @@ +import stat +from pathlib import Path + +import pytest + +from enroll.cache import _safe_component, new_harvest_cache_dir +from enroll.ignore import IgnorePolicy +from enroll.sopsutil import ( + SopsError, + _pgp_arg, + decrypt_file_binary_to, + encrypt_file_binary, +) + + +def test_safe_component_sanitizes_and_bounds_length(): + assert _safe_component(" ") == "unknown" + assert _safe_component("a/b c") == "a_b_c" + assert _safe_component("x" * 200) == "x" * 64 + + +def test_new_harvest_cache_dir_uses_xdg_cache_home(tmp_path: Path, monkeypatch): + monkeypatch.setenv("XDG_CACHE_HOME", str(tmp_path / "xdg")) + hc = new_harvest_cache_dir(hint="my host/01") + assert hc.dir.exists() + assert "my_host_01" in hc.dir.name + assert str(hc.dir).startswith(str(tmp_path / "xdg")) + # best-effort: ensure directory is not world-readable on typical FS + try: + mode = stat.S_IMODE(hc.dir.stat().st_mode) + assert mode & 0o077 == 0 + except OSError: + pass + + +def test_ignore_policy_denies_binary_and_sensitive_content(tmp_path: Path): + p_bin = tmp_path / "binfile" + p_bin.write_bytes(b"abc\x00def") + assert IgnorePolicy().deny_reason(str(p_bin)) == "binary_like" + + p_secret = tmp_path / "secret.conf" + p_secret.write_text("password=foo\n", encoding="utf-8") + assert IgnorePolicy().deny_reason(str(p_secret)) == "sensitive_content" + + # dangerous mode disables heuristic scanning (but still checks file-ness/size) + assert IgnorePolicy(dangerous=True).deny_reason(str(p_secret)) is None + + +def test_ignore_policy_denies_usr_local_shadow_by_glob(): + # This should short-circuit before stat() (path doesn't need to exist). + assert IgnorePolicy().deny_reason("/usr/local/etc/shadow") == "denied_path" + + +def test_sops_pgp_arg_and_encrypt_decrypt_roundtrip(tmp_path: Path, monkeypatch): + assert _pgp_arg([" ABC ", "DEF"]) == "ABC,DEF" + with pytest.raises(SopsError): + _pgp_arg([]) + + # Stub out sops and subprocess. + import enroll.sopsutil as s + + monkeypatch.setattr(s, "require_sops_cmd", lambda: "sops") + + class R: + def __init__(self, rc: int, out: bytes, err: bytes = b""): + self.returncode = rc + self.stdout = out + self.stderr = err + + calls = [] + + def fake_run(cmd, capture_output, check): + calls.append(cmd) + # Return a deterministic payload so we can assert file writes. + if "--encrypt" in cmd: + return R(0, b"ENCRYPTED") + if "--decrypt" in cmd: + return R(0, b"PLAINTEXT") + return R(1, b"", b"bad") + + monkeypatch.setattr(s.subprocess, "run", fake_run) + + src = tmp_path / "src.bin" + src.write_bytes(b"x") + enc = tmp_path / "out.sops" + dec = tmp_path / "out.bin" + + encrypt_file_binary(src, enc, pgp_fingerprints=["ABC"], mode=0o600) + assert enc.read_bytes() == b"ENCRYPTED" + + decrypt_file_binary_to(enc, dec, mode=0o644) + assert dec.read_bytes() == b"PLAINTEXT" + + # Sanity: we invoked encrypt and decrypt. + assert any("--encrypt" in c for c in calls) + assert any("--decrypt" in c for c in calls) From 25add369dc44db59b95d7286546b26dcbee9a8c7 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 17:24:45 +1100 Subject: [PATCH 08/85] README.md update --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 68c35e5..6645437 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ It aims to be **optimistic and noninteractive**: - Defensively excludes likely secrets (path denylist + content sniff + size caps). - Captures non-system users and their SSH public keys. - Captures miscellaneous `/etc` files it can’t attribute to a package and installs them in an `etc_custom` role. +- Ditto for /usr/local/bin (for non-binary files) and /usr/local/etc - Avoids trying to start systemd services that were detected as inactive during harvest. --- From 240e79706f18d0092fa54698c2e16b7c2ddd127b Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 20 Dec 2025 17:47:00 +1100 Subject: [PATCH 09/85] Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` arguments. --- CHANGELOG.md | 5 + README.md | 26 ++++ enroll/cli.py | 78 +++++++++++- enroll/diff.py | 6 + enroll/harvest.py | 110 ++++++++++++++++ enroll/manifest.py | 115 +++++++++++++++++ enroll/pathfilter.py | 293 +++++++++++++++++++++++++++++++++++++++++++ enroll/remote.py | 21 +++- tests/test_cli.py | 45 ++++++- 9 files changed, 687 insertions(+), 12 deletions(-) create mode 100644 enroll/pathfilter.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e80a13..2d8d6e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.1.3 + + * Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` + arguments. + # 0.1.2 * Include files from `/usr/local/bin` and `/usr/local/etc` in harvest (assuming they aren't binaries or diff --git a/README.md b/README.md index 6645437..84a6965 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,7 @@ Harvest state about a host and write a harvest bundle. - Changed-from-default config (plus related custom/unowned files under service dirs) - Non-system users + SSH public keys - Misc `/etc` that can’t be attributed to a package (`etc_custom` role) +- Optional user-specified extra files/dirs via `--include-path` (emitted as an `extra_paths` role at manifest time) **Common flags** - Remote harvesting: @@ -79,6 +80,14 @@ Harvest state about a host and write a harvest bundle. - `--dangerous`: disables secret-safety checks (see “Sensitive data” below) - Encrypt bundles at rest: - `--sops `: writes a single encrypted `harvest.tar.gz.sops` instead of a plaintext directory +- Path selection (include/exclude): + - `--include-path ` (repeatable): add extra files/dirs to harvest (even from locations normally ignored, like `/home`). Still subject to secret-safety checks unless `--dangerous`. + - `--exclude-path ` (repeatable): skip files/dirs even if they would normally be harvested. + - Pattern syntax: + - plain path: matches that file; directories match the directory + everything under it + - glob (default): supports `*` and `**` (prefix with `glob:` to force) + - regex: prefix with `re:` or `regex:` + - Precedence: excludes win over includes. --- @@ -227,6 +236,23 @@ enroll harvest --out /tmp/enroll-harvest enroll harvest --remote-host myhost.example.com --remote-user myuser --out /tmp/enroll-harvest ``` +### Include paths (`--include-path`) +```bash +# Add a few dotfiles from /home (still secret-safe unless --dangerous) +enroll harvest --out /tmp/enroll-harvest --include-path '/home/*/.bashrc' --include-path '/home/*/.profile' +``` + +### Exclude paths (`--exclude-path`) +```bash +# Skip specific /usr/local/bin entries (or patterns) +enroll harvest --out /tmp/enroll-harvest --exclude-path '/usr/local/bin/docker-*' --exclude-path '/usr/local/bin/some-tool' +``` + +### Regex include +```bash +enroll harvest --out /tmp/enroll-harvest --include-path 're:^/home/[^/]+/\.config/myapp/.*$' +``` + ### `--dangerous` ```bash enroll harvest --out /tmp/enroll-harvest --dangerous diff --git a/enroll/cli.py b/enroll/cli.py index 2d8ed5e..f6efe11 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -125,6 +125,27 @@ def main() -> None: action="store_true", help="Collect files more aggressively (may include secrets). Disables secret-avoidance checks.", ) + h.add_argument( + "--include-path", + action="append", + default=[], + metavar="PATTERN", + help=( + "Include extra file paths to harvest (repeatable). Supports globs (including '**') and regex via 're:'. " + "Included files are still filtered by IgnorePolicy unless --dangerous is used." + ), + ) + h.add_argument( + "--exclude-path", + action="append", + default=[], + metavar="PATTERN", + help=( + "Exclude file paths from harvesting (repeatable). Supports globs (including '**') and regex via 're:'. " + "Excludes apply to all harvesting, including defaults." + ), + ) + h.add_argument( "--sops", nargs="+", @@ -186,6 +207,27 @@ def main() -> None: action="store_true", help="Collect files more aggressively (may include secrets). Disables secret-avoidance checks.", ) + s.add_argument( + "--include-path", + action="append", + default=[], + metavar="PATTERN", + help=( + "Include extra file paths to harvest (repeatable). Supports globs (including '**') and regex via 're:'. " + "Included files are still filtered by IgnorePolicy unless --dangerous is used." + ), + ) + s.add_argument( + "--exclude-path", + action="append", + default=[], + metavar="PATTERN", + help=( + "Exclude file paths from harvesting (repeatable). Supports globs (including '**') and regex via 're:'. " + "Excludes apply to all harvesting, including defaults." + ), + ) + s.add_argument( "--sops", nargs="+", @@ -320,6 +362,8 @@ def main() -> None: remote_user=args.remote_user, dangerous=bool(args.dangerous), no_sudo=bool(args.no_sudo), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), ) _encrypt_harvest_dir_to_sops( tmp_bundle, out_file, list(sops_fps) @@ -338,6 +382,8 @@ def main() -> None: remote_user=args.remote_user, dangerous=bool(args.dangerous), no_sudo=bool(args.no_sudo), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), ) print(str(state)) else: @@ -350,7 +396,12 @@ def main() -> None: os.chmod(tmp_bundle, 0o700) except OSError: pass - harvest(str(tmp_bundle), dangerous=bool(args.dangerous)) + harvest( + str(tmp_bundle), + dangerous=bool(args.dangerous), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), + ) _encrypt_harvest_dir_to_sops( tmp_bundle, out_file, list(sops_fps) ) @@ -360,7 +411,12 @@ def main() -> None: raise SystemExit( "error: --out is required unless --remote-host is set" ) - path = harvest(args.out, dangerous=bool(args.dangerous)) + path = harvest( + args.out, + dangerous=bool(args.dangerous), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), + ) print(path) elif args.cmd == "manifest": out_enc = manifest( @@ -446,6 +502,8 @@ def main() -> None: remote_user=args.remote_user, dangerous=bool(args.dangerous), no_sudo=bool(args.no_sudo), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), ) _encrypt_harvest_dir_to_sops( tmp_bundle, out_file, list(sops_fps) @@ -473,6 +531,8 @@ def main() -> None: remote_user=args.remote_user, dangerous=bool(args.dangerous), no_sudo=bool(args.no_sudo), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), ) manifest( str(harvest_dir), @@ -493,7 +553,12 @@ def main() -> None: os.chmod(tmp_bundle, 0o700) except OSError: pass - harvest(str(tmp_bundle), dangerous=bool(args.dangerous)) + harvest( + str(tmp_bundle), + dangerous=bool(args.dangerous), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), + ) _encrypt_harvest_dir_to_sops( tmp_bundle, out_file, list(sops_fps) ) @@ -512,7 +577,12 @@ def main() -> None: raise SystemExit( "error: --harvest is required unless --remote-host is set" ) - harvest(args.harvest, dangerous=bool(args.dangerous)) + harvest( + args.harvest, + dangerous=bool(args.dangerous), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), + ) manifest( args.harvest, args.out, diff --git a/enroll/diff.py b/enroll/diff.py index e2861c9..a2b7d91 100644 --- a/enroll/diff.py +++ b/enroll/diff.py @@ -196,6 +196,12 @@ def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, for mf in ul.get("managed_files", []) or []: yield str(ul_role), mf + # extra_paths + xp = state.get("extra_paths") or {} + xp_role = xp.get("role_name") or "extra_paths" + for mf in xp.get("managed_files", []) or []: + yield str(xp_role), mf + def _file_index(bundle_dir: Path, state: Dict[str, Any]) -> Dict[str, FileRec]: """Return mapping of absolute path -> FileRec. diff --git a/enroll/harvest.py b/enroll/harvest.py index 659bebc..48242d6 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -19,6 +19,7 @@ from .debian import ( stat_triplet, ) from .ignore import IgnorePolicy +from .pathfilter import PathFilter, expand_includes from .accounts import collect_non_system_users @@ -86,6 +87,16 @@ class UsrLocalCustomSnapshot: notes: List[str] +@dataclass +class ExtraPathsSnapshot: + role_name: str + include_patterns: List[str] + exclude_patterns: List[str] + managed_files: List[ManagedFile] + excluded: List[ExcludedFile] + notes: List[str] + + ALLOWED_UNOWNED_EXTS = { ".conf", ".cfg", @@ -250,6 +261,8 @@ def harvest( policy: Optional[IgnorePolicy] = None, *, dangerous: bool = False, + include_paths: Optional[List[str]] = None, + exclude_paths: Optional[List[str]] = None, ) -> str: # If a policy is not supplied, build one. `--dangerous` relaxes secret # detection and deny-glob skipping. @@ -261,6 +274,10 @@ def harvest( policy.dangerous = True os.makedirs(bundle_dir, exist_ok=True) + # User-provided includes/excludes. Excludes apply to all harvesting; + # includes are harvested into an extra role. + path_filter = PathFilter(include=include_paths or (), exclude=exclude_paths or ()) + if hasattr(os, "geteuid") and os.geteuid() != 0: print( "Warning: not running as root; harvest may miss files or metadata.", @@ -406,6 +423,9 @@ def harvest( ) for path, reason in sorted(candidates.items()): + if path_filter.is_excluded(path): + excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue deny = policy.deny_reason(path) if deny: excluded.append(ExcludedFile(path=path, reason=deny)) @@ -522,6 +542,9 @@ def harvest( candidates.setdefault(r, "custom_specific_path") for path, reason in sorted(candidates.items()): + if path_filter.is_excluded(path): + excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue deny = policy.deny_reason(path) if deny: excluded.append(ExcludedFile(path=path, reason=deny)) @@ -593,6 +616,9 @@ def harvest( # Copy only safe SSH public material: authorized_keys + *.pub for sf in u.ssh_files: + if path_filter.is_excluded(sf): + users_excluded.append(ExcludedFile(path=sf, reason="user_excluded")) + continue deny = policy.deny_reason(sf) if deny: users_excluded.append(ExcludedFile(path=sf, reason=deny)) @@ -665,6 +691,10 @@ def harvest( if not _is_confish(path): continue + if path_filter.is_excluded(path): + etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue + deny = policy.deny_reason(path) if deny: etc_excluded.append(ExcludedFile(path=path, reason=deny)) @@ -754,6 +784,10 @@ def harvest( ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) continue + if path_filter.is_excluded(path): + ul_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue + deny = policy.deny_reason(path) if deny: ul_excluded.append(ExcludedFile(path=path, reason=deny)) @@ -806,6 +840,81 @@ def harvest( notes=ul_notes, ) + # ------------------------- + # extra_paths role (user-requested includes) + # ------------------------- + extra_notes: List[str] = [] + extra_excluded: List[ExcludedFile] = [] + extra_managed: List[ManagedFile] = [] + extra_role_name = "extra_paths" + + include_specs = list(include_paths or []) + exclude_specs = list(exclude_paths or []) + + if include_specs: + extra_notes.append("User include patterns:") + extra_notes.extend([f"- {p}" for p in include_specs]) + if exclude_specs: + extra_notes.append("User exclude patterns:") + extra_notes.extend([f"- {p}" for p in exclude_specs]) + + included_files: List[str] = [] + if include_specs: + files, inc_notes = expand_includes( + path_filter.iter_include_patterns(), + exclude=path_filter, + max_files=4000, + ) + included_files = files + extra_notes.extend(inc_notes) + + for path in included_files: + if path in already_all: + continue + + if path_filter.is_excluded(path): + extra_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue + + deny = policy.deny_reason(path) + if deny: + extra_excluded.append(ExcludedFile(path=path, reason=deny)) + continue + + try: + owner, group, mode = stat_triplet(path) + except OSError: + extra_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + src_rel = path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, extra_role_name, path, src_rel) + except OSError: + extra_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + extra_managed.append( + ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason="user_include", + ) + ) + already_all.add(path) + + extra_paths_snapshot = ExtraPathsSnapshot( + role_name=extra_role_name, + include_patterns=include_specs, + exclude_patterns=exclude_specs, + managed_files=extra_managed, + excluded=extra_excluded, + notes=extra_notes, + ) + state = { "host": {"hostname": os.uname().nodename, "os": "debian"}, "users": asdict(users_snapshot), @@ -815,6 +924,7 @@ def harvest( "package_roles": [asdict(p) for p in pkg_snaps], "etc_custom": asdict(etc_custom_snapshot), "usr_local_custom": asdict(usr_local_custom_snapshot), + "extra_paths": asdict(extra_paths_snapshot), } state_path = os.path.join(bundle_dir, "state.json") diff --git a/enroll/manifest.py b/enroll/manifest.py index 6909c5c..2f28eab 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -630,6 +630,7 @@ def _manifest_from_bundle_dir( users_snapshot: Dict[str, Any] = state.get("users", {}) etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) + extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {}) site_mode = fqdn is not None and fqdn != "" @@ -663,6 +664,7 @@ def _manifest_from_bundle_dir( manifested_users_roles: List[str] = [] manifested_etc_custom_roles: List[str] = [] manifested_usr_local_custom_roles: List[str] = [] + manifested_extra_paths_roles: List[str] = [] manifested_service_roles: List[str] = [] manifested_pkg_roles: List[str] = [] @@ -1098,6 +1100,118 @@ Unowned /etc config files not attributed to packages or services. manifested_usr_local_custom_roles.append(role) + # ------------------------- + # extra_paths role (user-requested includes) + # ------------------------- + if extra_paths_snapshot and extra_paths_snapshot.get("managed_files"): + role = extra_paths_snapshot.get("role_name", "extra_paths") + role_dir = os.path.join(roles_root, role) + _write_role_scaffold(role_dir) + + var_prefix = role + + managed_files = extra_paths_snapshot.get("managed_files", []) + excluded = extra_paths_snapshot.get("excluded", []) + notes = extra_paths_snapshot.get("notes", []) + include_pats = extra_paths_snapshot.get("include_patterns", []) or [] + exclude_pats = extra_paths_snapshot.get("exclude_patterns", []) or [] + + templated, jt_vars = _jinjify_managed_files( + bundle_dir, + role, + role_dir, + managed_files, + jt_exe=jt_exe, + jt_enabled=jt_enabled, + overwrite_templates=not site_mode, + ) + + if site_mode: + _copy_artifacts( + bundle_dir, + role, + _host_role_files_dir(out_dir, fqdn or "", role), + exclude_rels=templated, + ) + else: + _copy_artifacts( + bundle_dir, + role, + os.path.join(role_dir, "files"), + exclude_rels=templated, + ) + + files_var = _build_managed_files_var( + managed_files, + templated, + notify_other=None, + notify_systemd=None, + ) + + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} + vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map = _merge_mappings_overwrite(vars_map, jt_map) + + if site_mode: + _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_hostvars(out_dir, fqdn or "", role, vars_map) + else: + _write_role_defaults(role_dir, vars_map) + + tasks = "---\n" + _render_generic_files_tasks( + var_prefix, include_restart_notify=False + ) + with open( + os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write(tasks.rstrip() + "\n") + + with open( + os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\n") + + with open( + os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\ndependencies: []\n") + + readme = ( + f"""# {role} + +User-requested extra file harvesting. + +## Include patterns +""" + + ("\n".join([f"- {p}" for p in include_pats]) or "- (none)") + + """\n +## Exclude patterns +""" + + ("\n".join([f"- {p}" for p in exclude_pats]) or "- (none)") + + """\n +## Managed files +""" + + ("\n".join([f"- {mf.get('path')}" for mf in managed_files]) or "- (none)") + + """\n +## Excluded +""" + + ( + "\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) + or "- (none)" + ) + + """\n +## Notes +""" + + ("\n".join([f"- {n}" for n in notes]) or "- (none)") + + """\n""" + ) + with open(os.path.join(role_dir, "README.md"), "w", encoding="utf-8") as f: + f.write(readme) + + manifested_extra_paths_roles.append(role) + + manifested_usr_local_custom_roles.append(role) + # ------------------------- # ------------------------- @@ -1412,6 +1526,7 @@ Generated for package `{pkg}`. + manifested_service_roles + manifested_etc_custom_roles + manifested_usr_local_custom_roles + + manifested_extra_paths_roles + manifested_users_roles ) diff --git a/enroll/pathfilter.py b/enroll/pathfilter.py new file mode 100644 index 0000000..9df4afa --- /dev/null +++ b/enroll/pathfilter.py @@ -0,0 +1,293 @@ +from __future__ import annotations + +import glob +import os +import re +from dataclasses import dataclass +from pathlib import PurePosixPath +from typing import List, Optional, Sequence, Set, Tuple + + +_REGEX_PREFIXES = ("re:", "regex:") + + +def _has_glob_chars(s: str) -> bool: + return any(ch in s for ch in "*?[") + + +def _norm_abs(p: str) -> str: + """Normalise a path-ish string to an absolute POSIX path. + + We treat inputs that don't start with '/' as being relative to '/'. + """ + + p = p.strip() + if not p: + return "/" + if not p.startswith("/"): + p = "/" + p + # `normpath` keeps a leading '/' for absolute paths. + return os.path.normpath(p) + + +def _posix_match(path: str, pattern: str) -> bool: + """Path matching with glob semantics. + + Uses PurePosixPath.match which: + - treats '/' as a segment separator + - supports '**' for recursive matching + + Both `path` and `pattern` are treated as absolute paths. + """ + + # PurePosixPath.match is anchored and works best on relative strings. + p = path.lstrip("/") + pat = pattern.lstrip("/") + try: + return PurePosixPath(p).match(pat) + except Exception: + # If the pattern is somehow invalid, fail closed. + return False + + +def _regex_literal_prefix(regex: str) -> str: + """Best-effort literal prefix extraction for a regex. + + This lets us pick a starting directory to walk when expanding regex-based + include patterns. + """ + + s = regex + if s.startswith("^"): + s = s[1:] + out: List[str] = [] + escaped = False + meta = set(".^$*+?{}[]\\|()") + for ch in s: + if escaped: + out.append(ch) + escaped = False + continue + if ch == "\\": + escaped = True + continue + if ch in meta: + break + out.append(ch) + return "".join(out) + + +@dataclass(frozen=True) +class CompiledPathPattern: + raw: str + kind: str # 'prefix' | 'glob' | 'regex' + value: str + regex: Optional[re.Pattern[str]] = None + + def matches(self, path: str) -> bool: + p = _norm_abs(path) + + if self.kind == "regex": + if not self.regex: + return False + # Search (not match) so users can write unanchored patterns. + return self.regex.search(p) is not None + + if self.kind == "glob": + return _posix_match(p, self.value) + + # prefix + pref = self.value.rstrip("/") + return p == pref or p.startswith(pref + "/") + + +def compile_path_pattern(raw: str) -> CompiledPathPattern: + s = raw.strip() + for pre in _REGEX_PREFIXES: + if s.startswith(pre): + rex = s[len(pre) :].strip() + try: + return CompiledPathPattern( + raw=raw, kind="regex", value=rex, regex=re.compile(rex) + ) + except re.error: + # Treat invalid regexes as non-matching. + return CompiledPathPattern(raw=raw, kind="regex", value=rex, regex=None) + + # If the user explicitly says glob:, honour it. + if s.startswith("glob:"): + pat = s[len("glob:") :].strip() + return CompiledPathPattern(raw=raw, kind="glob", value=_norm_abs(pat)) + + # Heuristic: if it contains glob metacharacters, treat as a glob. + if _has_glob_chars(s) or "**" in s: + return CompiledPathPattern(raw=raw, kind="glob", value=_norm_abs(s)) + + # Otherwise treat as an exact path-or-prefix (dir subtree). + return CompiledPathPattern(raw=raw, kind="prefix", value=_norm_abs(s)) + + +@dataclass +class PathFilter: + """User-provided path filters. + + Semantics: + - exclude patterns always win + - include patterns are used only to expand *additional* files to harvest + (they do not restrict the default harvest set) + + Patterns: + - By default: glob-like (supports '**') + - Regex: prefix with 're:' or 'regex:' + - Force glob: prefix with 'glob:' + - A plain path without wildcards matches that path and everything under it + (directory-prefix behavior). + + Examples: + --exclude-path /usr/local/bin/docker-* + --include-path /home/*/.bashrc + --include-path 're:^/home/[^/]+/.config/myapp/.*$' + """ + + include: Sequence[str] = () + exclude: Sequence[str] = () + + def __post_init__(self) -> None: + self._include = [ + compile_path_pattern(p) for p in self.include if str(p).strip() + ] + self._exclude = [ + compile_path_pattern(p) for p in self.exclude if str(p).strip() + ] + + def is_excluded(self, path: str) -> bool: + for pat in self._exclude: + if pat.matches(path): + return True + return False + + def iter_include_patterns(self) -> List[CompiledPathPattern]: + return list(self._include) + + +def expand_includes( + patterns: Sequence[CompiledPathPattern], + *, + exclude: Optional[PathFilter] = None, + max_files: int = 4000, +) -> Tuple[List[str], List[str]]: + """Expand include patterns into concrete file paths. + + Returns (paths, notes). The returned paths are absolute paths. + + This function is intentionally conservative: + - symlinks are ignored (both dirs and files) + - the number of collected files is capped + + Regex patterns are expanded by walking a best-effort inferred root. + """ + + out: List[str] = [] + notes: List[str] = [] + seen: Set[str] = set() + + def _maybe_add_file(p: str) -> None: + if len(out) >= max_files: + return + p = _norm_abs(p) + if exclude and exclude.is_excluded(p): + return + if p in seen: + return + if not os.path.isfile(p) or os.path.islink(p): + return + seen.add(p) + out.append(p) + + def _walk_dir(root: str, match: Optional[CompiledPathPattern] = None) -> None: + root = _norm_abs(root) + if not os.path.isdir(root) or os.path.islink(root): + return + for dirpath, dirnames, filenames in os.walk(root, followlinks=False): + # Prune excluded directories early. + if exclude: + dirnames[:] = [ + d + for d in dirnames + if not exclude.is_excluded(os.path.join(dirpath, d)) + and not os.path.islink(os.path.join(dirpath, d)) + ] + for fn in filenames: + if len(out) >= max_files: + return + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + if exclude and exclude.is_excluded(p): + continue + if match is not None and not match.matches(p): + continue + if p in seen: + continue + seen.add(p) + out.append(_norm_abs(p)) + + for pat in patterns: + if len(out) >= max_files: + notes.append( + f"Include cap reached ({max_files}); some includes were not expanded." + ) + break + + matched_any = False + + if pat.kind == "prefix": + p = pat.value + if os.path.isfile(p) and not os.path.islink(p): + _maybe_add_file(p) + matched_any = True + elif os.path.isdir(p) and not os.path.islink(p): + before = len(out) + _walk_dir(p) + matched_any = len(out) > before + else: + # Still allow prefix patterns that don't exist now (e.g. remote different) + # by matching nothing rather than erroring. + matched_any = False + + elif pat.kind == "glob": + # Use glob for expansion; also walk directories that match. + gpat = pat.value + hits = glob.glob(gpat, recursive=True) + for h in hits: + if len(out) >= max_files: + break + h = _norm_abs(h) + if exclude and exclude.is_excluded(h): + continue + if os.path.isdir(h) and not os.path.islink(h): + before = len(out) + _walk_dir(h) + if len(out) > before: + matched_any = True + elif os.path.isfile(h) and not os.path.islink(h): + _maybe_add_file(h) + matched_any = True + + else: # regex + rex = pat.value + prefix = _regex_literal_prefix(rex) + # Determine a walk root. If we can infer an absolute prefix, use its + # directory; otherwise fall back to '/'. + if prefix.startswith("/"): + root = os.path.dirname(prefix) or "/" + else: + root = "/" + before = len(out) + _walk_dir(root, match=pat) + matched_any = len(out) > before + + if not matched_any: + notes.append(f"Include pattern matched no files: {pat.raw!r}") + + return out, notes diff --git a/enroll/remote.py b/enroll/remote.py index 469248d..9618512 100644 --- a/enroll/remote.py +++ b/enroll/remote.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +import shlex import shutil import tarfile import tempfile @@ -97,6 +98,8 @@ def remote_harvest( remote_python: str = "python3", dangerous: bool = False, no_sudo: bool = False, + include_paths: Optional[list[str]] = None, + exclude_paths: Optional[list[str]] = None, ) -> Path: """Run enroll harvest on a remote host via SSH and pull the bundle locally. @@ -165,13 +168,25 @@ def remote_harvest( sftp.put(str(pyz), rapp) # Run remote harvest. - _cmd = f"{remote_python} {rapp} harvest --out {rbundle}" + argv: list[str] = [ + remote_python, + rapp, + "harvest", + "--out", + rbundle, + ] + if dangerous: + argv.append("--dangerous") + for p in include_paths or []: + argv.extend(["--include-path", str(p)]) + for p in exclude_paths or []: + argv.extend(["--exclude-path", str(p)]) + + _cmd = " ".join(shlex.quote(a) for a in argv) if not no_sudo: cmd = f"sudo {_cmd}" else: cmd = _cmd - if dangerous: - cmd += " --dangerous" rc, out, err = _ssh_run(ssh, cmd) if rc != 0: raise RuntimeError( diff --git a/tests/test_cli.py b/tests/test_cli.py index ca3bfa6..4477b24 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -6,9 +6,17 @@ import enroll.cli as cli def test_cli_harvest_subcommand_calls_harvest(monkeypatch, capsys, tmp_path): called = {} - def fake_harvest(out: str, dangerous: bool = False): + def fake_harvest( + out: str, + dangerous: bool = False, + include_paths=None, + exclude_paths=None, + **_kwargs, + ): called["out"] = out called["dangerous"] = dangerous + called["include_paths"] = include_paths or [] + called["exclude_paths"] = exclude_paths or [] return str(tmp_path / "state.json") monkeypatch.setattr(cli, "harvest", fake_harvest) @@ -17,6 +25,8 @@ def test_cli_harvest_subcommand_calls_harvest(monkeypatch, capsys, tmp_path): cli.main() assert called["out"] == str(tmp_path) assert called["dangerous"] is False + assert called["include_paths"] == [] + assert called["exclude_paths"] == [] captured = capsys.readouterr() assert str(tmp_path / "state.json") in captured.out @@ -55,8 +65,16 @@ def test_cli_manifest_subcommand_calls_manifest(monkeypatch, tmp_path): def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path): calls = [] - def fake_harvest(bundle_dir: str, dangerous: bool = False): - calls.append(("harvest", bundle_dir, dangerous)) + def fake_harvest( + bundle_dir: str, + dangerous: bool = False, + include_paths=None, + exclude_paths=None, + **_kwargs, + ): + calls.append( + ("harvest", bundle_dir, dangerous, include_paths or [], exclude_paths or []) + ) return str(tmp_path / "bundle" / "state.json") def fake_manifest(bundle_dir: str, out_dir: str, **kwargs): @@ -87,7 +105,7 @@ def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path) cli.main() assert calls == [ - ("harvest", str(tmp_path / "bundle"), False), + ("harvest", str(tmp_path / "bundle"), False, [], []), ("manifest", str(tmp_path / "bundle"), str(tmp_path / "ansible"), None, "auto"), ] @@ -95,9 +113,17 @@ def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path) def test_cli_harvest_dangerous_flag_is_forwarded(monkeypatch, tmp_path): called = {} - def fake_harvest(out: str, dangerous: bool = False): + def fake_harvest( + out: str, + dangerous: bool = False, + include_paths=None, + exclude_paths=None, + **_kwargs, + ): called["out"] = out called["dangerous"] = dangerous + called["include_paths"] = include_paths or [] + called["exclude_paths"] = exclude_paths or [] return str(tmp_path / "state.json") monkeypatch.setattr(cli, "harvest", fake_harvest) @@ -107,6 +133,8 @@ def test_cli_harvest_dangerous_flag_is_forwarded(monkeypatch, tmp_path): cli.main() assert called["dangerous"] is True + assert called["include_paths"] == [] + assert called["exclude_paths"] == [] def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir( @@ -131,6 +159,9 @@ def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir( remote_user, dangerous, no_sudo, + include_paths=None, + exclude_paths=None, + **_kwargs, ): called.update( { @@ -140,6 +171,8 @@ def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir( "remote_user": remote_user, "dangerous": dangerous, "no_sudo": no_sudo, + "include_paths": include_paths or [], + "exclude_paths": exclude_paths or [], } ) return cache_dir / "state.json" @@ -169,6 +202,8 @@ def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir( assert called["remote_user"] == "alice" assert called["dangerous"] is False assert called["no_sudo"] is False + assert called["include_paths"] == [] + assert called["exclude_paths"] == [] def test_cli_single_shot_remote_without_harvest_prints_state_path( From 9641637d4d27df0c1d524a20c63adae90ff424fa Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 20 Dec 2025 18:24:46 +1100 Subject: [PATCH 10/85] Add support for an enroll.ini config file to store arguments per subcommand, to avoid having to remember them all for repetitive executions. --- CHANGELOG.md | 2 + README.md | 56 ++++++++++ debian/changelog | 9 ++ enroll/cli.py | 264 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 330 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d8d6e4..90478e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ * Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` arguments. + * Add support for an enroll.ini config file to store arguments per subcommand, to avoid having to remember + them all for repetitive executions. # 0.1.2 diff --git a/README.md b/README.md index 84a6965..a5d2157 100644 --- a/README.md +++ b/README.md @@ -336,3 +336,59 @@ ansible-playbook -i "localhost," -c local /tmp/enroll-ansible/playbook.yml ```bash ansible-playbook /tmp/enroll-ansible/playbooks/"$(hostname -f)".yml ``` + +## Configuration file + +As can be seen above, there are a lot of powerful 'permutations' available to all four subcommands. + +Sometimes, it can be easier to store them in a config file so you don't have to remember them! + +Enroll supports reading an ini-style file of all the arguments for each subcommand. + +### Location of the config file + +The path the config file can be specified with `-c` or `--config` on the command-line. Otherwise, +Enroll will look for `./enroll.ini`, `./.enroll.ini` (in the current working directory), +``~/.config/enroll/enroll.ini` (or `$XDG_CONFIG_HOME/enroll/enroll.ini`). + +You may also pass `--no-config` if you deliberately want to ignore the config file even if it existed. + +### Precedence + +Highest wins: + + * Explicit CLI flags + * INI config ([cmd], [enroll]) + * argparse defaults + +### Example config file + +Here is an example. + +Whenever an argument on the command-line has a 'hyphen' in it, just be sure to change it to an underscore in the ini file. + +```ini +[enroll] +# (future global flags may live here) + +[harvest] +dangerous = false +include_path = + /home/*/.bashrc + /home/*/.profile +exclude_path = /usr/local/bin/docker-*, /usr/local/bin/some-tool +# remote_host = yourserver.example.com +# remote_user = you +# remote_port = 2222 + +[manifest] +# you can set defaults here too, e.g. +no_jinjaturtle = true +sops = 00AE817C24A10C2540461A9C1D7CDE0234DB458D + +[single-shot] +# if you use single-shot, put its defaults here. +# It does not inherit those of the subsections above, so you +# may wish to repeat them here. +include_path = re:^/home/[^/]+/\.config/myapp/.*$ +``` diff --git a/debian/changelog b/debian/changelog index 0b16cfa..f6ba2f7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,12 @@ +enroll (0.1.3) unstable; urgency=medium + + * Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` + arguments. + * Add support for an enroll.ini config file to store arguments per subcommand, to avoid having to remember + them all for repetitive executions. + + -- Miguel Jacq Sat, 20 Dec 2025 18:24:00 +1100 + enroll (0.1.2) unstable; urgency=medium * Include files from `/usr/local/bin` and `/usr/local/etc` in harvest (assuming they aren't binaries or diff --git a/enroll/cli.py b/enroll/cli.py index f6efe11..e5f729d 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -1,7 +1,9 @@ from __future__ import annotations import argparse +import configparser import os +import sys import tarfile import tempfile from pathlib import Path @@ -15,6 +17,232 @@ from .remote import remote_harvest from .sopsutil import SopsError, encrypt_file_binary +def _discover_config_path(argv: list[str]) -> Optional[Path]: + """Return the config path to use, if any. + + Precedence: + 1) --no-config disables loading. + 2) --config PATH (or -c PATH) + 3) $ENROLL_CONFIG + 4) ./enroll.ini, ./.enroll.ini + 5) $XDG_CONFIG_HOME/enroll/enroll.ini (or ~/.config/enroll/enroll.ini) + + The config file is optional; if no file is found, returns None. + """ + + # Quick scan for explicit flags without needing to build the full parser. + if "--no-config" in argv: + return None + + def _value_after(flag: str) -> Optional[str]: + try: + i = argv.index(flag) + except ValueError: + return None + if i + 1 >= len(argv): + return None + return argv[i + 1] + + p = _value_after("--config") or _value_after("-c") + if p: + return Path(p).expanduser() + + envp = os.environ.get("ENROLL_CONFIG") + if envp: + return Path(envp).expanduser() + + cwd = Path.cwd() + for name in ("enroll.ini", ".enroll.ini"): + cp = cwd / name + if cp.exists() and cp.is_file(): + return cp + + xdg = os.environ.get("XDG_CONFIG_HOME") + if xdg: + base = Path(xdg).expanduser() + else: + base = Path.home() / ".config" + cp = base / "enroll" / "enroll.ini" + if cp.exists() and cp.is_file(): + return cp + + return None + + +def _parse_bool(s: str) -> Optional[bool]: + v = str(s).strip().lower() + if v in {"1", "true", "yes", "y", "on"}: + return True + if v in {"0", "false", "no", "n", "off"}: + return False + return None + + +def _action_lookup(p: argparse.ArgumentParser) -> dict[str, argparse.Action]: + """Map config keys -> argparse actions for a parser. + + Accepts both dest names and long option names without leading dashes, + normalized with '-' -> '_'. + """ + + m: dict[str, argparse.Action] = {} + for a in p._actions: # noqa: SLF001 (argparse internal) + if not getattr(a, "dest", None): + continue + dest = str(a.dest).strip().lower() + if dest: + m[dest] = a + for opt in getattr(a, "option_strings", []) or []: + k = opt.lstrip("-").strip().lower() + if k: + m[k.replace("-", "_")] = a + m[k] = a + return m + + +def _choose_flag(a: argparse.Action) -> Optional[str]: + # Prefer a long flag if available (e.g. --dangerous over -d) + for s in getattr(a, "option_strings", []) or []: + if s.startswith("--"): + return s + for s in getattr(a, "option_strings", []) or []: + return s + return None + + +def _split_list_value(v: str) -> list[str]: + # Support comma-separated and/or multi-line lists. + raw = str(v) + if "\n" in raw: + parts = [p.strip() for p in raw.splitlines()] + return [p for p in parts if p] + if "," in raw: + parts = [p.strip() for p in raw.split(",")] + return [p for p in parts if p] + raw = raw.strip() + return [raw] if raw else [] + + +def _section_to_argv( + p: argparse.ArgumentParser, cfg: configparser.ConfigParser, section: str +) -> list[str]: + """Translate an INI section into argv tokens for this parser.""" + if not cfg.has_section(section): + return [] + + lookup = _action_lookup(p) + out: list[str] = [] + + for k, v in cfg.items(section): + key = str(k).strip().lower().replace("-", "_") + # Avoid recursion / confusing self-configuration. + if key in {"config", "no_config"}: + continue + + a = lookup.get(key) + if not a: + # Unknown keys are ignored (but we try to be helpful). + print( + f"warning: config [{section}] contains unknown option '{k}' (ignored)", + file=sys.stderr, + ) + continue + + flag = _choose_flag(a) + if not flag: + continue + + # Boolean flags + if isinstance(a, argparse._StoreTrueAction): # noqa: SLF001 + b = _parse_bool(v) + if b is True: + out.append(flag) + continue + if isinstance(a, argparse._StoreFalseAction): # noqa: SLF001 + b = _parse_bool(v) + if b is False: + out.append(flag) + continue + + # Repeated options + if isinstance(a, argparse._AppendAction): # noqa: SLF001 + for item in _split_list_value(v): + out.extend([flag, item]) + continue + + # Count flags (rare, but easy to support) + if isinstance(a, argparse._CountAction): # noqa: SLF001 + b = _parse_bool(v) + if b is True: + out.append(flag) + else: + try: + n = int(str(v).strip()) + except ValueError: + n = 0 + out.extend([flag] * max(0, n)) + continue + + # Standard scalar options + sval = str(v).strip() + if sval: + out.extend([flag, sval]) + + return out + + +def _inject_config_argv( + argv: list[str], + *, + cfg_path: Optional[Path], + root_parser: argparse.ArgumentParser, + subparsers: dict[str, argparse.ArgumentParser], +) -> list[str]: + """Return argv with config-derived tokens inserted. + + We insert: + - [enroll] options before the subcommand + - [] options immediately after the subcommand token + + CLI flags always win because they come later in argv. + """ + + if not cfg_path: + return argv + cfg_path = Path(cfg_path).expanduser() + if not (cfg_path.exists() and cfg_path.is_file()): + return argv + + cfg = configparser.ConfigParser() + try: + cfg.read(cfg_path, encoding="utf-8") + except (OSError, configparser.Error) as e: + raise SystemExit(f"error: failed to read config file {cfg_path}: {e}") + + global_tokens = _section_to_argv(root_parser, cfg, "enroll") + + # Find the subcommand token position. + cmd_pos: Optional[int] = None + cmd_name: Optional[str] = None + for i, tok in enumerate(argv): + if tok in subparsers: + cmd_pos = i + cmd_name = tok + break + if cmd_pos is None or cmd_name is None: + # No subcommand found (argparse will handle the error); only apply global. + return global_tokens + argv + + cmd_tokens = _section_to_argv(subparsers[cmd_name], cfg, cmd_name) + # Also accept section names with '_' in place of '-' (e.g. [single_shot]) + if "-" in cmd_name: + alt = cmd_name.replace("-", "_") + if alt != cmd_name: + cmd_tokens += _section_to_argv(subparsers[cmd_name], cfg, alt) + + return global_tokens + argv[: cmd_pos + 1] + cmd_tokens + argv[cmd_pos + 1 :] + + def _resolve_sops_out_file(out: Optional[str], *, hint: str) -> Path: """Resolve an output *file* path for --sops mode. @@ -95,6 +323,22 @@ def _add_remote_args(p: argparse.ArgumentParser) -> None: "--remote-host", help="SSH host to run harvesting on (if set, harvest runs remotely and is pulled locally).", ) + + +def _add_config_args(p: argparse.ArgumentParser) -> None: + p.add_argument( + "-c", + "--config", + help=( + "Path to an INI config file for default options. If omitted, enroll will look for " + "./enroll.ini, ./.enroll.ini, or ~/.config/enroll/enroll.ini (or $XDG_CONFIG_HOME/enroll/enroll.ini)." + ), + ) + p.add_argument( + "--no-config", + action="store_true", + help="Do not load any INI config file (even if one would be auto-discovered).", + ) p.add_argument( "--remote-port", type=int, @@ -110,9 +354,11 @@ def _add_remote_args(p: argparse.ArgumentParser) -> None: def main() -> None: ap = argparse.ArgumentParser(prog="enroll") + _add_config_args(ap) sub = ap.add_subparsers(dest="cmd", required=True) h = sub.add_parser("harvest", help="Harvest service/package/config state") + _add_config_args(h) h.add_argument( "--out", help=( @@ -163,6 +409,7 @@ def main() -> None: _add_remote_args(h) m = sub.add_parser("manifest", help="Render Ansible roles from a harvest") + _add_config_args(m) m.add_argument( "--harvest", required=True, @@ -195,6 +442,7 @@ def main() -> None: s = sub.add_parser( "single-shot", help="Harvest state, then manifest Ansible code, in one shot" ) + _add_config_args(s) s.add_argument( "--harvest", help=( @@ -255,6 +503,7 @@ def main() -> None: _add_remote_args(s) d = sub.add_parser("diff", help="Compare two harvests and report differences") + _add_config_args(d) d.add_argument( "--old", required=True, @@ -338,7 +587,20 @@ def main() -> None: help="Environment variable containing SMTP password (optional).", ) - args = ap.parse_args() + argv = sys.argv[1:] + cfg_path = _discover_config_path(argv) + argv = _inject_config_argv( + argv, + cfg_path=cfg_path, + root_parser=ap, + subparsers={ + "harvest": h, + "manifest": m, + "single-shot": s, + "diff": d, + }, + ) + args = ap.parse_args(argv) remote_host: Optional[str] = getattr(args, "remote_host", None) From cf819f755a8ce200a2c6079f70a4ef14cc9efe06 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 20 Dec 2025 18:26:04 +1100 Subject: [PATCH 11/85] 0.1.3 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b5a07ab..541eded 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.2" +version = "0.1.3" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" From 59239eb2d27d799628e1d1e890325ed2947e6b91 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 20 Dec 2025 18:38:05 +1100 Subject: [PATCH 12/85] Fix formatting in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a5d2157..00f9d98 100644 --- a/README.md +++ b/README.md @@ -349,7 +349,7 @@ Enroll supports reading an ini-style file of all the arguments for each subcomma The path the config file can be specified with `-c` or `--config` on the command-line. Otherwise, Enroll will look for `./enroll.ini`, `./.enroll.ini` (in the current working directory), -``~/.config/enroll/enroll.ini` (or `$XDG_CONFIG_HOME/enroll/enroll.ini`). +`~/.config/enroll/enroll.ini` (or `$XDG_CONFIG_HOME/enroll/enroll.ini`). You may also pass `--no-config` if you deliberately want to ignore the config file even if it existed. From 51196a0a2b1615b7c463aebf59d861080a2b0ff5 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 22 Dec 2025 17:28:10 +1100 Subject: [PATCH 13/85] Fix trivy exit code --- .forgejo/workflows/trivy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.forgejo/workflows/trivy.yml b/.forgejo/workflows/trivy.yml index fad2f6f..d5585f4 100644 --- a/.forgejo/workflows/trivy.yml +++ b/.forgejo/workflows/trivy.yml @@ -23,7 +23,7 @@ jobs: - name: Run trivy run: | - trivy fs --no-progress --ignore-unfixed --format table --disable-telemetry . + trivy fs --no-progress --ignore-unfixed --format table --disable-telemetry --skip-version-check --exit-code 1 . # Notify if any previous step in this job failed - name: Notify on failure From 8c478249d9b2f112ffbdc66e25274160bb7b37e9 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Tue, 23 Dec 2025 17:22:50 +1100 Subject: [PATCH 14/85] Add build-deb action workflow --- .forgejo/workflows/build-deb.yml | 65 ++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 .forgejo/workflows/build-deb.yml diff --git a/.forgejo/workflows/build-deb.yml b/.forgejo/workflows/build-deb.yml new file mode 100644 index 0000000..28276df --- /dev/null +++ b/.forgejo/workflows/build-deb.yml @@ -0,0 +1,65 @@ +name: CI + +on: + push: + +jobs: + test: + runs-on: docker + + steps: + - name: Install system dependencies + run: | + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + build-essential \ + devscripts \ + debhelper \ + dh-python \ + pybuild-plugin-pyproject \ + python3-all \ + python3-poetry-core \ + python3-yaml \ + python3-paramiko \ + rsync \ + ca-certificates + + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Build deb + run: | + mkdir /out + + rsync -a --delete \ + --exclude '.git' \ + --exclude '.venv' \ + --exclude 'dist' \ + --exclude 'build' \ + --exclude '__pycache__' \ + --exclude '.pytest_cache' \ + --exclude '.mypy_cache' \ + ./ /out/ + + cd /out/ + export DEBEMAIL="mig@mig5.net" + export DEBFULLNAME="Miguel Jacq" + + dch --distribution "trixie" --local "~trixie" "CI build for trixie" + dpkg-buildpackage -us -uc -b + + # Notify if any previous step in this job failed + - name: Notify on failure + if: ${{ failure() }} + env: + WEBHOOK_URL: ${{ secrets.NODERED_WEBHOOK_URL }} + REPOSITORY: ${{ forgejo.repository }} + RUN_NUMBER: ${{ forgejo.run_number }} + SERVER_URL: ${{ forgejo.server_url }} + run: | + curl -X POST \ + -H "Content-Type: application/json" \ + -d "{\"repository\":\"$REPOSITORY\",\"run_number\":\"$RUN_NUMBER\",\"status\":\"failure\",\"url\":\"$SERVER_URL/$REPOSITORY/actions/runs/$RUN_NUMBER\"}" \ + "$WEBHOOK_URL" From 4d2250f974195c3f5dd300aacb2d43e7aa6f2d65 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 27 Dec 2025 16:56:30 +1100 Subject: [PATCH 15/85] Add fedora rpm building --- Dockerfile.rpmbuild | 102 ++++++++++++++++++++++++++++++++++++++++++++ README.md | 4 +- poetry.lock | 2 +- pyproject.toml | 4 +- release.sh | 31 ++++++++++++++ rpm/enroll.spec | 47 ++++++++++++++++++++ 6 files changed, 185 insertions(+), 5 deletions(-) create mode 100644 Dockerfile.rpmbuild create mode 100644 rpm/enroll.spec diff --git a/Dockerfile.rpmbuild b/Dockerfile.rpmbuild new file mode 100644 index 0000000..c928cea --- /dev/null +++ b/Dockerfile.rpmbuild @@ -0,0 +1,102 @@ +# syntax=docker/dockerfile:1 +FROM fedora:42 + +RUN set -eux; \ + dnf -y update; \ + dnf -y install \ + rpm-build \ + rpmdevtools \ + redhat-rpm-config \ + gcc \ + make \ + findutils \ + tar \ + gzip \ + rsync \ + python3 \ + python3-devel \ + python3-setuptools \ + python3-wheel \ + pyproject-rpm-macros \ + python3-rpm-macros \ + python3-yaml \ + python3-paramiko \ + openssl-devel \ + python3-poetry-core ; \ + dnf -y clean all + +# Build runner script (copies repo, tars, runs rpmbuild) +RUN set -eux; cat > /usr/local/bin/build-rpm <<'EOF' +#!/usr/bin/env bash +set -euo pipefail + +SRC="${SRC:-/src}" +WORKROOT="${WORKROOT:-/work}" +OUT="${OUT:-/out}" +DEPS_DIR="${DEPS_DIR:-/deps}" + +# Install jinjaturtle from local rpm +# Filter out .src.rpm and debug* subpackages if present. +if [ -d "${DEPS_DIR}" ] && compgen -G "${DEPS_DIR}/*.rpm" > /dev/null; then + mapfile -t rpms < <(ls -1 "${DEPS_DIR}"/*.rpm | grep -vE '(\.src\.rpm$|-(debuginfo|debugsource)-)') + if [ "${#rpms[@]}" -gt 0 ]; then + echo "Installing dependency RPMs from ${DEPS_DIR}:" + printf ' - %s\n' "${rpms[@]}" + dnf -y install "${rpms[@]}" + dnf -y clean all + else + echo "NOTE: Only src/debug RPMs found in ${DEPS_DIR}; nothing installed." >&2 + fi +else + echo "NOTE: No RPMs found in ${DEPS_DIR}. If the build fails with missing python3dist(jinjaturtle)," >&2 + echo " mount your jinjaturtle RPM directory as -v :/deps" >&2 +fi + +mkdir -p "${WORKROOT}" "${OUT}" +WORK="${WORKROOT}/src" +rm -rf "${WORK}" +mkdir -p "${WORK}" + +rsync -a --delete \ + --exclude '.git' \ + --exclude '.venv' \ + --exclude 'dist' \ + --exclude 'build' \ + --exclude '__pycache__' \ + --exclude '.pytest_cache' \ + --exclude '.mypy_cache' \ + "${SRC}/" "${WORK}/" + +cd "${WORK}" + +# Determine version from pyproject.toml unless provided +if [ -n "${VERSION:-}" ]; then + ver="${VERSION}" +else + ver="$(grep -m1 '^version = ' pyproject.toml | sed -E 's/version = "([^"]+)".*/\1/')" +fi + +TOPDIR="${WORKROOT}/rpmbuild" +mkdir -p "${TOPDIR}"/{BUILD,BUILDROOT,RPMS,SOURCES,SPECS,SRPMS} + +tarball="${TOPDIR}/SOURCES/enroll-${ver}.tar.gz" +tar -czf "${tarball}" --transform "s#^#enroll/#" . + +spec_src="rpm/enroll.spec" + +cp -v "${spec_src}" "${TOPDIR}/SPECS/enroll.spec" + +rpmbuild -ba "${TOPDIR}/SPECS/enroll.spec" \ + --define "_topdir ${TOPDIR}" \ + --define "upstream_version ${ver}" + +shopt -s nullglob +cp -v "${TOPDIR}"/RPMS/*/*.rpm "${OUT}/" || true +cp -v "${TOPDIR}"/SRPMS/*.src.rpm "${OUT}/" || true +echo "Artifacts copied to ${OUT}" +EOF + +RUN chmod +x /usr/local/bin/build-rpm + +WORKDIR /work +ENTRYPOINT ["/usr/local/bin/build-rpm"] diff --git a/README.md b/README.md index 00f9d98..5a0db91 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,6 @@ **enroll** inspects a Linux machine (currently Debian-only) and generates Ansible roles/playbooks (and optionally inventory) for what it finds. -It aims to be **optimistic and noninteractive**: - Detects packages that have been installed. - Detects Debian package ownership of `/etc` files using dpkg’s local database. - Captures config that has **changed from packaged defaults** (dpkg conffile hashes + package md5sums when available). @@ -26,9 +25,10 @@ It aims to be **optimistic and noninteractive**: 1) **Harvest**: collect host facts + relevant files into a harvest bundle (`state.json` + harvested artifacts) 2) **Manifest**: turn that harvest into Ansible roles/playbooks (and optionally inventory) -Additionally: +Additionally, some other functionalities exist: - **Diff**: compare two harvests and report what changed (packages/services/users/files) since the previous snapshot. +- **Single-shot mode**: run both harvest and manifest at once. --- diff --git a/poetry.lock b/poetry.lock index 1f2948d..0a90711 100644 --- a/poetry.lock +++ b/poetry.lock @@ -923,4 +923,4 @@ zstd = ["backports-zstd (>=1.0.0)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "c3466a6595a9822763431a6dff0c7f835407a2591b92d5995592f8e6802c774a" +content-hash = "20623104a1a5f4c6d4aaa759f25b2591d5de345d1464e727eb4140a6ef9a5b6e" diff --git a/pyproject.toml b/pyproject.toml index 541eded..3079404 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,8 +10,8 @@ repository = "https://git.mig5.net/mig5/enroll" [tool.poetry.dependencies] python = "^3.10" -pyyaml = "^6.0.3" -paramiko = "^4.0.0" +pyyaml = "^6" +paramiko = ">=3.5" [tool.poetry.scripts] enroll = "enroll.cli:main" diff --git a/release.sh b/release.sh index fe99a52..fdbe771 100755 --- a/release.sh +++ b/release.sh @@ -42,3 +42,34 @@ for dist in ${DISTS[@]}; do debfile=$(ls -1 dist/${release}/*.deb) reprepro -b /home/user/git/repo includedeb "${release}" "${debfile}" done + +# RPM +sudo apt-get -y install createrepo-c rpm +docker build -f Dockerfile.rpmbuild -t enroll:f42 --progress=plain . +docker run --rm -v "$PWD":/src -v "$PWD/dist/rpm":/out -v "$HOME/git/jinjaturtle/dist/rpm":/deps:ro enroll:f42 +sudo chown -R "${USER}" "$PWD/dist" + +REPO_ROOT="${HOME}/git/repo_rpm" +RPM_REPO="${REPO_ROOT}/rpm/x86_64" +BUILD_OUTPUT="${HOME}/git/enroll/dist" +REMOTE="letessier.mig5.net:/opt/repo_rpm" +KEYID="00AE817C24A10C2540461A9C1D7CDE0234DB458D" + +echo "==> Updating RPM repo..." +mkdir -p "$RPM_REPO" + +for file in `ls -1 "${BUILD_OUTPUT}/rpm"`; do + rpmsign --addsign "${BUILD_OUTPUT}/rpm/$file" +done + +cp "${BUILD_OUTPUT}/rpm/"*.rpm "$RPM_REPO/" + +createrepo_c "$RPM_REPO" + +echo "==> Signing repomd.xml..." +qubes-gpg-client --local-user "$KEYID" --detach-sign --armor "$RPM_REPO/repodata/repomd.xml" > "$RPM_REPO/repodata/repomd.xml.asc" + +echo "==> Syncing repo to server..." +rsync -aHPvz --exclude=.git --delete "$REPO_ROOT/" "$REMOTE/" + +echo "Done!" diff --git a/rpm/enroll.spec b/rpm/enroll.spec new file mode 100644 index 0000000..403d6da --- /dev/null +++ b/rpm/enroll.spec @@ -0,0 +1,47 @@ +%global upstream_version 0.1.3 + +Name: enroll +Version: %{upstream_version} +Release: 1%{?dist}.enroll1 +Summary: Enroll a server's running state retrospectively into Ansible. + +License: GPL-3.0-or-later +URL: https://git.mig5.net/mig5/enroll +Source0: %{name}-%{version}.tar.gz + +BuildArch: noarch + +BuildRequires: pyproject-rpm-macros +BuildRequires: python3-devel +BuildRequires: python3-poetry-core + +Requires: python3-yaml +Requires: python3-paramiko + +# Make sure private repo dependency is pulled in by package name as well. +Recommends: jinjaturtle + +%description +Enroll a server's running state retrospectively into Ansible. + +%prep +%autosetup -n enroll + +%generate_buildrequires +%pyproject_buildrequires + +%build +%pyproject_wheel + +%install +%pyproject_install +%pyproject_save_files enroll + +%files -f %{pyproject_files} +%license LICENSE +%doc README.md CHANGELOG.md +%{_bindir}/enroll + +%changelog +* Sat Dec 27 2025 Miguel Jacq - %{version}-%{release} +- Initial RPM packaging for Fedora 42 From 054a6192d170dcd1bf418263376287f711ff6dd6 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 27 Dec 2025 19:02:22 +1100 Subject: [PATCH 16/85] Capture more singletons in /etc and avoid apt duplication --- CHANGELOG.md | 5 + debian/changelog | 7 + enroll/harvest.py | 367 +++++++++++++++++++++++++++++++++++++++++++--- enroll/ignore.py | 22 +++ enroll/systemd.py | 97 ++++++++++++ rpm/enroll.spec | 5 +- 6 files changed, 481 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 90478e5..a51be14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.1.4 + + * Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers + * Avoid duplicate apt data in package-specific roles. + # 0.1.3 * Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` diff --git a/debian/changelog b/debian/changelog index f6ba2f7..17b8985 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.1.4) unstable; urgency=medium + + * Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers + * Avoid duplicate apt data in package-specific roles. + + -- Miguel Jacq Sat, 27 Dec 2025 19:00:00 +1100 + enroll (0.1.3) unstable; urgency=medium * Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` diff --git a/enroll/harvest.py b/enroll/harvest.py index 48242d6..0543355 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -8,7 +8,13 @@ import shutil from dataclasses import dataclass, asdict from typing import Dict, List, Optional, Set -from .systemd import list_enabled_services, get_unit_info, UnitQueryError +from .systemd import ( + list_enabled_services, + list_enabled_timers, + get_unit_info, + get_timer_info, + UnitQueryError, +) from .debian import ( build_dpkg_etc_index, dpkg_owner, @@ -98,24 +104,24 @@ class ExtraPathsSnapshot: ALLOWED_UNOWNED_EXTS = { + ".cnf", ".conf", ".cfg", ".ini", - ".cnf", - ".yaml", - ".yml", ".json", - ".toml", + ".link", + ".mount", + ".netdev", + ".network", + ".path", ".rules", ".service", ".socket", - ".timer", ".target", - ".path", - ".mount", - ".network", - ".netdev", - ".link", + ".timer", + ".toml", + ".yaml", + ".yml", "", # allow extensionless (common in /etc/default and /etc/init.d) } @@ -123,23 +129,24 @@ MAX_UNOWNED_FILES_PER_ROLE = 400 # Directories that are shared across many packages; never attribute unowned files in these trees to a single package. SHARED_ETC_TOPDIRS = { - "default", "apparmor.d", - "network", - "init.d", - "systemd", - "pam.d", - "ssh", - "ssl", - "sudoers.d", + "apt", "cron.d", "cron.daily", "cron.weekly", "cron.monthly", "cron.hourly", + "default", + "init.d", "logrotate.d", - "sysctl.d", "modprobe.d", + "network", + "pam.d", + "ssh", + "ssl", + "sudoers.d", + "sysctl.d", + "systemd", } @@ -256,6 +263,181 @@ def _topdirs_for_package(pkg: str, pkg_to_etc_paths: Dict[str, List[str]]) -> Se return topdirs +# ------------------------- +# System capture helpers +# ------------------------- + +_APT_SOURCE_GLOBS = [ + "/etc/apt/sources.list", + "/etc/apt/sources.list.d/*.list", + "/etc/apt/sources.list.d/*.sources", +] + +_APT_MISC_GLOBS = [ + "/etc/apt/apt.conf", + "/etc/apt/apt.conf.d/*", + "/etc/apt/preferences", + "/etc/apt/preferences.d/*", + "/etc/apt/auth.conf", + "/etc/apt/auth.conf.d/*", + "/etc/apt/trusted.gpg", + "/etc/apt/trusted.gpg.d/*", + "/etc/apt/keyrings/*", +] + +_SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ + # mounts + ("/etc/fstab", "system_mounts"), + ("/etc/crypttab", "system_mounts"), + # logrotate + ("/etc/logrotate.conf", "system_logrotate"), + ("/etc/logrotate.d/*", "system_logrotate"), + # sysctl / modules + ("/etc/sysctl.conf", "system_sysctl"), + ("/etc/sysctl.d/*", "system_sysctl"), + ("/etc/modprobe.d/*", "system_modprobe"), + ("/etc/modules", "system_modprobe"), + ("/etc/modules-load.d/*", "system_modprobe"), + # cron + ("/etc/crontab", "system_cron"), + ("/etc/cron.d/*", "system_cron"), + ("/etc/anacrontab", "system_cron"), + ("/etc/anacron/*", "system_cron"), + ("/var/spool/cron/crontabs/*", "system_cron"), + ("/var/spool/crontabs/*", "system_cron"), + # network + ("/etc/netplan/*", "system_network"), + ("/etc/systemd/network/*", "system_network"), + ("/etc/network/interfaces", "system_network"), + ("/etc/network/interfaces.d/*", "system_network"), + ("/etc/resolvconf.conf", "system_network"), + ("/etc/resolvconf/resolv.conf.d/*", "system_network"), + # firewall + ("/etc/nftables.conf", "system_firewall"), + ("/etc/nftables.d/*", "system_firewall"), + ("/etc/iptables/rules.v4", "system_firewall"), + ("/etc/iptables/rules.v6", "system_firewall"), + ("/etc/ufw/*", "system_firewall"), + ("/etc/default/ufw", "system_firewall"), + # other + ("/etc/rc.local", "system_rc"), +] + + +def _iter_matching_files(spec: str, *, cap: int = 2000) -> List[str]: + """Expand a glob spec and also walk directories to collect files.""" + out: List[str] = [] + for p in glob.glob(spec): + if len(out) >= cap: + break + if os.path.islink(p): + continue + if os.path.isfile(p): + out.append(p) + continue + if os.path.isdir(p): + for dirpath, _, filenames in os.walk(p): + for fn in filenames: + if len(out) >= cap: + break + fp = os.path.join(dirpath, fn) + if os.path.islink(fp) or not os.path.isfile(fp): + continue + out.append(fp) + if len(out) >= cap: + break + return out + + +def _parse_apt_signed_by(source_files: List[str]) -> Set[str]: + """Return absolute keyring paths referenced via signed-by / Signed-By.""" + out: Set[str] = set() + + # deb line: deb [signed-by=/usr/share/keyrings/foo.gpg] ... + re_signed_by = re.compile(r"signed-by\s*=\s*([^\]\s]+)", re.IGNORECASE) + # deb822: Signed-By: /usr/share/keyrings/foo.gpg + re_signed_by_hdr = re.compile(r"^\s*Signed-By\s*:\s*(.+)$", re.IGNORECASE) + + for sf in source_files: + try: + with open(sf, "r", encoding="utf-8", errors="replace") as f: + for raw in f: + line = raw.strip() + if not line or line.startswith("#"): + continue + + m = re_signed_by_hdr.match(line) + if m: + val = m.group(1).strip() + if val.startswith("|"): + continue + toks = re.split(r"[\s,]+", val) + for t in toks: + if t.startswith("/"): + out.add(t) + continue + + # Try bracketed options first (common for .list files) + if "[" in line and "]" in line: + bracket = line.split("[", 1)[1].split("]", 1)[0] + for mm in re_signed_by.finditer(bracket): + val = mm.group(1).strip().strip("\"'") + for t in re.split(r"[\s,]+", val): + if t.startswith("/"): + out.add(t) + continue + + # Fallback: signed-by= in whole line + for mm in re_signed_by.finditer(line): + val = mm.group(1).strip().strip("\"'") + for t in re.split(r"[\s,]+", val): + if t.startswith("/"): + out.add(t) + except OSError: + continue + + return out + + +def _iter_system_capture_paths() -> List[tuple[str, str]]: + """Return (path, reason) pairs for essential system config/state.""" + out: List[tuple[str, str]] = [] + + # APT: capture sources and related config + apt_sources: List[str] = [] + for g in _APT_SOURCE_GLOBS: + apt_sources.extend(_iter_matching_files(g)) + for p in sorted(set(apt_sources)): + out.append((p, "system_apt_sources")) + + # APT: misc config files/dirs + for g in _APT_MISC_GLOBS: + for p in _iter_matching_files(g): + out.append((p, "system_apt_config")) + + # APT: referenced keyrings (may live outside /etc) + signed_by = _parse_apt_signed_by(sorted(set(apt_sources))) + for p in sorted(signed_by): + if os.path.islink(p) or not os.path.isfile(p): + continue + out.append((p, "system_apt_keyring")) + + # Other system config/state globs + for spec, reason in _SYSTEM_CAPTURE_GLOBS: + for p in _iter_matching_files(spec): + out.append((p, reason)) + + # De-dup while preserving first reason + seen: Set[str] = set() + uniq: List[tuple[str, str]] = [] + for p, r in out: + if p in seen: + continue + seen.add(p) + uniq.append((p, r)) + return uniq + + def harvest( bundle_dir: str, policy: Optional[IgnorePolicy] = None, @@ -467,6 +649,107 @@ def harvest( ) ) + # ------------------------- + # Enabled systemd timers + # + # Timers are typically related to a service/package, so we try to attribute + # timer unit overrides to their associated role rather than creating a + # standalone timer role. If we can't attribute a timer, it will fall back + # to etc_custom (if it's a custom /etc unit). + # ------------------------- + timer_extra_by_pkg: Dict[str, List[str]] = {} + try: + enabled_timers = list_enabled_timers() + except Exception: + enabled_timers = [] + + service_snap_by_unit: Dict[str, ServiceSnapshot] = { + s.unit: s for s in service_snaps + } + + for t in enabled_timers: + try: + ti = get_timer_info(t) + except Exception: # nosec + continue + + timer_paths: List[str] = [] + for pth in [ti.fragment_path, *ti.dropin_paths, *ti.env_files]: + if not pth: + continue + if not pth.startswith("/etc/"): + # Prefer capturing only custom/overridden units. + continue + if os.path.islink(pth) or not os.path.isfile(pth): + continue + timer_paths.append(pth) + + if not timer_paths: + continue + + # Primary attribution: timer -> trigger service role + snap = None + if ti.trigger_unit: + snap = service_snap_by_unit.get(ti.trigger_unit) + + if snap is not None: + for path in timer_paths: + if path_filter.is_excluded(path): + snap.excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + continue + deny = policy.deny_reason(path) + if deny: + snap.excluded.append(ExcludedFile(path=path, reason=deny)) + continue + try: + owner, group, mode = stat_triplet(path) + except OSError: + snap.excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + src_rel = path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, snap.role_name, path, src_rel) + except OSError: + snap.excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + snap.managed_files.append( + ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason="related_timer", + ) + ) + continue + + # Secondary attribution: associate timer overrides with a package role + # (useful when a timer triggers a service that isn't enabled). + pkgs: Set[str] = set() + if ti.fragment_path: + p = dpkg_owner(ti.fragment_path) + if p: + pkgs.add(p) + if ti.trigger_unit and ti.trigger_unit.endswith(".service"): + try: + ui = get_unit_info(ti.trigger_unit) + if ui.fragment_path: + p = dpkg_owner(ui.fragment_path) + if p: + pkgs.add(p) + for exe in ui.exec_paths: + p = dpkg_owner(exe) + if p: + pkgs.add(p) + except Exception: # nosec + pass + + for pkg in pkgs: + timer_extra_by_pkg.setdefault(pkg, []).extend(timer_paths) + # ------------------------- # Manually installed package roles # ------------------------- @@ -490,6 +773,9 @@ def harvest( managed: List[ManagedFile] = [] candidates: Dict[str, str] = {} + for tpath in timer_extra_by_pkg.get(pkg, []): + candidates.setdefault(tpath, "related_timer") + conff = conffiles_by_pkg.get(pkg, {}) md5sums = read_pkg_md5sums(pkg) @@ -677,7 +963,46 @@ def harvest( for mf in users_managed: already.add(mf.path) - # Walk /etc for unowned config-ish files + # Capture essential system config/state (even if package-owned). + for path, reason in _iter_system_capture_paths(): + if path in already: + continue + + if path_filter.is_excluded(path): + etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue + + deny = policy.deny_reason(path) + if deny: + etc_excluded.append(ExcludedFile(path=path, reason=deny)) + continue + + try: + owner, group, mode = stat_triplet(path) + except OSError: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + src_rel = path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, etc_role_name, path, src_rel) + except OSError: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + etc_managed.append( + ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason, + ) + ) + already.add(path) + + # Walk /etc for remaining unowned config-ish files scanned = 0 for dirpath, _, filenames in os.walk("/etc"): for fn in filenames: diff --git a/enroll/ignore.py b/enroll/ignore.py index 93ba423..ab2cb96 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -30,6 +30,21 @@ DEFAULT_DENY_GLOBS = [ "/usr/local/etc/letsencrypt/*", ] + +# Allow a small set of binary config artifacts that are commonly required to +# reproduce system configuration (notably APT keyrings). These are still subject +# to size and readability limits, but are exempt from the "binary_like" denial. +DEFAULT_ALLOW_BINARY_GLOBS = [ + "/etc/apt/trusted.gpg", + "/etc/apt/trusted.gpg.d/*.gpg", + "/etc/apt/keyrings/*.gpg", + "/etc/apt/keyrings/*.pgp", + "/etc/apt/keyrings/*.asc", + "/usr/share/keyrings/*.gpg", + "/usr/share/keyrings/*.pgp", + "/usr/share/keyrings/*.asc", +] + SENSITIVE_CONTENT_PATTERNS = [ re.compile(rb"-----BEGIN (RSA |EC |OPENSSH |)PRIVATE KEY-----"), re.compile(rb"(?i)\bpassword\s*="), @@ -44,6 +59,7 @@ BLOCK_END = b"*/" @dataclass class IgnorePolicy: deny_globs: Optional[list[str]] = None + allow_binary_globs: Optional[list[str]] = None max_file_bytes: int = 256_000 sample_bytes: int = 64_000 # If True, be much less conservative about collecting potentially @@ -54,6 +70,8 @@ class IgnorePolicy: def __post_init__(self) -> None: if self.deny_globs is None: self.deny_globs = list(DEFAULT_DENY_GLOBS) + if self.allow_binary_globs is None: + self.allow_binary_globs = list(DEFAULT_ALLOW_BINARY_GLOBS) def iter_effective_lines(self, content: bytes): in_block = False @@ -105,6 +123,10 @@ class IgnorePolicy: return "unreadable" if b"\x00" in data: + for g in self.allow_binary_globs or []: + if fnmatch.fnmatch(path, g): + # Binary is acceptable for explicitly-allowed paths. + return None return "binary_like" if not self.dangerous: diff --git a/enroll/systemd.py b/enroll/systemd.py index ae8ce8d..7081001 100644 --- a/enroll/systemd.py +++ b/enroll/systemd.py @@ -33,6 +33,19 @@ def _run(cmd: list[str]) -> str: return p.stdout +@dataclass +class TimerInfo: + name: str + fragment_path: Optional[str] + dropin_paths: List[str] + env_files: List[str] + trigger_unit: Optional[str] + active_state: Optional[str] + sub_state: Optional[str] + unit_file_state: Optional[str] + condition_result: Optional[str] + + def list_enabled_services() -> List[str]: out = _run( [ @@ -58,6 +71,31 @@ def list_enabled_services() -> List[str]: return sorted(set(units)) +def list_enabled_timers() -> List[str]: + out = _run( + [ + "systemctl", + "list-unit-files", + "--type=timer", + "--state=enabled", + "--no-legend", + ] + ) + units: List[str] = [] + for line in out.splitlines(): + parts = line.split() + if not parts: + continue + unit = parts[0].strip() + if not unit.endswith(".timer"): + continue + # Skip template units like "foo@.timer" + if unit.endswith("@.timer"): + continue + units.append(unit) + return sorted(set(units)) + + def get_unit_info(unit: str) -> UnitInfo: p = subprocess.run( [ @@ -117,3 +155,62 @@ def get_unit_info(unit: str) -> UnitInfo: unit_file_state=kv.get("UnitFileState") or None, condition_result=kv.get("ConditionResult") or None, ) + + +def get_timer_info(unit: str) -> TimerInfo: + p = subprocess.run( + [ + "systemctl", + "show", + unit, + "-p", + "FragmentPath", + "-p", + "DropInPaths", + "-p", + "EnvironmentFiles", + "-p", + "Unit", + "-p", + "ActiveState", + "-p", + "SubState", + "-p", + "UnitFileState", + "-p", + "ConditionResult", + ], + text=True, + capture_output=True, + ) # nosec + if p.returncode != 0: + raise RuntimeError(f"systemctl show failed for {unit}: {p.stderr}") + + kv: dict[str, str] = {} + for line in (p.stdout or "").splitlines(): + if "=" in line: + k, v = line.split("=", 1) + kv[k] = v.strip() + + fragment = kv.get("FragmentPath") or None + dropins = [pp for pp in (kv.get("DropInPaths", "") or "").split() if pp] + + env_files: List[str] = [] + for token in (kv.get("EnvironmentFiles", "") or "").split(): + token = token.lstrip("-") + if token: + env_files.append(token) + + trigger = kv.get("Unit") or None + + return TimerInfo( + name=unit, + fragment_path=fragment, + dropin_paths=dropins, + env_files=env_files, + trigger_unit=trigger, + active_state=kv.get("ActiveState") or None, + sub_state=kv.get("SubState") or None, + unit_file_state=kv.get("UnitFileState") or None, + condition_result=kv.get("ConditionResult") or None, + ) diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 403d6da..707dc10 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.1.3 +%global upstream_version 0.1.4 Name: enroll Version: %{upstream_version} @@ -44,4 +44,7 @@ Enroll a server's running state retrospectively into Ansible. %changelog * Sat Dec 27 2025 Miguel Jacq - %{version}-%{release} +- Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers +- Avoid duplicate apt data in package-specific roles. +* Sat Dec 27 2025 Miguel Jacq - %{version}-%{release} - Initial RPM packaging for Fedora 42 From 40aad9e798c4631c571608dbeeb1a2319440cdc0 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 27 Dec 2025 19:04:00 +1100 Subject: [PATCH 17/85] 0.1.4 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3079404..f1f2420 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.3" +version = "0.1.4" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" From cae6246177581a0cc79e6aa3704298a164a154e3 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 27 Dec 2025 19:14:01 +1100 Subject: [PATCH 18/85] Add Fedora install steps to README --- README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.md b/README.md index 5a0db91..c6b8123 100644 --- a/README.md +++ b/README.md @@ -191,6 +191,25 @@ sudo apt update sudo apt install enroll ``` +### Fedora 42 + +```bash +sudo rpm --import https://mig5.net/static/mig5.asc + +sudo tee /etc/yum.repos.d/mig5.repo > /dev/null << 'EOF' +[mig5] +name=mig5 Repository +baseurl=https://rpm.mig5.net/rpm/$basearch +enabled=1 +gpgcheck=1 +repo_gpgcheck=1 +gpgkey=https://mig5.net/static/mig5.asc +EOF + +sudo dnf upgrade --refresh +sudo dnf install enroll +``` + ## AppImage Download it from my Releases page, then: From 303c1b0dd8b47fed40bb275845155a4c9daf4b38 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 28 Dec 2025 09:30:21 +1100 Subject: [PATCH 19/85] Consolidate logrotate and cron files into their main service/package roles if they exist. Standardise on MAX_FILES_CAP in one place --- enroll/harvest.py | 233 ++++++++++++++++++++++++++++++++++++------- enroll/manifest.py | 30 ++---- enroll/pathfilter.py | 2 +- 3 files changed, 208 insertions(+), 57 deletions(-) diff --git a/enroll/harvest.py b/enroll/harvest.py index 0543355..d4cfacd 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -125,7 +125,9 @@ ALLOWED_UNOWNED_EXTS = { "", # allow extensionless (common in /etc/default and /etc/init.d) } -MAX_UNOWNED_FILES_PER_ROLE = 400 +MAX_FILES_CAP = 4000 + +MAX_UNOWNED_FILES_PER_ROLE = 500 # Directories that are shared across many packages; never attribute unowned files in these trees to a single package. SHARED_ETC_TOPDIRS = { @@ -324,7 +326,7 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ ] -def _iter_matching_files(spec: str, *, cap: int = 2000) -> List[str]: +def _iter_matching_files(spec: str, *, cap: int = MAX_FILES_CAP) -> List[str]: """Expand a glob spec and also walk directories to collect files.""" out: List[str] = [] for p in glob.glob(spec): @@ -963,43 +965,141 @@ def harvest( for mf in users_managed: already.add(mf.path) + # Maps for re-attributing shared snippets (cron.d/logrotate.d) to existing roles. + svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} + pkg_by_role: Dict[str, PackageSnapshot] = {p.role_name: p for p in pkg_snaps} + + def _target_role_for_shared_snippet(path: str) -> Optional[tuple[str, str]]: + """If `path` is a shared snippet, return (role_name, reason) to attach to.""" + base = os.path.basename(path) + + # Try full filename and stem (before first dot). + candidates: List[str] = [base] + if "." in base: + candidates.append(base.split(".", 1)[0]) + + seen: Set[str] = set() + uniq: List[str] = [] + for c in candidates: + if c and c not in seen: + seen.add(c) + uniq.append(c) + + if path.startswith("/etc/logrotate.d/"): + for c in uniq: + rn = _safe_name(c) + if rn in svc_by_role or rn in pkg_by_role: + return (rn, "logrotate_snippet") + return None + + if path.startswith("/etc/cron.d/"): + for c in uniq: + rn = _safe_name(c) + if rn in svc_by_role or rn in pkg_by_role: + return (rn, "cron_snippet") + return None + + return None + # Capture essential system config/state (even if package-owned). for path, reason in _iter_system_capture_paths(): if path in already: continue + target = _target_role_for_shared_snippet(path) + if path_filter.is_excluded(path): - etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + already.add(path) continue deny = policy.deny_reason(path) if deny: - etc_excluded.append(ExcludedFile(path=path, reason=deny)) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason=deny)) + already.add(path) continue try: owner, group, mode = stat_triplet(path) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue src_rel = path.lstrip("/") + role_for_copy = etc_role_name + reason_for_role = reason + if target: + role_for_copy, reason_for_role = target + try: - _copy_into_bundle(bundle_dir, etc_role_name, path, src_rel) + _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue - etc_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + mf = ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason_for_role, ) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].managed_files.append(mf) + elif rn in pkg_by_role: + pkg_by_role[rn].managed_files.append(mf) + else: + etc_managed.append(mf) + already.add(path) # Walk /etc for remaining unowned config-ish files @@ -1016,45 +1116,106 @@ def harvest( if not _is_confish(path): continue + target = _target_role_for_shared_snippet(path) + if path_filter.is_excluded(path): - etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + already.add(path) continue deny = policy.deny_reason(path) if deny: - etc_excluded.append(ExcludedFile(path=path, reason=deny)) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason=deny)) + already.add(path) continue try: owner, group, mode = stat_triplet(path) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue src_rel = path.lstrip("/") + role_for_copy = etc_role_name + reason_for_role = "custom_unowned" + if target: + role_for_copy, reason_for_role = target + try: - _copy_into_bundle(bundle_dir, etc_role_name, path, src_rel) + _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue - etc_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason="custom_unowned", - ) + mf = ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason_for_role, ) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].managed_files.append(mf) + elif rn in pkg_by_role: + pkg_by_role[rn].managed_files.append(mf) + else: + etc_managed.append(mf) scanned += 1 - if scanned >= 2000: + if scanned >= MAX_FILES_CAP: etc_notes.append( - "Reached file cap (2000) while scanning /etc for unowned files." + f"Reached file cap ({MAX_FILES_CAP}) while scanning /etc for unowned files." ) break - if scanned >= 2000: + if scanned >= MAX_FILES_CAP: break etc_custom_snapshot = EtcCustomSnapshot( @@ -1146,7 +1307,7 @@ def harvest( _scan_usr_local_tree( "/usr/local/etc", require_executable=False, - cap=2000, + cap=MAX_FILES_CAP, reason="usr_local_etc_custom", ) @@ -1154,7 +1315,7 @@ def harvest( _scan_usr_local_tree( "/usr/local/bin", require_executable=True, - cap=2000, + cap=MAX_FILES_CAP, reason="usr_local_bin_script", ) @@ -1188,7 +1349,7 @@ def harvest( files, inc_notes = expand_includes( path_filter.iter_include_patterns(), exclude=path_filter, - max_files=4000, + max_files=MAX_FILES_CAP, ) included_files = files extra_notes.extend(inc_notes) diff --git a/enroll/manifest.py b/enroll/manifest.py index 2f28eab..d5ebff7 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -138,7 +138,6 @@ def _copy_artifacts( # If a file was successfully templatised by JinjaTurtle, do NOT # also materialise the raw copy in the destination files dir. - # (This keeps the output minimal and avoids redundant "raw" files.) if exclude_rels and rel in exclude_rels: try: if os.path.isfile(dst): @@ -165,7 +164,7 @@ def _write_role_scaffold(role_dir: str) -> None: def _write_playbook_all(path: str, roles: List[str]) -> None: pb_lines = [ "---", - "- name: Apply all roles on host", + "- name: Apply all roles on all hosts", " hosts: all", " become: true", " roles:", @@ -179,7 +178,7 @@ def _write_playbook_all(path: str, roles: List[str]) -> None: def _write_playbook_host(path: str, fqdn: str, roles: List[str]) -> None: pb_lines = [ "---", - f"- name: Apply enroll roles on {fqdn}", + f"- name: Apply all roles on {fqdn}", f" hosts: {fqdn}", " become: true", " roles:", @@ -390,9 +389,9 @@ def _render_generic_files_tasks( # Using first_found makes roles work in both modes: # - site-mode: inventory/host_vars///.files/... # - non-site: roles//files/... - return f"""# Generated by enroll (data-driven tasks) + return f"""# Generated by enroll -- name: Deploy systemd unit files (templates) +- name: Deploy any systemd unit files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" dest: "{{{{ item.dest }}}}" @@ -406,7 +405,7 @@ def _render_generic_files_tasks( | list }}}} notify: "{{{{ item.notify | default([]) }}}}" -- name: Deploy systemd unit files (copies) +- name: Deploy any systemd unit files (raw files) vars: _enroll_ff: files: @@ -433,7 +432,7 @@ def _render_generic_files_tasks( | list | length) > 0 -- name: Deploy other managed files (templates) +- name: Deploy any other managed files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" dest: "{{{{ item.dest }}}}" @@ -447,7 +446,7 @@ def _render_generic_files_tasks( | list }}}} notify: "{{{{ item.notify | default([]) }}}}" -- name: Deploy other managed files (copies) +- name: Deploy any other managed files (raw files) vars: _enroll_ff: files: @@ -668,11 +667,6 @@ def _manifest_from_bundle_dir( manifested_service_roles: List[str] = [] manifested_pkg_roles: List[str] = [] - # In site_mode, raw harvested files are stored under host-specific inventory - # to avoid cross-host clobber while still sharing a role definition. - - # ------------------------- - # ------------------------- # Users role (non-system users) # ------------------------- @@ -793,7 +787,7 @@ def _manifest_from_bundle_dir( # tasks (data-driven) users_tasks = """--- -# Generated by enroll (data-driven tasks) +# Generated by enroll - name: Ensure groups exist ansible.builtin.group: @@ -893,8 +887,6 @@ Generated non-system user accounts and SSH public material. manifested_users_roles.append(role) - # ------------------------- - # ------------------------- # etc_custom role (unowned /etc not already attributed) # ------------------------- @@ -1212,8 +1204,6 @@ User-requested extra file harvesting. manifested_usr_local_custom_roles.append(role) - # ------------------------- - # ------------------------- # Service roles # ------------------------- @@ -1315,7 +1305,7 @@ User-requested extra file harvesting. task_parts: List[str] = [] task_parts.append( f"""--- -# Generated by enroll (data-driven tasks) +# Generated by enroll - name: Install packages for {role} ansible.builtin.apt: @@ -1474,7 +1464,7 @@ Generated from `{unit}`. task_parts: List[str] = [] task_parts.append( f"""--- -# Generated by enroll (data-driven tasks) +# Generated by enroll - name: Install packages for {role} ansible.builtin.apt: diff --git a/enroll/pathfilter.py b/enroll/pathfilter.py index 9df4afa..6541ca9 100644 --- a/enroll/pathfilter.py +++ b/enroll/pathfilter.py @@ -174,7 +174,7 @@ def expand_includes( patterns: Sequence[CompiledPathPattern], *, exclude: Optional[PathFilter] = None, - max_files: int = 4000, + max_files: int, ) -> Tuple[List[str], List[str]]: """Expand include patterns into concrete file paths. From 8c6b51be3eb2ea949861937eddcffed74a439873 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 28 Dec 2025 09:39:14 +1100 Subject: [PATCH 20/85] Manage apt stuff in its own role, not in etc_custom --- enroll/diff.py | 6 ++ enroll/harvest.py | 120 ++++++++++++++++++++++++++++++---- enroll/manifest.py | 157 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 270 insertions(+), 13 deletions(-) diff --git a/enroll/diff.py b/enroll/diff.py index a2b7d91..0110d17 100644 --- a/enroll/diff.py +++ b/enroll/diff.py @@ -184,6 +184,12 @@ def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, for mf in u.get("managed_files", []) or []: yield str(u_role), mf + # apt_config + ac = state.get("apt_config") or {} + ac_role = ac.get("role_name") or "apt_config" + for mf in ac.get("managed_files", []) or []: + yield str(ac_role), mf + # etc_custom ec = state.get("etc_custom") or {} ec_role = ec.get("role_name") or "etc_custom" diff --git a/enroll/harvest.py b/enroll/harvest.py index d4cfacd..c1a1986 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -77,6 +77,14 @@ class UsersSnapshot: notes: List[str] +@dataclass +class AptConfigSnapshot: + role_name: str + managed_files: List[ManagedFile] + excluded: List[ExcludedFile] + notes: List[str] + + @dataclass class EtcCustomSnapshot: role_name: str @@ -126,7 +134,6 @@ ALLOWED_UNOWNED_EXTS = { } MAX_FILES_CAP = 4000 - MAX_UNOWNED_FILES_PER_ROLE = 500 # Directories that are shared across many packages; never attribute unowned files in these trees to a single package. @@ -401,30 +408,61 @@ def _parse_apt_signed_by(source_files: List[str]) -> Set[str]: return out -def _iter_system_capture_paths() -> List[tuple[str, str]]: - """Return (path, reason) pairs for essential system config/state.""" - out: List[tuple[str, str]] = [] +def _iter_apt_capture_paths() -> List[tuple[str, str]]: + """Return (path, reason) pairs for APT configuration. - # APT: capture sources and related config + This captures the full /etc/apt tree (subject to IgnorePolicy at copy time), + plus any keyrings referenced via signed-by/Signed-By which may live outside + /etc (e.g. /usr/share/keyrings). + """ + reasons: Dict[str, str] = {} + + # Capture all regular files under /etc/apt (no symlinks). + if os.path.isdir("/etc/apt"): + for dirpath, _, filenames in os.walk("/etc/apt"): + for fn in filenames: + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + reasons.setdefault(p, "apt_config") + + # Identify source files explicitly for nicer reasons and keyring discovery. apt_sources: List[str] = [] for g in _APT_SOURCE_GLOBS: apt_sources.extend(_iter_matching_files(g)) for p in sorted(set(apt_sources)): - out.append((p, "system_apt_sources")) + reasons[p] = "apt_source" - # APT: misc config files/dirs - for g in _APT_MISC_GLOBS: + # Keyrings in standard locations. + for g in ( + "/etc/apt/trusted.gpg", + "/etc/apt/trusted.gpg.d/*", + "/etc/apt/keyrings/*", + ): for p in _iter_matching_files(g): - out.append((p, "system_apt_config")) + reasons[p] = "apt_keyring" - # APT: referenced keyrings (may live outside /etc) + # Keyrings referenced by sources (may live outside /etc/apt). signed_by = _parse_apt_signed_by(sorted(set(apt_sources))) for p in sorted(signed_by): if os.path.islink(p) or not os.path.isfile(p): continue - out.append((p, "system_apt_keyring")) + if p.startswith("/etc/apt/"): + reasons[p] = "apt_keyring" + else: + reasons[p] = "apt_signed_by_keyring" + + # De-dup with stable ordering. + uniq: List[tuple[str, str]] = [] + for p in sorted(reasons.keys()): + uniq.append((p, reasons[p])) + return uniq + + +def _iter_system_capture_paths() -> List[tuple[str, str]]: + """Return (path, reason) pairs for essential system config/state (non-APT).""" + out: List[tuple[str, str]] = [] - # Other system config/state globs for spec, reason in _SYSTEM_CAPTURE_GLOBS: for p in _iter_matching_files(spec): out.append((p, reason)) @@ -544,6 +582,8 @@ def harvest( for path in pkg_to_etc_paths.get(pkg, []): if not os.path.isfile(path) or os.path.islink(path): continue + if path.startswith("/etc/apt/"): + continue if path in conff: # Only capture conffiles when they differ from the package default. try: @@ -784,6 +824,8 @@ def harvest( for path in pkg_to_etc_paths.get(pkg, []): if not os.path.isfile(path) or os.path.islink(path): continue + if path.startswith("/etc/apt/"): + continue if path in conff: try: current = file_md5(path) @@ -946,6 +988,55 @@ def harvest( notes=users_notes, ) + # ------------------------- + # apt_config role (APT configuration and keyrings) + # ------------------------- + apt_notes: List[str] = [] + apt_excluded: List[ExcludedFile] = [] + apt_managed: List[ManagedFile] = [] + apt_role_name = "apt_config" + + for path, reason in _iter_apt_capture_paths(): + if path_filter.is_excluded(path): + apt_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue + + deny = policy.deny_reason(path) + if deny: + apt_excluded.append(ExcludedFile(path=path, reason=deny)) + continue + + try: + owner, group, mode = stat_triplet(path) + except OSError: + apt_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + src_rel = path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, apt_role_name, path, src_rel) + except OSError: + apt_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + apt_managed.append( + ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason, + ) + ) + + apt_config_snapshot = AptConfigSnapshot( + role_name=apt_role_name, + managed_files=apt_managed, + excluded=apt_excluded, + notes=apt_notes, + ) + # ------------------------- # etc_custom role (unowned /etc files not already attributed elsewhere) # ------------------------- @@ -964,6 +1055,8 @@ def harvest( already.add(mf.path) for mf in users_managed: already.add(mf.path) + for mf in apt_managed: + already.add(mf.path) # Maps for re-attributing shared snippets (cron.d/logrotate.d) to existing roles. svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} @@ -1107,6 +1200,8 @@ def harvest( for dirpath, _, filenames in os.walk("/etc"): for fn in filenames: path = os.path.join(dirpath, fn) + if path.startswith("/etc/apt/"): + continue if path in already: continue if path in owned_etc: @@ -1408,6 +1503,7 @@ def harvest( "manual_packages": manual_pkgs, "manual_packages_skipped": manual_pkgs_skipped, "package_roles": [asdict(p) for p in pkg_snaps], + "apt_config": asdict(apt_config_snapshot), "etc_custom": asdict(etc_custom_snapshot), "usr_local_custom": asdict(usr_local_custom_snapshot), "extra_paths": asdict(extra_paths_snapshot), diff --git a/enroll/manifest.py b/enroll/manifest.py index d5ebff7..dbc2353 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -2,6 +2,7 @@ from __future__ import annotations import json import os +import re import shutil import stat import tarfile @@ -627,6 +628,7 @@ def _manifest_from_bundle_dir( services: List[Dict[str, Any]] = state.get("services", []) package_roles: List[Dict[str, Any]] = state.get("package_roles", []) users_snapshot: Dict[str, Any] = state.get("users", {}) + apt_config_snapshot: Dict[str, Any] = state.get("apt_config", {}) etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {}) @@ -661,6 +663,7 @@ def _manifest_from_bundle_dir( _ensure_ansible_cfg(os.path.join(out_dir, "ansible.cfg")) manifested_users_roles: List[str] = [] + manifested_apt_config_roles: List[str] = [] manifested_etc_custom_roles: List[str] = [] manifested_usr_local_custom_roles: List[str] = [] manifested_extra_paths_roles: List[str] = [] @@ -887,6 +890,157 @@ Generated non-system user accounts and SSH public material. manifested_users_roles.append(role) + # ------------------------- + # apt_config role (APT sources, pinning, and keyrings) + # ------------------------- + if apt_config_snapshot and apt_config_snapshot.get("managed_files"): + role = apt_config_snapshot.get("role_name", "apt_config") + role_dir = os.path.join(roles_root, role) + _write_role_scaffold(role_dir) + + var_prefix = role + + managed_files = apt_config_snapshot.get("managed_files", []) + excluded = apt_config_snapshot.get("excluded", []) + notes = apt_config_snapshot.get("notes", []) + + templated, jt_vars = _jinjify_managed_files( + bundle_dir, + role, + role_dir, + managed_files, + jt_exe=jt_exe, + jt_enabled=jt_enabled, + overwrite_templates=not site_mode, + ) + + # Copy only the non-templated artifacts (templates live in the role). + if site_mode: + _copy_artifacts( + bundle_dir, + role, + _host_role_files_dir(out_dir, fqdn or "", role), + exclude_rels=templated, + ) + else: + _copy_artifacts( + bundle_dir, + role, + os.path.join(role_dir, "files"), + exclude_rels=templated, + ) + + files_var = _build_managed_files_var( + managed_files, + templated, + notify_other=None, + notify_systemd=None, + ) + + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} + vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map = _merge_mappings_overwrite(vars_map, jt_map) + + if site_mode: + _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_hostvars(out_dir, fqdn or "", role, vars_map) + else: + _write_role_defaults(role_dir, vars_map) + + tasks = """---\n""" + _render_generic_files_tasks( + var_prefix, include_restart_notify=False + ) + with open( + os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write(tasks.rstrip() + "\n") + + with open( + os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\ndependencies: []\n") + + # README: summarise repos and keyrings + source_paths: List[str] = [] + keyring_paths: List[str] = [] + repo_hosts: Set[str] = set() + + url_re = re.compile(r"(?:https?|ftp)://([^/\s]+)", re.IGNORECASE) + + for mf in managed_files: + p = str(mf.get("path") or "") + src_rel = str(mf.get("src_rel") or "") + if not p or not src_rel: + continue + + if p == "/etc/apt/sources.list" or p.startswith("/etc/apt/sources.list.d/"): + source_paths.append(p) + art_path = os.path.join(bundle_dir, "artifacts", role, src_rel) + try: + with open(art_path, "r", encoding="utf-8", errors="replace") as sf: + for line in sf: + line = line.strip() + if not line or line.startswith("#"): + continue + for m in url_re.finditer(line): + repo_hosts.add(m.group(1)) + except OSError: + pass # nosec + + if ( + p.startswith("/etc/apt/trusted.gpg") + or p.startswith("/etc/apt/keyrings/") + or p.startswith("/usr/share/keyrings/") + ): + keyring_paths.append(p) + + source_paths = sorted(set(source_paths)) + keyring_paths = sorted(set(keyring_paths)) + repos = sorted(repo_hosts) + + readme = ( + """# apt_config + +APT configuration harvested from the system (sources, pinning, and keyrings). + +## Repository hosts +""" + + ("\n".join([f"- {h}" for h in repos]) or "- (none)") + + """\n +## Source files +""" + + ("\n".join([f"- {p}" for p in source_paths]) or "- (none)") + + """\n +## Keyrings +""" + + ("\n".join([f"- {p}" for p in keyring_paths]) or "- (none)") + + """\n +## Managed files +""" + + ( + "\n".join( + [f"- {mf.get('path')} ({mf.get('reason')})" for mf in managed_files] + ) + or "- (none)" + ) + + """\n +## Excluded +""" + + ( + "\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) + or "- (none)" + ) + + """\n +## Notes +""" + + ("\n".join([f"- {n}" for n in notes]) or "- (none)") + + """\n""" + ) + with open(os.path.join(role_dir, "README.md"), "w", encoding="utf-8") as f: + f.write(readme) + + manifested_apt_config_roles.append(role) + # ------------------------- # etc_custom role (unowned /etc not already attributed) # ------------------------- @@ -1512,7 +1666,8 @@ Generated for package `{pkg}`. manifested_pkg_roles.append(role) all_roles = ( - manifested_pkg_roles + manifested_apt_config_roles + + manifested_pkg_roles + manifested_service_roles + manifested_etc_custom_roles + manifested_usr_local_custom_roles From 3fc5aec5fc53090ebfd0e315d9bfdd1442320e98 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 28 Dec 2025 09:56:52 +1100 Subject: [PATCH 21/85] 0.1.5 --- CHANGELOG.md | 6 ++++++ debian/changelog | 8 ++++++++ pyproject.toml | 2 +- rpm/enroll.spec | 6 +++++- 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a51be14..79e45cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# 0.1.5 + + * Consolidate logrotate and cron files into their main service/package roles if they exist. + * Standardise on MAX_FILES_CAP in one place + * Manage apt stuff in its own role, not in etc_custom + # 0.1.4 * Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers diff --git a/debian/changelog b/debian/changelog index 17b8985..5f3be58 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,11 @@ +enroll (0.1.5) unstable; urgency=medium + + * Consolidate logrotate and cron files into their main service/package roles if they exist. + * Standardise on MAX_FILES_CAP in one place + * Manage apt stuff in its own role, not in etc_custom + + -- Miguel Jacq Sun, 28 Dec 2025 10:00:00 +1100 + enroll (0.1.4) unstable; urgency=medium * Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers diff --git a/pyproject.toml b/pyproject.toml index f1f2420..3aa01d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.4" +version = "0.1.5" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 707dc10..ed0a3c9 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.1.4 +%global upstream_version 0.1.5 Name: enroll Version: %{upstream_version} @@ -43,6 +43,10 @@ Enroll a server's running state retrospectively into Ansible. %{_bindir}/enroll %changelog +* Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} +- Consolidate logrotate and cron files into their main service/package roles if they exist. +- Standardise on MAX_FILES_CAP in one place +- Manage apt stuff in its own role, not in etc_custom * Sat Dec 27 2025 Miguel Jacq - %{version}-%{release} - Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers - Avoid duplicate apt data in package-specific roles. From 921801caa632c894ac4228efb390061b64fd668b Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 28 Dec 2025 15:32:40 +1100 Subject: [PATCH 22/85] 0.1.6 --- CHANGELOG.md | 5 + debian/changelog | 7 + enroll/cli.py | 2 +- enroll/debian.py | 4 +- enroll/harvest.py | 718 ++++++++++++++++---------------------- enroll/pathfilter.py | 2 +- pyproject.toml | 2 +- rpm/enroll.spec | 5 +- tests/test___main__.py | 18 + tests/test_accounts.py | 143 ++++++++ tests/test_debian.py | 154 ++++++++ tests/test_diff_bundle.py | 89 +++++ tests/test_pathfilter.py | 80 +++++ tests/test_remote.py | 175 ++++++++++ tests/test_systemd.py | 121 +++++++ 15 files changed, 1102 insertions(+), 423 deletions(-) create mode 100644 tests/test___main__.py create mode 100644 tests/test_accounts.py create mode 100644 tests/test_debian.py create mode 100644 tests/test_diff_bundle.py create mode 100644 tests/test_pathfilter.py create mode 100644 tests/test_remote.py create mode 100644 tests/test_systemd.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 79e45cd..2a4c39d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.1.6 + + * DRY up some code logic + * More test coverage + # 0.1.5 * Consolidate logrotate and cron files into their main service/package roles if they exist. diff --git a/debian/changelog b/debian/changelog index 5f3be58..a15c38a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.1.6) unstable; urgency=medium + + * DRY up some code logic + * More test coverage + + -- Miguel Jacq Sun, 28 Dec 2025 15:30:00 +1100 + enroll (0.1.5) unstable; urgency=medium * Consolidate logrotate and cron files into their main service/package roles if they exist. diff --git a/enroll/cli.py b/enroll/cli.py index e5f729d..ae9aba0 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -482,7 +482,7 @@ def main() -> None: metavar="GPG_FINGERPRINT", help=( "Encrypt the harvest as a SOPS-encrypted tarball, and bundle+encrypt the manifest output in --out " - "(same behavior as `harvest --sops` and `manifest --sops`)." + "(same behaviour as `harvest --sops` and `manifest --sops`)." ), ) s.add_argument( diff --git a/enroll/debian.py b/enroll/debian.py index 58569e5..0ddc1f3 100644 --- a/enroll/debian.py +++ b/enroll/debian.py @@ -154,7 +154,9 @@ def parse_status_conffiles( if ":" in line: k, v = line.split(":", 1) key = k - cur[key] = v.lstrip() + # Preserve leading spaces in continuation lines, but strip + # the trailing newline from the initial key line value. + cur[key] = v.lstrip().rstrip("\n") if cur: flush() diff --git a/enroll/harvest.py b/enroll/harvest.py index c1a1986..56e5aed 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -112,9 +112,9 @@ class ExtraPathsSnapshot: ALLOWED_UNOWNED_EXTS = { + ".cfg", ".cnf", ".conf", - ".cfg", ".ini", ".json", ".link", @@ -136,7 +136,9 @@ ALLOWED_UNOWNED_EXTS = { MAX_FILES_CAP = 4000 MAX_UNOWNED_FILES_PER_ROLE = 500 -# Directories that are shared across many packages; never attribute unowned files in these trees to a single package. +# Directories that are shared across many packages. +# Never attribute all unowned files in these trees +# to one single package. SHARED_ETC_TOPDIRS = { "apparmor.d", "apt", @@ -195,6 +197,82 @@ def _copy_into_bundle( shutil.copy2(abs_path, dst) +def _capture_file( + *, + bundle_dir: str, + role_name: str, + abs_path: str, + reason: str, + policy: IgnorePolicy, + path_filter: PathFilter, + managed_out: List[ManagedFile], + excluded_out: List[ExcludedFile], + seen_role: Optional[Set[str]] = None, + seen_global: Optional[Set[str]] = None, + metadata: Optional[tuple[str, str, str]] = None, +) -> bool: + """Try to capture a single file into the bundle. + + Returns True if the file was copied (managed), False otherwise. + + * seen_role: de-dupe within a role (prevents duplicate tasks/records) + * seen_global: de-dupe across roles/stages (prevents multiple roles copying same path) + * metadata: optional (owner, group, mode) tuple to avoid re-statting + """ + + if seen_global is not None and abs_path in seen_global: + return False + if seen_role is not None and abs_path in seen_role: + return False + + def _mark_seen() -> None: + if seen_role is not None: + seen_role.add(abs_path) + if seen_global is not None: + seen_global.add(abs_path) + + if path_filter.is_excluded(abs_path): + excluded_out.append(ExcludedFile(path=abs_path, reason="user_excluded")) + _mark_seen() + return False + + deny = policy.deny_reason(abs_path) + if deny: + excluded_out.append(ExcludedFile(path=abs_path, reason=deny)) + _mark_seen() + return False + + try: + owner, group, mode = ( + metadata if metadata is not None else stat_triplet(abs_path) + ) + except OSError: + excluded_out.append(ExcludedFile(path=abs_path, reason="unreadable")) + _mark_seen() + return False + + src_rel = abs_path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, role_name, abs_path, src_rel) + except OSError: + excluded_out.append(ExcludedFile(path=abs_path, reason="unreadable")) + _mark_seen() + return False + + managed_out.append( + ManagedFile( + path=abs_path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason, + ) + ) + _mark_seen() + return True + + def _is_confish(path: str) -> bool: base = os.path.basename(path) _, ext = os.path.splitext(base) @@ -227,7 +305,6 @@ def _maybe_add_specific_paths(hints: Set[str]) -> List[str]: f"/etc/default/{h}", f"/etc/init.d/{h}", f"/etc/sysctl.d/{h}.conf", - f"/etc/logrotate.d/{h}", ] ) return paths @@ -492,7 +569,7 @@ def harvest( policy = IgnorePolicy(dangerous=dangerous) elif dangerous: # If callers explicitly provided a policy but also requested - # dangerous behavior, honour the CLI intent. + # dangerous behaviour, honour the CLI intent. policy.dangerous = True os.makedirs(bundle_dir, exist_ok=True) @@ -513,12 +590,21 @@ def harvest( # Service roles # ------------------------- service_snaps: List[ServiceSnapshot] = [] + # Track alias strings (service names, package names, stems) that should map + # back to the service role for shared snippet attribution (cron.d/logrotate.d). + service_role_aliases: Dict[str, Set[str]] = {} + # De-dupe per-role captures (avoids duplicate tasks in manifest generation). + seen_by_role: Dict[str, Set[str]] = {} for unit in list_enabled_services(): role = _role_name_from_unit(unit) try: ui = get_unit_info(unit) except UnitQueryError as e: + # Even when we can't query the unit, keep a minimal alias mapping so + # shared snippets can still be attributed to this role by name. + service_role_aliases.setdefault(role, _hint_names(unit, set()) | {role}) + seen_by_role.setdefault(role, set()) service_snaps.append( ServiceSnapshot( unit=unit, @@ -567,6 +653,10 @@ def harvest( hints = _hint_names(unit, pkgs) _add_pkgs_from_etc_topdirs(hints, topdir_to_pkgs, pkgs) + # Keep a stable set of aliases for this service role. Include current + # packages as well, so that package-named snippets (e.g. cron.d or + # logrotate.d entries) can still be attributed back to this service. + service_role_aliases[role] = set(hints) | set(pkgs) | {role} for sp in _maybe_add_specific_paths(hints): if not os.path.exists(sp): @@ -610,7 +700,7 @@ def harvest( # key material under service directories (e.g. /etc/openvpn/*.crt). # # To avoid exploding output for shared trees (e.g. /etc/systemd), keep - # the older "config-ish only" behavior for known shared topdirs. + # the older "config-ish only" behaviour for known shared topdirs. any_roots: List[str] = [] confish_roots: List[str] = [] for h in hints: @@ -646,34 +736,20 @@ def harvest( "No packages or /etc candidates detected (unexpected for enabled service)." ) + # De-dupe within this role while capturing. This also avoids emitting + # duplicate Ansible tasks for the same destination path. + role_seen = seen_by_role.setdefault(role, set()) for path, reason in sorted(candidates.items()): - if path_filter.is_excluded(path): - excluded.append(ExcludedFile(path=path, reason="user_excluded")) - continue - deny = policy.deny_reason(path) - if deny: - excluded.append(ExcludedFile(path=path, reason=deny)) - continue - try: - owner, group, mode = stat_triplet(path) - except OSError: - excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, role, path, src_rel) - except OSError: - excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + _capture_file( + bundle_dir=bundle_dir, + role_name=role, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=managed, + excluded_out=excluded, + seen_role=role_seen, ) service_snaps.append( @@ -735,36 +811,18 @@ def harvest( snap = service_snap_by_unit.get(ti.trigger_unit) if snap is not None: + role_seen = seen_by_role.setdefault(snap.role_name, set()) for path in timer_paths: - if path_filter.is_excluded(path): - snap.excluded.append( - ExcludedFile(path=path, reason="user_excluded") - ) - continue - deny = policy.deny_reason(path) - if deny: - snap.excluded.append(ExcludedFile(path=path, reason=deny)) - continue - try: - owner, group, mode = stat_triplet(path) - except OSError: - snap.excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, snap.role_name, path, src_rel) - except OSError: - snap.excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - snap.managed_files.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason="related_timer", - ) + _capture_file( + bundle_dir=bundle_dir, + role_name=snap.role_name, + abs_path=path, + reason="related_timer", + policy=policy, + path_filter=path_filter, + managed_out=snap.managed_files, + excluded_out=snap.excluded, + seen_role=role_seen, ) continue @@ -852,7 +910,6 @@ def harvest( roots.extend([f"/etc/{td}", f"/etc/{td}.d"]) roots.extend([f"/etc/default/{td}"]) roots.extend([f"/etc/init.d/{td}"]) - roots.extend([f"/etc/logrotate.d/{td}"]) roots.extend([f"/etc/sysctl.d/{td}.conf"]) # Capture any custom/unowned files under /etc/ for this @@ -871,34 +928,18 @@ def harvest( if r not in owned_etc and _is_confish(r): candidates.setdefault(r, "custom_specific_path") + role_seen = seen_by_role.setdefault(role, set()) for path, reason in sorted(candidates.items()): - if path_filter.is_excluded(path): - excluded.append(ExcludedFile(path=path, reason="user_excluded")) - continue - deny = policy.deny_reason(path) - if deny: - excluded.append(ExcludedFile(path=path, reason=deny)) - continue - try: - owner, group, mode = stat_triplet(path) - except OSError: - excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, role, path, src_rel) - except OSError: - excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + _capture_file( + bundle_dir=bundle_dir, + role_name=role, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=managed, + excluded_out=excluded, + seen_role=role_seen, ) if not pkg_to_etc_paths.get(pkg, []) and not managed: @@ -929,6 +970,7 @@ def harvest( users_notes.append(f"Failed to enumerate users: {e!r}") users_role_name = "users" + users_role_seen = seen_by_role.setdefault(users_role_name, set()) for u in user_records: users_list.append( @@ -946,38 +988,21 @@ def harvest( # Copy only safe SSH public material: authorized_keys + *.pub for sf in u.ssh_files: - if path_filter.is_excluded(sf): - users_excluded.append(ExcludedFile(path=sf, reason="user_excluded")) - continue - deny = policy.deny_reason(sf) - if deny: - users_excluded.append(ExcludedFile(path=sf, reason=deny)) - continue - try: - owner, group, mode = stat_triplet(sf) - except OSError: - users_excluded.append(ExcludedFile(path=sf, reason="unreadable")) - continue - src_rel = sf.lstrip("/") - try: - _copy_into_bundle(bundle_dir, users_role_name, sf, src_rel) - except OSError: - users_excluded.append(ExcludedFile(path=sf, reason="unreadable")) - continue reason = ( "authorized_keys" if sf.endswith("/authorized_keys") else "ssh_public_key" ) - users_managed.append( - ManagedFile( - path=sf, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + _capture_file( + bundle_dir=bundle_dir, + role_name=users_role_name, + abs_path=sf, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=users_managed, + excluded_out=users_excluded, + seen_role=users_role_seen, ) users_snapshot = UsersSnapshot( @@ -995,39 +1020,19 @@ def harvest( apt_excluded: List[ExcludedFile] = [] apt_managed: List[ManagedFile] = [] apt_role_name = "apt_config" + apt_role_seen = seen_by_role.setdefault(apt_role_name, set()) for path, reason in _iter_apt_capture_paths(): - if path_filter.is_excluded(path): - apt_excluded.append(ExcludedFile(path=path, reason="user_excluded")) - continue - - deny = policy.deny_reason(path) - if deny: - apt_excluded.append(ExcludedFile(path=path, reason=deny)) - continue - - try: - owner, group, mode = stat_triplet(path) - except OSError: - apt_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, apt_role_name, path, src_rel) - except OSError: - apt_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - - apt_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + _capture_file( + bundle_dir=bundle_dir, + role_name=apt_role_name, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=apt_managed, + excluded_out=apt_excluded, + seen_role=apt_role_seen, ) apt_config_snapshot = AptConfigSnapshot( @@ -1062,11 +1067,58 @@ def harvest( svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} pkg_by_role: Dict[str, PackageSnapshot] = {p.role_name: p for p in pkg_snaps} - def _target_role_for_shared_snippet(path: str) -> Optional[tuple[str, str]]: - """If `path` is a shared snippet, return (role_name, reason) to attach to.""" - base = os.path.basename(path) + # Package name -> role_name for manually-installed package roles. + pkg_name_to_role: Dict[str, str] = {p.package: p.role_name for p in pkg_snaps} - # Try full filename and stem (before first dot). + # Package name -> list of service role names that reference it. + pkg_to_service_roles: Dict[str, List[str]] = {} + for s in service_snaps: + for pkg in s.packages: + pkg_to_service_roles.setdefault(pkg, []).append(s.role_name) + + # Alias -> role mapping used as a fallback when dpkg ownership is missing. + # Prefer service roles over package roles when both would match. + alias_ranked: Dict[str, tuple[int, str]] = {} + + def _add_alias(alias: str, role_name: str, *, priority: int) -> None: + key = _safe_name(alias) + if not key: + return + cur = alias_ranked.get(key) + if ( + cur is None + or priority < cur[0] + or (priority == cur[0] and role_name < cur[1]) + ): + alias_ranked[key] = (priority, role_name) + + for role_name, aliases in service_role_aliases.items(): + for a in aliases: + _add_alias(a, role_name, priority=0) + + for p in pkg_snaps: + _add_alias(p.package, p.role_name, priority=1) + + def _target_role_for_shared_snippet(path: str) -> Optional[tuple[str, str]]: + """If `path` is a shared snippet, return (role_name, reason) to attach to. + + This is used primarily for /etc/logrotate.d/* and /etc/cron.d/* where + files are "owned" by many packages but people tend to reason about them + per service. + + Resolution order: + 1) dpkg owner -> service role (if any service references the package) + 2) dpkg owner -> package role (manual package role exists) + 3) basename/stem alias match -> preferred role + """ + if path.startswith("/etc/logrotate.d/"): + tag = "logrotate_snippet" + elif path.startswith("/etc/cron.d/"): + tag = "cron_snippet" + else: + return None + + base = os.path.basename(path) candidates: List[str] = [base] if "." in base: candidates.append(base.split(".", 1)[0]) @@ -1078,122 +1130,62 @@ def harvest( seen.add(c) uniq.append(c) - if path.startswith("/etc/logrotate.d/"): - for c in uniq: - rn = _safe_name(c) - if rn in svc_by_role or rn in pkg_by_role: - return (rn, "logrotate_snippet") - return None + pkg = dpkg_owner(path) + if pkg: + svc_roles = pkg_to_service_roles.get(pkg) + if svc_roles: + # Deterministic tie-break: lowest role name. + return (sorted(set(svc_roles))[0], tag) + pkg_role = pkg_name_to_role.get(pkg) + if pkg_role: + return (pkg_role, tag) - if path.startswith("/etc/cron.d/"): - for c in uniq: - rn = _safe_name(c) - if rn in svc_by_role or rn in pkg_by_role: - return (rn, "cron_snippet") - return None + for c in uniq: + key = _safe_name(c) + hit = alias_ranked.get(key) + if hit is not None: + return (hit[1], tag) return None + def _lists_for_role(role_name: str) -> tuple[List[ManagedFile], List[ExcludedFile]]: + if role_name in svc_by_role: + snap = svc_by_role[role_name] + return (snap.managed_files, snap.excluded) + if role_name in pkg_by_role: + snap = pkg_by_role[role_name] + return (snap.managed_files, snap.excluded) + # Fallback (shouldn't normally happen): attribute to etc_custom. + return (etc_managed, etc_excluded) + # Capture essential system config/state (even if package-owned). + etc_role_seen = seen_by_role.setdefault(etc_role_name, set()) for path, reason in _iter_system_capture_paths(): if path in already: continue target = _target_role_for_shared_snippet(path) - - if path_filter.is_excluded(path): - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="user_excluded") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="user_excluded") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) - already.add(path) - continue - - deny = policy.deny_reason(path) - if deny: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason=deny) - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason=deny) - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason=deny)) - already.add(path) - continue - - try: - owner, group, mode = stat_triplet(path) - except OSError: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) - already.add(path) - continue - - src_rel = path.lstrip("/") - role_for_copy = etc_role_name - reason_for_role = reason - if target: + if target is not None: role_for_copy, reason_for_role = target - - try: - _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel) - except OSError: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) - already.add(path) - continue - - mf = ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason_for_role, - ) - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].managed_files.append(mf) - elif rn in pkg_by_role: - pkg_by_role[rn].managed_files.append(mf) + managed_out, excluded_out = _lists_for_role(role_for_copy) + role_seen = seen_by_role.setdefault(role_for_copy, set()) else: - etc_managed.append(mf) + role_for_copy, reason_for_role = (etc_role_name, reason) + managed_out, excluded_out = (etc_managed, etc_excluded) + role_seen = etc_role_seen - already.add(path) + _capture_file( + bundle_dir=bundle_dir, + role_name=role_for_copy, + abs_path=path, + reason=reason_for_role, + policy=policy, + path_filter=path_filter, + managed_out=managed_out, + excluded_out=excluded_out, + seen_role=role_seen, + seen_global=already, + ) # Walk /etc for remaining unowned config-ish files scanned = 0 @@ -1212,99 +1204,28 @@ def harvest( continue target = _target_role_for_shared_snippet(path) - - if path_filter.is_excluded(path): - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="user_excluded") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="user_excluded") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) - already.add(path) - continue - - deny = policy.deny_reason(path) - if deny: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason=deny) - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason=deny) - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason=deny)) - already.add(path) - continue - - try: - owner, group, mode = stat_triplet(path) - except OSError: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) - already.add(path) - continue - - src_rel = path.lstrip("/") - role_for_copy = etc_role_name - reason_for_role = "custom_unowned" - if target: + if target is not None: role_for_copy, reason_for_role = target - - try: - _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel) - except OSError: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) - already.add(path) - continue - - mf = ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason_for_role, - ) - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].managed_files.append(mf) - elif rn in pkg_by_role: - pkg_by_role[rn].managed_files.append(mf) + managed_out, excluded_out = _lists_for_role(role_for_copy) + role_seen = seen_by_role.setdefault(role_for_copy, set()) else: - etc_managed.append(mf) - scanned += 1 + role_for_copy, reason_for_role = (etc_role_name, "custom_unowned") + managed_out, excluded_out = (etc_managed, etc_excluded) + role_seen = etc_role_seen + + if _capture_file( + bundle_dir=bundle_dir, + role_name=role_for_copy, + abs_path=path, + reason=reason_for_role, + policy=policy, + path_filter=path_filter, + managed_out=managed_out, + excluded_out=excluded_out, + seen_role=role_seen, + seen_global=already, + ): + scanned += 1 if scanned >= MAX_FILES_CAP: etc_notes.append( f"Reached file cap ({MAX_FILES_CAP}) while scanning /etc for unowned files." @@ -1339,6 +1260,7 @@ def harvest( scanned = 0 if not os.path.isdir(root): return + role_seen = seen_by_role.setdefault(ul_role_name, set()) for dirpath, _, filenames in os.walk(root): for fn in filenames: path = os.path.join(dirpath, fn) @@ -1346,54 +1268,34 @@ def harvest( continue if not os.path.isfile(path) or os.path.islink(path): continue + try: + owner, group, mode = stat_triplet(path) + except OSError: + ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + if require_executable: - try: - owner, group, mode = stat_triplet(path) - except OSError: - ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue try: if (int(mode, 8) & 0o111) == 0: continue except ValueError: # If mode parsing fails, be conservative and skip. continue - else: - try: - owner, group, mode = stat_triplet(path) - except OSError: - ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - if path_filter.is_excluded(path): - ul_excluded.append(ExcludedFile(path=path, reason="user_excluded")) - continue - - deny = policy.deny_reason(path) - if deny: - ul_excluded.append(ExcludedFile(path=path, reason=deny)) - continue - - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, ul_role_name, path, src_rel) - except OSError: - ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - - ul_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) - ) - - already_all.add(path) - scanned += 1 + if _capture_file( + bundle_dir=bundle_dir, + role_name=ul_role_name, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=ul_managed, + excluded_out=ul_excluded, + seen_role=role_seen, + metadata=(owner, group, mode), + ): + already_all.add(path) + scanned += 1 if scanned >= cap: ul_notes.append(f"Reached file cap ({cap}) while scanning {root}.") return @@ -1428,6 +1330,7 @@ def harvest( extra_excluded: List[ExcludedFile] = [] extra_managed: List[ManagedFile] = [] extra_role_name = "extra_paths" + extra_role_seen = seen_by_role.setdefault(extra_role_name, set()) include_specs = list(include_paths or []) exclude_specs = list(exclude_paths or []) @@ -1453,39 +1356,18 @@ def harvest( if path in already_all: continue - if path_filter.is_excluded(path): - extra_excluded.append(ExcludedFile(path=path, reason="user_excluded")) - continue - - deny = policy.deny_reason(path) - if deny: - extra_excluded.append(ExcludedFile(path=path, reason=deny)) - continue - - try: - owner, group, mode = stat_triplet(path) - except OSError: - extra_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, extra_role_name, path, src_rel) - except OSError: - extra_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - - extra_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason="user_include", - ) - ) - already_all.add(path) + if _capture_file( + bundle_dir=bundle_dir, + role_name=extra_role_name, + abs_path=path, + reason="user_include", + policy=policy, + path_filter=path_filter, + managed_out=extra_managed, + excluded_out=extra_excluded, + seen_role=extra_role_seen, + ): + already_all.add(path) extra_paths_snapshot = ExtraPathsSnapshot( role_name=extra_role_name, diff --git a/enroll/pathfilter.py b/enroll/pathfilter.py index 6541ca9..680d390 100644 --- a/enroll/pathfilter.py +++ b/enroll/pathfilter.py @@ -141,7 +141,7 @@ class PathFilter: - Regex: prefix with 're:' or 'regex:' - Force glob: prefix with 'glob:' - A plain path without wildcards matches that path and everything under it - (directory-prefix behavior). + (directory-prefix behaviour). Examples: --exclude-path /usr/local/bin/docker-* diff --git a/pyproject.toml b/pyproject.toml index 3aa01d0..c7356bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.5" +version = "0.1.6" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/rpm/enroll.spec b/rpm/enroll.spec index ed0a3c9..637dee1 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.1.5 +%global upstream_version 0.1.6 Name: enroll Version: %{upstream_version} @@ -44,6 +44,9 @@ Enroll a server's running state retrospectively into Ansible. %changelog * Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} +- DRY up some code logic +- More test coverage +* Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} - Consolidate logrotate and cron files into their main service/package roles if they exist. - Standardise on MAX_FILES_CAP in one place - Manage apt stuff in its own role, not in etc_custom diff --git a/tests/test___main__.py b/tests/test___main__.py new file mode 100644 index 0000000..2e83ac1 --- /dev/null +++ b/tests/test___main__.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +import runpy + + +def test_module_main_invokes_cli_main(monkeypatch): + import enroll.cli + + called = {"ok": False} + + def fake_main() -> None: + called["ok"] = True + + monkeypatch.setattr(enroll.cli, "main", fake_main) + + # Execute enroll.__main__ as if `python -m enroll`. + runpy.run_module("enroll.__main__", run_name="__main__") + assert called["ok"] is True diff --git a/tests/test_accounts.py b/tests/test_accounts.py new file mode 100644 index 0000000..d5cc267 --- /dev/null +++ b/tests/test_accounts.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import os +from pathlib import Path + + +def test_parse_login_defs_parses_known_keys(tmp_path: Path): + from enroll.accounts import parse_login_defs + + p = tmp_path / "login.defs" + p.write_text( + """ + # comment + UID_MIN 1000 + UID_MAX 60000 + SYS_UID_MIN 100 + SYS_UID_MAX 999 + UID_MIN not_an_int + OTHER 123 + """, + encoding="utf-8", + ) + + vals = parse_login_defs(str(p)) + assert vals["UID_MIN"] == 1000 + assert vals["UID_MAX"] == 60000 + assert vals["SYS_UID_MIN"] == 100 + assert vals["SYS_UID_MAX"] == 999 + assert "OTHER" not in vals + + +def test_parse_passwd_and_group_and_ssh_files(tmp_path: Path): + from enroll.accounts import find_user_ssh_files, parse_group, parse_passwd + + passwd = tmp_path / "passwd" + passwd.write_text( + "\n".join( + [ + "root:x:0:0:root:/root:/bin/bash", + "# comment", + "alice:x:1000:1000:Alice:/home/alice:/bin/bash", + "bob:x:1001:1000:Bob:/home/bob:/usr/sbin/nologin", + "badline", + "cathy:x:notint:1000:Cathy:/home/cathy:/bin/bash", + "", + ] + ), + encoding="utf-8", + ) + + group = tmp_path / "group" + group.write_text( + "\n".join( + [ + "root:x:0:", + "users:x:1000:alice,bob", + "admins:x:1002:alice", + "badgroup:x:notint:alice", + "", + ] + ), + encoding="utf-8", + ) + + rows = parse_passwd(str(passwd)) + assert ("alice", 1000, 1000, "Alice", "/home/alice", "/bin/bash") in rows + assert all(r[0] != "cathy" for r in rows) # skipped invalid UID + + gid_to_name, name_to_gid, members = parse_group(str(group)) + assert gid_to_name[1000] == "users" + assert name_to_gid["admins"] == 1002 + assert "alice" in members["admins"] + + # ssh discovery: only authorized_keys, no symlinks + home = tmp_path / "home" / "alice" + sshdir = home / ".ssh" + sshdir.mkdir(parents=True) + ak = sshdir / "authorized_keys" + ak.write_text("ssh-ed25519 AAA...", encoding="utf-8") + # a symlink should be ignored + (sshdir / "authorized_keys2").write_text("x", encoding="utf-8") + os.symlink(str(sshdir / "authorized_keys2"), str(sshdir / "authorized_keys_link")) + assert find_user_ssh_files(str(home)) == [str(ak)] + + +def test_collect_non_system_users(monkeypatch, tmp_path: Path): + import enroll.accounts as a + + orig_parse_login_defs = a.parse_login_defs + orig_parse_passwd = a.parse_passwd + orig_parse_group = a.parse_group + + # Provide controlled passwd/group/login.defs inputs via monkeypatch. + passwd = tmp_path / "passwd" + passwd.write_text( + "\n".join( + [ + "root:x:0:0:root:/root:/bin/bash", + "nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin", + "alice:x:1000:1000:Alice:/home/alice:/bin/bash", + "sysuser:x:200:200:Sys:/home/sys:/bin/bash", + "bob:x:1001:1000:Bob:/home/bob:/bin/false", + "", + ] + ), + encoding="utf-8", + ) + group = tmp_path / "group" + group.write_text( + "\n".join( + [ + "users:x:1000:alice,bob", + "admins:x:1002:alice", + "", + ] + ), + encoding="utf-8", + ) + + defs = tmp_path / "login.defs" + defs.write_text("UID_MIN 1000\n", encoding="utf-8") + + monkeypatch.setattr( + a, "parse_login_defs", lambda path=str(defs): orig_parse_login_defs(path) + ) + monkeypatch.setattr( + a, "parse_passwd", lambda path=str(passwd): orig_parse_passwd(path) + ) + monkeypatch.setattr( + a, "parse_group", lambda path=str(group): orig_parse_group(path) + ) + + # Use a stable fake ssh discovery. + monkeypatch.setattr( + a, "find_user_ssh_files", lambda home: [f"{home}/.ssh/authorized_keys"] + ) + + users = a.collect_non_system_users() + assert [u.name for u in users] == ["alice"] + u = users[0] + assert u.primary_group == "users" + assert u.supplementary_groups == ["admins"] + assert u.ssh_files == ["/home/alice/.ssh/authorized_keys"] diff --git a/tests/test_debian.py b/tests/test_debian.py new file mode 100644 index 0000000..333afc1 --- /dev/null +++ b/tests/test_debian.py @@ -0,0 +1,154 @@ +from __future__ import annotations + +import hashlib +from pathlib import Path + + +def test_dpkg_owner_parses_output(monkeypatch): + import enroll.debian as d + + class P: + def __init__(self, rc: int, out: str): + self.returncode = rc + self.stdout = out + self.stderr = "" + + def fake_run(cmd, text, capture_output): + assert cmd[:2] == ["dpkg", "-S"] + return P( + 0, + """ + diversion by foo from: /etc/something + nginx-common:amd64: /etc/nginx/nginx.conf + nginx-common, nginx: /etc/nginx/sites-enabled/default + """, + ) + + monkeypatch.setattr(d.subprocess, "run", fake_run) + assert d.dpkg_owner("/etc/nginx/nginx.conf") == "nginx-common" + + def fake_run_none(cmd, text, capture_output): + return P(1, "") + + monkeypatch.setattr(d.subprocess, "run", fake_run_none) + assert d.dpkg_owner("/missing") is None + + +def test_list_manual_packages_parses_and_sorts(monkeypatch): + import enroll.debian as d + + class P: + def __init__(self, rc: int, out: str): + self.returncode = rc + self.stdout = out + self.stderr = "" + + def fake_run(cmd, text, capture_output): + assert cmd == ["apt-mark", "showmanual"] + return P(0, "\n# comment\nnginx\nvim\nnginx\n") + + monkeypatch.setattr(d.subprocess, "run", fake_run) + assert d.list_manual_packages() == ["nginx", "vim"] + + +def test_build_dpkg_etc_index(tmp_path: Path): + import enroll.debian as d + + info = tmp_path / "info" + info.mkdir() + (info / "nginx.list").write_text( + "/etc/nginx/nginx.conf\n/etc/nginx/sites-enabled/default\n/usr/bin/nginx\n", + encoding="utf-8", + ) + (info / "vim:amd64.list").write_text( + "/etc/vim/vimrc\n/usr/bin/vim\n", + encoding="utf-8", + ) + + owned, owner_map, topdir_to_pkgs, pkg_to_etc = d.build_dpkg_etc_index(str(info)) + assert "/etc/nginx/nginx.conf" in owned + assert owner_map["/etc/nginx/nginx.conf"] == "nginx" + assert "nginx" in topdir_to_pkgs + assert topdir_to_pkgs["nginx"] == {"nginx"} + assert pkg_to_etc["vim"] == ["/etc/vim/vimrc"] + + +def test_parse_status_conffiles_handles_continuations(tmp_path: Path): + import enroll.debian as d + + status = tmp_path / "status" + status.write_text( + "\n".join( + [ + "Package: nginx", + "Version: 1", + "Conffiles:", + " /etc/nginx/nginx.conf abcdef", + " /etc/nginx/mime.types 123456", + "", + "Package: other", + "Version: 2", + "", + ] + ), + encoding="utf-8", + ) + m = d.parse_status_conffiles(str(status)) + assert m["nginx"]["/etc/nginx/nginx.conf"] == "abcdef" + assert m["nginx"]["/etc/nginx/mime.types"] == "123456" + assert "other" not in m + + +def test_read_pkg_md5sums_and_file_md5(tmp_path: Path, monkeypatch): + import enroll.debian as d + + # Patch /var/lib/dpkg/info/.md5sums lookup to a tmp file. + md5_file = tmp_path / "pkg.md5sums" + md5_file.write_text("0123456789abcdef etc/foo.conf\n", encoding="utf-8") + + def fake_exists(path: str) -> bool: + return path.endswith("/var/lib/dpkg/info/p1.md5sums") + + real_open = open + + def fake_open(path: str, *args, **kwargs): + if path.endswith("/var/lib/dpkg/info/p1.md5sums"): + return real_open(md5_file, *args, **kwargs) + return real_open(path, *args, **kwargs) + + monkeypatch.setattr(d.os.path, "exists", fake_exists) + monkeypatch.setattr("builtins.open", fake_open) + + m = d.read_pkg_md5sums("p1") + assert m == {"etc/foo.conf": "0123456789abcdef"} + + content = b"hello world\n" + p = tmp_path / "x" + p.write_bytes(content) + assert d.file_md5(str(p)) == hashlib.md5(content).hexdigest() + + +def test_stat_triplet_fallbacks(tmp_path: Path, monkeypatch): + import enroll.debian as d + import sys + + p = tmp_path / "f" + p.write_text("x", encoding="utf-8") + + class FakePwdMod: + @staticmethod + def getpwuid(_): # pragma: no cover + raise KeyError + + class FakeGrpMod: + @staticmethod + def getgrgid(_): # pragma: no cover + raise KeyError + + # stat_triplet imports pwd/grp inside the function, so patch sys.modules. + monkeypatch.setitem(sys.modules, "pwd", FakePwdMod) + monkeypatch.setitem(sys.modules, "grp", FakeGrpMod) + owner, group, mode = d.stat_triplet(str(p)) + assert owner.isdigit() + assert group.isdigit() + assert mode.isdigit() and len(mode) == 4 diff --git a/tests/test_diff_bundle.py b/tests/test_diff_bundle.py new file mode 100644 index 0000000..66ef094 --- /dev/null +++ b/tests/test_diff_bundle.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import os +import tarfile +from pathlib import Path + +import pytest + + +def _make_bundle_dir(tmp_path: Path) -> Path: + b = tmp_path / "bundle" + (b / "artifacts").mkdir(parents=True) + (b / "state.json").write_text("{}\n", encoding="utf-8") + return b + + +def _tar_gz_of_dir(src: Path, out: Path) -> None: + with tarfile.open(out, mode="w:gz") as tf: + # tar -C src . semantics + for p in src.rglob("*"): + rel = p.relative_to(src) + tf.add(p, arcname=str(rel)) + + +def test_bundle_from_directory_and_statejson_path(tmp_path: Path): + import enroll.diff as d + + b = _make_bundle_dir(tmp_path) + + br1 = d._bundle_from_input(str(b), sops_mode=False) + assert br1.dir == b + assert br1.state_path.exists() + + br2 = d._bundle_from_input(str(b / "state.json"), sops_mode=False) + assert br2.dir == b + + +def test_bundle_from_tarball_extracts(tmp_path: Path): + import enroll.diff as d + + b = _make_bundle_dir(tmp_path) + tgz = tmp_path / "bundle.tgz" + _tar_gz_of_dir(b, tgz) + + br = d._bundle_from_input(str(tgz), sops_mode=False) + try: + assert br.dir.is_dir() + assert (br.dir / "state.json").exists() + finally: + if br.tempdir: + br.tempdir.cleanup() + + +def test_bundle_from_sops_like_file(monkeypatch, tmp_path: Path): + import enroll.diff as d + + b = _make_bundle_dir(tmp_path) + tgz = tmp_path / "bundle.tar.gz" + _tar_gz_of_dir(b, tgz) + + # Pretend the tarball is an encrypted bundle by giving it a .sops name. + sops_path = tmp_path / "bundle.tar.gz.sops" + sops_path.write_bytes(tgz.read_bytes()) + + # Stub out sops machinery: "decrypt" just copies through. + monkeypatch.setattr(d, "require_sops_cmd", lambda: "sops") + + def fake_decrypt(src: Path, dest: Path, mode: int): + dest.write_bytes(Path(src).read_bytes()) + try: + os.chmod(dest, mode) + except OSError: + pass + + monkeypatch.setattr(d, "decrypt_file_binary_to", fake_decrypt) + + br = d._bundle_from_input(str(sops_path), sops_mode=False) + try: + assert (br.dir / "state.json").exists() + finally: + if br.tempdir: + br.tempdir.cleanup() + + +def test_bundle_from_input_missing_path(tmp_path: Path): + import enroll.diff as d + + with pytest.raises(RuntimeError, match="not found"): + d._bundle_from_input(str(tmp_path / "nope"), sops_mode=False) diff --git a/tests/test_pathfilter.py b/tests/test_pathfilter.py new file mode 100644 index 0000000..406b7e7 --- /dev/null +++ b/tests/test_pathfilter.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +import os +from pathlib import Path + + +def test_compile_and_match_prefix_glob_and_regex(tmp_path: Path): + from enroll.pathfilter import PathFilter, compile_path_pattern + + # prefix semantics: matches the exact path and subtree + p = compile_path_pattern("/etc/nginx") + assert p.kind == "prefix" + assert p.matches("/etc/nginx") + assert p.matches("/etc/nginx/nginx.conf") + assert not p.matches("/etc/nginx2/nginx.conf") + + # glob semantics + g = compile_path_pattern("/etc/**/*.conf") + assert g.kind == "glob" + assert g.matches("/etc/nginx/nginx.conf") + assert not g.matches("/var/etc/nginx.conf") + + # explicit glob + g2 = compile_path_pattern("glob:/home/*/.bashrc") + assert g2.kind == "glob" + assert g2.matches("/home/alice/.bashrc") + + # regex semantics (search, not match) + r = compile_path_pattern(r"re:/home/[^/]+/\.ssh/authorized_keys$") + assert r.kind == "regex" + assert r.matches("/home/alice/.ssh/authorized_keys") + assert not r.matches("/home/alice/.ssh/authorized_keys2") + + # invalid regex: never matches + bad = compile_path_pattern("re:[") + assert bad.kind == "regex" + assert not bad.matches("/etc/passwd") + + # exclude wins + pf = PathFilter(exclude=["/etc/nginx"], include=["/etc/nginx/nginx.conf"]) + assert pf.is_excluded("/etc/nginx/nginx.conf") + + +def test_expand_includes_respects_exclude_symlinks_and_caps(tmp_path: Path): + from enroll.pathfilter import PathFilter, compile_path_pattern, expand_includes + + root = tmp_path / "root" + (root / "a").mkdir(parents=True) + (root / "a" / "one.txt").write_text("1", encoding="utf-8") + (root / "a" / "two.txt").write_text("2", encoding="utf-8") + (root / "b").mkdir() + (root / "b" / "secret.txt").write_text("s", encoding="utf-8") + + # symlink file should be ignored + os.symlink(str(root / "a" / "one.txt"), str(root / "a" / "link.txt")) + + exclude = PathFilter(exclude=[str(root / "b")]) + + pats = [ + compile_path_pattern(str(root / "a")), + compile_path_pattern("glob:" + str(root / "**" / "*.txt")), + ] + + paths, notes = expand_includes(pats, exclude=exclude, max_files=2) + # cap should limit to 2 files + assert len(paths) == 2 + assert any("cap" in n.lower() for n in notes) + # excluded dir should not contribute + assert all("/b/" not in p for p in paths) + # symlink ignored + assert all(not p.endswith("link.txt") for p in paths) + + +def test_expand_includes_notes_on_no_matches(tmp_path: Path): + from enroll.pathfilter import compile_path_pattern, expand_includes + + pats = [compile_path_pattern(str(tmp_path / "does_not_exist"))] + paths, notes = expand_includes(pats, max_files=10) + assert paths == [] + assert any("matched no files" in n.lower() for n in notes) diff --git a/tests/test_remote.py b/tests/test_remote.py new file mode 100644 index 0000000..576c0b1 --- /dev/null +++ b/tests/test_remote.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import io +import tarfile +from pathlib import Path + +import pytest + + +def _make_tgz_bytes(files: dict[str, bytes]) -> bytes: + bio = io.BytesIO() + with tarfile.open(fileobj=bio, mode="w:gz") as tf: + for name, content in files.items(): + ti = tarfile.TarInfo(name=name) + ti.size = len(content) + tf.addfile(ti, io.BytesIO(content)) + return bio.getvalue() + + +def test_safe_extract_tar_rejects_path_traversal(tmp_path: Path): + from enroll.remote import _safe_extract_tar + + # Build an unsafe tar with ../ traversal + bio = io.BytesIO() + with tarfile.open(fileobj=bio, mode="w:gz") as tf: + ti = tarfile.TarInfo(name="../evil") + ti.size = 1 + tf.addfile(ti, io.BytesIO(b"x")) + + bio.seek(0) + with tarfile.open(fileobj=bio, mode="r:gz") as tf: + with pytest.raises(RuntimeError, match="Unsafe tar member path"): + _safe_extract_tar(tf, tmp_path) + + +def test_safe_extract_tar_rejects_symlinks(tmp_path: Path): + from enroll.remote import _safe_extract_tar + + bio = io.BytesIO() + with tarfile.open(fileobj=bio, mode="w:gz") as tf: + ti = tarfile.TarInfo(name="link") + ti.type = tarfile.SYMTYPE + ti.linkname = "/etc/passwd" + tf.addfile(ti) + + bio.seek(0) + with tarfile.open(fileobj=bio, mode="r:gz") as tf: + with pytest.raises(RuntimeError, match="Refusing to extract"): + _safe_extract_tar(tf, tmp_path) + + +def test_remote_harvest_happy_path(tmp_path: Path, monkeypatch): + import sys + + import enroll.remote as r + + # Avoid building a real zipapp; just create a file. + def fake_build(_td: Path) -> Path: + p = _td / "enroll.pyz" + p.write_bytes(b"PYZ") + return p + + monkeypatch.setattr(r, "_build_enroll_pyz", fake_build) + + # Prepare a tiny harvest bundle tar stream from the "remote". + tgz = _make_tgz_bytes({"state.json": b'{"ok": true}\n'}) + + calls: list[str] = [] + + class _Chan: + def __init__(self, rc: int = 0): + self._rc = rc + + def recv_exit_status(self) -> int: + return self._rc + + class _Stdout: + def __init__(self, payload: bytes = b"", rc: int = 0): + self._bio = io.BytesIO(payload) + self.channel = _Chan(rc) + + def read(self, n: int = -1) -> bytes: + return self._bio.read(n) + + class _Stderr: + def __init__(self, payload: bytes = b""): + self._bio = io.BytesIO(payload) + + def read(self, n: int = -1) -> bytes: + return self._bio.read(n) + + class _SFTP: + def __init__(self): + self.put_calls: list[tuple[str, str]] = [] + + def put(self, local: str, remote: str) -> None: + self.put_calls.append((local, remote)) + + def close(self) -> None: + return + + class FakeSSH: + def __init__(self): + self._sftp = _SFTP() + + def load_system_host_keys(self): + return + + def set_missing_host_key_policy(self, _policy): + return + + def connect(self, **kwargs): + # Accept any connect parameters. + return + + def open_sftp(self): + return self._sftp + + def exec_command(self, cmd: str): + calls.append(cmd) + # The tar stream uses exec_command directly. + if cmd.startswith("tar -cz -C"): + return (None, _Stdout(tgz, rc=0), _Stderr(b"")) + + # _ssh_run path: id -un, mktemp -d, chmod, sudo harvest, sudo chown, rm -rf + if cmd == "id -un": + return (None, _Stdout(b"alice\n"), _Stderr()) + if cmd == "mktemp -d": + return (None, _Stdout(b"/tmp/enroll-remote-123\n"), _Stderr()) + if cmd.startswith("chmod 700"): + return (None, _Stdout(b""), _Stderr()) + if " harvest " in cmd: + return (None, _Stdout(b""), _Stderr()) + if cmd.startswith("sudo chown -R"): + return (None, _Stdout(b""), _Stderr()) + if cmd.startswith("rm -rf"): + return (None, _Stdout(b""), _Stderr()) + + return (None, _Stdout(b""), _Stderr(b"unknown")) + + def close(self): + return + + import types + + class RejectPolicy: + pass + + FakeParamiko = types.SimpleNamespace(SSHClient=FakeSSH, RejectPolicy=RejectPolicy) + + # Provide a fake paramiko module. + monkeypatch.setitem(sys.modules, "paramiko", FakeParamiko) + + out_dir = tmp_path / "out" + state_path = r.remote_harvest( + local_out_dir=out_dir, + remote_host="example.com", + remote_port=2222, + remote_user=None, + include_paths=["/etc/nginx/nginx.conf"], + exclude_paths=["/etc/shadow"], + dangerous=True, + no_sudo=False, + ) + + assert state_path == out_dir / "state.json" + assert state_path.exists() + assert b"ok" in state_path.read_bytes() + + # Ensure we attempted remote harvest with sudo and passed include/exclude and dangerous. + joined = "\n".join(calls) + assert "sudo" in joined + assert "--dangerous" in joined + assert "--include-path" in joined + assert "--exclude-path" in joined diff --git a/tests/test_systemd.py b/tests/test_systemd.py new file mode 100644 index 0000000..f351159 --- /dev/null +++ b/tests/test_systemd.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +import pytest + + +def test_list_enabled_services_and_timers_filters_templates(monkeypatch): + import enroll.systemd as s + + def fake_run(cmd: list[str]) -> str: + if "--type=service" in cmd: + return "\n".join( + [ + "nginx.service enabled", + "getty@.service enabled", # template + "foo@bar.service enabled", # instance units are included + "ssh.service enabled", + ] + ) + if "--type=timer" in cmd: + return "\n".join( + [ + "apt-daily.timer enabled", + "foo@.timer enabled", # template + ] + ) + raise AssertionError("unexpected") + + monkeypatch.setattr(s, "_run", fake_run) + assert s.list_enabled_services() == [ + "foo@bar.service", + "nginx.service", + "ssh.service", + ] + assert s.list_enabled_timers() == ["apt-daily.timer"] + + +def test_get_unit_info_parses_fields(monkeypatch): + import enroll.systemd as s + + class P: + def __init__(self, rc: int, out: str, err: str = ""): + self.returncode = rc + self.stdout = out + self.stderr = err + + def fake_run(cmd, check, text, capture_output): + assert cmd[0:2] == ["systemctl", "show"] + return P( + 0, + "\n".join( + [ + "FragmentPath=/lib/systemd/system/nginx.service", + "DropInPaths=/etc/systemd/system/nginx.service.d/override.conf /etc/systemd/system/nginx.service.d/extra.conf", + "EnvironmentFiles=-/etc/default/nginx /etc/nginx/env", + "ExecStart={ path=/usr/sbin/nginx ; argv[]=/usr/sbin/nginx -g daemon off; }", + "ActiveState=active", + "SubState=running", + "UnitFileState=enabled", + "ConditionResult=yes", + ] + ), + ) + + monkeypatch.setattr(s.subprocess, "run", fake_run) + ui = s.get_unit_info("nginx.service") + assert ui.fragment_path == "/lib/systemd/system/nginx.service" + assert "/etc/default/nginx" in ui.env_files + assert "/etc/nginx/env" in ui.env_files + assert "/usr/sbin/nginx" in ui.exec_paths + assert ui.active_state == "active" + + +def test_get_unit_info_raises_unit_query_error(monkeypatch): + import enroll.systemd as s + + class P: + def __init__(self, rc: int, out: str, err: str): + self.returncode = rc + self.stdout = out + self.stderr = err + + def fake_run(cmd, check, text, capture_output): + return P(1, "", "no such unit") + + monkeypatch.setattr(s.subprocess, "run", fake_run) + with pytest.raises(s.UnitQueryError) as ei: + s.get_unit_info("missing.service") + assert "missing.service" in str(ei.value) + assert ei.value.unit == "missing.service" + + +def test_get_timer_info_parses_fields(monkeypatch): + import enroll.systemd as s + + class P: + def __init__(self, rc: int, out: str, err: str = ""): + self.returncode = rc + self.stdout = out + self.stderr = err + + def fake_run(cmd, text, capture_output): + return P( + 0, + "\n".join( + [ + "FragmentPath=/lib/systemd/system/apt-daily.timer", + "DropInPaths=", + "EnvironmentFiles=-/etc/default/apt", + "Unit=apt-daily.service", + "ActiveState=active", + "SubState=waiting", + "UnitFileState=enabled", + "ConditionResult=yes", + ] + ), + ) + + monkeypatch.setattr(s.subprocess, "run", fake_run) + ti = s.get_timer_info("apt-daily.timer") + assert ti.trigger_unit == "apt-daily.service" + assert "/etc/default/apt" in ti.env_files From 8c19473e18b388b95ac3a5f77942cd081c17e889 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 28 Dec 2025 18:37:14 +1100 Subject: [PATCH 23/85] Fix an attribution bug for certain files ending up in the wrong package/role. --- CHANGELOG.md | 4 ++ debian/changelog | 6 +++ enroll/harvest.py | 44 +++++++++++++++-- pyproject.toml | 2 +- rpm/enroll.spec | 4 +- tests/test_harvest.py | 107 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 160 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a4c39d..f2cb109 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 0.1.7 + + * Fix an attribution bug for certain files ending up in the wrong package/role. + # 0.1.6 * DRY up some code logic diff --git a/debian/changelog b/debian/changelog index a15c38a..eabdefc 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +enroll (0.1.7) unstable; urgency=medium + + * Fix an attribution bug for certain files ending up in the wrong package/role. + + -- Miguel Jacq Sun, 28 Dec 2025 18:30:00 +1100 + enroll (0.1.6) unstable; urgency=medium * DRY up some code logic diff --git a/enroll/harvest.py b/enroll/harvest.py index 56e5aed..d678b89 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -292,9 +292,26 @@ def _hint_names(unit: str, pkgs: Set[str]) -> Set[str]: def _add_pkgs_from_etc_topdirs( hints: Set[str], topdir_to_pkgs: Dict[str, Set[str]], pkgs: Set[str] ) -> None: + """Expand a service's package set using dpkg-owned /etc top-level dirs. + + This is a heuristic: many Debian packages split a service across multiple + packages (e.g. nginx + nginx-common) while sharing a single /etc/ + tree. + + We intentionally *avoid* using shared trees (e.g. /etc/cron.d, /etc/ssl, + /etc/apparmor.d) to expand package sets, because many unrelated packages + legitimately install files there. + + We also consider the common ".d" variant (e.g. hint "apparmor" -> + topdir "apparmor.d") so we can explicitly skip known shared trees. + """ + for h in hints: - for p in topdir_to_pkgs.get(h, set()): - pkgs.add(p) + for top in (h, f"{h}.d"): + if top in SHARED_ETC_TOPDIRS: + continue + for p in topdir_to_pkgs.get(top, set()): + pkgs.add(p) def _maybe_add_specific_paths(hints: Set[str]) -> List[str]: @@ -1132,10 +1149,27 @@ def harvest( pkg = dpkg_owner(path) if pkg: - svc_roles = pkg_to_service_roles.get(pkg) + svc_roles = sorted(set(pkg_to_service_roles.get(pkg, []))) if svc_roles: - # Deterministic tie-break: lowest role name. - return (sorted(set(svc_roles))[0], tag) + # If multiple service roles reference the same package, prefer + # the role that most closely matches the snippet name (basename + # or stem). This avoids surprising attributions such as an + # AppArmor loader role "claiming" a cron/logrotate snippet + # that is clearly named after another package/service. + if len(svc_roles) > 1: + # Direct role-name matches first. + for c in [pkg, *uniq]: + rn = _safe_name(c) + if rn in svc_roles: + return (rn, tag) + # Next, use the alias map if it points at one of the roles. + for c in [pkg, *uniq]: + hit = alias_ranked.get(_safe_name(c)) + if hit is not None and hit[1] in svc_roles: + return (hit[1], tag) + + # Deterministic fallback: lowest role name. + return (svc_roles[0], tag) pkg_role = pkg_name_to_role.get(pkg) if pkg_role: return (pkg_role, tag) diff --git a/pyproject.toml b/pyproject.toml index c7356bc..ca875e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.6" +version = "0.1.7" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 637dee1..f63a12c 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.1.6 +%global upstream_version 0.1.7 Name: enroll Version: %{upstream_version} @@ -44,6 +44,8 @@ Enroll a server's running state retrospectively into Ansible. %changelog * Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} +- Fix an attribution bug for certain files ending up in the wrong package/role. +* Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} - DRY up some code logic - More test coverage * Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} diff --git a/tests/test_harvest.py b/tests/test_harvest.py index a832c81..fa796f0 100644 --- a/tests/test_harvest.py +++ b/tests/test_harvest.py @@ -176,3 +176,110 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( assert any(mf["path"] == "/usr/local/etc/myapp.conf" for mf in ul["managed_files"]) assert any(mf["path"] == "/usr/local/bin/myscript" for mf in ul["managed_files"]) assert all(mf["path"] != "/usr/local/bin/readme.txt" for mf in ul["managed_files"]) + + +def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( + monkeypatch, tmp_path: Path +): + """Regression test for shared snippet routing. + + When multiple service roles reference the same owning package, we prefer the + role whose name matches the snippet/package (e.g. ntpsec) rather than a + lexicographic tie-break that could incorrectly pick another role. + """ + + bundle = tmp_path / "bundle" + + files = {"/etc/cron.d/ntpsec": b"# cron\n"} + dirs = {"/etc", "/etc/cron.d"} + + monkeypatch.setattr(h.os.path, "isfile", lambda p: p in files) + monkeypatch.setattr(h.os.path, "islink", lambda p: False) + monkeypatch.setattr(h.os.path, "isdir", lambda p: p in dirs) + monkeypatch.setattr(h.os.path, "exists", lambda p: p in files or p in dirs) + monkeypatch.setattr(h.os, "walk", lambda root: [("/etc/cron.d", [], ["ntpsec"])]) + + # Only include the cron snippet in the system capture set. + monkeypatch.setattr( + h, "_iter_system_capture_paths", lambda: [("/etc/cron.d/ntpsec", "system_cron")] + ) + + monkeypatch.setattr( + h, "list_enabled_services", lambda: ["apparmor.service", "ntpsec.service"] + ) + + def fake_unit_info(unit: str) -> UnitInfo: + if unit == "apparmor.service": + return UnitInfo( + name=unit, + fragment_path="/lib/systemd/system/apparmor.service", + dropin_paths=[], + env_files=[], + exec_paths=["/usr/sbin/apparmor"], + active_state="active", + sub_state="running", + unit_file_state="enabled", + condition_result=None, + ) + return UnitInfo( + name=unit, + fragment_path="/lib/systemd/system/ntpsec.service", + dropin_paths=[], + env_files=[], + exec_paths=["/usr/sbin/ntpd"], + active_state="active", + sub_state="running", + unit_file_state="enabled", + condition_result=None, + ) + + monkeypatch.setattr(h, "get_unit_info", fake_unit_info) + + # Dpkg /etc index: no owned /etc paths needed for this test. + monkeypatch.setattr( + h, + "build_dpkg_etc_index", + lambda: (set(), {}, {}, {}), + ) + monkeypatch.setattr(h, "parse_status_conffiles", lambda: {}) + monkeypatch.setattr(h, "read_pkg_md5sums", lambda pkg: {}) + monkeypatch.setattr(h, "file_md5", lambda path: "x") + monkeypatch.setattr(h, "list_manual_packages", lambda: []) + monkeypatch.setattr(h, "collect_non_system_users", lambda: []) + + # Make apparmor *also* claim the ntpsec package (simulates overly-broad + # package inference). The snippet routing should still prefer role 'ntpsec'. + def fake_dpkg_owner(p: str): + if p == "/etc/cron.d/ntpsec": + return "ntpsec" + if "apparmor" in p: + return "ntpsec" # intentionally misleading + if "ntpsec" in p or "ntpd" in p: + return "ntpsec" + return None + + monkeypatch.setattr(h, "dpkg_owner", fake_dpkg_owner) + monkeypatch.setattr(h, "stat_triplet", lambda p: ("root", "root", "0644")) + + def fake_copy(bundle_dir: str, role_name: str, abs_path: str, src_rel: str): + dst = Path(bundle_dir) / "artifacts" / role_name / src_rel + dst.parent.mkdir(parents=True, exist_ok=True) + dst.write_bytes(files[abs_path]) + + monkeypatch.setattr(h, "_copy_into_bundle", fake_copy) + + class AllowAll: + def deny_reason(self, path: str): + return None + + state_path = h.harvest(str(bundle), policy=AllowAll()) + st = json.loads(Path(state_path).read_text(encoding="utf-8")) + + # Cron snippet should end up attached to the ntpsec role, not apparmor. + svc_ntpsec = next(s for s in st["services"] if s["role_name"] == "ntpsec") + assert any(mf["path"] == "/etc/cron.d/ntpsec" for mf in svc_ntpsec["managed_files"]) + + svc_apparmor = next(s for s in st["services"] if s["role_name"] == "apparmor") + assert all( + mf["path"] != "/etc/cron.d/ntpsec" for mf in svc_apparmor["managed_files"] + ) From ad2abed6127989e62a639874f861acbfaf2e9915 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 14:29:11 +1100 Subject: [PATCH 24/85] Add version CLI arg --- CHANGELOG.md | 4 ++++ enroll/cli.py | 61 ++++++++++++++++++++++++++++------------------- enroll/version.py | 32 +++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 25 deletions(-) create mode 100644 enroll/version.py diff --git a/CHANGELOG.md b/CHANGELOG.md index f2cb109..e07f57b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 0.2.0 + + * Add version CLI arg + # 0.1.7 * Fix an attribution bug for certain files ending up in the wrong package/role. diff --git a/enroll/cli.py b/enroll/cli.py index ae9aba0..bb4d3f1 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -15,6 +15,7 @@ from .harvest import harvest from .manifest import manifest from .remote import remote_harvest from .sopsutil import SopsError, encrypt_file_binary +from .version import get_enroll_version def _discover_config_path(argv: list[str]) -> Optional[Path]: @@ -318,13 +319,6 @@ def _jt_mode(args: argparse.Namespace) -> str: return "auto" -def _add_remote_args(p: argparse.ArgumentParser) -> None: - p.add_argument( - "--remote-host", - help="SSH host to run harvesting on (if set, harvest runs remotely and is pulled locally).", - ) - - def _add_config_args(p: argparse.ArgumentParser) -> None: p.add_argument( "-c", @@ -339,6 +333,13 @@ def _add_config_args(p: argparse.ArgumentParser) -> None: action="store_true", help="Do not load any INI config file (even if one would be auto-discovered).", ) + + +def _add_remote_args(p: argparse.ArgumentParser) -> None: + p.add_argument( + "--remote-host", + help="SSH host to run harvesting on (if set, harvest runs remotely and is pulled locally).", + ) p.add_argument( "--remote-port", type=int, @@ -354,11 +355,18 @@ def _add_config_args(p: argparse.ArgumentParser) -> None: def main() -> None: ap = argparse.ArgumentParser(prog="enroll") + ap.add_argument( + "-v", + "--version", + action="version", + version=f"{get_enroll_version()}", + ) _add_config_args(ap) sub = ap.add_subparsers(dest="cmd", required=True) h = sub.add_parser("harvest", help="Harvest service/package/config state") _add_config_args(h) + _add_remote_args(h) h.add_argument( "--out", help=( @@ -406,7 +414,6 @@ def main() -> None: action="store_true", help="Don't use sudo on the remote host (when using --remote options). This may result in a limited harvest due to permission restrictions.", ) - _add_remote_args(h) m = sub.add_parser("manifest", help="Render Ansible roles from a harvest") _add_config_args(m) @@ -443,6 +450,7 @@ def main() -> None: "single-shot", help="Harvest state, then manifest Ansible code, in one shot" ) _add_config_args(s) + _add_remote_args(s) s.add_argument( "--harvest", help=( @@ -500,7 +508,6 @@ def main() -> None: ), ) _add_common_manifest_args(s) - _add_remote_args(s) d = sub.add_parser("diff", help="Compare two harvests and report differences") _add_config_args(d) @@ -602,14 +609,12 @@ def main() -> None: ) args = ap.parse_args(argv) - remote_host: Optional[str] = getattr(args, "remote_host", None) - try: if args.cmd == "harvest": sops_fps = getattr(args, "sops", None) - if remote_host: + if args.remote_host: if sops_fps: - out_file = _resolve_sops_out_file(args.out, hint=remote_host) + out_file = _resolve_sops_out_file(args.out, hint=args.remote_host) with tempfile.TemporaryDirectory(prefix="enroll-harvest-") as td: tmp_bundle = Path(td) / "bundle" tmp_bundle.mkdir(parents=True, exist_ok=True) @@ -619,7 +624,7 @@ def main() -> None: pass remote_harvest( local_out_dir=tmp_bundle, - remote_host=remote_host, + remote_host=args.remote_host, remote_port=int(args.remote_port), remote_user=args.remote_user, dangerous=bool(args.dangerous), @@ -635,11 +640,11 @@ def main() -> None: out_dir = ( Path(args.out) if args.out - else new_harvest_cache_dir(hint=remote_host).dir + else new_harvest_cache_dir(hint=args.remote_host).dir ) state = remote_harvest( local_out_dir=out_dir, - remote_host=remote_host, + remote_host=args.remote_host, remote_port=int(args.remote_port), remote_user=args.remote_user, dangerous=bool(args.dangerous), @@ -669,12 +674,16 @@ def main() -> None: ) print(str(out_file)) else: - if not args.out: - raise SystemExit( - "error: --out is required unless --remote-host is set" + if args.out: + out_dir = args.out + else: + out_dir = ( + Path(args.out) + if args.out + else new_harvest_cache_dir(hint=args.remote_host).dir ) path = harvest( - args.out, + out_dir, dangerous=bool(args.dangerous), include_paths=list(getattr(args, "include_path", []) or []), exclude_paths=list(getattr(args, "exclude_path", []) or []), @@ -747,9 +756,11 @@ def main() -> None: raise SystemExit(2) elif args.cmd == "single-shot": sops_fps = getattr(args, "sops", None) - if remote_host: + if args.remote_host: if sops_fps: - out_file = _resolve_sops_out_file(args.harvest, hint=remote_host) + out_file = _resolve_sops_out_file( + args.harvest, hint=args.remote_host + ) with tempfile.TemporaryDirectory(prefix="enroll-harvest-") as td: tmp_bundle = Path(td) / "bundle" tmp_bundle.mkdir(parents=True, exist_ok=True) @@ -759,7 +770,7 @@ def main() -> None: pass remote_harvest( local_out_dir=tmp_bundle, - remote_host=remote_host, + remote_host=args.remote_host, remote_port=int(args.remote_port), remote_user=args.remote_user, dangerous=bool(args.dangerous), @@ -784,11 +795,11 @@ def main() -> None: harvest_dir = ( Path(args.harvest) if args.harvest - else new_harvest_cache_dir(hint=remote_host).dir + else new_harvest_cache_dir(hint=args.remote_host).dir ) remote_harvest( local_out_dir=harvest_dir, - remote_host=remote_host, + remote_host=args.remote_host, remote_port=int(args.remote_port), remote_user=args.remote_user, dangerous=bool(args.dangerous), diff --git a/enroll/version.py b/enroll/version.py new file mode 100644 index 0000000..bbe78b6 --- /dev/null +++ b/enroll/version.py @@ -0,0 +1,32 @@ +from __future__ import annotations + + +def get_enroll_version() -> str: + """ + Best-effort version lookup that works when installed via: + - poetry/pip/wheel + - deb/rpm system packages + Falls back to "0+unknown" when running from an unpacked source tree. + """ + try: + from importlib.metadata import ( + packages_distributions, + version, + ) + except Exception: + # Very old Python or unusual environment + return "unknown" + + # Map import package -> dist(s) + dist_names = [] + try: + dist_names = (packages_distributions() or {}).get("enroll", []) or [] + except Exception: + dist_names = [] + + # Try mapped dists first, then a reasonable default + for dist in [*dist_names, "enroll"]: + try: + return version(dist) + except Exception: + return "unknown" From 984b0fa81b5b224951816c4dc46a74734b950d07 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 14:59:34 +1100 Subject: [PATCH 25/85] Add ability to enroll RH-style systems (DNF5/DNF/RPM) --- CHANGELOG.md | 1 + README.md | 18 +-- enroll/debian.py | 26 ---- enroll/fsutil.py | 40 ++++++ enroll/harvest.py | 272 ++++++++++++++++++++++++++--------------- enroll/ignore.py | 1 + enroll/manifest.py | 229 ++++++++++++++++++++++++++++++---- enroll/platform.py | 261 +++++++++++++++++++++++++++++++++++++++ enroll/rpm.py | 266 ++++++++++++++++++++++++++++++++++++++++ tests/test_debian.py | 56 --------- tests/test_fsutil.py | 25 ++++ tests/test_harvest.py | 142 +++++++++++++++------ tests/test_manifest.py | 93 ++++++++++++++ tests/test_platform.py | 93 ++++++++++++++ tests/test_rpm.py | 131 ++++++++++++++++++++ 15 files changed, 1400 insertions(+), 254 deletions(-) create mode 100644 enroll/fsutil.py create mode 100644 enroll/platform.py create mode 100644 enroll/rpm.py create mode 100644 tests/test_fsutil.py create mode 100644 tests/test_platform.py create mode 100644 tests/test_rpm.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e07f57b..f92e0b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # 0.2.0 * Add version CLI arg + * Add ability to enroll RH-style systems (DNF5/DNF/RPM) # 0.1.7 diff --git a/README.md b/README.md index c6b8123..d075951 100644 --- a/README.md +++ b/README.md @@ -4,15 +4,15 @@ Enroll logo -**enroll** inspects a Linux machine (currently Debian-only) and generates Ansible roles/playbooks (and optionally inventory) for what it finds. +**enroll** inspects a Linux machine (Debian-like or RedHat-like) and generates Ansible roles/playbooks (and optionally inventory) for what it finds. - Detects packages that have been installed. -- Detects Debian package ownership of `/etc` files using dpkg’s local database. -- Captures config that has **changed from packaged defaults** (dpkg conffile hashes + package md5sums when available). +- Detects package ownership of `/etc` files where possible +- Captures config that has **changed from packaged defaults** where possible (e.g dpkg conffile hashes + package md5sums when available). - Also captures **service-relevant custom/unowned files** under `/etc//...` (e.g. drop-in config includes). - Defensively excludes likely secrets (path denylist + content sniff + size caps). - Captures non-system users and their SSH public keys. -- Captures miscellaneous `/etc` files it can’t attribute to a package and installs them in an `etc_custom` role. +- Captures miscellaneous `/etc` files it can't attribute to a package and installs them in an `etc_custom` role. - Ditto for /usr/local/bin (for non-binary files) and /usr/local/etc - Avoids trying to start systemd services that were detected as inactive during harvest. @@ -41,8 +41,8 @@ Use when enrolling **one server** (or generating a “golden” role set you int **Characteristics** - Roles are more self-contained. -- Raw config files live in the role’s `files/`. -- Template variables live in the role’s `defaults/main.yml`. +- Raw config files live in the role's `files/`. +- Template variables live in the role's `defaults/main.yml`. ### Multi-site mode (`--fqdn`) Use when enrolling **several existing servers** quickly, especially if they differ. @@ -68,13 +68,13 @@ Harvest state about a host and write a harvest bundle. - “Manual” packages - Changed-from-default config (plus related custom/unowned files under service dirs) - Non-system users + SSH public keys -- Misc `/etc` that can’t be attributed to a package (`etc_custom` role) +- Misc `/etc` that can't be attributed to a package (`etc_custom` role) - Optional user-specified extra files/dirs via `--include-path` (emitted as an `extra_paths` role at manifest time) **Common flags** - Remote harvesting: - `--remote-host`, `--remote-user`, `--remote-port` - - `--no-sudo` (if you don’t want/need sudo) + - `--no-sudo` (if you don't want/need sudo) - Sensitive-data behaviour: - default: tries to avoid likely secrets - `--dangerous`: disables secret-safety checks (see “Sensitive data” below) @@ -233,7 +233,7 @@ poetry run enroll --help ## Found a bug / have a suggestion? -My Forgejo doesn’t currently support federation, so I haven’t opened registration/login for issues. +My Forgejo doesn't currently support federation, so I haven't opened registration/login for issues. Instead, email me (see `pyproject.toml`) or contact me on the Fediverse: diff --git a/enroll/debian.py b/enroll/debian.py index 0ddc1f3..7e1ee2d 100644 --- a/enroll/debian.py +++ b/enroll/debian.py @@ -1,7 +1,6 @@ from __future__ import annotations import glob -import hashlib import os import subprocess # nosec from typing import Dict, List, Optional, Set, Tuple @@ -180,28 +179,3 @@ def read_pkg_md5sums(pkg: str) -> Dict[str, str]: md5, rel = line.split(None, 1) m[rel.strip()] = md5.strip() return m - - -def file_md5(path: str) -> str: - h = hashlib.md5() # nosec - with open(path, "rb") as f: - for chunk in iter(lambda: f.read(1024 * 1024), b""): - h.update(chunk) - return h.hexdigest() - - -def stat_triplet(path: str) -> Tuple[str, str, str]: - st = os.stat(path, follow_symlinks=True) - mode = oct(st.st_mode & 0o777)[2:].zfill(4) - - import pwd, grp - - try: - owner = pwd.getpwuid(st.st_uid).pw_name - except KeyError: - owner = str(st.st_uid) - try: - group = grp.getgrgid(st.st_gid).gr_name - except KeyError: - group = str(st.st_gid) - return owner, group, mode diff --git a/enroll/fsutil.py b/enroll/fsutil.py new file mode 100644 index 0000000..3d18df6 --- /dev/null +++ b/enroll/fsutil.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import hashlib +import os +from typing import Tuple + + +def file_md5(path: str) -> str: + """Return hex MD5 of a file. + + Used for Debian dpkg baseline comparisons. + """ + h = hashlib.md5() # nosec + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(1024 * 1024), b""): + h.update(chunk) + return h.hexdigest() + + +def stat_triplet(path: str) -> Tuple[str, str, str]: + """Return (owner, group, mode) for a path. + + owner/group are usernames/group names when resolvable, otherwise numeric ids. + mode is a zero-padded octal string (e.g. "0644"). + """ + st = os.stat(path, follow_symlinks=True) + mode = oct(st.st_mode & 0o777)[2:].zfill(4) + + import grp + import pwd + + try: + owner = pwd.getpwuid(st.st_uid).pw_name + except KeyError: + owner = str(st.st_uid) + try: + group = grp.getgrgid(st.st_gid).gr_name + except KeyError: + group = str(st.st_gid) + return owner, group, mode diff --git a/enroll/harvest.py b/enroll/harvest.py index d678b89..bb706b1 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -15,18 +15,12 @@ from .systemd import ( get_timer_info, UnitQueryError, ) -from .debian import ( - build_dpkg_etc_index, - dpkg_owner, - file_md5, - list_manual_packages, - parse_status_conffiles, - read_pkg_md5sums, - stat_triplet, -) +from .fsutil import stat_triplet +from .platform import detect_platform, get_backend from .ignore import IgnorePolicy from .pathfilter import PathFilter, expand_includes from .accounts import collect_non_system_users +from .version import get_enroll_version @dataclass @@ -85,6 +79,14 @@ class AptConfigSnapshot: notes: List[str] +@dataclass +class DnfConfigSnapshot: + role_name: str + managed_files: List[ManagedFile] + excluded: List[ExcludedFile] + notes: List[str] + + @dataclass class EtcCustomSnapshot: role_name: str @@ -158,6 +160,13 @@ SHARED_ETC_TOPDIRS = { "sudoers.d", "sysctl.d", "systemd", + # RPM-family shared trees + "dnf", + "yum", + "yum.repos.d", + "sysconfig", + "pki", + "firewalld", } @@ -314,17 +323,23 @@ def _add_pkgs_from_etc_topdirs( pkgs.add(p) -def _maybe_add_specific_paths(hints: Set[str]) -> List[str]: - paths: List[str] = [] - for h in hints: - paths.extend( - [ - f"/etc/default/{h}", - f"/etc/init.d/{h}", - f"/etc/sysctl.d/{h}.conf", - ] - ) - return paths +def _maybe_add_specific_paths(hints: Set[str], backend) -> List[str]: + # Delegate to backend-specific conventions (e.g. /etc/default on Debian, + # /etc/sysconfig on Fedora/RHEL). Always include sysctl.d. + try: + return backend.specific_paths_for_hints(hints) + except Exception: + # Best-effort fallback (Debian-ish). + paths: List[str] = [] + for h in hints: + paths.extend( + [ + f"/etc/default/{h}", + f"/etc/init.d/{h}", + f"/etc/sysctl.d/{h}.conf", + ] + ) + return paths def _scan_unowned_under_roots( @@ -408,6 +423,7 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ ("/etc/anacron/*", "system_cron"), ("/var/spool/cron/crontabs/*", "system_cron"), ("/var/spool/crontabs/*", "system_cron"), + ("/var/spool/cron/*", "system_cron"), # network ("/etc/netplan/*", "system_network"), ("/etc/systemd/network/*", "system_network"), @@ -415,6 +431,9 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ ("/etc/network/interfaces.d/*", "system_network"), ("/etc/resolvconf.conf", "system_network"), ("/etc/resolvconf/resolv.conf.d/*", "system_network"), + ("/etc/NetworkManager/system-connections/*", "system_network"), + ("/etc/sysconfig/network*", "system_network"), + ("/etc/sysconfig/network-scripts/*", "system_network"), # firewall ("/etc/nftables.conf", "system_firewall"), ("/etc/nftables.d/*", "system_firewall"), @@ -422,6 +441,10 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ ("/etc/iptables/rules.v6", "system_firewall"), ("/etc/ufw/*", "system_firewall"), ("/etc/default/ufw", "system_firewall"), + ("/etc/firewalld/*", "system_firewall"), + ("/etc/firewalld/zones/*", "system_firewall"), + # SELinux + ("/etc/selinux/config", "system_security"), # other ("/etc/rc.local", "system_rc"), ] @@ -553,6 +576,51 @@ def _iter_apt_capture_paths() -> List[tuple[str, str]]: return uniq +def _iter_dnf_capture_paths() -> List[tuple[str, str]]: + """Return (path, reason) pairs for DNF/YUM configuration on RPM systems. + + Captures: + - /etc/dnf/* (dnf.conf, vars, plugins, modules, automatic) + - /etc/yum.conf (legacy) + - /etc/yum.repos.d/*.repo + - /etc/pki/rpm-gpg/* (GPG key files) + """ + reasons: Dict[str, str] = {} + + for root, tag in ( + ("/etc/dnf", "dnf_config"), + ("/etc/yum", "yum_config"), + ): + if os.path.isdir(root): + for dirpath, _, filenames in os.walk(root): + for fn in filenames: + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + reasons.setdefault(p, tag) + + # Legacy yum.conf. + if os.path.isfile("/etc/yum.conf") and not os.path.islink("/etc/yum.conf"): + reasons.setdefault("/etc/yum.conf", "yum_conf") + + # Repositories. + if os.path.isdir("/etc/yum.repos.d"): + for p in _iter_matching_files("/etc/yum.repos.d/*.repo"): + reasons[p] = "yum_repo" + + # RPM GPG keys. + if os.path.isdir("/etc/pki/rpm-gpg"): + for dirpath, _, filenames in os.walk("/etc/pki/rpm-gpg"): + for fn in filenames: + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + reasons.setdefault(p, "rpm_gpg_key") + + # Stable ordering. + return [(p, reasons[p]) for p in sorted(reasons.keys())] + + def _iter_system_capture_paths() -> List[tuple[str, str]]: """Return (path, reason) pairs for essential system config/state (non-APT).""" out: List[tuple[str, str]] = [] @@ -600,8 +668,12 @@ def harvest( flush=True, ) - owned_etc, etc_owner_map, topdir_to_pkgs, pkg_to_etc_paths = build_dpkg_etc_index() - conffiles_by_pkg = parse_status_conffiles() + platform = detect_platform() + backend = get_backend(platform) + + owned_etc, etc_owner_map, topdir_to_pkgs, pkg_to_etc_paths = ( + backend.build_etc_index() + ) # ------------------------- # Service roles @@ -645,12 +717,12 @@ def harvest( candidates: Dict[str, str] = {} if ui.fragment_path: - p = dpkg_owner(ui.fragment_path) + p = backend.owner_of_path(ui.fragment_path) if p: pkgs.add(p) for exe in ui.exec_paths: - p = dpkg_owner(exe) + p = backend.owner_of_path(exe) if p: pkgs.add(p) @@ -675,7 +747,7 @@ def harvest( # logrotate.d entries) can still be attributed back to this service. service_role_aliases[role] = set(hints) | set(pkgs) | {role} - for sp in _maybe_add_specific_paths(hints): + for sp in _maybe_add_specific_paths(hints, backend): if not os.path.exists(sp): continue if sp in etc_owner_map: @@ -684,31 +756,13 @@ def harvest( candidates.setdefault(sp, "custom_specific_path") for pkg in sorted(pkgs): - conff = conffiles_by_pkg.get(pkg, {}) - md5sums = read_pkg_md5sums(pkg) - for path in pkg_to_etc_paths.get(pkg, []): + etc_paths = pkg_to_etc_paths.get(pkg, []) + for path, reason in backend.modified_paths(pkg, etc_paths).items(): if not os.path.isfile(path) or os.path.islink(path): continue - if path.startswith("/etc/apt/"): + if backend.is_pkg_config_path(path): continue - if path in conff: - # Only capture conffiles when they differ from the package default. - try: - current = file_md5(path) - except OSError: - continue - if current != conff[path]: - candidates.setdefault(path, "modified_conffile") - continue - rel = path.lstrip("/") - baseline = md5sums.get(rel) - if baseline: - try: - current = file_md5(path) - except OSError: - continue - if current != baseline: - candidates.setdefault(path, "modified_packaged_file") + candidates.setdefault(path, reason) # Capture custom/unowned files living under /etc/ for this service. # @@ -847,18 +901,18 @@ def harvest( # (useful when a timer triggers a service that isn't enabled). pkgs: Set[str] = set() if ti.fragment_path: - p = dpkg_owner(ti.fragment_path) + p = backend.owner_of_path(ti.fragment_path) if p: pkgs.add(p) if ti.trigger_unit and ti.trigger_unit.endswith(".service"): try: ui = get_unit_info(ti.trigger_unit) if ui.fragment_path: - p = dpkg_owner(ui.fragment_path) + p = backend.owner_of_path(ui.fragment_path) if p: pkgs.add(p) for exe in ui.exec_paths: - p = dpkg_owner(exe) + p = backend.owner_of_path(exe) if p: pkgs.add(p) except Exception: # nosec @@ -870,7 +924,7 @@ def harvest( # ------------------------- # Manually installed package roles # ------------------------- - manual_pkgs = list_manual_packages() + manual_pkgs = backend.list_manual_packages() # Avoid duplicate roles: if a manual package is already managed by any service role, skip its pkg_ role. covered_by_services: Set[str] = set() for s in service_snaps: @@ -893,41 +947,26 @@ def harvest( for tpath in timer_extra_by_pkg.get(pkg, []): candidates.setdefault(tpath, "related_timer") - conff = conffiles_by_pkg.get(pkg, {}) - md5sums = read_pkg_md5sums(pkg) - - for path in pkg_to_etc_paths.get(pkg, []): + etc_paths = pkg_to_etc_paths.get(pkg, []) + for path, reason in backend.modified_paths(pkg, etc_paths).items(): if not os.path.isfile(path) or os.path.islink(path): continue - if path.startswith("/etc/apt/"): + if backend.is_pkg_config_path(path): continue - if path in conff: - try: - current = file_md5(path) - except OSError: - continue - if current != conff[path]: - candidates.setdefault(path, "modified_conffile") - continue - rel = path.lstrip("/") - baseline = md5sums.get(rel) - if baseline: - try: - current = file_md5(path) - except OSError: - continue - if current != baseline: - candidates.setdefault(path, "modified_packaged_file") + candidates.setdefault(path, reason) topdirs = _topdirs_for_package(pkg, pkg_to_etc_paths) roots: List[str] = [] + # Collect candidate directories plus backend-specific common files. for td in sorted(topdirs): if td in SHARED_ETC_TOPDIRS: continue + if backend.is_pkg_config_path(f"/etc/{td}/") or backend.is_pkg_config_path( + f"/etc/{td}" + ): + continue roots.extend([f"/etc/{td}", f"/etc/{td}.d"]) - roots.extend([f"/etc/default/{td}"]) - roots.extend([f"/etc/init.d/{td}"]) - roots.extend([f"/etc/sysctl.d/{td}.conf"]) + roots.extend(_maybe_add_specific_paths(set(topdirs), backend)) # Capture any custom/unowned files under /etc/ for this # manually-installed package. This may include runtime-generated @@ -1031,26 +1070,48 @@ def harvest( ) # ------------------------- - # apt_config role (APT configuration and keyrings) + # Package manager config role + # - Debian: apt_config + # - Fedora/RHEL-like: dnf_config # ------------------------- apt_notes: List[str] = [] apt_excluded: List[ExcludedFile] = [] apt_managed: List[ManagedFile] = [] - apt_role_name = "apt_config" - apt_role_seen = seen_by_role.setdefault(apt_role_name, set()) + dnf_notes: List[str] = [] + dnf_excluded: List[ExcludedFile] = [] + dnf_managed: List[ManagedFile] = [] - for path, reason in _iter_apt_capture_paths(): - _capture_file( - bundle_dir=bundle_dir, - role_name=apt_role_name, - abs_path=path, - reason=reason, - policy=policy, - path_filter=path_filter, - managed_out=apt_managed, - excluded_out=apt_excluded, - seen_role=apt_role_seen, - ) + apt_role_name = "apt_config" + dnf_role_name = "dnf_config" + + if backend.name == "dpkg": + apt_role_seen = seen_by_role.setdefault(apt_role_name, set()) + for path, reason in _iter_apt_capture_paths(): + _capture_file( + bundle_dir=bundle_dir, + role_name=apt_role_name, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=apt_managed, + excluded_out=apt_excluded, + seen_role=apt_role_seen, + ) + elif backend.name == "rpm": + dnf_role_seen = seen_by_role.setdefault(dnf_role_name, set()) + for path, reason in _iter_dnf_capture_paths(): + _capture_file( + bundle_dir=bundle_dir, + role_name=dnf_role_name, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=dnf_managed, + excluded_out=dnf_excluded, + seen_role=dnf_role_seen, + ) apt_config_snapshot = AptConfigSnapshot( role_name=apt_role_name, @@ -1058,6 +1119,12 @@ def harvest( excluded=apt_excluded, notes=apt_notes, ) + dnf_config_snapshot = DnfConfigSnapshot( + role_name=dnf_role_name, + managed_files=dnf_managed, + excluded=dnf_excluded, + notes=dnf_notes, + ) # ------------------------- # etc_custom role (unowned /etc files not already attributed elsewhere) @@ -1079,6 +1146,8 @@ def harvest( already.add(mf.path) for mf in apt_managed: already.add(mf.path) + for mf in dnf_managed: + already.add(mf.path) # Maps for re-attributing shared snippets (cron.d/logrotate.d) to existing roles. svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} @@ -1093,7 +1162,7 @@ def harvest( for pkg in s.packages: pkg_to_service_roles.setdefault(pkg, []).append(s.role_name) - # Alias -> role mapping used as a fallback when dpkg ownership is missing. + # Alias -> role mapping used as a fallback when package ownership is missing. # Prefer service roles over package roles when both would match. alias_ranked: Dict[str, tuple[int, str]] = {} @@ -1124,8 +1193,8 @@ def harvest( per service. Resolution order: - 1) dpkg owner -> service role (if any service references the package) - 2) dpkg owner -> package role (manual package role exists) + 1) package owner -> service role (if any service references the package) + 2) package owner -> package role (manual package role exists) 3) basename/stem alias match -> preferred role """ if path.startswith("/etc/logrotate.d/"): @@ -1147,7 +1216,7 @@ def harvest( seen.add(c) uniq.append(c) - pkg = dpkg_owner(path) + pkg = backend.owner_of_path(path) if pkg: svc_roles = sorted(set(pkg_to_service_roles.get(pkg, []))) if svc_roles: @@ -1226,7 +1295,7 @@ def harvest( for dirpath, _, filenames in os.walk("/etc"): for fn in filenames: path = os.path.join(dirpath, fn) - if path.startswith("/etc/apt/"): + if backend.is_pkg_config_path(path): continue if path in already: continue @@ -1413,13 +1482,22 @@ def harvest( ) state = { - "host": {"hostname": os.uname().nodename, "os": "debian"}, + "enroll": { + "version": get_enroll_version(), + }, + "host": { + "hostname": os.uname().nodename, + "os": platform.os_family, + "pkg_backend": backend.name, + "os_release": platform.os_release, + }, "users": asdict(users_snapshot), "services": [asdict(s) for s in service_snaps], "manual_packages": manual_pkgs, "manual_packages_skipped": manual_pkgs_skipped, "package_roles": [asdict(p) for p in pkg_snaps], "apt_config": asdict(apt_config_snapshot), + "dnf_config": asdict(dnf_config_snapshot), "etc_custom": asdict(etc_custom_snapshot), "usr_local_custom": asdict(usr_local_custom_snapshot), "extra_paths": asdict(extra_paths_snapshot), diff --git a/enroll/ignore.py b/enroll/ignore.py index ab2cb96..904997f 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -43,6 +43,7 @@ DEFAULT_ALLOW_BINARY_GLOBS = [ "/usr/share/keyrings/*.gpg", "/usr/share/keyrings/*.pgp", "/usr/share/keyrings/*.asc", + "/etc/pki/rpm-gpg/*", ] SENSITIVE_CONTENT_PATTERNS = [ diff --git a/enroll/manifest.py b/enroll/manifest.py index dbc2353..923040f 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -166,6 +166,7 @@ def _write_playbook_all(path: str, roles: List[str]) -> None: pb_lines = [ "---", "- name: Apply all roles on all hosts", + " gather_facts: true", " hosts: all", " become: true", " roles:", @@ -181,6 +182,7 @@ def _write_playbook_host(path: str, fqdn: str, roles: List[str]) -> None: "---", f"- name: Apply all roles on {fqdn}", f" hosts: {fqdn}", + " gather_facts: true", " become: true", " roles:", ] @@ -468,6 +470,51 @@ def _render_generic_files_tasks( """ +def _render_install_packages_tasks(role: str, var_prefix: str) -> str: + """Render cross-distro package installation tasks. + + We generate conditional tasks for apt/dnf/yum, falling back to the + generic `package` module. This keeps generated roles usable on both + Debian-like and RPM-like systems. + """ + return f"""# Generated by enroll + +- name: Install packages for {role} (APT) + ansible.builtin.apt: + name: "{{{{ {var_prefix}_packages | default([]) }}}}" + state: present + update_cache: true + when: + - ({var_prefix}_packages | default([])) | length > 0 + - ansible_facts.pkg_mgr | default('') == 'apt' + +- name: Install packages for {role} (DNF5) + ansible.builtin.dnf5: + name: "{{{{ {var_prefix}_packages | default([]) }}}}" + state: present + when: + - ({var_prefix}_packages | default([])) | length > 0 + - ansible_facts.pkg_mgr | default('') == 'dnf5' + +- name: Install packages for {role} (DNF/YUM) + ansible.builtin.dnf: + name: "{{{{ {var_prefix}_packages | default([]) }}}}" + state: present + when: + - ({var_prefix}_packages | default([])) | length > 0 + - ansible_facts.pkg_mgr | default('') in ['dnf', 'yum'] + +- name: Install packages for {role} (generic fallback) + ansible.builtin.package: + name: "{{{{ {var_prefix}_packages | default([]) }}}}" + state: present + when: + - ({var_prefix}_packages | default([])) | length > 0 + - ansible_facts.pkg_mgr | default('') not in ['apt', 'dnf', 'dnf5', 'yum'] + +""" + + def _prepare_bundle_dir( bundle: str, *, @@ -629,6 +676,7 @@ def _manifest_from_bundle_dir( package_roles: List[Dict[str, Any]] = state.get("package_roles", []) users_snapshot: Dict[str, Any] = state.get("users", {}) apt_config_snapshot: Dict[str, Any] = state.get("apt_config", {}) + dnf_config_snapshot: Dict[str, Any] = state.get("dnf_config", {}) etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {}) @@ -664,6 +712,7 @@ def _manifest_from_bundle_dir( manifested_users_roles: List[str] = [] manifested_apt_config_roles: List[str] = [] + manifested_dnf_config_roles: List[str] = [] manifested_etc_custom_roles: List[str] = [] manifested_usr_local_custom_roles: List[str] = [] manifested_extra_paths_roles: List[str] = [] @@ -1041,6 +1090,157 @@ APT configuration harvested from the system (sources, pinning, and keyrings). manifested_apt_config_roles.append(role) + # ------------------------- + # dnf_config role (DNF/YUM repos, config, and RPM GPG keys) + # ------------------------- + if dnf_config_snapshot and dnf_config_snapshot.get("managed_files"): + role = dnf_config_snapshot.get("role_name", "dnf_config") + role_dir = os.path.join(roles_root, role) + _write_role_scaffold(role_dir) + + var_prefix = role + + managed_files = dnf_config_snapshot.get("managed_files", []) + excluded = dnf_config_snapshot.get("excluded", []) + notes = dnf_config_snapshot.get("notes", []) + + templated, jt_vars = _jinjify_managed_files( + bundle_dir, + role, + role_dir, + managed_files, + jt_exe=jt_exe, + jt_enabled=jt_enabled, + overwrite_templates=not site_mode, + ) + + if site_mode: + _copy_artifacts( + bundle_dir, + role, + _host_role_files_dir(out_dir, fqdn or "", role), + exclude_rels=templated, + ) + else: + _copy_artifacts( + bundle_dir, + role, + os.path.join(role_dir, "files"), + exclude_rels=templated, + ) + + files_var = _build_managed_files_var( + managed_files, + templated, + notify_other=None, + notify_systemd=None, + ) + + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} + vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map = _merge_mappings_overwrite(vars_map, jt_map) + + if site_mode: + _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_hostvars(out_dir, fqdn or "", role, vars_map) + else: + _write_role_defaults(role_dir, vars_map) + + tasks = "---\n" + _render_generic_files_tasks( + var_prefix, include_restart_notify=False + ) + with open( + os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write(tasks.rstrip() + "\n") + + with open( + os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\ndependencies: []\n") + + # README: summarise repos and GPG key material + repo_paths: List[str] = [] + key_paths: List[str] = [] + repo_hosts: Set[str] = set() + + url_re = re.compile(r"(?:https?|ftp)://([^/\s]+)", re.IGNORECASE) + file_url_re = re.compile(r"file://(/[^\s]+)") + + for mf in managed_files: + p = str(mf.get("path") or "") + src_rel = str(mf.get("src_rel") or "") + if not p or not src_rel: + continue + + if p.startswith("/etc/yum.repos.d/") and p.endswith(".repo"): + repo_paths.append(p) + art_path = os.path.join(bundle_dir, "artifacts", role, src_rel) + try: + with open(art_path, "r", encoding="utf-8", errors="replace") as rf: + for line in rf: + s = line.strip() + if not s or s.startswith("#") or s.startswith(";"): + continue + # Collect hostnames from URLs (baseurl, mirrorlist, metalink, gpgkey...) + for m in url_re.finditer(s): + repo_hosts.add(m.group(1)) + # Collect local gpgkey file paths referenced as file:///... + for m in file_url_re.finditer(s): + key_paths.append(m.group(1)) + except OSError: + pass # nosec + + if p.startswith("/etc/pki/rpm-gpg/"): + key_paths.append(p) + + repo_paths = sorted(set(repo_paths)) + key_paths = sorted(set(key_paths)) + repos = sorted(repo_hosts) + + readme = ( + """# dnf_config + +DNF/YUM configuration harvested from the system (repos, config files, and RPM GPG keys). + +## Repository hosts +""" + + ("\n".join([f"- {h}" for h in repos]) or "- (none)") + + """\n +## Repo files +""" + + ("\n".join([f"- {p}" for p in repo_paths]) or "- (none)") + + """\n +## GPG keys +""" + + ("\n".join([f"- {p}" for p in key_paths]) or "- (none)") + + """\n +## Managed files +""" + + ( + "\n".join( + [f"- {mf.get('path')} ({mf.get('reason')})" for mf in managed_files] + ) + or "- (none)" + ) + + """\n +## Excluded +""" + + ( + "\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) + or "- (none)" + ) + + """\n +## Notes +""" + + ("\n".join([f"- {n}" for n in notes]) or "- (none)") + + """\n""" + ) + with open(os.path.join(role_dir, "README.md"), "w", encoding="utf-8") as f: + f.write(readme) + + manifested_dnf_config_roles.append(role) + # ------------------------- # etc_custom role (unowned /etc not already attributed) # ------------------------- @@ -1457,19 +1657,7 @@ User-requested extra file harvesting. f.write(handlers) task_parts: List[str] = [] - task_parts.append( - f"""--- -# Generated by enroll - -- name: Install packages for {role} - ansible.builtin.apt: - name: "{{{{ {var_prefix}_packages | default([]) }}}}" - state: present - update_cache: true - when: ({var_prefix}_packages | default([])) | length > 0 - -""" - ) + task_parts.append("---\n" + _render_install_packages_tasks(role, var_prefix)) task_parts.append( _render_generic_files_tasks(var_prefix, include_restart_notify=True) @@ -1616,19 +1804,7 @@ Generated from `{unit}`. f.write(handlers) task_parts: List[str] = [] - task_parts.append( - f"""--- -# Generated by enroll - -- name: Install packages for {role} - ansible.builtin.apt: - name: "{{{{ {var_prefix}_packages | default([]) }}}}" - state: present - update_cache: true - when: ({var_prefix}_packages | default([])) | length > 0 - -""" - ) + task_parts.append("---\n" + _render_install_packages_tasks(role, var_prefix)) task_parts.append( _render_generic_files_tasks(var_prefix, include_restart_notify=False) ) @@ -1667,6 +1843,7 @@ Generated for package `{pkg}`. manifested_pkg_roles.append(role) all_roles = ( manifested_apt_config_roles + + manifested_dnf_config_roles + manifested_pkg_roles + manifested_service_roles + manifested_etc_custom_roles diff --git a/enroll/platform.py b/enroll/platform.py new file mode 100644 index 0000000..998b83d --- /dev/null +++ b/enroll/platform.py @@ -0,0 +1,261 @@ +from __future__ import annotations + +import shutil +from dataclasses import dataclass +from typing import Dict, List, Optional, Set, Tuple + +from .fsutil import file_md5 + + +def _read_os_release(path: str = "/etc/os-release") -> Dict[str, str]: + out: Dict[str, str] = {} + try: + with open(path, "r", encoding="utf-8", errors="replace") as f: + for raw in f: + line = raw.strip() + if not line or line.startswith("#") or "=" not in line: + continue + k, v = line.split("=", 1) + k = k.strip() + v = v.strip().strip('"') + out[k] = v + except OSError: + return {} + return out + + +@dataclass +class PlatformInfo: + os_family: str # debian|redhat|unknown + pkg_backend: str # dpkg|rpm|unknown + os_release: Dict[str, str] + + +def detect_platform() -> PlatformInfo: + """Detect platform family and package backend. + + Uses /etc/os-release when available, with a conservative fallback to + checking for dpkg/rpm binaries. + """ + + osr = _read_os_release() + os_id = (osr.get("ID") or "").strip().lower() + likes = (osr.get("ID_LIKE") or "").strip().lower().split() + + deb_ids = {"debian", "ubuntu", "linuxmint", "raspbian", "kali"} + rhel_ids = { + "fedora", + "rhel", + "centos", + "rocky", + "almalinux", + "ol", + "oracle", + "scientific", + } + + if os_id in deb_ids or "debian" in likes: + return PlatformInfo(os_family="debian", pkg_backend="dpkg", os_release=osr) + if os_id in rhel_ids or any( + x in likes for x in ("rhel", "fedora", "centos", "redhat") + ): + return PlatformInfo(os_family="redhat", pkg_backend="rpm", os_release=osr) + + # Fallback heuristics. + if shutil.which("dpkg"): + return PlatformInfo(os_family="debian", pkg_backend="dpkg", os_release=osr) + if shutil.which("rpm"): + return PlatformInfo(os_family="redhat", pkg_backend="rpm", os_release=osr) + return PlatformInfo(os_family="unknown", pkg_backend="unknown", os_release=osr) + + +class PackageBackend: + """Backend abstraction for package ownership, config detection, and manual package lists.""" + + name: str + pkg_config_prefixes: Tuple[str, ...] + + def owner_of_path(self, path: str) -> Optional[str]: # pragma: no cover + raise NotImplementedError + + def list_manual_packages(self) -> List[str]: # pragma: no cover + raise NotImplementedError + + def build_etc_index( + self, + ) -> Tuple[ + Set[str], Dict[str, str], Dict[str, Set[str]], Dict[str, List[str]] + ]: # pragma: no cover + raise NotImplementedError + + def specific_paths_for_hints(self, hints: Set[str]) -> List[str]: + return [] + + def is_pkg_config_path(self, path: str) -> bool: + for pfx in self.pkg_config_prefixes: + if path == pfx or path.startswith(pfx): + return True + return False + + def modified_paths(self, pkg: str, etc_paths: List[str]) -> Dict[str, str]: + """Return a mapping of modified file paths -> reason label.""" + return {} + + +class DpkgBackend(PackageBackend): + name = "dpkg" + pkg_config_prefixes = ("/etc/apt/",) + + def __init__(self) -> None: + from .debian import parse_status_conffiles + + self._conffiles_by_pkg = parse_status_conffiles() + + def owner_of_path(self, path: str) -> Optional[str]: + from .debian import dpkg_owner + + return dpkg_owner(path) + + def list_manual_packages(self) -> List[str]: + from .debian import list_manual_packages + + return list_manual_packages() + + def build_etc_index(self): + from .debian import build_dpkg_etc_index + + return build_dpkg_etc_index() + + def specific_paths_for_hints(self, hints: Set[str]) -> List[str]: + paths: List[str] = [] + for h in hints: + paths.extend( + [ + f"/etc/default/{h}", + f"/etc/init.d/{h}", + f"/etc/sysctl.d/{h}.conf", + ] + ) + return paths + + def modified_paths(self, pkg: str, etc_paths: List[str]) -> Dict[str, str]: + from .debian import read_pkg_md5sums + + out: Dict[str, str] = {} + conff = self._conffiles_by_pkg.get(pkg, {}) + md5sums = read_pkg_md5sums(pkg) + + for path in etc_paths: + if not path.startswith("/etc/"): + continue + if self.is_pkg_config_path(path): + continue + if path in conff: + try: + current = file_md5(path) + except OSError: + continue + if current != conff[path]: + out[path] = "modified_conffile" + continue + + rel = path.lstrip("/") + baseline = md5sums.get(rel) + if baseline: + try: + current = file_md5(path) + except OSError: + continue + if current != baseline: + out[path] = "modified_packaged_file" + return out + + +class RpmBackend(PackageBackend): + name = "rpm" + pkg_config_prefixes = ( + "/etc/dnf/", + "/etc/yum/", + "/etc/yum.repos.d/", + "/etc/yum.conf", + ) + + def __init__(self) -> None: + self._modified_cache: Dict[str, Set[str]] = {} + self._config_cache: Dict[str, Set[str]] = {} + + def owner_of_path(self, path: str) -> Optional[str]: + from .rpm import rpm_owner + + return rpm_owner(path) + + def list_manual_packages(self) -> List[str]: + from .rpm import list_manual_packages + + return list_manual_packages() + + def build_etc_index(self): + from .rpm import build_rpm_etc_index + + return build_rpm_etc_index() + + def specific_paths_for_hints(self, hints: Set[str]) -> List[str]: + paths: List[str] = [] + for h in hints: + paths.extend( + [ + f"/etc/sysconfig/{h}", + f"/etc/sysconfig/{h}.conf", + f"/etc/sysctl.d/{h}.conf", + ] + ) + return paths + + def _config_files(self, pkg: str) -> Set[str]: + if pkg in self._config_cache: + return self._config_cache[pkg] + from .rpm import rpm_config_files + + s = rpm_config_files(pkg) + self._config_cache[pkg] = s + return s + + def _modified_files(self, pkg: str) -> Set[str]: + if pkg in self._modified_cache: + return self._modified_cache[pkg] + from .rpm import rpm_modified_files + + s = rpm_modified_files(pkg) + self._modified_cache[pkg] = s + return s + + def modified_paths(self, pkg: str, etc_paths: List[str]) -> Dict[str, str]: + out: Dict[str, str] = {} + modified = self._modified_files(pkg) + if not modified: + return out + config = self._config_files(pkg) + + for path in etc_paths: + if not path.startswith("/etc/"): + continue + if self.is_pkg_config_path(path): + continue + if path not in modified: + continue + out[path] = ( + "modified_conffile" if path in config else "modified_packaged_file" + ) + return out + + +def get_backend(info: Optional[PlatformInfo] = None) -> PackageBackend: + info = info or detect_platform() + if info.pkg_backend == "dpkg": + return DpkgBackend() + if info.pkg_backend == "rpm": + return RpmBackend() + # Unknown: be conservative and use an rpm backend if rpm exists, otherwise dpkg. + if shutil.which("rpm"): + return RpmBackend() + return DpkgBackend() diff --git a/enroll/rpm.py b/enroll/rpm.py new file mode 100644 index 0000000..947617c --- /dev/null +++ b/enroll/rpm.py @@ -0,0 +1,266 @@ +from __future__ import annotations + +import os +import re +import shutil +import subprocess # nosec +from typing import Dict, List, Optional, Set, Tuple + + +def _run( + cmd: list[str], *, allow_fail: bool = False, merge_err: bool = False +) -> tuple[int, str]: + """Run a command and return (rc, stdout). + + If merge_err is True, stderr is merged into stdout to preserve ordering. + """ + p = subprocess.run( + cmd, + check=False, + text=True, + stdout=subprocess.PIPE, + stderr=(subprocess.STDOUT if merge_err else subprocess.PIPE), + ) # nosec + out = p.stdout or "" + if (not allow_fail) and p.returncode != 0: + err = "" if merge_err else (p.stderr or "") + raise RuntimeError(f"Command failed: {cmd}\n{err}{out}") + return p.returncode, out + + +def rpm_owner(path: str) -> Optional[str]: + """Return owning package name for a path, or None if unowned.""" + if not path: + return None + rc, out = _run( + ["rpm", "-qf", "--qf", "%{NAME}\n", path], allow_fail=True, merge_err=True + ) + if rc != 0: + return None + for line in out.splitlines(): + line = line.strip() + if not line: + continue + if "is not owned" in line: + return None + # With --qf we expect just the package name. + if re.match(r"^[A-Za-z0-9_.+:-]+$", line): + # Strip any accidental epoch/name-version-release output. + return line.split(":", 1)[-1].strip() if line else None + return None + + +_ARCH_SUFFIXES = { + "noarch", + "x86_64", + "i686", + "aarch64", + "armv7hl", + "ppc64le", + "s390x", + "riscv64", +} + + +def _strip_arch(token: str) -> str: + """Strip a trailing .ARCH from a yum/dnf package token.""" + t = token.strip() + if "." not in t: + return t + head, tail = t.rsplit(".", 1) + if tail in _ARCH_SUFFIXES: + return head + return t + + +def list_manual_packages() -> List[str]: + """Return packages considered "user-installed" on RPM-based systems. + + Best-effort: + 1) dnf repoquery --userinstalled + 2) dnf history userinstalled + 3) yum history userinstalled + + If none are available, returns an empty list. + """ + + def _dedupe(pkgs: List[str]) -> List[str]: + return sorted({p for p in (pkgs or []) if p}) + + if shutil.which("dnf"): + # Prefer a machine-friendly output. + for cmd in ( + ["dnf", "-q", "repoquery", "--userinstalled", "--qf", "%{name}\n"], + ["dnf", "-q", "repoquery", "--userinstalled"], + ): + rc, out = _run(cmd, allow_fail=True, merge_err=True) + if rc == 0 and out.strip(): + pkgs = [] + for line in out.splitlines(): + line = line.strip() + if not line or line.startswith("Loaded plugins"): + continue + pkgs.append(_strip_arch(line.split()[0])) + if pkgs: + return _dedupe(pkgs) + + # Fallback: human-oriented output. + rc, out = _run( + ["dnf", "-q", "history", "userinstalled"], allow_fail=True, merge_err=True + ) + if rc == 0 and out.strip(): + pkgs = [] + for line in out.splitlines(): + line = line.strip() + if not line or line.startswith("Installed") or line.startswith("Last"): + continue + # Often: "vim-enhanced.x86_64" + tok = line.split()[0] + pkgs.append(_strip_arch(tok)) + if pkgs: + return _dedupe(pkgs) + + if shutil.which("yum"): + rc, out = _run( + ["yum", "-q", "history", "userinstalled"], allow_fail=True, merge_err=True + ) + if rc == 0 and out.strip(): + pkgs = [] + for line in out.splitlines(): + line = line.strip() + if ( + not line + or line.startswith("Installed") + or line.startswith("Loaded") + ): + continue + tok = line.split()[0] + pkgs.append(_strip_arch(tok)) + if pkgs: + return _dedupe(pkgs) + + return [] + + +def _walk_etc_files() -> List[str]: + out: List[str] = [] + for dirpath, _, filenames in os.walk("/etc"): + for fn in filenames: + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + out.append(p) + return out + + +def build_rpm_etc_index() -> ( + Tuple[Set[str], Dict[str, str], Dict[str, Set[str]], Dict[str, List[str]]] +): + """Best-effort equivalent of build_dpkg_etc_index for RPM systems. + + This builds indexes by walking the live /etc tree and querying RPM ownership + for each file. + + Returns: + owned_etc_paths: set of /etc paths owned by rpm + etc_owner_map: /etc/path -> pkg + topdir_to_pkgs: "nginx" -> {"nginx", ...} based on /etc//... + pkg_to_etc_paths: pkg -> list of owned /etc paths + """ + + owned: Set[str] = set() + owner: Dict[str, str] = {} + topdir_to_pkgs: Dict[str, Set[str]] = {} + pkg_to_etc: Dict[str, List[str]] = {} + + paths = _walk_etc_files() + + # Query in chunks to avoid excessive process spawns. + chunk_size = 250 + + not_owned_re = re.compile( + r"^file\s+(?P.+?)\s+is\s+not\s+owned\s+by\s+any\s+package", re.IGNORECASE + ) + + for i in range(0, len(paths), chunk_size): + chunk = paths[i : i + chunk_size] + rc, out = _run( + ["rpm", "-qf", "--qf", "%{NAME}\n", *chunk], + allow_fail=True, + merge_err=True, + ) + + lines = [ln.strip() for ln in out.splitlines() if ln.strip()] + # Heuristic: rpm prints one output line per input path. If that isn't + # true (warnings/errors), fall back to per-file queries for this chunk. + if len(lines) != len(chunk): + for p in chunk: + pkg = rpm_owner(p) + if not pkg: + continue + owned.add(p) + owner.setdefault(p, pkg) + pkg_to_etc.setdefault(pkg, []).append(p) + parts = p.split("/", 3) + if len(parts) >= 3 and parts[2]: + topdir_to_pkgs.setdefault(parts[2], set()).add(pkg) + continue + + for pth, line in zip(chunk, lines): + if not line: + continue + if not_owned_re.match(line) or "is not owned" in line: + continue + pkg = line.split()[0].strip() + if not pkg: + continue + owned.add(pth) + owner.setdefault(pth, pkg) + pkg_to_etc.setdefault(pkg, []).append(pth) + parts = pth.split("/", 3) + if len(parts) >= 3 and parts[2]: + topdir_to_pkgs.setdefault(parts[2], set()).add(pkg) + + for k, v in list(pkg_to_etc.items()): + pkg_to_etc[k] = sorted(set(v)) + + return owned, owner, topdir_to_pkgs, pkg_to_etc + + +def rpm_config_files(pkg: str) -> Set[str]: + """Return config files for a package (rpm -qc).""" + rc, out = _run(["rpm", "-qc", pkg], allow_fail=True, merge_err=True) + if rc != 0: + return set() + files: Set[str] = set() + for line in out.splitlines(): + line = line.strip() + if line.startswith("/"): + files.add(line) + return files + + +def rpm_modified_files(pkg: str) -> Set[str]: + """Return files reported as modified by rpm verification (rpm -V). + + rpm -V only prints lines for differences/missing files. + """ + rc, out = _run(["rpm", "-V", pkg], allow_fail=True, merge_err=True) + # rc is non-zero when there are differences; we still want the output. + files: Set[str] = set() + for raw in out.splitlines(): + line = raw.strip() + if not line: + continue + # Typical forms: + # S.5....T. c /etc/foo.conf + # missing /etc/bar + m = re.search(r"\s(/\S+)$", line) + if m: + files.add(m.group(1)) + continue + if line.startswith("missing"): + parts = line.split() + if parts and parts[-1].startswith("/"): + files.add(parts[-1]) + return files diff --git a/tests/test_debian.py b/tests/test_debian.py index 333afc1..abad361 100644 --- a/tests/test_debian.py +++ b/tests/test_debian.py @@ -1,6 +1,5 @@ from __future__ import annotations -import hashlib from pathlib import Path @@ -97,58 +96,3 @@ def test_parse_status_conffiles_handles_continuations(tmp_path: Path): assert m["nginx"]["/etc/nginx/nginx.conf"] == "abcdef" assert m["nginx"]["/etc/nginx/mime.types"] == "123456" assert "other" not in m - - -def test_read_pkg_md5sums_and_file_md5(tmp_path: Path, monkeypatch): - import enroll.debian as d - - # Patch /var/lib/dpkg/info/.md5sums lookup to a tmp file. - md5_file = tmp_path / "pkg.md5sums" - md5_file.write_text("0123456789abcdef etc/foo.conf\n", encoding="utf-8") - - def fake_exists(path: str) -> bool: - return path.endswith("/var/lib/dpkg/info/p1.md5sums") - - real_open = open - - def fake_open(path: str, *args, **kwargs): - if path.endswith("/var/lib/dpkg/info/p1.md5sums"): - return real_open(md5_file, *args, **kwargs) - return real_open(path, *args, **kwargs) - - monkeypatch.setattr(d.os.path, "exists", fake_exists) - monkeypatch.setattr("builtins.open", fake_open) - - m = d.read_pkg_md5sums("p1") - assert m == {"etc/foo.conf": "0123456789abcdef"} - - content = b"hello world\n" - p = tmp_path / "x" - p.write_bytes(content) - assert d.file_md5(str(p)) == hashlib.md5(content).hexdigest() - - -def test_stat_triplet_fallbacks(tmp_path: Path, monkeypatch): - import enroll.debian as d - import sys - - p = tmp_path / "f" - p.write_text("x", encoding="utf-8") - - class FakePwdMod: - @staticmethod - def getpwuid(_): # pragma: no cover - raise KeyError - - class FakeGrpMod: - @staticmethod - def getgrgid(_): # pragma: no cover - raise KeyError - - # stat_triplet imports pwd/grp inside the function, so patch sys.modules. - monkeypatch.setitem(sys.modules, "pwd", FakePwdMod) - monkeypatch.setitem(sys.modules, "grp", FakeGrpMod) - owner, group, mode = d.stat_triplet(str(p)) - assert owner.isdigit() - assert group.isdigit() - assert mode.isdigit() and len(mode) == 4 diff --git a/tests/test_fsutil.py b/tests/test_fsutil.py new file mode 100644 index 0000000..ebe2224 --- /dev/null +++ b/tests/test_fsutil.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import hashlib +import os +from pathlib import Path + +from enroll.fsutil import file_md5, stat_triplet + + +def test_file_md5_matches_hashlib(tmp_path: Path): + p = tmp_path / "x" + p.write_bytes(b"hello world") + expected = hashlib.md5(b"hello world").hexdigest() # nosec + assert file_md5(str(p)) == expected + + +def test_stat_triplet_reports_mode(tmp_path: Path): + p = tmp_path / "x" + p.write_text("x", encoding="utf-8") + os.chmod(p, 0o600) + + owner, group, mode = stat_triplet(str(p)) + assert mode == "0600" + assert owner # non-empty string + assert group # non-empty string diff --git a/tests/test_harvest.py b/tests/test_harvest.py index fa796f0..a0d22ec 100644 --- a/tests/test_harvest.py +++ b/tests/test_harvest.py @@ -2,6 +2,7 @@ import json from pathlib import Path import enroll.harvest as h +from enroll.platform import PlatformInfo from enroll.systemd import UnitInfo @@ -10,6 +11,64 @@ class AllowAllPolicy: return None +class FakeBackend: + """Minimal backend stub for harvest tests. + + The real backends (dpkg/rpm) enumerate the live system (dpkg status, rpm + databases, etc). These tests instead control all backend behaviour. + """ + + def __init__( + self, + *, + name: str, + owned_etc: set[str], + etc_owner_map: dict[str, str], + topdir_to_pkgs: dict[str, set[str]], + pkg_to_etc_paths: dict[str, list[str]], + manual_pkgs: list[str], + owner_fn, + modified_by_pkg: dict[str, dict[str, str]] | None = None, + pkg_config_prefixes: tuple[str, ...] = ("/etc/apt/",), + ): + self.name = name + self.pkg_config_prefixes = pkg_config_prefixes + self._owned_etc = owned_etc + self._etc_owner_map = etc_owner_map + self._topdir_to_pkgs = topdir_to_pkgs + self._pkg_to_etc_paths = pkg_to_etc_paths + self._manual = manual_pkgs + self._owner_fn = owner_fn + self._modified_by_pkg = modified_by_pkg or {} + + def build_etc_index(self): + return ( + self._owned_etc, + self._etc_owner_map, + self._topdir_to_pkgs, + self._pkg_to_etc_paths, + ) + + def owner_of_path(self, path: str): + return self._owner_fn(path) + + def list_manual_packages(self): + return list(self._manual) + + def specific_paths_for_hints(self, hints: set[str]): + return [] + + def is_pkg_config_path(self, path: str) -> bool: + for pfx in self.pkg_config_prefixes: + if path == pfx or path.startswith(pfx): + return True + return False + + def modified_paths(self, pkg: str, etc_paths: list[str]): + # Test-controlled; ignore etc_paths. + return dict(self._modified_by_pkg.get(pkg, {})) + + def test_harvest_dedup_manual_packages_and_builds_etc_custom( monkeypatch, tmp_path: Path ): @@ -22,7 +81,7 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( real_exists = os.path.exists real_islink = os.path.islink - # Fake filesystem: two /etc files exist, only one is dpkg-owned. + # Fake filesystem: two /etc files exist, only one is package-owned. # Also include some /usr/local files to populate usr_local_custom. files = { "/etc/openvpn/server.conf": b"server", @@ -93,6 +152,7 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( # Avoid real system access monkeypatch.setattr(h, "list_enabled_services", lambda: ["openvpn.service"]) + monkeypatch.setattr(h, "list_enabled_timers", lambda: []) monkeypatch.setattr( h, "get_unit_info", @@ -109,29 +169,30 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( ), ) - # Debian package index: openvpn owns /etc/openvpn/server.conf; keyboard is unowned. - def fake_build_index(): - owned_etc = {"/etc/openvpn/server.conf"} - etc_owner_map = {"/etc/openvpn/server.conf": "openvpn"} - topdir_to_pkgs = {"openvpn": {"openvpn"}} - pkg_to_etc_paths = {"openvpn": ["/etc/openvpn/server.conf"], "curl": []} - return owned_etc, etc_owner_map, topdir_to_pkgs, pkg_to_etc_paths + # Package index: openvpn owns /etc/openvpn/server.conf; keyboard is unowned. + owned_etc = {"/etc/openvpn/server.conf"} + etc_owner_map = {"/etc/openvpn/server.conf": "openvpn"} + topdir_to_pkgs = {"openvpn": {"openvpn"}} + pkg_to_etc_paths = {"openvpn": ["/etc/openvpn/server.conf"], "curl": []} - monkeypatch.setattr(h, "build_dpkg_etc_index", fake_build_index) - - # openvpn conffile hash mismatch => should be captured under service role - monkeypatch.setattr( - h, - "parse_status_conffiles", - lambda: {"openvpn": {"/etc/openvpn/server.conf": "old"}}, + backend = FakeBackend( + name="dpkg", + owned_etc=owned_etc, + etc_owner_map=etc_owner_map, + topdir_to_pkgs=topdir_to_pkgs, + pkg_to_etc_paths=pkg_to_etc_paths, + manual_pkgs=["openvpn", "curl"], + owner_fn=lambda p: "openvpn" if "openvpn" in (p or "") else None, + modified_by_pkg={ + "openvpn": {"/etc/openvpn/server.conf": "modified_conffile"}, + }, ) - monkeypatch.setattr(h, "read_pkg_md5sums", lambda pkg: {}) - monkeypatch.setattr(h, "file_md5", lambda path: "new") monkeypatch.setattr( - h, "dpkg_owner", lambda p: "openvpn" if "openvpn" in p else None + h, "detect_platform", lambda: PlatformInfo("debian", "dpkg", {}) ) - monkeypatch.setattr(h, "list_manual_packages", lambda: ["openvpn", "curl"]) + monkeypatch.setattr(h, "get_backend", lambda info=None: backend) + monkeypatch.setattr(h, "collect_non_system_users", lambda: []) def fake_stat_triplet(p: str): @@ -207,6 +268,7 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( monkeypatch.setattr( h, "list_enabled_services", lambda: ["apparmor.service", "ntpsec.service"] ) + monkeypatch.setattr(h, "list_enabled_timers", lambda: []) def fake_unit_info(unit: str) -> UnitInfo: if unit == "apparmor.service": @@ -235,31 +297,35 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( monkeypatch.setattr(h, "get_unit_info", fake_unit_info) - # Dpkg /etc index: no owned /etc paths needed for this test. - monkeypatch.setattr( - h, - "build_dpkg_etc_index", - lambda: (set(), {}, {}, {}), - ) - monkeypatch.setattr(h, "parse_status_conffiles", lambda: {}) - monkeypatch.setattr(h, "read_pkg_md5sums", lambda pkg: {}) - monkeypatch.setattr(h, "file_md5", lambda path: "x") - monkeypatch.setattr(h, "list_manual_packages", lambda: []) - monkeypatch.setattr(h, "collect_non_system_users", lambda: []) - # Make apparmor *also* claim the ntpsec package (simulates overly-broad # package inference). The snippet routing should still prefer role 'ntpsec'. - def fake_dpkg_owner(p: str): + def fake_owner(p: str): if p == "/etc/cron.d/ntpsec": return "ntpsec" - if "apparmor" in p: + if "apparmor" in (p or ""): return "ntpsec" # intentionally misleading - if "ntpsec" in p or "ntpd" in p: + if "ntpsec" in (p or "") or "ntpd" in (p or ""): return "ntpsec" return None - monkeypatch.setattr(h, "dpkg_owner", fake_dpkg_owner) + backend = FakeBackend( + name="dpkg", + owned_etc=set(), + etc_owner_map={}, + topdir_to_pkgs={}, + pkg_to_etc_paths={}, + manual_pkgs=[], + owner_fn=fake_owner, + modified_by_pkg={}, + ) + + monkeypatch.setattr( + h, "detect_platform", lambda: PlatformInfo("debian", "dpkg", {}) + ) + monkeypatch.setattr(h, "get_backend", lambda info=None: backend) + monkeypatch.setattr(h, "stat_triplet", lambda p: ("root", "root", "0644")) + monkeypatch.setattr(h, "collect_non_system_users", lambda: []) def fake_copy(bundle_dir: str, role_name: str, abs_path: str, src_rel: str): dst = Path(bundle_dir) / "artifacts" / role_name / src_rel @@ -268,11 +334,7 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( monkeypatch.setattr(h, "_copy_into_bundle", fake_copy) - class AllowAll: - def deny_reason(self, path: str): - return None - - state_path = h.harvest(str(bundle), policy=AllowAll()) + state_path = h.harvest(str(bundle), policy=AllowAllPolicy()) st = json.loads(Path(state_path).read_text(encoding="utf-8")) # Cron snippet should end up attached to the ntpsec role, not apparmor. diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 92c3dfc..cbfc208 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -322,3 +322,96 @@ def test_copy2_replace_overwrites_readonly_destination(tmp_path: Path): assert dst.read_text(encoding="utf-8") == "new" mode = stat.S_IMODE(dst.stat().st_mode) assert mode & stat.S_IWUSR # destination should remain mergeable + + +def test_manifest_includes_dnf_config_role_when_present(tmp_path: Path): + bundle = tmp_path / "bundle" + out = tmp_path / "ansible" + + # Create a dnf_config artifact. + (bundle / "artifacts" / "dnf_config" / "etc" / "dnf").mkdir( + parents=True, exist_ok=True + ) + (bundle / "artifacts" / "dnf_config" / "etc" / "dnf" / "dnf.conf").write_text( + "[main]\n", encoding="utf-8" + ) + + state = { + "host": {"hostname": "test", "os": "redhat", "pkg_backend": "rpm"}, + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [], + "package_roles": [], + "manual_packages": [], + "manual_packages_skipped": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_files": [ + { + "path": "/etc/dnf/dnf.conf", + "src_rel": "etc/dnf/dnf.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "dnf_config", + } + ], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + } + + bundle.mkdir(parents=True, exist_ok=True) + (bundle / "state.json").write_text(json.dumps(state, indent=2), encoding="utf-8") + + manifest(str(bundle), str(out)) + + pb = (out / "playbook.yml").read_text(encoding="utf-8") + assert "- dnf_config" in pb + + tasks = (out / "roles" / "dnf_config" / "tasks" / "main.yml").read_text( + encoding="utf-8" + ) + # Ensure the role exists and contains some file deployment logic. + assert "Deploy any other managed files" in tasks + + +def test_render_install_packages_tasks_contains_dnf_branch(): + from enroll.manifest import _render_install_packages_tasks + + txt = _render_install_packages_tasks("role", "role") + assert "ansible.builtin.apt" in txt + assert "ansible.builtin.dnf" in txt + assert "ansible.builtin.package" in txt + assert "pkg_mgr" in txt diff --git a/tests/test_platform.py b/tests/test_platform.py new file mode 100644 index 0000000..7ff66c6 --- /dev/null +++ b/tests/test_platform.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from pathlib import Path + +import enroll.platform as platform + + +def test_read_os_release_parses_kv_and_strips_quotes(tmp_path: Path): + p = tmp_path / "os-release" + p.write_text( + """ +# comment +ID=fedora +ID_LIKE=\"rhel centos\" +NAME=\"Fedora Linux\" +EMPTY= +NOEQUALS +""", + encoding="utf-8", + ) + + osr = platform._read_os_release(str(p)) + assert osr["ID"] == "fedora" + assert osr["ID_LIKE"] == "rhel centos" + assert osr["NAME"] == "Fedora Linux" + assert osr["EMPTY"] == "" + assert "NOEQUALS" not in osr + + +def test_detect_platform_prefers_os_release(monkeypatch): + monkeypatch.setattr( + platform, + "_read_os_release", + lambda path="/etc/os-release": {"ID": "fedora", "ID_LIKE": "rhel"}, + ) + # If os-release is decisive we shouldn't need which() + monkeypatch.setattr(platform.shutil, "which", lambda exe: None) + + info = platform.detect_platform() + assert info.os_family == "redhat" + assert info.pkg_backend == "rpm" + + +def test_detect_platform_fallbacks_to_dpkg_when_unknown(monkeypatch): + monkeypatch.setattr(platform, "_read_os_release", lambda path="/etc/os-release": {}) + monkeypatch.setattr( + platform.shutil, "which", lambda exe: "/usr/bin/dpkg" if exe == "dpkg" else None + ) + + info = platform.detect_platform() + assert info.os_family == "debian" + assert info.pkg_backend == "dpkg" + + +def test_get_backend_unknown_prefers_rpm_if_present(monkeypatch): + monkeypatch.setattr( + platform.shutil, "which", lambda exe: "/usr/bin/rpm" if exe == "rpm" else None + ) + + b = platform.get_backend( + platform.PlatformInfo(os_family="unknown", pkg_backend="unknown", os_release={}) + ) + assert isinstance(b, platform.RpmBackend) + + +def test_rpm_backend_modified_paths_labels_conffiles(monkeypatch): + b = platform.RpmBackend() + + # Pretend rpm -V says both files changed, but only one is a config file. + monkeypatch.setattr(b, "_modified_files", lambda pkg: {"/etc/foo.conf", "/etc/bar"}) + monkeypatch.setattr(b, "_config_files", lambda pkg: {"/etc/foo.conf"}) + + out = b.modified_paths("mypkg", ["/etc/foo.conf", "/etc/bar", "/etc/dnf/dnf.conf"]) + assert out["/etc/foo.conf"] == "modified_conffile" + assert out["/etc/bar"] == "modified_packaged_file" + # Package-manager config paths are excluded. + assert "/etc/dnf/dnf.conf" not in out + + +def test_specific_paths_for_hints_differs_between_backends(): + # We can exercise this without instantiating DpkgBackend (which reads dpkg status) + class Dummy(platform.PackageBackend): + name = "dummy" + pkg_config_prefixes = ("/etc/apt/",) + + d = Dummy() + assert d.is_pkg_config_path("/etc/apt/sources.list") + assert not d.is_pkg_config_path("/etc/ssh/sshd_config") + + r = platform.RpmBackend() + paths = set(r.specific_paths_for_hints({"nginx"})) + assert "/etc/sysconfig/nginx" in paths + assert "/etc/sysconfig/nginx.conf" in paths diff --git a/tests/test_rpm.py b/tests/test_rpm.py new file mode 100644 index 0000000..ea97c12 --- /dev/null +++ b/tests/test_rpm.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +import enroll.rpm as rpm + + +def test_rpm_owner_returns_none_when_unowned(monkeypatch): + monkeypatch.setattr( + rpm, + "_run", + lambda cmd, allow_fail=False, merge_err=False: ( + 1, + "file /etc/x is not owned by any package\n", + ), + ) + assert rpm.rpm_owner("/etc/x") is None + + +def test_rpm_owner_parses_name(monkeypatch): + monkeypatch.setattr( + rpm, "_run", lambda cmd, allow_fail=False, merge_err=False: (0, "bash\n") + ) + assert rpm.rpm_owner("/bin/bash") == "bash" + + +def test_strip_arch_strips_known_arches(): + assert rpm._strip_arch("vim-enhanced.x86_64") == "vim-enhanced" + assert rpm._strip_arch("foo.noarch") == "foo" + assert rpm._strip_arch("weird.token") == "weird.token" + + +def test_list_manual_packages_prefers_dnf_repoquery(monkeypatch): + monkeypatch.setattr( + rpm.shutil, "which", lambda exe: "/usr/bin/dnf" if exe == "dnf" else None + ) + + def fake_run(cmd, allow_fail=False, merge_err=False): + # First repoquery form returns usable output. + if cmd[:3] == ["dnf", "-q", "repoquery"]: + return 0, "vim-enhanced.x86_64\nhtop\nvim-enhanced.x86_64\n" + raise AssertionError(f"unexpected cmd: {cmd}") + + monkeypatch.setattr(rpm, "_run", fake_run) + + pkgs = rpm.list_manual_packages() + assert pkgs == ["htop", "vim-enhanced"] + + +def test_list_manual_packages_falls_back_to_history(monkeypatch): + monkeypatch.setattr( + rpm.shutil, "which", lambda exe: "/usr/bin/dnf" if exe == "dnf" else None + ) + + def fake_run(cmd, allow_fail=False, merge_err=False): + # repoquery fails + if cmd[:3] == ["dnf", "-q", "repoquery"]: + return 1, "" + if cmd[:3] == ["dnf", "-q", "history"]: + return ( + 0, + "Installed Packages\nvim-enhanced.x86_64\nLast metadata expiration check: 0:01:00 ago\n", + ) + raise AssertionError(f"unexpected cmd: {cmd}") + + monkeypatch.setattr(rpm, "_run", fake_run) + + pkgs = rpm.list_manual_packages() + assert pkgs == ["vim-enhanced"] + + +def test_build_rpm_etc_index_uses_fallback_when_rpm_output_mismatches(monkeypatch): + # Two files in /etc, one owned, one unowned. + monkeypatch.setattr( + rpm, "_walk_etc_files", lambda: ["/etc/owned.conf", "/etc/unowned.conf"] + ) + + # Simulate chunk query producing unexpected extra line (mismatch) -> triggers per-file fallback. + monkeypatch.setattr( + rpm, + "_run", + lambda cmd, allow_fail=False, merge_err=False: (0, "ownedpkg\nEXTRA\nTHIRD\n"), + ) + monkeypatch.setattr( + rpm, "rpm_owner", lambda p: "ownedpkg" if p == "/etc/owned.conf" else None + ) + + owned, owner_map, topdir_to_pkgs, pkg_to_etc = rpm.build_rpm_etc_index() + + assert owned == {"/etc/owned.conf"} + assert owner_map["/etc/owned.conf"] == "ownedpkg" + assert "owned.conf" in topdir_to_pkgs + assert pkg_to_etc["ownedpkg"] == ["/etc/owned.conf"] + + +def test_build_rpm_etc_index_parses_chunk_output(monkeypatch): + monkeypatch.setattr( + rpm, "_walk_etc_files", lambda: ["/etc/ssh/sshd_config", "/etc/notowned"] + ) + + def fake_run(cmd, allow_fail=False, merge_err=False): + # One output line per input path. + return 0, "openssh-server\nfile /etc/notowned is not owned by any package\n" + + monkeypatch.setattr(rpm, "_run", fake_run) + + owned, owner_map, topdir_to_pkgs, pkg_to_etc = rpm.build_rpm_etc_index() + + assert "/etc/ssh/sshd_config" in owned + assert "/etc/notowned" not in owned + assert owner_map["/etc/ssh/sshd_config"] == "openssh-server" + assert "ssh" in topdir_to_pkgs + assert "openssh-server" in topdir_to_pkgs["ssh"] + assert pkg_to_etc["openssh-server"] == ["/etc/ssh/sshd_config"] + + +def test_rpm_config_files_and_modified_files_parsing(monkeypatch): + monkeypatch.setattr( + rpm, + "_run", + lambda cmd, allow_fail=False, merge_err=False: ( + 0, + "/etc/foo.conf\n/usr/bin/tool\n", + ), + ) + assert rpm.rpm_config_files("mypkg") == {"/etc/foo.conf", "/usr/bin/tool"} + + # rpm -V returns only changed/missing files + out = "S.5....T. c /etc/foo.conf\nmissing /etc/bar\n" + monkeypatch.setattr( + rpm, "_run", lambda cmd, allow_fail=False, merge_err=False: (1, out) + ) + assert rpm.rpm_modified_files("mypkg") == {"/etc/foo.conf", "/etc/bar"} From 043802e80034b998c84a711e77ae8c69c8e0d137 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 16:10:27 +1100 Subject: [PATCH 26/85] Refactor state structure and capture versions of packages --- enroll/debian.py | 44 ++++++++++++++++++ enroll/diff.py | 112 +++++++++++++++++++++++++++++++++++++-------- enroll/harvest.py | 75 ++++++++++++++++++++++++++---- enroll/manifest.py | 27 ++++++----- enroll/platform.py | 21 +++++++++ enroll/rpm.py | 57 +++++++++++++++++++++++ 6 files changed, 294 insertions(+), 42 deletions(-) diff --git a/enroll/debian.py b/enroll/debian.py index 7e1ee2d..9bf847e 100644 --- a/enroll/debian.py +++ b/enroll/debian.py @@ -63,6 +63,50 @@ def list_manual_packages() -> List[str]: return sorted(set(pkgs)) +def list_installed_packages() -> Dict[str, List[Dict[str, str]]]: + """Return mapping of installed package name -> installed instances. + + Uses dpkg-query and is expected to work on Debian/Ubuntu-like systems. + + Output format: + {"pkg": [{"version": "...", "arch": "..."}, ...], ...} + """ + + try: + p = subprocess.run( + [ + "dpkg-query", + "-W", + "-f=${Package}\t${Version}\t${Architecture}\n", + ], + text=True, + capture_output=True, + check=False, + ) # nosec + except Exception: + return {} + + out: Dict[str, List[Dict[str, str]]] = {} + for raw in (p.stdout or "").splitlines(): + line = raw.strip("\n") + if not line: + continue + parts = line.split("\t") + if len(parts) < 3: + continue + name, ver, arch = parts[0].strip(), parts[1].strip(), parts[2].strip() + if not name: + continue + out.setdefault(name, []).append({"version": ver, "arch": arch}) + + # Stable ordering for deterministic JSON dumps. + for k in list(out.keys()): + out[k] = sorted( + out[k], key=lambda x: (x.get("arch") or "", x.get("version") or "") + ) + return out + + def build_dpkg_etc_index( info_dir: str = "/var/lib/dpkg/info", ) -> Tuple[Set[str], Dict[str, str], Dict[str, Set[str]], Dict[str, List[str]]]: diff --git a/enroll/diff.py b/enroll/diff.py index 0110d17..5ad0eac 100644 --- a/enroll/diff.py +++ b/enroll/diff.py @@ -126,18 +126,62 @@ def _load_state(bundle_dir: Path) -> Dict[str, Any]: return json.load(f) +def _packages_inventory(state: Dict[str, Any]) -> Dict[str, Any]: + return (state.get("inventory") or {}).get("packages") or {} + + def _all_packages(state: Dict[str, Any]) -> List[str]: - pkgs = set(state.get("manual_packages", []) or []) - pkgs |= set(state.get("manual_packages_skipped", []) or []) - for s in state.get("services", []) or []: - for p in s.get("packages", []) or []: - pkgs.add(p) - return sorted(pkgs) + return sorted(_packages_inventory(state).keys()) + + +def _roles(state: Dict[str, Any]) -> Dict[str, Any]: + return state.get("roles") or {} + + +def _pkg_version_key(entry: Dict[str, Any]) -> Optional[str]: + """Return a stable string used for version comparison.""" + installs = entry.get("installations") or [] + if isinstance(installs, list) and installs: + parts: List[str] = [] + for inst in installs: + if not isinstance(inst, dict): + continue + arch = str(inst.get("arch") or "") + ver = str(inst.get("version") or "") + if not ver: + continue + parts.append(f"{arch}:{ver}" if arch else ver) + if parts: + return "|".join(sorted(parts)) + v = entry.get("version") + if v: + return str(v) + return None + + +def _pkg_version_display(entry: Dict[str, Any]) -> Optional[str]: + v = entry.get("version") + if v: + return str(v) + installs = entry.get("installations") or [] + if isinstance(installs, list) and installs: + parts: List[str] = [] + for inst in installs: + if not isinstance(inst, dict): + continue + arch = str(inst.get("arch") or "") + ver = str(inst.get("version") or "") + if not ver: + continue + parts.append(f"{ver} ({arch})" if arch else ver) + if parts: + return ", ".join(sorted(parts)) + return None def _service_units(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: out: Dict[str, Dict[str, Any]] = {} - for s in state.get("services", []) or []: + for s in _roles(state).get("services") or []: unit = s.get("unit") if unit: out[str(unit)] = s @@ -145,7 +189,7 @@ def _service_units(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: def _users_by_name(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: - users = (state.get("users") or {}).get("users") or [] + users = (_roles(state).get("users") or {}).get("users") or [] out: Dict[str, Dict[str, Any]] = {} for u in users: name = u.get("name") @@ -167,43 +211,43 @@ class FileRec: def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, Any]]]: # Services - for s in state.get("services", []) or []: + for s in _roles(state).get("services") or []: role = s.get("role_name") or "unknown" for mf in s.get("managed_files", []) or []: yield str(role), mf # Package roles - for p in state.get("package_roles", []) or []: + for p in _roles(state).get("packages") or []: role = p.get("role_name") or "unknown" for mf in p.get("managed_files", []) or []: yield str(role), mf # Users - u = state.get("users") or {} + u = _roles(state).get("users") or {} u_role = u.get("role_name") or "users" for mf in u.get("managed_files", []) or []: yield str(u_role), mf # apt_config - ac = state.get("apt_config") or {} + ac = _roles(state).get("apt_config") or {} ac_role = ac.get("role_name") or "apt_config" for mf in ac.get("managed_files", []) or []: yield str(ac_role), mf # etc_custom - ec = state.get("etc_custom") or {} + ec = _roles(state).get("etc_custom") or {} ec_role = ec.get("role_name") or "etc_custom" for mf in ec.get("managed_files", []) or []: yield str(ec_role), mf # usr_local_custom - ul = state.get("usr_local_custom") or {} + ul = _roles(state).get("usr_local_custom") or {} ul_role = ul.get("role_name") or "usr_local_custom" for mf in ul.get("managed_files", []) or []: yield str(ul_role), mf # extra_paths - xp = state.get("extra_paths") or {} + xp = _roles(state).get("extra_paths") or {} xp_role = xp.get("role_name") or "extra_paths" for mf in xp.get("managed_files", []) or []: yield str(xp_role), mf @@ -261,12 +305,28 @@ def compare_harvests( old_state = _load_state(old_b.dir) new_state = _load_state(new_b.dir) - old_pkgs = set(_all_packages(old_state)) - new_pkgs = set(_all_packages(new_state)) + old_inv = _packages_inventory(old_state) + new_inv = _packages_inventory(new_state) + + old_pkgs = set(old_inv.keys()) + new_pkgs = set(new_inv.keys()) pkgs_added = sorted(new_pkgs - old_pkgs) pkgs_removed = sorted(old_pkgs - new_pkgs) + pkgs_version_changed: List[Dict[str, Any]] = [] + for pkg in sorted(old_pkgs & new_pkgs): + a = old_inv.get(pkg) or {} + b = new_inv.get(pkg) or {} + if _pkg_version_key(a) != _pkg_version_key(b): + pkgs_version_changed.append( + { + "package": pkg, + "old": _pkg_version_display(a), + "new": _pkg_version_display(b), + } + ) + old_units = _service_units(old_state) new_units = _service_units(new_state) units_added = sorted(set(new_units) - set(old_units)) @@ -380,6 +440,7 @@ def compare_harvests( [ pkgs_added, pkgs_removed, + pkgs_version_changed, units_added, units_removed, units_changed, @@ -413,7 +474,11 @@ def compare_harvests( "state_mtime": _mtime_iso(new_b.state_path), "host": (new_state.get("host") or {}).get("hostname"), }, - "packages": {"added": pkgs_added, "removed": pkgs_removed}, + "packages": { + "added": pkgs_added, + "removed": pkgs_removed, + "version_changed": pkgs_version_changed, + }, "services": { "enabled_added": units_added, "enabled_removed": units_removed, @@ -471,10 +536,13 @@ def _report_text(report: Dict[str, Any]) -> str: lines.append("\nPackages") lines.append(f" added: {len(pk.get('added', []) or [])}") lines.append(f" removed: {len(pk.get('removed', []) or [])}") + lines.append(f" version_changed: {len(pk.get('version_changed', []) or [])}") for p in pk.get("added", []) or []: lines.append(f" + {p}") for p in pk.get("removed", []) or []: lines.append(f" - {p}") + for ch in pk.get("version_changed", []) or []: + lines.append(f" ~ {ch.get('package')}: {ch.get('old')} -> {ch.get('new')}") sv = report.get("services", {}) lines.append("\nServices (enabled systemd units)") @@ -542,6 +610,7 @@ def _report_text(report: Dict[str, Any]) -> str: [ (pk.get("added") or []), (pk.get("removed") or []), + (pk.get("version_changed") or []), (sv.get("enabled_added") or []), (sv.get("enabled_removed") or []), (sv.get("changed") or []), @@ -578,6 +647,12 @@ def _report_markdown(report: Dict[str, Any]) -> str: for p in pk.get("removed", []) or []: out.append(f" - `- {p}`\n") + out.append(f"- Version changed: {len(pk.get('version_changed', []) or [])}\n") + for ch in pk.get("version_changed", []) or []: + out.append( + f" - `~ {ch.get('package')}`: `{ch.get('old')}` → `{ch.get('new')}`\n" + ) + sv = report.get("services", {}) out.append("## Services (enabled systemd units)\n") if sv.get("enabled_added"): @@ -672,6 +747,7 @@ def _report_markdown(report: Dict[str, Any]) -> str: [ (pk.get("added") or []), (pk.get("removed") or []), + (pk.get("version_changed") or []), (sv.get("enabled_added") or []), (sv.get("enabled_removed") or []), (sv.get("changed") or []), diff --git a/enroll/harvest.py b/enroll/harvest.py index bb706b1..4ca3984 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -5,6 +5,7 @@ import json import os import re import shutil +import time from dataclasses import dataclass, asdict from typing import Dict, List, Optional, Set @@ -1481,9 +1482,60 @@ def harvest( notes=extra_notes, ) + # ------------------------- + # Inventory: packages (SBOM-ish) + # ------------------------- + installed = backend.installed_packages() or {} + + manual_set: Set[str] = set(manual_pkgs or []) + + pkg_units: Dict[str, Set[str]] = {} + pkg_roles_map: Dict[str, Set[str]] = {} + + for svc in service_snaps: + for p in svc.packages: + pkg_units.setdefault(p, set()).add(svc.unit) + pkg_roles_map.setdefault(p, set()).add(svc.role_name) + + pkg_role_names: Dict[str, List[str]] = {} + for ps in pkg_snaps: + pkg_roles_map.setdefault(ps.package, set()).add(ps.role_name) + pkg_role_names.setdefault(ps.package, []).append(ps.role_name) + + pkg_names: Set[str] = set() + pkg_names |= manual_set + pkg_names |= set(pkg_units.keys()) + pkg_names |= {ps.package for ps in pkg_snaps} + + packages_inventory: Dict[str, Dict[str, object]] = {} + for pkg in sorted(pkg_names): + installs = installed.get(pkg, []) or [] + arches = sorted({i.get("arch") for i in installs if i.get("arch")}) + vers = sorted({i.get("version") for i in installs if i.get("version")}) + version: Optional[str] = vers[0] if len(vers) == 1 else None + + observed: List[Dict[str, str]] = [] + if pkg in manual_set: + observed.append({"kind": "user_installed"}) + for unit in sorted(pkg_units.get(pkg, set())): + observed.append({"kind": "systemd_unit", "ref": unit}) + for rn in sorted(set(pkg_role_names.get(pkg, []))): + observed.append({"kind": "package_role", "ref": rn}) + + roles = sorted(pkg_roles_map.get(pkg, set())) + + packages_inventory[pkg] = { + "version": version, + "arches": arches, + "installations": installs, + "observed_via": observed, + "roles": roles, + } + state = { "enroll": { "version": get_enroll_version(), + "harvest_time": time.time_ns(), }, "host": { "hostname": os.uname().nodename, @@ -1491,16 +1543,19 @@ def harvest( "pkg_backend": backend.name, "os_release": platform.os_release, }, - "users": asdict(users_snapshot), - "services": [asdict(s) for s in service_snaps], - "manual_packages": manual_pkgs, - "manual_packages_skipped": manual_pkgs_skipped, - "package_roles": [asdict(p) for p in pkg_snaps], - "apt_config": asdict(apt_config_snapshot), - "dnf_config": asdict(dnf_config_snapshot), - "etc_custom": asdict(etc_custom_snapshot), - "usr_local_custom": asdict(usr_local_custom_snapshot), - "extra_paths": asdict(extra_paths_snapshot), + "inventory": { + "packages": packages_inventory, + }, + "roles": { + "users": asdict(users_snapshot), + "services": [asdict(s) for s in service_snaps], + "packages": [asdict(p) for p in pkg_snaps], + "apt_config": asdict(apt_config_snapshot), + "dnf_config": asdict(dnf_config_snapshot), + "etc_custom": asdict(etc_custom_snapshot), + "usr_local_custom": asdict(usr_local_custom_snapshot), + "extra_paths": asdict(extra_paths_snapshot), + }, } state_path = os.path.join(bundle_dir, "state.json") diff --git a/enroll/manifest.py b/enroll/manifest.py index 923040f..8b4008b 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -271,9 +271,7 @@ def _write_hostvars(site_root: str, fqdn: str, role: str, data: Dict[str, Any]) merged = _merge_mappings_overwrite(existing_map, data) - out = "# Generated by enroll (host-specific vars)\n---\n" + _yaml_dump_mapping( - merged, sort_keys=True - ) + out = "---\n" + _yaml_dump_mapping(merged, sort_keys=True) with open(path, "w", encoding="utf-8") as f: f.write(out) @@ -392,7 +390,7 @@ def _render_generic_files_tasks( # Using first_found makes roles work in both modes: # - site-mode: inventory/host_vars///.files/... # - non-site: roles//files/... - return f"""# Generated by enroll + return f""" - name: Deploy any systemd unit files (templates) ansible.builtin.template: @@ -477,7 +475,7 @@ def _render_install_packages_tasks(role: str, var_prefix: str) -> str: generic `package` module. This keeps generated roles usable on both Debian-like and RPM-like systems. """ - return f"""# Generated by enroll + return f""" - name: Install packages for {role} (APT) ansible.builtin.apt: @@ -672,14 +670,16 @@ def _manifest_from_bundle_dir( with open(state_path, "r", encoding="utf-8") as f: state = json.load(f) - services: List[Dict[str, Any]] = state.get("services", []) - package_roles: List[Dict[str, Any]] = state.get("package_roles", []) - users_snapshot: Dict[str, Any] = state.get("users", {}) - apt_config_snapshot: Dict[str, Any] = state.get("apt_config", {}) - dnf_config_snapshot: Dict[str, Any] = state.get("dnf_config", {}) - etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) - usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) - extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {}) + roles: Dict[str, Any] = state.get("roles") or {} + + services: List[Dict[str, Any]] = roles.get("services", []) + package_roles: List[Dict[str, Any]] = roles.get("packages", []) + users_snapshot: Dict[str, Any] = roles.get("users", {}) + apt_config_snapshot: Dict[str, Any] = roles.get("apt_config", {}) + dnf_config_snapshot: Dict[str, Any] = roles.get("dnf_config", {}) + etc_custom_snapshot: Dict[str, Any] = roles.get("etc_custom", {}) + usr_local_custom_snapshot: Dict[str, Any] = roles.get("usr_local_custom", {}) + extra_paths_snapshot: Dict[str, Any] = roles.get("extra_paths", {}) site_mode = fqdn is not None and fqdn != "" @@ -839,7 +839,6 @@ def _manifest_from_bundle_dir( # tasks (data-driven) users_tasks = """--- -# Generated by enroll - name: Ensure groups exist ansible.builtin.group: diff --git a/enroll/platform.py b/enroll/platform.py index 998b83d..3c1904b 100644 --- a/enroll/platform.py +++ b/enroll/platform.py @@ -81,6 +81,17 @@ class PackageBackend: def list_manual_packages(self) -> List[str]: # pragma: no cover raise NotImplementedError + def installed_packages(self) -> Dict[str, List[Dict[str, str]]]: # pragma: no cover + """Return mapping of package name -> installed instances. + + Each instance is a dict with at least: + - version: package version string + - arch: architecture string + + Backends should be best-effort and return an empty mapping on failure. + """ + raise NotImplementedError + def build_etc_index( self, ) -> Tuple[ @@ -121,6 +132,11 @@ class DpkgBackend(PackageBackend): return list_manual_packages() + def installed_packages(self) -> Dict[str, List[Dict[str, str]]]: + from .debian import list_installed_packages + + return list_installed_packages() + def build_etc_index(self): from .debian import build_dpkg_etc_index @@ -194,6 +210,11 @@ class RpmBackend(PackageBackend): return list_manual_packages() + def installed_packages(self) -> Dict[str, List[Dict[str, str]]]: + from .rpm import list_installed_packages + + return list_installed_packages() + def build_etc_index(self): from .rpm import build_rpm_etc_index diff --git a/enroll/rpm.py b/enroll/rpm.py index 947617c..9e2892f 100644 --- a/enroll/rpm.py +++ b/enroll/rpm.py @@ -142,6 +142,63 @@ def list_manual_packages() -> List[str]: return [] +def list_installed_packages() -> Dict[str, List[Dict[str, str]]]: + """Return mapping of installed package name -> installed instances. + + Uses `rpm -qa` and is expected to work on RHEL/Fedora-like systems. + + Output format: + {"pkg": [{"version": "...", "arch": "..."}, ...], ...} + + The version string is formatted as: + - "-" for typical packages + - ":-" if a non-zero epoch is present + """ + + try: + _, out = _run( + [ + "rpm", + "-qa", + "--qf", + "%{NAME}\t%{EPOCHNUM}\t%{VERSION}\t%{RELEASE}\t%{ARCH}\n", + ], + allow_fail=False, + merge_err=True, + ) + except Exception: + return {} + + pkgs: Dict[str, List[Dict[str, str]]] = {} + for raw in (out or "").splitlines(): + line = raw.strip("\n") + if not line: + continue + parts = line.split("\t") + if len(parts) < 5: + continue + name, epoch, ver, rel, arch = [p.strip() for p in parts[:5]] + if not name or not ver: + continue + + # Normalise epoch. + epoch = epoch.strip() + if epoch.lower() in ("(none)", "none", ""): + epoch = "0" + + v = f"{ver}-{rel}" if rel else ver + if epoch and epoch.isdigit() and epoch != "0": + v = f"{epoch}:{v}" + + pkgs.setdefault(name, []).append({"version": v, "arch": arch}) + + for k in list(pkgs.keys()): + pkgs[k] = sorted( + pkgs[k], key=lambda x: (x.get("arch") or "", x.get("version") or "") + ) + return pkgs + + def _walk_etc_files() -> List[str]: out: List[str] = [] for dirpath, _, filenames in os.walk("/etc"): From 081739fd19ba4983fa00b28c9d6969e40bef712d Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 16:35:21 +1100 Subject: [PATCH 27/85] Fix tests --- enroll/manifest.py | 15 +- enroll/rpm.py | 2 +- tests/test_diff_usr_local_custom.py | 147 ++++++---- tests/test_harvest.py | 42 ++- tests/test_jinjaturtle.py | 104 ++++--- tests/test_manifest.py | 425 +++++++++++++++++----------- 6 files changed, 457 insertions(+), 278 deletions(-) diff --git a/enroll/manifest.py b/enroll/manifest.py index 8b4008b..bc629bb 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -390,9 +390,7 @@ def _render_generic_files_tasks( # Using first_found makes roles work in both modes: # - site-mode: inventory/host_vars///.files/... # - non-site: roles//files/... - return f""" - -- name: Deploy any systemd unit files (templates) + return f"""- name: Deploy any systemd unit files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" dest: "{{{{ item.dest }}}}" @@ -475,9 +473,7 @@ def _render_install_packages_tasks(role: str, var_prefix: str) -> str: generic `package` module. This keeps generated roles usable on both Debian-like and RPM-like systems. """ - return f""" - -- name: Install packages for {role} (APT) + return f"""- name: Install packages for {role} (APT) ansible.builtin.apt: name: "{{{{ {var_prefix}_packages | default([]) }}}}" state: present @@ -995,7 +991,7 @@ Generated non-system user accounts and SSH public material. else: _write_role_defaults(role_dir, vars_map) - tasks = """---\n""" + _render_generic_files_tasks( + tasks = "---\n" + _render_generic_files_tasks( var_prefix, include_restart_notify=False ) with open( @@ -1297,7 +1293,7 @@ DNF/YUM configuration harvested from the system (repos, config files, and RPM GP else: _write_role_defaults(role_dir, vars_map) - tasks = """---\n""" + _render_generic_files_tasks( + tasks = "---\n" + _render_generic_files_tasks( var_prefix, include_restart_notify=False ) with open( @@ -1663,8 +1659,7 @@ User-requested extra file harvesting. ) task_parts.append( - f""" -- name: Probe whether systemd unit exists and is manageable + f"""- name: Probe whether systemd unit exists and is manageable ansible.builtin.systemd: name: "{{{{ {var_prefix}_unit_name }}}}" check_mode: true diff --git a/enroll/rpm.py b/enroll/rpm.py index 9e2892f..0314670 100644 --- a/enroll/rpm.py +++ b/enroll/rpm.py @@ -104,7 +104,7 @@ def list_manual_packages() -> List[str]: if pkgs: return _dedupe(pkgs) - # Fallback: human-oriented output. + # Fallback rc, out = _run( ["dnf", "-q", "history", "userinstalled"], allow_fail=True, merge_err=True ) diff --git a/tests/test_diff_usr_local_custom.py b/tests/test_diff_usr_local_custom.py index 88d594f..28ec57c 100644 --- a/tests/test_diff_usr_local_custom.py +++ b/tests/test_diff_usr_local_custom.py @@ -18,65 +18,106 @@ def test_diff_includes_usr_local_custom_files(tmp_path: Path): new = tmp_path / "new" old_state = { - "host": {"hostname": "h1", "os": "debian"}, - "users": { - "role_name": "users", - "users": [], - "managed_files": [], - "excluded": [], - "notes": [], - }, - "services": [], - "package_roles": [], - "manual_packages": ["curl"], - "manual_packages_skipped": [], - "etc_custom": { - "role_name": "etc_custom", - "managed_files": [], - "excluded": [], - "notes": [], - }, - "usr_local_custom": { - "role_name": "usr_local_custom", - "managed_files": [ - { - "path": "/usr/local/etc/myapp.conf", - "src_rel": "usr/local/etc/myapp.conf", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "usr_local_etc_custom", + "schema_version": 3, + "host": {"hostname": "h1", "os": "debian", "pkg_backend": "dpkg"}, + "inventory": { + "packages": { + "curl": { + "version": "1.0", + "arches": [], + "installations": [{"version": "1.0", "arch": "amd64"}], + "observed_via": [{"kind": "user_installed"}], + "roles": [], } - ], - "excluded": [], - "notes": [], + } + }, + "roles": { + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [], + "packages": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + } + ], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, }, } + new_state = { **old_state, - "manual_packages": ["curl", "htop"], - "usr_local_custom": { - "role_name": "usr_local_custom", - "managed_files": [ - { - "path": "/usr/local/etc/myapp.conf", - "src_rel": "usr/local/etc/myapp.conf", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "usr_local_etc_custom", + "inventory": { + "packages": { + **old_state["inventory"]["packages"], + "htop": { + "version": "3.0", + "arches": [], + "installations": [{"version": "3.0", "arch": "amd64"}], + "observed_via": [{"kind": "user_installed"}], + "roles": [], }, - { - "path": "/usr/local/bin/myscript", - "src_rel": "usr/local/bin/myscript", - "owner": "root", - "group": "root", - "mode": "0755", - "reason": "usr_local_bin_script", - }, - ], - "excluded": [], - "notes": [], + } + }, + "roles": { + **old_state["roles"], + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + }, + { + "path": "/usr/local/bin/myscript", + "src_rel": "usr/local/bin/myscript", + "owner": "root", + "group": "root", + "mode": "0755", + "reason": "usr_local_bin_script", + }, + ], + "excluded": [], + "notes": [], + }, }, } diff --git a/tests/test_harvest.py b/tests/test_harvest.py index a0d22ec..1b884aa 100644 --- a/tests/test_harvest.py +++ b/tests/test_harvest.py @@ -30,6 +30,7 @@ class FakeBackend: owner_fn, modified_by_pkg: dict[str, dict[str, str]] | None = None, pkg_config_prefixes: tuple[str, ...] = ("/etc/apt/",), + installed: dict[str, list[dict[str, str]]] | None = None, ): self.name = name self.pkg_config_prefixes = pkg_config_prefixes @@ -40,6 +41,7 @@ class FakeBackend: self._manual = manual_pkgs self._owner_fn = owner_fn self._modified_by_pkg = modified_by_pkg or {} + self._installed = installed or {} def build_etc_index(self): return ( @@ -55,6 +57,14 @@ class FakeBackend: def list_manual_packages(self): return list(self._manual) + def installed_packages(self): + """Return mapping package -> installations. + + The real backends return: + {"pkg": [{"version": "...", "arch": "..."}, ...]} + """ + return dict(self._installed) + def specific_paths_for_hints(self, hints: set[str]): return [] @@ -214,26 +224,36 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( state_path = h.harvest(str(bundle), policy=AllowAllPolicy()) st = json.loads(Path(state_path).read_text(encoding="utf-8")) - assert "openvpn" in st["manual_packages"] - assert "curl" in st["manual_packages"] - assert "openvpn" in st["manual_packages_skipped"] - assert all(pr["package"] != "openvpn" for pr in st["package_roles"]) - assert any(pr["package"] == "curl" for pr in st["package_roles"]) + inv = st["inventory"]["packages"] + assert "openvpn" in inv + assert "curl" in inv + + # openvpn is managed by the service role, so it should NOT appear as a package role. + pkg_roles = st["roles"]["packages"] + assert all(pr["package"] != "openvpn" for pr in pkg_roles) + assert any(pr["package"] == "curl" for pr in pkg_roles) + + # Inventory provenance: openvpn should be observed via systemd unit. + openvpn_obs = inv["openvpn"]["observed_via"] + assert any( + o.get("kind") == "systemd_unit" and o.get("ref") == "openvpn.service" + for o in openvpn_obs + ) # Service role captured modified conffile - svc = st["services"][0] + svc = st["roles"]["services"][0] assert svc["unit"] == "openvpn.service" assert "openvpn" in svc["packages"] assert any(mf["path"] == "/etc/openvpn/server.conf" for mf in svc["managed_files"]) # Unowned /etc/default/keyboard is attributed to etc_custom only - etc_custom = st["etc_custom"] + etc_custom = st["roles"]["etc_custom"] assert any( mf["path"] == "/etc/default/keyboard" for mf in etc_custom["managed_files"] ) # /usr/local content is attributed to usr_local_custom - ul = st["usr_local_custom"] + ul = st["roles"]["usr_local_custom"] assert any(mf["path"] == "/usr/local/etc/myapp.conf" for mf in ul["managed_files"]) assert any(mf["path"] == "/usr/local/bin/myscript" for mf in ul["managed_files"]) assert all(mf["path"] != "/usr/local/bin/readme.txt" for mf in ul["managed_files"]) @@ -338,10 +358,12 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( st = json.loads(Path(state_path).read_text(encoding="utf-8")) # Cron snippet should end up attached to the ntpsec role, not apparmor. - svc_ntpsec = next(s for s in st["services"] if s["role_name"] == "ntpsec") + svc_ntpsec = next(s for s in st["roles"]["services"] if s["role_name"] == "ntpsec") assert any(mf["path"] == "/etc/cron.d/ntpsec" for mf in svc_ntpsec["managed_files"]) - svc_apparmor = next(s for s in st["services"] if s["role_name"] == "apparmor") + svc_apparmor = next( + s for s in st["roles"]["services"] if s["role_name"] == "apparmor" + ) assert all( mf["path"] != "/etc/cron.d/ntpsec" for mf in svc_apparmor["managed_files"] ) diff --git a/tests/test_jinjaturtle.py b/tests/test_jinjaturtle.py index 68bb04c..c0447b1 100644 --- a/tests/test_jinjaturtle.py +++ b/tests/test_jinjaturtle.py @@ -24,44 +24,78 @@ def test_manifest_uses_jinjaturtle_templates_and_does_not_copy_raw( ) state = { - "host": {"hostname": "test", "os": "debian"}, - "users": { - "role_name": "users", - "users": [], - "managed_files": [], - "excluded": [], - "notes": [], + "schema_version": 3, + "host": {"hostname": "test", "os": "debian", "pkg_backend": "dpkg"}, + "inventory": { + "packages": { + "foo": { + "version": "1.0", + "arches": [], + "installations": [{"version": "1.0", "arch": "amd64"}], + "observed_via": [{"kind": "systemd_unit", "ref": "foo.service"}], + "roles": ["foo"], + } + } }, - "etc_custom": { - "role_name": "etc_custom", - "managed_files": [], - "excluded": [], - "notes": [], - }, - "services": [ - { - "unit": "foo.service", - "role_name": "foo", - "packages": ["foo"], - "active_state": "inactive", - "sub_state": "dead", - "unit_file_state": "disabled", - "condition_result": "no", - "managed_files": [ - { - "path": "/etc/foo.ini", - "src_rel": "etc/foo.ini", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "modified_conffile", - } - ], + "roles": { + "users": { + "role_name": "users", + "users": [], + "managed_files": [], "excluded": [], "notes": [], - } - ], - "package_roles": [], + }, + "services": [ + { + "unit": "foo.service", + "role_name": "foo", + "packages": ["foo"], + "active_state": "inactive", + "sub_state": "dead", + "unit_file_state": "disabled", + "condition_result": "no", + "managed_files": [ + { + "path": "/etc/foo.ini", + "src_rel": "etc/foo.ini", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "modified_conffile", + } + ], + "excluded": [], + "notes": [], + } + ], + "packages": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + }, } bundle.mkdir(parents=True, exist_ok=True) diff --git a/tests/test_manifest.py b/tests/test_manifest.py index cbfc208..fec9cc3 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -13,95 +13,136 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): ) state = { - "host": {"hostname": "test", "os": "debian"}, - "users": { - "role_name": "users", - "users": [ + "schema_version": 3, + "host": {"hostname": "test", "os": "debian", "pkg_backend": "dpkg"}, + "inventory": { + "packages": { + "foo": { + "version": "1.0", + "arches": [], + "installations": [{"version": "1.0", "arch": "amd64"}], + "observed_via": [{"kind": "systemd_unit", "ref": "foo.service"}], + "roles": ["foo"], + }, + "curl": { + "version": "8.0", + "arches": [], + "installations": [{"version": "8.0", "arch": "amd64"}], + "observed_via": [{"kind": "package_role", "ref": "curl"}], + "roles": ["curl"], + }, + } + }, + "roles": { + "users": { + "role_name": "users", + "users": [ + { + "name": "alice", + "uid": 1000, + "gid": 1000, + "gecos": "Alice", + "home": "/home/alice", + "shell": "/bin/bash", + "primary_group": "alice", + "supplementary_groups": ["docker", "qubes"], + } + ], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [ { - "name": "alice", - "uid": 1000, - "gid": 1000, - "gecos": "Alice", - "home": "/home/alice", - "shell": "/bin/bash", - "primary_group": "alice", - "supplementary_groups": ["docker", "qubes"], + "unit": "foo.service", + "role_name": "foo", + "packages": ["foo"], + "active_state": "inactive", + "sub_state": "dead", + "unit_file_state": "enabled", + "condition_result": "no", + "managed_files": [ + { + "path": "/etc/foo.conf", + "src_rel": "etc/foo.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "modified_conffile", + } + ], + "excluded": [], + "notes": [], } ], - "managed_files": [], - "excluded": [], - "notes": [], - }, - "etc_custom": { - "role_name": "etc_custom", - "managed_files": [ + "packages": [ { - "path": "/etc/default/keyboard", - "src_rel": "etc/default/keyboard", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "custom_unowned", + "package": "curl", + "role_name": "curl", + "managed_files": [], + "excluded": [], + "notes": [], } ], - "excluded": [], - "notes": [], - }, - "usr_local_custom": { - "role_name": "usr_local_custom", - "managed_files": [ - { - "path": "/usr/local/etc/myapp.conf", - "src_rel": "usr/local/etc/myapp.conf", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "usr_local_etc_custom", - }, - { - "path": "/usr/local/bin/myscript", - "src_rel": "usr/local/bin/myscript", - "owner": "root", - "group": "root", - "mode": "0755", - "reason": "usr_local_bin_script", - }, - ], - "excluded": [], - "notes": [], - }, - "services": [ - { - "unit": "foo.service", - "role_name": "foo", - "packages": ["foo"], - "active_state": "inactive", - "sub_state": "dead", - "unit_file_state": "enabled", - "condition_result": "no", + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", "managed_files": [ { - "path": "/etc/foo.conf", - "src_rel": "etc/foo.conf", + "path": "/etc/default/keyboard", + "src_rel": "etc/default/keyboard", "owner": "root", "group": "root", "mode": "0644", - "reason": "modified_conffile", + "reason": "custom_unowned", } ], "excluded": [], "notes": [], - } - ], - "package_roles": [ - { - "package": "curl", - "role_name": "curl", + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + }, + { + "path": "/usr/local/bin/myscript", + "src_rel": "usr/local/bin/myscript", + "owner": "root", + "group": "root", + "mode": "0755", + "reason": "usr_local_bin_script", + }, + ], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], "managed_files": [], "excluded": [], "notes": [], - } - ], + }, + }, } bundle.mkdir(parents=True, exist_ok=True) @@ -189,68 +230,102 @@ def test_manifest_site_mode_creates_host_inventory_and_raw_files(tmp_path: Path) ) state = { - "host": {"hostname": "test", "os": "debian"}, - "users": { - "role_name": "users", - "users": [], - "managed_files": [], - "excluded": [], - "notes": [], + "schema_version": 3, + "host": {"hostname": "test", "os": "debian", "pkg_backend": "dpkg"}, + "inventory": { + "packages": { + "foo": { + "version": "1.0", + "arches": [], + "installations": [{"version": "1.0", "arch": "amd64"}], + "observed_via": [{"kind": "systemd_unit", "ref": "foo.service"}], + "roles": ["foo"], + } + } }, - "etc_custom": { - "role_name": "etc_custom", - "managed_files": [ + "roles": { + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [ { - "path": "/etc/default/keyboard", - "src_rel": "etc/default/keyboard", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "custom_unowned", + "unit": "foo.service", + "role_name": "foo", + "packages": ["foo"], + "active_state": "active", + "sub_state": "running", + "unit_file_state": "enabled", + "condition_result": "yes", + "managed_files": [ + { + "path": "/etc/foo.conf", + "src_rel": "etc/foo.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "modified_conffile", + } + ], + "excluded": [], + "notes": [], } ], - "excluded": [], - "notes": [], - }, - "usr_local_custom": { - "role_name": "usr_local_custom", - "managed_files": [ - { - "path": "/usr/local/etc/myapp.conf", - "src_rel": "usr/local/etc/myapp.conf", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "usr_local_etc_custom", - } - ], - "excluded": [], - "notes": [], - }, - "services": [ - { - "unit": "foo.service", - "role_name": "foo", - "packages": ["foo"], - "active_state": "active", - "sub_state": "running", - "unit_file_state": "enabled", - "condition_result": "yes", + "packages": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", "managed_files": [ { - "path": "/etc/foo.conf", - "src_rel": "etc/foo.conf", + "path": "/etc/default/keyboard", + "src_rel": "etc/default/keyboard", "owner": "root", "group": "root", "mode": "0644", - "reason": "modified_conffile", + "reason": "custom_unowned", } ], "excluded": [], "notes": [], - } - ], - "package_roles": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + } + ], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + }, } bundle.mkdir(parents=True, exist_ok=True) @@ -337,58 +412,70 @@ def test_manifest_includes_dnf_config_role_when_present(tmp_path: Path): ) state = { + "schema_version": 3, "host": {"hostname": "test", "os": "redhat", "pkg_backend": "rpm"}, - "users": { - "role_name": "users", - "users": [], - "managed_files": [], - "excluded": [], - "notes": [], - }, - "services": [], - "package_roles": [], - "manual_packages": [], - "manual_packages_skipped": [], - "apt_config": { - "role_name": "apt_config", - "managed_files": [], - "excluded": [], - "notes": [], - }, - "dnf_config": { - "role_name": "dnf_config", - "managed_files": [ - { - "path": "/etc/dnf/dnf.conf", - "src_rel": "etc/dnf/dnf.conf", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "dnf_config", + "inventory": { + "packages": { + "dnf": { + "version": "4.0", + "arches": [], + "installations": [{"version": "4.0", "arch": "x86_64"}], + "observed_via": [{"kind": "dnf_config"}], + "roles": [], } - ], - "excluded": [], - "notes": [], + } }, - "etc_custom": { - "role_name": "etc_custom", - "managed_files": [], - "excluded": [], - "notes": [], - }, - "usr_local_custom": { - "role_name": "usr_local_custom", - "managed_files": [], - "excluded": [], - "notes": [], - }, - "extra_paths": { - "role_name": "extra_paths", - "include_patterns": [], - "exclude_patterns": [], - "managed_files": [], - "excluded": [], - "notes": [], + "roles": { + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [], + "packages": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_files": [ + { + "path": "/etc/dnf/dnf.conf", + "src_rel": "etc/dnf/dnf.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "dnf_config", + } + ], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, }, } From f01603dac484ab5c2d835d60e3edf510577cb6d9 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 17:19:59 +1100 Subject: [PATCH 28/85] Better attribution of config files to parent service/role (not systemd helpers) --- enroll/harvest.py | 108 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 80 insertions(+), 28 deletions(-) diff --git a/enroll/harvest.py b/enroll/harvest.py index 4ca3984..74ac516 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -676,6 +676,10 @@ def harvest( backend.build_etc_index() ) + # Global de-duplication across roles: each absolute path is captured at most once. + # This avoids multiple Ansible roles managing the same destination file. + captured_global: Set[str] = set() + # ------------------------- # Service roles # ------------------------- @@ -685,8 +689,45 @@ def harvest( service_role_aliases: Dict[str, Set[str]] = {} # De-dupe per-role captures (avoids duplicate tasks in manifest generation). seen_by_role: Dict[str, Set[str]] = {} - for unit in list_enabled_services(): + # Managed/excluded lists keyed by role so helper services can attribute shared + # configuration to their parent service role. + managed_by_role: Dict[str, List[ManagedFile]] = {} + excluded_by_role: Dict[str, List[ExcludedFile]] = {} + + enabled_services = list_enabled_services() + enabled_set = set(enabled_services) + + def _service_sort_key(unit: str) -> tuple[int, str, str]: + # Prefer "parent" services over helpers (e.g. NetworkManager.service before + # NetworkManager-dispatcher.service) so shared config lands in the main role. + base = unit.removesuffix(".service") + base = base.split("@", 1)[0] + return (base.count("-"), base.lower(), unit.lower()) + + def _parent_service_unit(unit: str) -> Optional[str]: + # If unit name contains '-' segments, treat dashed prefixes as potential parents. + # Example: NetworkManager-dispatcher.service -> NetworkManager.service (if enabled). + if not unit.endswith(".service"): + return None + base = unit.removesuffix(".service") + base = base.split("@", 1)[0] + parts = base.split("-") + for i in range(len(parts) - 1, 0, -1): + cand = "-".join(parts[:i]) + ".service" + if cand in enabled_set: + return cand + return None + + parent_unit_for: Dict[str, str] = {} + for u in enabled_services: + pu = _parent_service_unit(u) + if pu: + parent_unit_for[u] = pu + + for unit in sorted(enabled_services, key=_service_sort_key): role = _role_name_from_unit(unit) + parent_unit = parent_unit_for.get(unit) + parent_role = _role_name_from_unit(parent_unit) if parent_unit else None try: ui = get_unit_info(unit) @@ -695,6 +736,8 @@ def harvest( # shared snippets can still be attributed to this role by name. service_role_aliases.setdefault(role, _hint_names(unit, set()) | {role}) seen_by_role.setdefault(role, set()) + managed = managed_by_role.setdefault(role, []) + excluded = excluded_by_role.setdefault(role, []) service_snaps.append( ServiceSnapshot( unit=unit, @@ -704,8 +747,8 @@ def harvest( sub_state=None, unit_file_state=None, condition_result=None, - managed_files=[], - excluded=[], + managed_files=managed, + excluded=excluded, notes=[str(e)], ) ) @@ -713,8 +756,8 @@ def harvest( pkgs: Set[str] = set() notes: List[str] = [] - excluded: List[ExcludedFile] = [] - managed: List[ManagedFile] = [] + excluded = excluded_by_role.setdefault(role, []) + managed = managed_by_role.setdefault(role, []) candidates: Dict[str, str] = {} if ui.fragment_path: @@ -810,18 +853,31 @@ def harvest( # De-dupe within this role while capturing. This also avoids emitting # duplicate Ansible tasks for the same destination path. - role_seen = seen_by_role.setdefault(role, set()) + # Attribute shared /etc config to the parent service role when this unit looks + # like a helper (e.g. NetworkManager-dispatcher.service -> NetworkManager.service). for path, reason in sorted(candidates.items()): + dest_role = role + if ( + parent_role + and path.startswith("/etc/") + and reason not in ("systemd_dropin", "systemd_envfile") + ): + dest_role = parent_role + + dest_managed = managed_by_role.setdefault(dest_role, []) + dest_excluded = excluded_by_role.setdefault(dest_role, []) + dest_seen = seen_by_role.setdefault(dest_role, set()) _capture_file( bundle_dir=bundle_dir, - role_name=role, + role_name=dest_role, abs_path=path, reason=reason, policy=policy, path_filter=path_filter, - managed_out=managed, - excluded_out=excluded, - seen_role=role_seen, + managed_out=dest_managed, + excluded_out=dest_excluded, + seen_role=dest_seen, + seen_global=captured_global, ) service_snaps.append( @@ -857,7 +913,7 @@ def harvest( s.unit: s for s in service_snaps } - for t in enabled_timers: + for t in sorted(enabled_timers): try: ti = get_timer_info(t) except Exception: # nosec @@ -895,6 +951,7 @@ def harvest( managed_out=snap.managed_files, excluded_out=snap.excluded, seen_role=role_seen, + seen_global=captured_global, ) continue @@ -935,7 +992,7 @@ def harvest( manual_pkgs_skipped: List[str] = [] pkg_snaps: List[PackageSnapshot] = [] - for pkg in manual_pkgs: + for pkg in sorted(manual_pkgs): if pkg in covered_by_services: manual_pkgs_skipped.append(pkg) continue @@ -997,6 +1054,7 @@ def harvest( managed_out=managed, excluded_out=excluded, seen_role=role_seen, + seen_global=captured_global, ) if not pkg_to_etc_paths.get(pkg, []) and not managed: @@ -1060,6 +1118,7 @@ def harvest( managed_out=users_managed, excluded_out=users_excluded, seen_role=users_role_seen, + seen_global=captured_global, ) users_snapshot = UsersSnapshot( @@ -1098,6 +1157,7 @@ def harvest( managed_out=apt_managed, excluded_out=apt_excluded, seen_role=apt_role_seen, + seen_global=captured_global, ) elif backend.name == "rpm": dnf_role_seen = seen_by_role.setdefault(dnf_role_name, set()) @@ -1112,6 +1172,7 @@ def harvest( managed_out=dnf_managed, excluded_out=dnf_excluded, seen_role=dnf_role_seen, + seen_global=captured_global, ) apt_config_snapshot = AptConfigSnapshot( @@ -1135,20 +1196,9 @@ def harvest( etc_managed: List[ManagedFile] = [] etc_role_name = "etc_custom" - # Build a set of files already captured by other roles. - already: Set[str] = set() - for s in service_snaps: - for mf in s.managed_files: - already.add(mf.path) - for p in pkg_snaps: - for mf in p.managed_files: - already.add(mf.path) - for mf in users_managed: - already.add(mf.path) - for mf in apt_managed: - already.add(mf.path) - for mf in dnf_managed: - already.add(mf.path) + # Files already captured by earlier roles. Use the global set so we never + # end up with the same destination path managed by multiple roles. + already: Set[str] = captured_global # Maps for re-attributing shared snippets (cron.d/logrotate.d) to existing roles. svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} @@ -1288,7 +1338,7 @@ def harvest( managed_out=managed_out, excluded_out=excluded_out, seen_role=role_seen, - seen_global=already, + seen_global=captured_global, ) # Walk /etc for remaining unowned config-ish files @@ -1327,7 +1377,7 @@ def harvest( managed_out=managed_out, excluded_out=excluded_out, seen_role=role_seen, - seen_global=already, + seen_global=captured_global, ): scanned += 1 if scanned >= MAX_FILES_CAP: @@ -1396,6 +1446,7 @@ def harvest( managed_out=ul_managed, excluded_out=ul_excluded, seen_role=role_seen, + seen_global=captured_global, metadata=(owner, group, mode), ): already_all.add(path) @@ -1470,6 +1521,7 @@ def harvest( managed_out=extra_managed, excluded_out=extra_excluded, seen_role=extra_role_seen, + seen_global=captured_global, ): already_all.add(path) From e44e4aaf3aa554daf5128d0cfa4720cd9d0f7f03 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 17:39:39 +1100 Subject: [PATCH 29/85] 0.2.0 --- CHANGELOG.md | 1 + debian/changelog | 9 ++++++++- pyproject.toml | 2 +- rpm/enroll.spec | 6 +++++- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f92e0b7..49217f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ * Add version CLI arg * Add ability to enroll RH-style systems (DNF5/DNF/RPM) + * Refactor harvest state to track package versions # 0.1.7 diff --git a/debian/changelog b/debian/changelog index eabdefc..f050e7f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,12 @@ -enroll (0.1.7) unstable; urgency=medium +enroll (0.2.0) unstable; urgency=medium + * Add version CLI arg + * Add ability to enroll RH-style systems (DNF5/DNF/RPM) + * Refactor harvest state to track package versions + + -- Miguel Jacq Mon, 29 Dec 2025 17:30:00 +1100 + +enroll (0.1.7) unstable; urgency=medium * Fix an attribution bug for certain files ending up in the wrong package/role. -- Miguel Jacq Sun, 28 Dec 2025 18:30:00 +1100 diff --git a/pyproject.toml b/pyproject.toml index ca875e8..683a9b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.7" +version = "0.2.0" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/rpm/enroll.spec b/rpm/enroll.spec index f63a12c..3beac03 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.1.7 +%global upstream_version 0.2.0 Name: enroll Version: %{upstream_version} @@ -43,6 +43,10 @@ Enroll a server's running state retrospectively into Ansible. %{_bindir}/enroll %changelog +* Mon Dec 29 2025 Miguel Jacq - %{version}-%{release} +- Add version CLI arg +- Add ability to enroll RH-style systems (DNF5/DNF/RPM) +- Refactor harvest state to track package versions * Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} - Fix an attribution bug for certain files ending up in the wrong package/role. * Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} From e4887b7add36f3e926f7362e3e159fd9c523beeb Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 1 Jan 2026 11:02:30 +1100 Subject: [PATCH 30/85] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d075951..f4920b5 100644 --- a/README.md +++ b/README.md @@ -191,7 +191,7 @@ sudo apt update sudo apt install enroll ``` -### Fedora 42 +## Fedora ```bash sudo rpm --import https://mig5.net/static/mig5.asc From 09438246ae0557185c3343c0db6e0101f2d75385 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 1 Jan 2026 15:24:21 +1100 Subject: [PATCH 31/85] Build for Fedora 43 --- Dockerfile.rpmbuild | 8 +++++--- release.sh | 45 ++++++++++++++++++++++++++++++--------------- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/Dockerfile.rpmbuild b/Dockerfile.rpmbuild index c928cea..f76a673 100644 --- a/Dockerfile.rpmbuild +++ b/Dockerfile.rpmbuild @@ -1,5 +1,6 @@ # syntax=docker/dockerfile:1 -FROM fedora:42 +ARG BASE_IMAGE=fedora:42 +FROM ${BASE_IMAGE} RUN set -eux; \ dnf -y update; \ @@ -34,11 +35,12 @@ SRC="${SRC:-/src}" WORKROOT="${WORKROOT:-/work}" OUT="${OUT:-/out}" DEPS_DIR="${DEPS_DIR:-/deps}" - +VERSION_ID="$(grep VERSION_ID /etc/os-release | cut -d= -f2)" +echo "Version ID is ${VERSION_ID}" # Install jinjaturtle from local rpm # Filter out .src.rpm and debug* subpackages if present. if [ -d "${DEPS_DIR}" ] && compgen -G "${DEPS_DIR}/*.rpm" > /dev/null; then - mapfile -t rpms < <(ls -1 "${DEPS_DIR}"/*.rpm | grep -vE '(\.src\.rpm$|-(debuginfo|debugsource)-)') + mapfile -t rpms < <(ls -1 "${DEPS_DIR}"/*.rpm | grep -vE '(\.src\.rpm$|-(debuginfo|debugsource)-)' | grep "${VERSION_ID}") if [ "${#rpms[@]}" -gt 0 ]; then echo "Installing dependency RPMs from ${DEPS_DIR}:" printf ' - %s\n' "${rpms[@]}" diff --git a/release.sh b/release.sh index fdbe771..0a052c7 100755 --- a/release.sh +++ b/release.sh @@ -44,31 +44,46 @@ for dist in ${DISTS[@]}; do done # RPM -sudo apt-get -y install createrepo-c rpm -docker build -f Dockerfile.rpmbuild -t enroll:f42 --progress=plain . -docker run --rm -v "$PWD":/src -v "$PWD/dist/rpm":/out -v "$HOME/git/jinjaturtle/dist/rpm":/deps:ro enroll:f42 -sudo chown -R "${USER}" "$PWD/dist" - REPO_ROOT="${HOME}/git/repo_rpm" RPM_REPO="${REPO_ROOT}/rpm/x86_64" BUILD_OUTPUT="${HOME}/git/enroll/dist" REMOTE="letessier.mig5.net:/opt/repo_rpm" KEYID="00AE817C24A10C2540461A9C1D7CDE0234DB458D" -echo "==> Updating RPM repo..." mkdir -p "$RPM_REPO" +sudo apt-get -y install createrepo-c rpm -for file in `ls -1 "${BUILD_OUTPUT}/rpm"`; do - rpmsign --addsign "${BUILD_OUTPUT}/rpm/$file" +DISTS=( + fedora:43 + fedora:42 +) + +for dist in ${DISTS[@]}; do + release=$(echo ${dist} | cut -d: -f2) + docker build \ + --no-cache \ + -f Dockerfile.rpmbuild \ + -t enroll-rpm:${release} \ + --progress=plain \ + --build-arg BASE_IMAGE=${dist} \ + . + + docker run --rm -v "$PWD":/src -v "$PWD/dist/rpm":/out -v "$HOME/git/jinjaturtle/dist/rpm":/deps:ro enroll-rpm:${release} + sudo chown -R "${USER}" "$PWD/dist" + + echo "==> Updating RPM repo..." + for file in `ls -1 "${BUILD_OUTPUT}/rpm"`; do + rpmsign --addsign "${BUILD_OUTPUT}/rpm/$file" + done + + cp "${BUILD_OUTPUT}/rpm/"*.rpm "$RPM_REPO/" + + createrepo_c "$RPM_REPO" + + echo "==> Signing repomd.xml..." + qubes-gpg-client --local-user "$KEYID" --detach-sign --armor "$RPM_REPO/repodata/repomd.xml" > "$RPM_REPO/repodata/repomd.xml.asc" done -cp "${BUILD_OUTPUT}/rpm/"*.rpm "$RPM_REPO/" - -createrepo_c "$RPM_REPO" - -echo "==> Signing repomd.xml..." -qubes-gpg-client --local-user "$KEYID" --detach-sign --armor "$RPM_REPO/repodata/repomd.xml" > "$RPM_REPO/repodata/repomd.xml.asc" - echo "==> Syncing repo to server..." rsync -aHPvz --exclude=.git --delete "$REPO_ROOT/" "$REMOTE/" From 781efef4678d4ee1d176a264d62423aefe6680b6 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 20:19:47 +1100 Subject: [PATCH 32/85] Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook --- CHANGELOG.md | 4 ++++ enroll/manifest.py | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49217f0..8283b5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 0.2.1 + + * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook + # 0.2.0 * Add version CLI arg diff --git a/enroll/manifest.py b/enroll/manifest.py index bc629bb..839ebab 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -1551,8 +1551,6 @@ User-requested extra file harvesting. manifested_extra_paths_roles.append(role) - manifested_usr_local_custom_roles.append(role) - # ------------------------- # Service roles # ------------------------- From c88405ef01510b554846b55a5d3dd9593bb46352 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 21:10:32 +1100 Subject: [PATCH 33/85] Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files --- CHANGELOG.md | 1 + enroll/fsutil.py | 2 +- enroll/harvest.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++ enroll/ignore.py | 30 ++++++++++++++++++ enroll/manifest.py | 65 +++++++++++++++++++++++++++++++++++--- 5 files changed, 170 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8283b5b..3c41210 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # 0.2.1 * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook + * Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files # 0.2.0 diff --git a/enroll/fsutil.py b/enroll/fsutil.py index 3d18df6..c852b9e 100644 --- a/enroll/fsutil.py +++ b/enroll/fsutil.py @@ -24,7 +24,7 @@ def stat_triplet(path: str) -> Tuple[str, str, str]: mode is a zero-padded octal string (e.g. "0644"). """ st = os.stat(path, follow_symlinks=True) - mode = oct(st.st_mode & 0o777)[2:].zfill(4) + mode = oct(st.st_mode & 0o7777)[2:].zfill(4) import grp import pwd diff --git a/enroll/harvest.py b/enroll/harvest.py index 74ac516..98e1404 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -34,6 +34,15 @@ class ManagedFile: reason: str +@dataclass +class ManagedDir: + path: str + owner: str + group: str + mode: str + reason: str + + @dataclass class ExcludedFile: path: str @@ -109,6 +118,7 @@ class ExtraPathsSnapshot: role_name: str include_patterns: List[str] exclude_patterns: List[str] + managed_dirs: List[ManagedDir] managed_files: List[ManagedFile] excluded: List[ExcludedFile] notes: List[str] @@ -1484,12 +1494,78 @@ def harvest( extra_notes: List[str] = [] extra_excluded: List[ExcludedFile] = [] extra_managed: List[ManagedFile] = [] + extra_managed_dirs: List[ManagedDir] = [] + extra_dir_seen: Set[str] = set() + + def _walk_and_capture_dirs(root: str) -> None: + root = os.path.normpath(root) + if not root.startswith("/"): + root = "/" + root + if not os.path.isdir(root) or os.path.islink(root): + return + for dirpath, dirnames, _ in os.walk(root, followlinks=False): + if len(extra_managed_dirs) >= MAX_FILES_CAP: + extra_notes.append( + f"Reached directory cap ({MAX_FILES_CAP}) while scanning {root}." + ) + return + dirpath = os.path.normpath(dirpath) + if not dirpath.startswith("/"): + dirpath = "/" + dirpath + if path_filter.is_excluded(dirpath): + # Prune excluded subtrees. + dirnames[:] = [] + continue + if os.path.islink(dirpath) or not os.path.isdir(dirpath): + dirnames[:] = [] + continue + + if dirpath not in extra_dir_seen: + deny = policy.deny_reason_dir(dirpath) + if not deny: + try: + owner, group, mode = stat_triplet(dirpath) + extra_managed_dirs.append( + ManagedDir( + path=dirpath, + owner=owner, + group=group, + mode=mode, + reason="user_include_dir", + ) + ) + except OSError: + pass + extra_dir_seen.add(dirpath) + + # Prune excluded dirs and symlinks early. + pruned: List[str] = [] + for d in dirnames: + p = os.path.join(dirpath, d) + if os.path.islink(p) or path_filter.is_excluded(p): + continue + pruned.append(d) + dirnames[:] = pruned + extra_role_name = "extra_paths" extra_role_seen = seen_by_role.setdefault(extra_role_name, set()) include_specs = list(include_paths or []) exclude_specs = list(exclude_paths or []) + # If any include pattern points at a directory, capture that directory tree's + # ownership/mode so the manifest can recreate it accurately. + include_pats = path_filter.iter_include_patterns() + for pat in include_pats: + if pat.kind == "prefix": + p = pat.value + if os.path.isdir(p) and not os.path.islink(p): + _walk_and_capture_dirs(p) + elif pat.kind == "glob": + for h in glob.glob(pat.value, recursive=True): + if os.path.isdir(h) and not os.path.islink(h): + _walk_and_capture_dirs(h) + if include_specs: extra_notes.append("User include patterns:") extra_notes.extend([f"- {p}" for p in include_specs]) @@ -1529,6 +1605,7 @@ def harvest( role_name=extra_role_name, include_patterns=include_specs, exclude_patterns=exclude_specs, + managed_dirs=extra_managed_dirs, managed_files=extra_managed, excluded=extra_excluded, notes=extra_notes, diff --git a/enroll/ignore.py b/enroll/ignore.py index 904997f..895c030 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -137,3 +137,33 @@ class IgnorePolicy: return "sensitive_content" return None + + def deny_reason_dir(self, path: str) -> Optional[str]: + """Directory-specific deny logic. + + deny_reason() is file-oriented (it rejects directories as "not_regular_file"). + For directory metadata capture (so roles can recreate directory trees), we need + a lighter-weight check: + - apply deny_globs (unless dangerous) + - require the path to be a real directory (no symlink) + - ensure it's stat'able/readable + + No size checks or content scanning are performed for directories. + """ + if not self.dangerous: + for g in self.deny_globs or []: + if fnmatch.fnmatch(path, g): + return "denied_path" + + try: + os.stat(path, follow_symlinks=True) + except OSError: + return "unreadable" + + if os.path.islink(path): + return "symlink" + + if not os.path.isdir(path): + return "not_directory" + + return None diff --git a/enroll/manifest.py b/enroll/manifest.py index 839ebab..a373773 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -344,6 +344,29 @@ def _write_role_defaults(role_dir: str, mapping: Dict[str, Any]) -> None: f.write(out) +def _build_managed_dirs_var( + managed_dirs: List[Dict[str, Any]], +) -> List[Dict[str, Any]]: + """Convert enroll managed_dirs into an Ansible-friendly list of dicts. + + Each dict drives a role task loop and is safe across hosts. + """ + out: List[Dict[str, Any]] = [] + for d in managed_dirs: + dest = d.get("path") or "" + if not dest: + continue + out.append( + { + "dest": dest, + "owner": d.get("owner") or "root", + "group": d.get("group") or "root", + "mode": d.get("mode") or "0755", + } + ) + return out + + def _build_managed_files_var( managed_files: List[Dict[str, Any]], templated_src_rels: Set[str], @@ -390,7 +413,22 @@ def _render_generic_files_tasks( # Using first_found makes roles work in both modes: # - site-mode: inventory/host_vars///.files/... # - non-site: roles//files/... - return f"""- name: Deploy any systemd unit files (templates) + return f"""- name: Ensure managed directories exist (preserve owner/group/mode) + ansible.builtin.file: + path: "{{{{ item.dest }}}}" + state: directory + owner: "{{{{ item.owner }}}}" + group: "{{{{ item.group }}}}" + mode: "{{{{ item.mode }}}}" + loop: "{{{{ {var_prefix}_managed_dirs | default([]) }}}}" + +- name: Ensure destination directories exist + ansible.builtin.file: + path: "{{{{ item.dest | dirname }}}}" + state: directory + loop: "{{{{ {var_prefix}_managed_files | default([]) }}}}" + +- name: Deploy any systemd unit files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" dest: "{{{{ item.dest }}}}" @@ -1444,13 +1482,17 @@ Unowned /etc config files not attributed to packages or services. # ------------------------- # extra_paths role (user-requested includes) # ------------------------- - if extra_paths_snapshot and extra_paths_snapshot.get("managed_files"): + if extra_paths_snapshot and ( + extra_paths_snapshot.get("managed_files") + or extra_paths_snapshot.get("managed_dirs") + ): role = extra_paths_snapshot.get("role_name", "extra_paths") role_dir = os.path.join(roles_root, role) _write_role_scaffold(role_dir) var_prefix = role + managed_dirs = extra_paths_snapshot.get("managed_dirs", []) or [] managed_files = extra_paths_snapshot.get("managed_files", []) excluded = extra_paths_snapshot.get("excluded", []) notes = extra_paths_snapshot.get("notes", []) @@ -1489,12 +1531,23 @@ Unowned /etc config files not attributed to packages or services. notify_systemd=None, ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_dirs": dirs_var, + f"{var_prefix}_managed_files": files_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + { + f"{var_prefix}_managed_dirs": [], + f"{var_prefix}_managed_files": [], + }, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1530,6 +1583,10 @@ User-requested extra file harvesting. """ + ("\n".join([f"- {p}" for p in exclude_pats]) or "- (none)") + """\n +## Managed directories +""" + + ("\n".join([f"- {d.get('path')}" for d in managed_dirs]) or "- (none)") + + """\n ## Managed files """ + ("\n".join([f"- {mf.get('path')}" for mf in managed_files]) or "- (none)") From 29b52d451d4d477ea2f9d05fdc5c85fe8f8ecd16 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 21:29:16 +1100 Subject: [PATCH 34/85] 0.2.1 --- debian/changelog | 7 +++++++ pyproject.toml | 2 +- rpm/enroll.spec | 5 ++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index f050e7f..dbc7548 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.2.1) unstable; urgency=medium + + * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook + * Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files + + -- Miguel Jacq Fri, 01 Jan 2026 21:30:00 +1100 + enroll (0.2.0) unstable; urgency=medium * Add version CLI arg diff --git a/pyproject.toml b/pyproject.toml index 683a9b2..34f411e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.2.0" +version = "0.2.1" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 3beac03..8fc8cac 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.2.0 +%global upstream_version 0.2.1 Name: enroll Version: %{upstream_version} @@ -43,6 +43,9 @@ Enroll a server's running state retrospectively into Ansible. %{_bindir}/enroll %changelog +* Fri Jan 01 2026 Miguel Jacq - %{version}-%{release} +- Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook +- Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files * Mon Dec 29 2025 Miguel Jacq - %{version}-%{release} - Add version CLI arg - Add ability to enroll RH-style systems (DNF5/DNF/RPM) From 824010b2ab15865b0c1845d8cc9e67a80c7accf2 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 3 Jan 2026 11:39:57 +1100 Subject: [PATCH 35/85] Several bug fixes and prep for 0.2.2 - Fix stat() of parent directory so that we set directory perms correct on --include paths. - Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty --- CHANGELOG.md | 5 ++ README.md | 2 +- debian/changelog | 7 ++ enroll/harvest.py | 171 +++++++++++++++++++++++++++++++++++++++------ enroll/manifest.py | 68 ++++++++++++++---- enroll/remote.py | 34 +++++---- pyproject.toml | 2 +- release.sh | 16 ++--- rpm/enroll.spec | 5 +- 9 files changed, 249 insertions(+), 61 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c41210..0740cb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.2.2 + + * Fix stat() of parent directory so that we set directory perms correct on --include paths. + * Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty + # 0.2.1 * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook diff --git a/README.md b/README.md index f4920b5..e399633 100644 --- a/README.md +++ b/README.md @@ -199,7 +199,7 @@ sudo rpm --import https://mig5.net/static/mig5.asc sudo tee /etc/yum.repos.d/mig5.repo > /dev/null << 'EOF' [mig5] name=mig5 Repository -baseurl=https://rpm.mig5.net/rpm/$basearch +baseurl=https://rpm.mig5.net/rpm/$releasever/$basearch enabled=1 gpgcheck=1 repo_gpgcheck=1 diff --git a/debian/changelog b/debian/changelog index dbc7548..8c2f4b9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.2.2) unstable; urgency=medium + + * Fix stat() of parent directory so that we set directory perms correct on --include paths. + * Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty + + -- Miguel Jacq Sat, 02 Jan 2026 09:56:00 +1100 + enroll (0.2.1) unstable; urgency=medium * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook diff --git a/enroll/harvest.py b/enroll/harvest.py index 98e1404..7aba7c6 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -6,7 +6,7 @@ import os import re import shutil import time -from dataclasses import dataclass, asdict +from dataclasses import dataclass, asdict, field from typing import Dict, List, Optional, Set from .systemd import ( @@ -58,59 +58,66 @@ class ServiceSnapshot: sub_state: Optional[str] unit_file_state: Optional[str] condition_result: Optional[str] - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class PackageSnapshot: package: str role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class UsersSnapshot: role_name: str users: List[dict] - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class AptConfigSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class DnfConfigSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class EtcCustomSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class UsrLocalCustomSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass @@ -149,6 +156,71 @@ ALLOWED_UNOWNED_EXTS = { MAX_FILES_CAP = 4000 MAX_UNOWNED_FILES_PER_ROLE = 500 + +def _merge_parent_dirs( + existing_dirs: List[ManagedDir], + managed_files: List[ManagedFile], + *, + policy: IgnorePolicy, +) -> List[ManagedDir]: + """Ensure parent directories for managed_files are present in managed_dirs. + + This is used so the Ansible manifest can create destination directories with + explicit owner/group/mode (ansible-lint friendly) without needing a separate + "mkdir without perms" task. + + We only add the immediate parent directory for each managed file. For + explicit directory includes (extra_paths), existing_dirs will already + contain the walked directory tree. + """ + by_path: Dict[str, ManagedDir] = { + d.path: d for d in (existing_dirs or []) if d.path + } + + for mf in managed_files or []: + p = str(mf.path or "").rstrip("/") + if not p: + continue + dpath = os.path.dirname(p) + if not dpath or dpath == "/": + continue + if dpath in by_path: + continue + + # Directory-deny logic: newer IgnorePolicy implementations provide + # deny_reason_dir(). Older/simple policies (including unit tests) may + # only implement deny_reason(), which is file-oriented and may return + # "not_regular_file" for directories. + deny = None + deny_dir = getattr(policy, "deny_reason_dir", None) + if callable(deny_dir): + deny = deny_dir(dpath) + else: + deny = policy.deny_reason(dpath) + if deny in ("not_regular_file", "not_file", "not_regular"): + deny = None + if deny: + # If the file itself was captured, its parent directory is likely safe, + # but still respect deny globs for directories to avoid managing + # sensitive/forbidden trees. + continue + + try: + owner, group, mode = stat_triplet(dpath) + except OSError: + continue + + by_path[dpath] = ManagedDir( + path=dpath, + owner=owner, + group=group, + mode=mode, + reason="parent_of_managed_file", + ) + + return [by_path[k] for k in sorted(by_path)] + + # Directories that are shared across many packages. # Never attribute all unowned files in these trees # to one single package. @@ -1521,7 +1593,14 @@ def harvest( continue if dirpath not in extra_dir_seen: - deny = policy.deny_reason_dir(dirpath) + deny = None + deny_dir = getattr(policy, "deny_reason_dir", None) + if callable(deny_dir): + deny = deny_dir(dirpath) + else: + deny = policy.deny_reason(dirpath) + if deny in ("not_regular_file", "not_file", "not_regular"): + deny = None if not deny: try: owner, group, mode = stat_triplet(dirpath) @@ -1661,6 +1740,52 @@ def harvest( "roles": roles, } + # Ensure every role has explicit managed_dirs for parent directories of managed files. + # This lets the manifest create directories with owner/group/mode (ansible-lint friendly) + # without a separate "mkdir without perms" task. + users_snapshot.managed_dirs = _merge_parent_dirs( + users_snapshot.managed_dirs, users_snapshot.managed_files, policy=policy + ) + for s in service_snaps: + s.managed_dirs = _merge_parent_dirs( + s.managed_dirs, s.managed_files, policy=policy + ) + for p in pkg_snaps: + p.managed_dirs = _merge_parent_dirs( + p.managed_dirs, p.managed_files, policy=policy + ) + + if apt_config_snapshot: + apt_config_snapshot.managed_dirs = _merge_parent_dirs( + apt_config_snapshot.managed_dirs, + apt_config_snapshot.managed_files, + policy=policy, + ) + if dnf_config_snapshot: + dnf_config_snapshot.managed_dirs = _merge_parent_dirs( + dnf_config_snapshot.managed_dirs, + dnf_config_snapshot.managed_files, + policy=policy, + ) + if etc_custom_snapshot: + etc_custom_snapshot.managed_dirs = _merge_parent_dirs( + etc_custom_snapshot.managed_dirs, + etc_custom_snapshot.managed_files, + policy=policy, + ) + if usr_local_custom_snapshot: + usr_local_custom_snapshot.managed_dirs = _merge_parent_dirs( + usr_local_custom_snapshot.managed_dirs, + usr_local_custom_snapshot.managed_files, + policy=policy, + ) + if extra_paths_snapshot: + extra_paths_snapshot.managed_dirs = _merge_parent_dirs( + extra_paths_snapshot.managed_dirs, + extra_paths_snapshot.managed_files, + policy=policy, + ) + state = { "enroll": { "version": get_enroll_version(), diff --git a/enroll/manifest.py b/enroll/manifest.py index a373773..f30e5f3 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -422,12 +422,6 @@ def _render_generic_files_tasks( mode: "{{{{ item.mode }}}}" loop: "{{{{ {var_prefix}_managed_dirs | default([]) }}}}" -- name: Ensure destination directories exist - ansible.builtin.file: - path: "{{{{ item.dest | dirname }}}}" - state: directory - loop: "{{{{ {var_prefix}_managed_files | default([]) }}}}" - - name: Deploy any systemd unit files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" @@ -983,6 +977,7 @@ Generated non-system user accounts and SSH public material. var_prefix = role managed_files = apt_config_snapshot.get("managed_files", []) + managed_dirs = apt_config_snapshot.get("managed_dirs", []) or [] excluded = apt_config_snapshot.get("excluded", []) notes = apt_config_snapshot.get("notes", []) @@ -1019,12 +1014,20 @@ Generated non-system user accounts and SSH public material. notify_systemd=None, ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1134,6 +1137,7 @@ APT configuration harvested from the system (sources, pinning, and keyrings). var_prefix = role managed_files = dnf_config_snapshot.get("managed_files", []) + managed_dirs = dnf_config_snapshot.get("managed_dirs", []) or [] excluded = dnf_config_snapshot.get("excluded", []) notes = dnf_config_snapshot.get("notes", []) @@ -1169,12 +1173,20 @@ APT configuration harvested from the system (sources, pinning, and keyrings). notify_systemd=None, ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1285,6 +1297,7 @@ DNF/YUM configuration harvested from the system (repos, config files, and RPM GP var_prefix = role managed_files = etc_custom_snapshot.get("managed_files", []) + managed_dirs = etc_custom_snapshot.get("managed_dirs", []) or [] excluded = etc_custom_snapshot.get("excluded", []) notes = etc_custom_snapshot.get("notes", []) @@ -1321,12 +1334,20 @@ DNF/YUM configuration harvested from the system (repos, config files, and RPM GP notify_systemd="Run systemd daemon-reload", ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1395,6 +1416,7 @@ Unowned /etc config files not attributed to packages or services. var_prefix = role managed_files = usr_local_custom_snapshot.get("managed_files", []) + managed_dirs = usr_local_custom_snapshot.get("managed_dirs", []) or [] excluded = usr_local_custom_snapshot.get("excluded", []) notes = usr_local_custom_snapshot.get("notes", []) @@ -1431,12 +1453,20 @@ Unowned /etc config files not attributed to packages or services. notify_systemd=None, ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1616,6 +1646,7 @@ User-requested extra file harvesting. unit = svc["unit"] pkgs = svc.get("packages", []) or [] managed_files = svc.get("managed_files", []) or [] + managed_dirs = svc.get("managed_dirs", []) or [] role_dir = os.path.join(roles_root, role) _write_role_scaffold(role_dir) @@ -1660,11 +1691,14 @@ User-requested extra file harvesting. notify_systemd="Run systemd daemon-reload", ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} base_vars: Dict[str, Any] = { f"{var_prefix}_unit_name": unit, f"{var_prefix}_packages": pkgs, f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, f"{var_prefix}_manage_unit": True, f"{var_prefix}_systemd_enabled": bool(enabled_at_harvest), f"{var_prefix}_systemd_state": desired_state, @@ -1679,6 +1713,7 @@ User-requested extra file harvesting. f"{var_prefix}_unit_name": unit, f"{var_prefix}_packages": [], f"{var_prefix}_managed_files": [], + f"{var_prefix}_managed_dirs": [], f"{var_prefix}_manage_unit": False, f"{var_prefix}_systemd_enabled": False, f"{var_prefix}_systemd_state": "stopped", @@ -1782,6 +1817,7 @@ Generated from `{unit}`. role = pr["role_name"] pkg = pr.get("package") or "" managed_files = pr.get("managed_files", []) or [] + managed_dirs = pr.get("managed_dirs", []) or [] role_dir = os.path.join(roles_root, role) _write_role_scaffold(role_dir) @@ -1823,10 +1859,13 @@ Generated from `{unit}`. notify_systemd="Run systemd daemon-reload", ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} base_vars: Dict[str, Any] = { f"{var_prefix}_packages": pkgs, f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, } base_vars = _merge_mappings_overwrite(base_vars, jt_map) @@ -1836,6 +1875,7 @@ Generated from `{unit}`. { f"{var_prefix}_packages": [], f"{var_prefix}_managed_files": [], + f"{var_prefix}_managed_dirs": [], }, ) _write_hostvars(out_dir, fqdn or "", role, base_vars) diff --git a/enroll/remote.py b/enroll/remote.py index 9618512..b86cd08 100644 --- a/enroll/remote.py +++ b/enroll/remote.py @@ -16,7 +16,6 @@ def _safe_extract_tar(tar: tarfile.TarFile, dest: Path) -> None: Protects against path traversal (e.g. entries containing ../). """ - # Note: tar member names use POSIX separators regardless of platform. dest = dest.resolve() @@ -80,9 +79,18 @@ def _build_enroll_pyz(tmpdir: Path) -> Path: return pyz_path -def _ssh_run(ssh, cmd: str) -> tuple[int, str, str]: - """Run a command over a Paramiko SSHClient.""" - _stdin, stdout, stderr = ssh.exec_command(cmd) +def _ssh_run(ssh, cmd: str, *, get_pty: bool = False) -> tuple[int, str, str]: + """Run a command over a Paramiko SSHClient. + + Paramiko's exec_command runs commands without a TTY by default. + Some hosts have sudoers "requiretty" enabled, which causes sudo to + fail even when passwordless sudo is configured. For those commands, + request a PTY. + + We do not request a PTY for commands that stream binary data + (e.g. tar/gzip output), as a PTY can corrupt the byte stream. + """ + _stdin, stdout, stderr = ssh.exec_command(cmd, get_pty=get_pty) out = stdout.read().decode("utf-8", errors="replace") err = stderr.read().decode("utf-8", errors="replace") rc = stdout.channel.recv_exit_status() @@ -105,7 +113,6 @@ def remote_harvest( Returns the local path to state.json inside local_out_dir. """ - try: import paramiko # type: ignore except Exception as e: @@ -182,34 +189,35 @@ def remote_harvest( for p in exclude_paths or []: argv.extend(["--exclude-path", str(p)]) - _cmd = " ".join(shlex.quote(a) for a in argv) - if not no_sudo: - cmd = f"sudo {_cmd}" - else: - cmd = _cmd - rc, out, err = _ssh_run(ssh, cmd) + _cmd = " ".join(map(shlex.quote, argv)) + cmd = f"sudo {_cmd}" if not no_sudo else _cmd + + # PTY for sudo commands (helps sudoers requiretty). + rc, out, err = _ssh_run(ssh, cmd, get_pty=(not no_sudo)) if rc != 0: raise RuntimeError( "Remote harvest failed.\n" f"Command: {cmd}\n" f"Exit code: {rc}\n" + f"Stdout: {out.strip()}\n" f"Stderr: {err.strip()}" ) if not no_sudo: - # Ensure user can read the files, before we tar it + # Ensure user can read the files, before we tar it. if not resolved_user: raise RuntimeError( "Unable to determine remote username for chown. " "Pass --remote-user explicitly or use --no-sudo." ) cmd = f"sudo chown -R {resolved_user} {rbundle}" - rc, out, err = _ssh_run(ssh, cmd) + rc, out, err = _ssh_run(ssh, cmd, get_pty=True) if rc != 0: raise RuntimeError( "chown of harvest failed.\n" f"Command: {cmd}\n" f"Exit code: {rc}\n" + f"Stdout: {out.strip()}\n" f"Stderr: {err.strip()}" ) diff --git a/pyproject.toml b/pyproject.toml index 34f411e..72dd732 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.2.1" +version = "0.2.2" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/release.sh b/release.sh index 0a052c7..db3f27b 100755 --- a/release.sh +++ b/release.sh @@ -44,14 +44,11 @@ for dist in ${DISTS[@]}; do done # RPM -REPO_ROOT="${HOME}/git/repo_rpm" -RPM_REPO="${REPO_ROOT}/rpm/x86_64" -BUILD_OUTPUT="${HOME}/git/enroll/dist" -REMOTE="letessier.mig5.net:/opt/repo_rpm" -KEYID="00AE817C24A10C2540461A9C1D7CDE0234DB458D" - -mkdir -p "$RPM_REPO" sudo apt-get -y install createrepo-c rpm +BUILD_OUTPUT="${HOME}/git/enroll/dist" +KEYID="00AE817C24A10C2540461A9C1D7CDE0234DB458D" +REPO_ROOT="${HOME}/git/repo_rpm" +REMOTE="letessier.mig5.net:/opt/repo_rpm" DISTS=( fedora:43 @@ -60,6 +57,10 @@ DISTS=( for dist in ${DISTS[@]}; do release=$(echo ${dist} | cut -d: -f2) + REPO_RELEASE_ROOT="${REPO_ROOT}/fc${release}" + RPM_REPO="${REPO_RELEASE_ROOT}/rpm/x86_64" + mkdir -p "$RPM_REPO" + docker build \ --no-cache \ -f Dockerfile.rpmbuild \ @@ -71,7 +72,6 @@ for dist in ${DISTS[@]}; do docker run --rm -v "$PWD":/src -v "$PWD/dist/rpm":/out -v "$HOME/git/jinjaturtle/dist/rpm":/deps:ro enroll-rpm:${release} sudo chown -R "${USER}" "$PWD/dist" - echo "==> Updating RPM repo..." for file in `ls -1 "${BUILD_OUTPUT}/rpm"`; do rpmsign --addsign "${BUILD_OUTPUT}/rpm/$file" done diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 8fc8cac..12286fa 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.2.1 +%global upstream_version 0.2.2 Name: enroll Version: %{upstream_version} @@ -43,6 +43,9 @@ Enroll a server's running state retrospectively into Ansible. %{_bindir}/enroll %changelog +* Sat Jan 02 2026 Miguel Jacq - %{version}-%{release} +- Fix stat() of parent directory so that we set directory perms correct on --include paths. +- Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty * Fri Jan 01 2026 Miguel Jacq - %{version}-%{release} - Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook - Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files From 6c3275b44a9ca1ebeac4caec02cb650e996837c5 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 3 Jan 2026 11:46:40 +1100 Subject: [PATCH 36/85] Fix tests --- tests/test_cli_config_and_sops.py | 189 +++++++++++++++++ tests/test_more_coverage.py | 323 ++++++++++++++++++++++++++++++ tests/test_remote.py | 18 +- 3 files changed, 525 insertions(+), 5 deletions(-) create mode 100644 tests/test_cli_config_and_sops.py create mode 100644 tests/test_more_coverage.py diff --git a/tests/test_cli_config_and_sops.py b/tests/test_cli_config_and_sops.py new file mode 100644 index 0000000..7e3fe5b --- /dev/null +++ b/tests/test_cli_config_and_sops.py @@ -0,0 +1,189 @@ +from __future__ import annotations + +import argparse +import configparser +import tarfile +from pathlib import Path + + +def test_discover_config_path_precedence(monkeypatch, tmp_path: Path): + from enroll.cli import _discover_config_path + + cfg = tmp_path / "cfg.ini" + cfg.write_text("[enroll]\n", encoding="utf-8") + + # --no-config always wins + monkeypatch.setenv("ENROLL_CONFIG", str(cfg)) + assert _discover_config_path(["--no-config", "harvest"]) is None + + # explicit --config wins + assert _discover_config_path(["--config", str(cfg), "harvest"]) == cfg + + # env var used when present + assert _discover_config_path(["harvest"]) == cfg + + +def test_discover_config_path_finds_local_and_xdg(monkeypatch, tmp_path: Path): + from enroll.cli import _discover_config_path + + # local file in cwd + cwd = tmp_path / "cwd" + cwd.mkdir() + local = cwd / "enroll.ini" + local.write_text("[enroll]\n", encoding="utf-8") + + monkeypatch.chdir(cwd) + monkeypatch.delenv("ENROLL_CONFIG", raising=False) + monkeypatch.delenv("XDG_CONFIG_HOME", raising=False) + assert _discover_config_path(["harvest"]) == local + + # xdg config fallback + monkeypatch.chdir(tmp_path) + xdg = tmp_path / "xdg" + (xdg / "enroll").mkdir(parents=True) + xcfg = xdg / "enroll" / "enroll.ini" + xcfg.write_text("[enroll]\n", encoding="utf-8") + monkeypatch.setenv("XDG_CONFIG_HOME", str(xdg)) + assert _discover_config_path(["harvest"]) == xcfg + + +def test_section_to_argv_supports_bool_append_count_and_unknown(monkeypatch, capsys): + from enroll.cli import _section_to_argv + + ap = argparse.ArgumentParser(add_help=False) + ap.add_argument("--flag", action="store_true") + ap.add_argument("--no-flag", action="store_false", dest="flag2") + ap.add_argument("--item", action="append", default=[]) + ap.add_argument("-v", action="count", default=0) + + cfg = configparser.ConfigParser() + cfg.read_dict( + { + "enroll": { + "flag": "true", + "no_flag": "false", + "item": "a,b", + "v": "2", + "unknown_key": "zzz", + } + } + ) + + argv = _section_to_argv(ap, cfg, "enroll") + + # bools set + assert "--flag" in argv + assert "--no-flag" in argv + + # append expanded + assert argv.count("--item") == 2 + assert "a" in argv and "b" in argv + + # count flag expanded + assert argv.count("-v") == 2 + + # unknown key prints warning + err = capsys.readouterr().err + assert "unknown option" in err + + +def test_inject_config_argv_inserts_global_and_command_tokens(tmp_path: Path): + from enroll.cli import _inject_config_argv + + root = argparse.ArgumentParser(add_help=False) + root.add_argument("--root-flag", action="store_true") + sub = root.add_subparsers(dest="cmd", required=True) + p_h = sub.add_parser("harvest", add_help=False) + p_h.add_argument("--dangerous", action="store_true") + p_h.add_argument("--include-path", action="append", default=[]) + + cfg_path = tmp_path / "enroll.ini" + cfg_path.write_text( + """[enroll] +root-flag = true + +[harvest] +dangerous = true +include-path = /etc/one,/etc/two +""", + encoding="utf-8", + ) + + argv = ["harvest", "--include-path", "/etc/cli"] + injected = _inject_config_argv( + argv, + cfg_path=cfg_path, + root_parser=root, + subparsers={"harvest": p_h}, + ) + + # global inserted before cmd, subcommand tokens right after cmd + assert injected[:2] == ["--root-flag", "harvest"] + # include-path from config inserted before CLI include-path (CLI wins later if duplicates) + joined = " ".join(injected) + assert "--include-path /etc/one" in joined + assert "--include-path /etc/cli" in joined + + +def test_resolve_sops_out_file_and_encrypt_path(monkeypatch, tmp_path: Path): + from enroll import cli + + # directory output should yield harvest.tar.gz.sops inside + out_dir = tmp_path / "o" + out_dir.mkdir() + assert ( + cli._resolve_sops_out_file(str(out_dir), hint="h").name == "harvest.tar.gz.sops" + ) + + # file-like output retained + out_file = tmp_path / "x.sops" + assert cli._resolve_sops_out_file(str(out_file), hint="h") == out_file + + # None uses cache dir + class HC: + def __init__(self, d: Path): + self.dir = d + + monkeypatch.setattr( + cli, "new_harvest_cache_dir", lambda hint: HC(tmp_path / "cache") + ) + p = cli._resolve_sops_out_file(None, hint="h") + assert str(p).endswith("harvest.tar.gz.sops") + + # Cover _tar_dir_to quickly (writes a tarball) + bundle = tmp_path / "bundle" + bundle.mkdir() + (bundle / "state.json").write_text("{}", encoding="utf-8") + tar_path = tmp_path / "b.tar.gz" + cli._tar_dir_to(bundle, tar_path) + assert tar_path.exists() + with tarfile.open(tar_path, "r:gz") as tf: + names = tf.getnames() + assert "state.json" in names or "./state.json" in names + + +def test_encrypt_harvest_dir_to_sops_cleans_up_tmp_tgz(monkeypatch, tmp_path: Path): + from enroll.cli import _encrypt_harvest_dir_to_sops + + bundle = tmp_path / "bundle" + bundle.mkdir() + (bundle / "state.json").write_text("{}", encoding="utf-8") + out_file = tmp_path / "out.sops" + + seen = {} + + def fake_encrypt(src: Path, dst: Path, pgp_fingerprints, mode): # noqa: ARG001 + # write something so we can see output created + seen["src"] = src + dst.write_bytes(b"enc") + + monkeypatch.setattr("enroll.cli.encrypt_file_binary", fake_encrypt) + + # Make os.unlink raise FileNotFoundError to hit the except branch in finally. + monkeypatch.setattr( + "enroll.cli.os.unlink", lambda p: (_ for _ in ()).throw(FileNotFoundError()) + ) + + res = _encrypt_harvest_dir_to_sops(bundle, out_file, fps=["ABC"]) + assert res == out_file + assert out_file.read_bytes() == b"enc" diff --git a/tests/test_more_coverage.py b/tests/test_more_coverage.py new file mode 100644 index 0000000..2c6693a --- /dev/null +++ b/tests/test_more_coverage.py @@ -0,0 +1,323 @@ +from __future__ import annotations + +import json +import os +import subprocess +import sys +import types +from pathlib import Path +from types import SimpleNamespace + +import pytest + + +def test_cache_dir_defaults_to_home_cache(monkeypatch, tmp_path: Path): + # Ensure default path uses ~/.cache when XDG_CACHE_HOME is unset. + from enroll.cache import enroll_cache_dir + + monkeypatch.delenv("XDG_CACHE_HOME", raising=False) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + + p = enroll_cache_dir() + assert str(p).startswith(str(tmp_path)) + assert p.name == "enroll" + + +def test_harvest_cache_state_json_property(tmp_path: Path): + from enroll.cache import HarvestCache + + hc = HarvestCache(tmp_path / "h1") + assert hc.state_json == hc.dir / "state.json" + + +def test_cache_dir_security_rejects_symlink(tmp_path: Path): + from enroll.cache import _ensure_dir_secure + + real = tmp_path / "real" + real.mkdir() + link = tmp_path / "link" + link.symlink_to(real, target_is_directory=True) + + with pytest.raises(RuntimeError, match="Refusing to use symlink"): + _ensure_dir_secure(link) + + +def test_cache_dir_chmod_failures_are_ignored(monkeypatch, tmp_path: Path): + from enroll import cache + + # Make the cache base path deterministic and writable. + monkeypatch.setattr(cache, "enroll_cache_dir", lambda: tmp_path) + + # Force os.chmod to fail to cover the "except OSError: pass" paths. + monkeypatch.setattr( + os, "chmod", lambda *a, **k: (_ for _ in ()).throw(OSError("nope")) + ) + + hc = cache.new_harvest_cache_dir() + assert hc.dir.exists() + assert hc.dir.is_dir() + + +def test_stat_triplet_falls_back_to_numeric_ids(monkeypatch, tmp_path: Path): + from enroll.fsutil import stat_triplet + import pwd + import grp + + p = tmp_path / "x" + p.write_text("x", encoding="utf-8") + + # Force username/group resolution failures. + monkeypatch.setattr( + pwd, "getpwuid", lambda _uid: (_ for _ in ()).throw(KeyError("no user")) + ) + monkeypatch.setattr( + grp, "getgrgid", lambda _gid: (_ for _ in ()).throw(KeyError("no group")) + ) + + owner, group, mode = stat_triplet(str(p)) + assert owner.isdigit() + assert group.isdigit() + assert len(mode) == 4 + + +def test_ignore_policy_iter_effective_lines_removes_block_comments(): + from enroll.ignore import IgnorePolicy + + pol = IgnorePolicy() + data = b"""keep1 +/* +drop me +*/ +keep2 +""" + assert list(pol.iter_effective_lines(data)) == [b"keep1", b"keep2"] + + +def test_ignore_policy_deny_reason_dir_variants(tmp_path: Path): + from enroll.ignore import IgnorePolicy + + pol = IgnorePolicy() + + # denied by glob + assert pol.deny_reason_dir("/etc/shadow") == "denied_path" + + # symlink rejected + d = tmp_path / "d" + d.mkdir() + link = tmp_path / "l" + link.symlink_to(d, target_is_directory=True) + assert pol.deny_reason_dir(str(link)) == "symlink" + + # not a directory + f = tmp_path / "f" + f.write_text("x", encoding="utf-8") + assert pol.deny_reason_dir(str(f)) == "not_directory" + + # ok + assert pol.deny_reason_dir(str(d)) is None + + +def test_run_jinjaturtle_parses_outputs(monkeypatch, tmp_path: Path): + # Fully unit-test enroll.jinjaturtle.run_jinjaturtle by stubbing subprocess.run. + from enroll.jinjaturtle import run_jinjaturtle + + def fake_run(cmd, **kwargs): # noqa: ARG001 + # cmd includes "-d -t