From 68e797a0544a173c1c3e01a134eddda3c40e6f38 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 11:53:26 +1100 Subject: [PATCH 001/115] Needs systemctl --- .forgejo/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.forgejo/workflows/ci.yml b/.forgejo/workflows/ci.yml index af14c07..a99bcb6 100644 --- a/.forgejo/workflows/ci.yml +++ b/.forgejo/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: run: | apt-get update DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - ansible ansible-lint python3-venv pipx + ansible ansible-lint python3-venv pipx systemctl - name: Install Poetry run: | From d50f1505bb6497893e3a7182124d38e967c8a13f Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 11:56:38 +1100 Subject: [PATCH 002/115] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 95e57d9..3f05dff 100644 --- a/README.md +++ b/README.md @@ -45,24 +45,24 @@ poetry run enroll --help ## Usage -On the host (root recommended): +On the host (root recommended to harvest as much data as possible): ### 1. Harvest state/information about the host ```bash -sudo poetry run enroll harvest --out /tmp/enroll-harvest +enroll harvest --out /tmp/enroll-harvest ``` ### 2. Generate Ansible manifests (roles/playbook) from that harvest ```bash -sudo poetry run enroll manifest --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible +enroll manifest --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible ``` ### Alternatively, do both steps in one shot: ```bash -sudo poetry run enroll enroll --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible +enroll enroll --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible ``` Then run: From 4710231c91ad3458a17e319e30b9c2c5daa7ad8d Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 11:57:33 +1100 Subject: [PATCH 003/115] Remove sudo in tests --- tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests.sh b/tests.sh index 68842f4..f8d246c 100755 --- a/tests.sh +++ b/tests.sh @@ -21,4 +21,4 @@ builtin cd "${ANSIBLE_DIR}" ansible-lint "${ANSIBLE_DIR}" # Run -sudo ansible-playbook playbook.yml -i "localhost," -c local --check --diff +ansible-playbook playbook.yml -i "localhost," -c local --check --diff From 809f21dd359b822263299767328d2f9fc8158203 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 11:58:23 +1100 Subject: [PATCH 004/115] CHANGELOG is markdown --- CHANGELOG.txt => CHANGELOG.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename CHANGELOG.txt => CHANGELOG.md (100%) diff --git a/CHANGELOG.txt b/CHANGELOG.md similarity index 100% rename from CHANGELOG.txt rename to CHANGELOG.md From 2c5e901450dc14e90d7657fc87a2667120c698ba Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 12:01:01 +1100 Subject: [PATCH 005/115] needs python3-apt for tests --- .forgejo/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.forgejo/workflows/ci.yml b/.forgejo/workflows/ci.yml index a99bcb6..630af44 100644 --- a/.forgejo/workflows/ci.yml +++ b/.forgejo/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: run: | apt-get update DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - ansible ansible-lint python3-venv pipx systemctl + ansible ansible-lint python3-venv pipx systemctl python3-apt - name: Install Poetry run: | From 2eecb73a49d332ce9b8d35737dc61d0c91d9a70e Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 12:18:26 +1100 Subject: [PATCH 006/115] Ensure we only try to enable service if the unit file existed --- enroll/manifest.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/enroll/manifest.py b/enroll/manifest.py index 0fb6fae..f6a3d57 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -379,16 +379,26 @@ Unowned /etc config files not attributed to packages or services. task_parts.append(copy_task(mf, "[Restart service]")) task_parts.append( - f"""- name: Ensure {unit} is enabled (preserve running state) + f""" +- name: Check if unit exists + ansible.builtin.command: systemctl cat "{{ unit_name }}" + register: _unit_exists + changed_when: false + failed_when: false + +- name: Ensure {unit} is enabled (preserve running state) ansible.builtin.service: - name: "{{{{ unit_name }}}}" + name: "{{ unit_name }}" enabled: true + when: _unit_exists.rc == 0 - name: Start {unit} if it was active at harvest time ansible.builtin.service: name: "{{{{ unit_name }}}}" state: started - when: {var_prefix}_start | bool + when: + - {var_prefix}_start | bool + - _unit_exists.rc == 0 """ ) From 283f3286f20501d982de14f0bbf256ff1cd4293c Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 12:22:59 +1100 Subject: [PATCH 007/115] Fix test --- tests/test_manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 71f2030..7696657 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -96,7 +96,7 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): # Service role: conditional start must be a clean Ansible expression tasks = (out / "roles" / "foo" / "tasks" / "main.yml").read_text(encoding="utf-8") - assert "when: foo_start | bool" in tasks + assert "when:\n - foo_start | bool\n - _unit_exists.rc == 0\n" in tasks # Ensure we didn't emit deprecated/broken '{{ }}' delimiters in when: for line in tasks.splitlines(): if line.lstrip().startswith("when:"): From ac0c884c3994238d00c8d114a215b44f581a3ada Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 12:28:21 +1100 Subject: [PATCH 008/115] Another fix for systemd unit file --- enroll/manifest.py | 26 ++++++++++++++------------ tests/test_manifest.py | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/enroll/manifest.py b/enroll/manifest.py index f6a3d57..b51aa74 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -380,25 +380,27 @@ Unowned /etc config files not attributed to packages or services. task_parts.append( f""" -- name: Check if unit exists - ansible.builtin.command: systemctl cat "{{ unit_name }}" - register: _unit_exists - changed_when: false +- name: Probe whether systemd unit exists and is manageable + ansible.builtin.systemd: + name: "{{ unit_name }}" + check_mode: true + register: _unit_probe failed_when: false + changed_when: false -- name: Ensure {unit} is enabled (preserve running state) - ansible.builtin.service: +- name: Ensure {{ unit_name }} is enabled (preserve running state) + ansible.builtin.systemd: name: "{{ unit_name }}" enabled: true - when: _unit_exists.rc == 0 + when: _unit_probe is succeeded -- name: Start {unit} if it was active at harvest time - ansible.builtin.service: - name: "{{{{ unit_name }}}}" +- name: Start {{ unit_name }} if it was active at harvest time + ansible.builtin.systemd: + name: "{{ unit_name }}" state: started when: - - {var_prefix}_start | bool - - _unit_exists.rc == 0 + - _unit_probe is succeeded + - {{ var_prefix }}_start | bool """ ) diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 7696657..8759e69 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -96,7 +96,7 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): # Service role: conditional start must be a clean Ansible expression tasks = (out / "roles" / "foo" / "tasks" / "main.yml").read_text(encoding="utf-8") - assert "when:\n - foo_start | bool\n - _unit_exists.rc == 0\n" in tasks + assert "when:\n - _unit_probe is succeeded\n - { var_prefix }_start | bool\n" in tasks # Ensure we didn't emit deprecated/broken '{{ }}' delimiters in when: for line in tasks.splitlines(): if line.lstrip().startswith("when:"): From c6f174dd55bd81dbe34d47a2896852511991a8d7 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 12:34:26 +1100 Subject: [PATCH 009/115] fix... --- enroll/manifest.py | 2 +- tests/test_manifest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/enroll/manifest.py b/enroll/manifest.py index b51aa74..8fb3dd2 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -400,7 +400,7 @@ Unowned /etc config files not attributed to packages or services. state: started when: - _unit_probe is succeeded - - {{ var_prefix }}_start | bool + - {var_prefix}_start | bool """ ) diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 8759e69..09c66e1 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -96,7 +96,7 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): # Service role: conditional start must be a clean Ansible expression tasks = (out / "roles" / "foo" / "tasks" / "main.yml").read_text(encoding="utf-8") - assert "when:\n - _unit_probe is succeeded\n - { var_prefix }_start | bool\n" in tasks + assert "when:\n - _unit_probe is succeeded\n - foo_start | bool\n" in tasks # Ensure we didn't emit deprecated/broken '{{ }}' delimiters in when: for line in tasks.splitlines(): if line.lstrip().startswith("when:"): From 019f6bf6f396538d995eb999e55a3becafb6d0bd Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 12:49:50 +1100 Subject: [PATCH 010/115] Attempt fix for unit name --- enroll/manifest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/enroll/manifest.py b/enroll/manifest.py index 8fb3dd2..4e4d6be 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -388,13 +388,13 @@ Unowned /etc config files not attributed to packages or services. failed_when: false changed_when: false -- name: Ensure {{ unit_name }} is enabled (preserve running state) +- name: Ensure unit is enabled (preserve running state) ansible.builtin.systemd: name: "{{ unit_name }}" enabled: true when: _unit_probe is succeeded -- name: Start {{ unit_name }} if it was active at harvest time +- name: Start unit if it was active at harvest time ansible.builtin.systemd: name: "{{ unit_name }}" state: started From d8fb33f0d0d3c1aa75ad1868e1a172a456f52fc5 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 13:23:11 +1100 Subject: [PATCH 011/115] hmm --- enroll/manifest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/enroll/manifest.py b/enroll/manifest.py index 4e4d6be..9f7aea6 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -382,7 +382,7 @@ Unowned /etc config files not attributed to packages or services. f""" - name: Probe whether systemd unit exists and is manageable ansible.builtin.systemd: - name: "{{ unit_name }}" + name: "{{{{ unit_name }}}}" check_mode: true register: _unit_probe failed_when: false @@ -390,13 +390,13 @@ Unowned /etc config files not attributed to packages or services. - name: Ensure unit is enabled (preserve running state) ansible.builtin.systemd: - name: "{{ unit_name }}" + name: "{{{{ unit_name }}}}" enabled: true when: _unit_probe is succeeded - name: Start unit if it was active at harvest time ansible.builtin.systemd: - name: "{{ unit_name }}" + name: "{{{{ unit_name }}}}" state: started when: - _unit_probe is succeeded From 82dc8702133786c734d66ec643814945e0614f91 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 13:33:56 +1100 Subject: [PATCH 012/115] Debian packaging --- .forgejo/workflows/ci.yml | 2 +- Dockerfile.debbuild | 77 +++++++++++++++++++++++++++++++++++++++ README.md | 12 +++++- debian/changelog | 5 +++ debian/control | 19 ++++++++++ debian/rules | 6 +++ debian/source/format | 1 + debian/source/options | 6 +++ pyproject.toml | 15 ++++---- release.sh | 28 ++++++++++++++ 10 files changed, 161 insertions(+), 10 deletions(-) create mode 100644 Dockerfile.debbuild create mode 100644 debian/changelog create mode 100644 debian/control create mode 100755 debian/rules create mode 100644 debian/source/format create mode 100644 debian/source/options diff --git a/.forgejo/workflows/ci.yml b/.forgejo/workflows/ci.yml index 630af44..41efa55 100644 --- a/.forgejo/workflows/ci.yml +++ b/.forgejo/workflows/ci.yml @@ -25,7 +25,7 @@ jobs: - name: Install project deps (including test extras) run: | - poetry install --with test + poetry install --with dev - name: Run test script run: | diff --git a/Dockerfile.debbuild b/Dockerfile.debbuild new file mode 100644 index 0000000..873018e --- /dev/null +++ b/Dockerfile.debbuild @@ -0,0 +1,77 @@ +# syntax=docker/dockerfile:1 +ARG BASE_IMAGE=debian:bookworm +FROM ${BASE_IMAGE} + +ENV DEBIAN_FRONTEND=noninteractive + +# If Ubuntu, ensure Universe is enabled. +RUN set -eux; \ + . /etc/os-release; \ + if [ "${ID:-}" = "ubuntu" ]; then \ + apt-get update; \ + apt-get install -y --no-install-recommends software-properties-common ca-certificates; \ + add-apt-repository -y universe; \ + fi + +# Build deps +RUN set -eux; \ + apt-get update; \ + apt-get install -y --no-install-recommends \ + build-essential \ + devscripts \ + debhelper \ + dh-python \ + pybuild-plugin-pyproject \ + python3-all \ + python3-poetry-core \ + rsync \ + ca-certificates \ + ; \ + rm -rf /var/lib/apt/lists/* + +# Build runner script +RUN set -eux; \ + cat > /usr/local/bin/build-deb <<'EOF' +#!/usr/bin/env bash +set -euo pipefail + +SRC="${SRC:-/src}" +WORKROOT="${WORKROOT:-/work}" +WORK="${WORKROOT}/src" +OUT="${OUT:-/out}" + +mkdir -p "$WORK" "$OUT" + +rsync -a --delete \ + --exclude '.git' \ + --exclude '.venv' \ + --exclude 'dist' \ + --exclude 'build' \ + --exclude '__pycache__' \ + --exclude '.pytest_cache' \ + --exclude '.mypy_cache' \ + "${SRC}/" "${WORK}/" + +cd "${WORK}" +if [ -n "${SUITE:-}" ]; then + export DEBEMAIL="mig@mig5.net" + export DEBFULLNAME="Miguel Jacq" + + dch --distribution "$SUITE" --local "~${SUITE}" "CI build for $SUITE" +fi +dpkg-buildpackage -us -uc -b + +shopt -s nullglob +cp -v "${WORKROOT}"/*.deb \ + "${WORKROOT}"/*.changes \ + "${WORKROOT}"/*.buildinfo \ + "${WORKROOT}"/*.dsc \ + "${WORKROOT}"/*.tar.* \ + "${OUT}/" || true + +echo "Artifacts copied to ${OUT}" +EOF +RUN chmod +x /usr/local/bin/build-deb + +WORKDIR /work +ENTRYPOINT ["/usr/local/bin/build-deb"] diff --git a/README.md b/README.md index 3f05dff..33fe901 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,16 @@ It aims to be **optimistic and noninteractive**: ## Install +### Ubuntu/Debian apt repository + +```bash +sudo mkdir -p /usr/share/keyrings +curl -fsSL https://mig5.net/static/mig5.asc | sudo gpg --dearmor -o /usr/share/keyrings/mig5.gpg +echo "deb [arch=amd64 signed-by=/usr/share/keyrings/mig5.gpg] https://apt.mig5.net $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/mig5.list +sudo apt update +sudo apt install enroll +``` + ### AppImage Download the AppImage file from the Releases page (verify with GPG if you wish, my fingerprint is [here](https://mig5.net/static/mig5.asc), @@ -28,7 +38,7 @@ chmod +x Enroll.AppImage ./Enroll.AppImage ``` -### Pip +### Pip/PipX ```bash pip install enroll diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..b502374 --- /dev/null +++ b/debian/changelog @@ -0,0 +1,5 @@ +enroll (0.0.2) unstable; urgency=medium + + * Initial package + + -- Miguel Jacq Mon, 15 Dec 2025 12:00:00 +0000 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..372f83f --- /dev/null +++ b/debian/control @@ -0,0 +1,19 @@ +Source: enroll +Section: admin +Priority: optional +Maintainer: Miguel Jacq +Rules-Requires-Root: no +Build-Depends: + debhelper-compat (= 13), + dh-python, + pybuild-plugin-pyproject, + python3-all, + python3-poetry-core +Standards-Version: 4.6.2 +Homepage: https://git.mig5.net/mig5/enroll + +Package: enroll +Architecture: all +Depends: ${misc:Depends}, ${python3:Depends} +Description: Harvest a host into Ansible roles + A tool that inspects a system and emits Ansible roles/playbooks to reproduce it. diff --git a/debian/rules b/debian/rules new file mode 100755 index 0000000..ed5a9f4 --- /dev/null +++ b/debian/rules @@ -0,0 +1,6 @@ +#!/usr/bin/make -f +export PYBUILD_NAME=enroll +export PYBUILD_SYSTEM=pyproject + +%: + dh $@ --with python3 --buildsystem=pybuild diff --git a/debian/source/format b/debian/source/format new file mode 100644 index 0000000..163aaf8 --- /dev/null +++ b/debian/source/format @@ -0,0 +1 @@ +3.0 (quilt) diff --git a/debian/source/options b/debian/source/options new file mode 100644 index 0000000..c32a8c1 --- /dev/null +++ b/debian/source/options @@ -0,0 +1,6 @@ +tar-ignore = ".git" +tar-ignore = ".venv" +tar-ignore = "__pycache__" +tar-ignore = ".pytest_cache" +tar-ignore = "dist" +tar-ignore = "build" diff --git a/pyproject.toml b/pyproject.toml index 89acb38..f48e71d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,14 +14,6 @@ python = "^3.10" [tool.poetry.scripts] enroll = "enroll.cli:main" -[tool.poetry.group.test.dependencies] -pytest = "^9.0.2" -pytest-cov = "^7.0.0" - - -[tool.poetry.group.dev.dependencies] -pyproject-appimage = "^4.2" - [build-system] requires = ["poetry-core>=1.8.0"] build-backend = "poetry.core.masonry.api" @@ -29,3 +21,10 @@ build-backend = "poetry.core.masonry.api" [tool.pyproject-appimage] script = "enroll" output = "Enroll.AppImage" + +[tool.poetry.dev-dependencies] +pytest = "^8" +pytest-cov = "^5" +ansible = "^9" +ansible-lint = "^24" +pyproject-appimage = "^4.2" diff --git a/release.sh b/release.sh index 95e6412..915c41a 100755 --- a/release.sh +++ b/release.sh @@ -15,3 +15,31 @@ mv Enroll.AppImage dist/ # Sign packages for file in `ls -1 dist/`; do qubes-gpg-client --batch --armor --detach-sign dist/$file > dist/$file.asc; done + +# Deb stuff +DISTS=( + debian:bookworm + debian:trixie + ubuntu:jammy + ubuntu:noble +) + +for dist in ${DISTS[@]}; do + release=$(echo ${dist} | cut -d: -f2) + mkdir -p dist/${release} + + docker build -f Dockerfile.debbuild -t enroll-deb:${release} \ + --progress=plain \ + --build-arg BASE_IMAGE=${dist} . + + docker run --rm \ + -e SUITE="${release}" \ + -v "$PWD":/src \ + -v "$PWD/dist/${release}":/out \ + enroll-deb:${release} + + # rename the file + debfile=$(ls -1 dist/${release}/*.deb) + + reprepro -b /home/user/git/repo includedeb "${release}" "${debfile}" +done From 2f5a65b737a7f3eb9c8ca76a638de94ff5cdea47 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 13:34:20 +1100 Subject: [PATCH 013/115] Bump version --- CHANGELOG.md | 5 +++++ pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b6678d..011e83d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.0.3 + + * various bug fixes + * Add debian packaging + # 0.0.2 * Merge pkg_ and roles created based on file/service detection diff --git a/pyproject.toml b/pyproject.toml index f48e71d..d8c2513 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.0.2" +version = "0.0.3" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" From 18ca83c255f5425e8434ab512e3dc9c94ab8d40d Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 13:37:41 +1100 Subject: [PATCH 014/115] remove ansible from dev dependencies --- pyproject.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d8c2513..dd76bd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,4 @@ output = "Enroll.AppImage" [tool.poetry.dev-dependencies] pytest = "^8" pytest-cov = "^5" -ansible = "^9" -ansible-lint = "^24" pyproject-appimage = "^4.2" From 25550c21bfa076b36d1b90b6eca97e7d5d0d1741 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 13:37:49 +1100 Subject: [PATCH 015/115] poetry lock --- poetry.lock | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/poetry.lock b/poetry.lock index 2cd66d4..a056de4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -365,20 +365,20 @@ tomli = {version = "*", markers = "python_version < \"3.11\""} [[package]] name = "pytest" -version = "9.0.2" +version = "8.4.2" description = "pytest: simple powerful testing with Python" optional = false -python-versions = ">=3.10" +python-versions = ">=3.9" files = [ - {file = "pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b"}, - {file = "pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11"}, + {file = "pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79"}, + {file = "pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01"}, ] [package.dependencies] colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} exceptiongroup = {version = ">=1", markers = "python_version < \"3.11\""} -iniconfig = ">=1.0.1" -packaging = ">=22" +iniconfig = ">=1" +packaging = ">=20" pluggy = ">=1.5,<2" pygments = ">=2.7.2" tomli = {version = ">=1", markers = "python_version < \"3.11\""} @@ -388,22 +388,21 @@ dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests [[package]] name = "pytest-cov" -version = "7.0.0" +version = "5.0.0" description = "Pytest plugin for measuring coverage." optional = false -python-versions = ">=3.9" +python-versions = ">=3.8" files = [ - {file = "pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861"}, - {file = "pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1"}, + {file = "pytest-cov-5.0.0.tar.gz", hash = "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857"}, + {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"}, ] [package.dependencies] -coverage = {version = ">=7.10.6", extras = ["toml"]} -pluggy = ">=1.2" -pytest = ">=7" +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" [package.extras] -testing = ["process-tests", "pytest-xdist", "virtualenv"] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] [[package]] name = "requests" @@ -508,4 +507,4 @@ zstd = ["backports-zstd (>=1.0.0)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "47927c9a0ec1b9be8dad6b4428cabaacc50a840e117c7e5f397d97ead83d5b76" +content-hash = "84c06974dfe822257ef324807672e51d71c0a6197e037fa56e92d8369c40d341" From 9edbfb676a12d6d375ba15c391983672cde4d943 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 15:04:43 +1100 Subject: [PATCH 016/115] Remove obsolete comment in release.sh --- release.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/release.sh b/release.sh index 915c41a..523ad9e 100755 --- a/release.sh +++ b/release.sh @@ -38,8 +38,6 @@ for dist in ${DISTS[@]}; do -v "$PWD/dist/${release}":/out \ enroll-deb:${release} - # rename the file debfile=$(ls -1 dist/${release}/*.deb) - reprepro -b /home/user/git/repo includedeb "${release}" "${debfile}" done From 883e9f963f2229812655f12f692bf22b0a589c9b Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 15:05:09 +1100 Subject: [PATCH 017/115] use no-cache --- release.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/release.sh b/release.sh index 523ad9e..fe99a52 100755 --- a/release.sh +++ b/release.sh @@ -29,6 +29,7 @@ for dist in ${DISTS[@]}; do mkdir -p dist/${release} docker build -f Dockerfile.debbuild -t enroll-deb:${release} \ + --no-cache \ --progress=plain \ --build-arg BASE_IMAGE=${dist} . From b7ffc4e7c241e894a1976a7cce1d2d5cebe3d14e Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 15:10:13 +1100 Subject: [PATCH 018/115] 0.0.3 --- debian/changelog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index b502374..41c527b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -enroll (0.0.2) unstable; urgency=medium +enroll (0.0.3) unstable; urgency=medium * Initial package From 651549b9492464114834672f70a396915785b31a Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 16:28:10 +1100 Subject: [PATCH 019/115] Change message about whether it is a meta package or not --- enroll/harvest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enroll/harvest.py b/enroll/harvest.py index 306d8af..c07c0ca 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -488,7 +488,7 @@ def harvest(bundle_dir: str, policy: Optional[SecretPolicy] = None) -> str: if not pkg_to_etc_paths.get(pkg, []) and not managed: notes.append( - "No /etc files detected for this package (may be a meta package)." + "No /etc files detected for this package." ) pkg_snaps.append( From 4882ddff49ae68bb0138bf46b9e76329588f01b9 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 16:46:39 +1100 Subject: [PATCH 020/115] Add custom_etc and users last --- enroll/harvest.py | 2 +- enroll/manifest.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/enroll/harvest.py b/enroll/harvest.py index c07c0ca..62f130c 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -392,7 +392,7 @@ def harvest(bundle_dir: str, policy: Optional[SecretPolicy] = None) -> str: ) # ------------------------- - # Manual package roles + # Manually installed package roles # ------------------------- manual_pkgs = list_manual_packages() # Avoid duplicate roles: if a manual package is already managed by any service role, skip its pkg_ role. diff --git a/enroll/manifest.py b/enroll/manifest.py index 9f7aea6..b92f59c 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -149,7 +149,7 @@ users_accounts: lines.append(f" group: {u.get('primary_group')}") lines.append(" mode: '0700'") - # Copy harvested SSH public material (authorized_keys + *.pub) + # Copy harvested SSH public material (authorized_keys) for mf in managed_files: dest = mf["path"] src = mf["src_rel"] @@ -225,7 +225,7 @@ Generated non-system user accounts and SSH public material. manifested_users_roles.append(role) - # ------------------------- + # ------------------------- # etc_custom role (unowned /etc not already attributed) # ------------------------- if etc_custom_snapshot and etc_custom_snapshot.get("managed_files"): @@ -439,7 +439,7 @@ Generated from `{unit}`. manifested_service_roles.append(role) # ------------------------- - # Manual package roles + # Manually installed package roles # ------------------------- for pr in package_roles: role = pr["role_name"] @@ -490,7 +490,7 @@ Generated from `{unit}`. task_parts: List[str] = [] task_parts.append( f"""--- -- name: Install manual package {pkg} +- name: Install package {pkg} ansible.builtin.apt: name: "{{{{ {var_prefix}_packages }}}}" state: present @@ -525,7 +525,7 @@ Generated from `{unit}`. notes = pr.get("notes", []) readme = f"""# {role} -Generated for manual package `{pkg}`. +Generated for package `{pkg}`. ## Managed files {os.linesep.join("- " + mf["path"] + " (" + mf["reason"] + ")" for mf in managed_files) or "- (none)"} @@ -536,7 +536,7 @@ Generated for manual package `{pkg}`. ## Notes {os.linesep.join("- " + n for n in notes) or "- (none)"} -> Note: package roles do not attempt to restart or enable services automatically. +> Note: package roles (those not discovered via a systemd service) do not attempt to restart or enable services automatically. """ with open(os.path.join(role_dir, "README.md"), "w", encoding="utf-8") as f: f.write(readme) @@ -546,8 +546,8 @@ Generated for manual package `{pkg}`. # Playbooks _write_playbook( os.path.join(out_dir, "playbook.yml"), - manifested_users_roles + manifested_pkg_roles + + manifested_service_roles + manifested_etc_custom_roles - + manifested_pkg_roles - + manifested_service_roles, + + manifested_users_roles, ) From e4be7f5975c73a6f3a2a86e4861ede73f1a03c10 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 17:03:28 +1100 Subject: [PATCH 021/115] Rename secrets to ignore as it does more than secrets --- CHANGELOG.md | 5 ++++ debian/changelog | 9 ++++++- enroll/debian.py | 31 ++++++++++++++++++++--- enroll/harvest.py | 10 +++----- enroll/{secrets.py => ignore.py} | 2 +- pyproject.toml | 2 +- tests/{test_secrets.py => test_ignore.py} | 7 ++--- 7 files changed, 51 insertions(+), 15 deletions(-) rename enroll/{secrets.py => ignore.py} (98%) rename tests/{test_secrets.py => test_ignore.py} (51%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 011e83d..d30a65a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.0.4 + + * Fix dash package detection issue + * Reorder which roles install first + # 0.0.3 * various bug fixes diff --git a/debian/changelog b/debian/changelog index 41c527b..86ae088 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,12 @@ +enroll (0.0.4) unstable; urgency=medium + + * Fix dash package detection issue + * Reorder which roles install first + + -- Miguel Jacq Mon, 15 Dec 2025 17:00:00 +1100 + enroll (0.0.3) unstable; urgency=medium * Initial package - -- Miguel Jacq Mon, 15 Dec 2025 12:00:00 +0000 + -- Miguel Jacq Mon, 15 Dec 2025 12:00:00 +1100 diff --git a/enroll/debian.py b/enroll/debian.py index d3f1563..58569e5 100644 --- a/enroll/debian.py +++ b/enroll/debian.py @@ -6,6 +6,8 @@ import os import subprocess # nosec from typing import Dict, List, Optional, Set, Tuple +_DIVERSION_PREFIX = "diversion by " + def _run(cmd: list[str]) -> str: p = subprocess.run(cmd, check=False, text=True, capture_output=True) # nosec @@ -18,9 +20,32 @@ def dpkg_owner(path: str) -> Optional[str]: p = subprocess.run(["dpkg", "-S", path], text=True, capture_output=True) # nosec if p.returncode != 0: return None - left = p.stdout.split(":", 1)[0].strip() - pkg = left.split(":", 1)[0].strip() - return pkg or None + + for raw in (p.stdout or "").splitlines(): + line = raw.strip() + if not line: + continue + + # dpkg diversion chatter; not an ownership line + if line.startswith(_DIVERSION_PREFIX): + continue + + # Expected: "[, ...][:]: " + if ":" not in line: + continue + + left, _ = line.split(":", 1) + + # If multiple pkgs listed, pick the first (common case is just one) + left = left.split(",", 1)[0].strip() + + # Strip any ":arch" suffix from left side + pkg = left.split(":", 1)[0].strip() + + if pkg and not pkg.startswith(_DIVERSION_PREFIX): + return pkg + + return None def list_manual_packages() -> List[str]: diff --git a/enroll/harvest.py b/enroll/harvest.py index 62f130c..78f7d1f 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -18,7 +18,7 @@ from .debian import ( read_pkg_md5sums, stat_triplet, ) -from .secrets import SecretPolicy +from .ignore import IgnorePolicy from .accounts import collect_non_system_users @@ -233,8 +233,8 @@ def _topdirs_for_package(pkg: str, pkg_to_etc_paths: Dict[str, List[str]]) -> Se return topdirs -def harvest(bundle_dir: str, policy: Optional[SecretPolicy] = None) -> str: - policy = policy or SecretPolicy() +def harvest(bundle_dir: str, policy: Optional[IgnorePolicy] = None) -> str: + policy = policy or IgnorePolicy() os.makedirs(bundle_dir, exist_ok=True) if hasattr(os, "geteuid") and os.geteuid() != 0: @@ -487,9 +487,7 @@ def harvest(bundle_dir: str, policy: Optional[SecretPolicy] = None) -> str: ) if not pkg_to_etc_paths.get(pkg, []) and not managed: - notes.append( - "No /etc files detected for this package." - ) + notes.append("No /etc files detected for this package.") pkg_snaps.append( PackageSnapshot( diff --git a/enroll/secrets.py b/enroll/ignore.py similarity index 98% rename from enroll/secrets.py rename to enroll/ignore.py index 06514e5..217497f 100644 --- a/enroll/secrets.py +++ b/enroll/ignore.py @@ -33,7 +33,7 @@ SENSITIVE_CONTENT_PATTERNS = [ @dataclass -class SecretPolicy: +class IgnorePolicy: deny_globs: list[str] = None max_file_bytes: int = 256_000 sample_bytes: int = 64_000 diff --git a/pyproject.toml b/pyproject.toml index dd76bd0..6b4d1b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.0.3" +version = "0.0.4" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/tests/test_secrets.py b/tests/test_ignore.py similarity index 51% rename from tests/test_secrets.py rename to tests/test_ignore.py index f66d4cb..bba9f06 100644 --- a/tests/test_secrets.py +++ b/tests/test_ignore.py @@ -1,8 +1,9 @@ -from enroll.secrets import SecretPolicy +from enroll.ignore import IgnorePolicy -def test_secret_policy_denies_common_backup_files(): - pol = SecretPolicy() +def test_ignore_policy_denies_common_backup_files(): + pol = IgnorePolicy() assert pol.deny_reason("/etc/shadow-") == "denied_path" assert pol.deny_reason("/etc/passwd-") == "denied_path" assert pol.deny_reason("/etc/group-") == "denied_path" + assert pol.deny_reason("/foobar") == "unreadable" From 576649a49c42c6b6ca472d0744b2f3099c951d37 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 15 Dec 2025 17:13:06 +1100 Subject: [PATCH 022/115] README.md adjustment --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 33fe901..1839ad9 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ sudo apt install enroll ### AppImage -Download the AppImage file from the Releases page (verify with GPG if you wish, my fingerprint is [here](https://mig5.net/static/mig5.asc), +Download the AppImage file from the Releases page (verify with GPG if you wish, my fingerprint is [here](https://mig5.net/static/mig5.asc)), then make it executable and run it: ```bash @@ -75,7 +75,7 @@ enroll manifest --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible enroll enroll --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible ``` -Then run: +Then run Ansible however way you wish, for example (local execution): ```bash ansible-playbook -i "localhost," -c local /tmp/enroll-ansible/playbook.yml From f255ba566c59067e909e883088abef6e8187442f Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Tue, 16 Dec 2025 20:14:20 +1100 Subject: [PATCH 023/115] biiiiig refactor to support jinjaturtle and multi site mode --- CHANGELOG.md | 13 + Dockerfile.debbuild | 1 + README.md | 180 ++++++- debian/changelog | 9 + debian/control | 3 +- enroll/cli.py | 37 +- enroll/ignore.py | 33 +- enroll/jinjaturtle.py | 105 ++++ enroll/manifest.py | 1140 +++++++++++++++++++++++++++++++---------- poetry.lock | 105 +++- pyproject.toml | 3 +- 11 files changed, 1331 insertions(+), 298 deletions(-) create mode 100644 enroll/jinjaturtle.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d30a65a..76d737d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,16 @@ +# 0.0.5 + + * Use JinjaTurtle to generate dynamic template/inventory if it's on the PATH + * Support --fqdn flag for site-specific inventory and an inventory hosts file. + This radically re-architects the roles to loop through abstract inventory + because otherwise different servers can collide with each other through use + of the same role. Use 'single site' mode (no `--fqdn`) if you want more readable, + self-contained roles (in which case, store each manifested output in its own + repo per server) + * Generate an ansible.cfg if not present, to support host_vars plugin and other params, + when using `--fqdn` mode + * Be more permissive with files that we previously thought contained secrets (ignore commented lines) + # 0.0.4 * Fix dash package detection issue diff --git a/Dockerfile.debbuild b/Dockerfile.debbuild index 873018e..9009b41 100644 --- a/Dockerfile.debbuild +++ b/Dockerfile.debbuild @@ -24,6 +24,7 @@ RUN set -eux; \ pybuild-plugin-pyproject \ python3-all \ python3-poetry-core \ + python3-yaml \ rsync \ ca-certificates \ ; \ diff --git a/README.md b/README.md index 1839ad9..b9375dd 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Enroll +# Enroll
Enroll logo @@ -16,9 +16,127 @@ It aims to be **optimistic and noninteractive**: - Captures miscellaneous `/etc` files that it can't attribute to a package, and installs it in an `etc_custom` role - Avoids trying to start systemd services that were detected as being Inactive during harvest -## Install +--- -### Ubuntu/Debian apt repository +# Two modes: single-site vs multi-site (`--fqdn`) + +**enroll** has two distinct ways to generate Ansible: + +## 1) Single-site mode (default: *no* `--fqdn`) +Use this when you’re enrolling **one server** (or you’re generating a “golden” role set you intend to reuse). + +**What you get** +- Config, templates, and defaults are primarily **contained inside each role**. +- Raw config files (when not templated) live in the role’s `files/`. +- Template variables (when templated) live in the role’s `defaults/main.yml`. + +**Pros** +- Roles are more **self-contained** and easier to understand. +- Better starting point for **provisioning new servers**, because the role contains most of what it needs. +- Less inventory abstraction/duplication. + +**Cons** +- Less convenient for quickly enrolling multiple hosts with divergent configs (you’ll do more manual work to make roles flexible across hosts). + +## 2) Multi-site mode (`--fqdn`) +Use this when you want to enroll **several existing servers** quickly, especially if they differ. + +**What you get** +- Roles are **shared** across hosts, but host-specific data lives in inventory. +- Host inventory drives what’s managed: + - which files to deploy for that host + - which packages are relevant for that host + - which services should be enabled/started for that host +- For non-templated config, raw files live in host-specific inventory under `.files/` (per role). + +**Pros** +- Fastest way to retrofit **multiple servers** into config management. +- Avoids shared-role “host A breaks host B” problems by keeping host-specific state in inventory. +- Better fit when you already have a fleet and want to capture/reflect reality first. + +**Cons** +- More abstraction: roles become more “data-driven”. +- Potential duplication: raw files may exist per-host in inventory (even if identical). +- Harder to use the roles to **provision a brand-new server** without also building an inventory for that new host, because multi-site output assumes the server already exists and is being retrofitted. + +**Rule of thumb** +- If your goal is *“make this one server reproducible / provisionable”* → start with **single-site**. +- If your goal is *“get several already-running servers under management quickly”* → use **multi-site**. + +--- + +# Key concepts + +## Harvest + +**enroll** begins by 'harvesting' known state about your host. This includes detecting what running services exist, what packages have been installed 'manually' (that is, stuff that doesn't come out of the box with the OS), and anything 'custom' in `/etc` that it can't attribute to a specific package. + +It also detects if any config files have been *changed* from their packaged defaults. If they have, it will attempt to 'harvest' them. If the config file is identical to how it comes with the package, then it doesn't bother harvesting it, because there's little value in config-managing it if it's identical to what you get by simply installing the package! + +The harvest writes a state.json file explaining all the data it harvested and, if it chose not to harvest something, explanations as to why that is the case (see below: sensitive data). + +## Sensitive data + +**enroll** doesn't make any assumptions about how you might handle sensitive data from your config files, in Ansible. Some people might use SOPS, others might use Vault, others might do something else entirely. + +For this reason, **enroll** will attempt to read config files, and if it detects data that looks like a sensitive SSH/SSL private key, or password, or API key, etc, then it won't harvest it for config management. + +This inevitably means that it will deliberately miss some important config files that you probably *want* to manage in Ansible. + +Nonetheless, in the Harvest 'state' file, there should be an explanation of 'excluded files'. You can parse or inspect this file to find what it chose to ignore, and then you know what you might want to augment the results with later, once you 'manifest' the harvest into Ansible configuration. + +## Manifest + +The 'manifest' subcommand expects to be given a path to the 'harvest' obtained in the first step. It will then attempt to generate Ansible roles and playbooks (and potentially 'inventory') from that harvest. + +Manifesting is the most complex step because a lot of people will have opinions on how Ansible roles and inventory should work. No solution is perfect for everyone. However, **enroll** tries to strike a reasonable balance. + +Remember, the purpose of this tool is to save **time** getting your systems into a decently-managed state. It's still up to you to wrangle it into a form that works for you on an ongoing basis. + +--- + +# Single-shot mode for the impatient sysadmin + +**enroll** has a 'single-shot' subcommand which combines the two other phases (harvest and manifest) into one. Use it to generate both the harvest and then manifest ansible from that harvest all in one go. Perfect if you're in a hurry! + +--- + +# JinjaTurtle integration (both modes) + +If you also have my other tool [JinjaTurtle](https://git.mig5.net/mig5/jinjaturtle) installed, **enroll** will attempt to create Jinja2 templates for any ini/json/xml/toml style configuration that it finds. + +- Templates live in the **role** (`roles//templates/...`) +- Variables live in: + - **single-site**: `roles//defaults/main.yml` + - **multi-site** (`--fqdn`): `inventory/host_vars//.yml` + +JinjaTurtle will be used automatically if it is detected on the `$PATH`. You can also be explicit and pass `--jinjaturtle`, but this will throw an error if JinjaTurtle is not on the `$PATH`. + +If you *do* have JinjaTurtle installed, but *don't* wish to make use of it, you can use `--no-jinjaturtle`, in which case all config files will be kept as 'raw' files. + +--- + +# How multi-site avoids “shared role breaks a host” + +In multi-site mode, **roles are data-driven**. The role contains generic tasks like: + +- “deploy all files listed for this host” +- “install packages listed for this host” +- “apply systemd enable/start state listed for this host” + +The host inventory is what decides which files/packages/services apply to that host. This prevents the classic failure mode where host2 adds a config file to a shared role and host1 then fails trying to deploy a file it never had. + +Raw non-templated files are stored under: + +- `inventory/host_vars///.files/...` + +…and the host’s role variables describe which of those files should be deployed. + +--- + +# Install + +## Ubuntu/Debian apt repository ```bash sudo mkdir -p /usr/share/keyrings @@ -28,7 +146,7 @@ sudo apt update sudo apt install enroll ``` -### AppImage +## AppImage Download the AppImage file from the Releases page (verify with GPG if you wish, my fingerprint is [here](https://mig5.net/static/mig5.asc)), then make it executable and run it: @@ -53,49 +171,69 @@ poetry install poetry run enroll --help ``` -## Usage +--- + +# Usage + +## 1. Harvest state/information about the host On the host (root recommended to harvest as much data as possible): -### 1. Harvest state/information about the host - ```bash enroll harvest --out /tmp/enroll-harvest ``` -### 2. Generate Ansible manifests (roles/playbook) from that harvest +## 2. Generate Ansible manifests (roles/playbook) from that harvest + +### Single-site (default: no --fqdn) + +Good for one server, or for producing roles you want to reuse to provision new machines: ```bash enroll manifest --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible ``` -### Alternatively, do both steps in one shot: +### Multi-site (--fqdn) + +Best when enrolling multiple already-running servers into one repo: ```bash -enroll enroll --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible +enroll manifest --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible --fqdn "$(hostname -f)" ``` -Then run Ansible however way you wish, for example (local execution): +## Single-shot + +Alternatively, do both steps in one shot: + +```bash +enroll single-shot --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible --fqdn "$(hostname -f)" +``` + +## 3. Run Ansible + +### Single-site + +You can run it however you prefer (local connection or your own inventory). Example: ```bash ansible-playbook -i "localhost," -c local /tmp/enroll-ansible/playbook.yml ``` +### Multi-site (--fqdn) -## Notes / Safety +In multi-site mode, enroll generates an ansible.cfg, `host_vars` inventory, and a host-specific playbook: -- enroll **skips** common sensitive locations like `/etc/ssl/private/*`, `/etc/ssh/ssh_host_*`, and files that look like private keys/tokens. -- It also skips symlinks, binary-ish files, and large files by default. -- Review each generated role’s README before committing it anywhere. -- It only stores the raw config files. If you want to turn these into Jinja2 templates with dynamic inventory, see my other tool https://git.mig5.net/mig5/jinjaturtle . +```bash +ansible-playbook /tmp/enroll-ansible/playbooks/"$(hostname -f)".yml +``` +--- -## Troubleshooting +# Found a bug, have a suggestion? -- Run as root for the most complete harvest (`sudo ...`). +My Forgejo doesn't yet support proper federation, and for that reason I've not opened up registration/login to use the issue queue. -## Found a bug, have a suggestion? - -You can e-mail me (see the pyproject.toml for details) or contact me on the Fediverse: +Instead, you can e-mail me (see the pyproject.toml for details) or contact me on the Fediverse: https://goto.mig5.net/@mig5 + diff --git a/debian/changelog b/debian/changelog index 86ae088..b889be7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,12 @@ +enroll (0.0.5) unstable; urgency=medium + + * Use JinjaTurtle to generate dynamic template/inventory if it's on the PATH + * Support --fqdn flag for site-specific inventory and an inventory hosts file + * Generate an ansible.cfg if not present, to support host_vars plugin and other params + * Be more permissive with files that we previously thought contained secrets (ignore commented lines) + + -- Miguel Jacq Tue, 16 Dec 2025 12:00:00 +1100 + enroll (0.0.4) unstable; urgency=medium * Fix dash package detection issue diff --git a/debian/control b/debian/control index 372f83f..71b5661 100644 --- a/debian/control +++ b/debian/control @@ -8,12 +8,13 @@ Build-Depends: dh-python, pybuild-plugin-pyproject, python3-all, + python3-yaml, python3-poetry-core Standards-Version: 4.6.2 Homepage: https://git.mig5.net/mig5/enroll Package: enroll Architecture: all -Depends: ${misc:Depends}, ${python3:Depends} +Depends: ${misc:Depends}, ${python3:Depends}, python3-yaml Description: Harvest a host into Ansible roles A tool that inspects a system and emits Ansible roles/playbooks to reproduce it. diff --git a/enroll/cli.py b/enroll/cli.py index 530a388..0511b54 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -1,10 +1,37 @@ from __future__ import annotations import argparse + from .harvest import harvest from .manifest import manifest +def _add_common_manifest_args(p: argparse.ArgumentParser) -> None: + p.add_argument( + "--fqdn", + help="Host FQDN/name for site-mode output (creates inventory/, inventory/host_vars/, playbooks/).", + ) + g = p.add_mutually_exclusive_group() + g.add_argument( + "--jinjaturtle", + action="store_true", + help="Attempt jinjaturtle template integration (it will error if jinjaturtle is not found on PATH).", + ) + g.add_argument( + "--no-jinjaturtle", + action="store_true", + help="Do not use jinjaturtle integration, even if it is installed.", + ) + + +def _jt_mode(args: argparse.Namespace) -> str: + if getattr(args, "jinjaturtle", False): + return "on" + if getattr(args, "no_jinjaturtle", False): + return "off" + return "auto" + + def main() -> None: ap = argparse.ArgumentParser(prog="enroll") sub = ap.add_subparsers(dest="cmd", required=True) @@ -23,9 +50,10 @@ def main() -> None: required=True, help="Output directory for generated roles/playbook Ansible manifest", ) + _add_common_manifest_args(r) e = sub.add_parser( - "enroll", help="Harvest state, then manifest Ansible code, in one shot" + "single-shot", help="Harvest state, then manifest Ansible code, in one shot" ) e.add_argument( "--harvest", required=True, help="Path to the directory to place the harvest in" @@ -35,6 +63,7 @@ def main() -> None: required=True, help="Output directory for generated roles/playbook Ansible manifest", ) + _add_common_manifest_args(e) args = ap.parse_args() @@ -42,7 +71,7 @@ def main() -> None: path = harvest(args.out) print(path) elif args.cmd == "manifest": - manifest(args.harvest, args.out) - elif args.cmd == "enroll": + manifest(args.harvest, args.out, fqdn=args.fqdn, jinjaturtle=_jt_mode(args)) + elif args.cmd == "single-shot": harvest(args.harvest) - manifest(args.harvest, args.out) + manifest(args.harvest, args.out, fqdn=args.fqdn, jinjaturtle=_jt_mode(args)) diff --git a/enroll/ignore.py b/enroll/ignore.py index 217497f..9a9ecf2 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -31,6 +31,10 @@ SENSITIVE_CONTENT_PATTERNS = [ re.compile(rb"(?i)\b(pass|passwd|token|secret|api[_-]?key)\b"), ] +COMMENT_PREFIXES = (b"#", b";", b"//") +BLOCK_START = b"/*" +BLOCK_END = b"*/" + @dataclass class IgnorePolicy: @@ -42,6 +46,28 @@ class IgnorePolicy: if self.deny_globs is None: self.deny_globs = list(DEFAULT_DENY_GLOBS) + def iter_effective_lines(self, content: bytes): + in_block = False + for raw in content.splitlines(): + line = raw.lstrip() + + if in_block: + if BLOCK_END in line: + in_block = False + continue + + if not line: + continue + + if line.startswith(BLOCK_START): + in_block = True + continue + + if line.startswith(COMMENT_PREFIXES) or line.startswith(b"*"): + continue + + yield raw + def deny_reason(self, path: str) -> Optional[str]: for g in self.deny_globs: if fnmatch.fnmatch(path, g): @@ -67,8 +93,9 @@ class IgnorePolicy: if b"\x00" in data: return "binary_like" - for pat in SENSITIVE_CONTENT_PATTERNS: - if pat.search(data): - return "sensitive_content" + for line in self.iter_effective_lines(data): + for pat in SENSITIVE_CONTENT_PATTERNS: + if pat.search(line): + return "sensitive_content" return None diff --git a/enroll/jinjaturtle.py b/enroll/jinjaturtle.py new file mode 100644 index 0000000..f894f04 --- /dev/null +++ b/enroll/jinjaturtle.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +import re +import shutil +import subprocess # nosec +import tempfile +from dataclasses import dataclass +from pathlib import Path +from typing import Optional + + +SUPPORTED_EXTS = {".ini", ".json", ".toml", ".yaml", ".yml", ".xml"} + + +@dataclass(frozen=True) +class JinjifyResult: + template_text: str + vars_text: str # YAML mapping text (no leading --- expected) + + +def find_jinjaturtle_cmd() -> Optional[str]: + """Return the executable path for jinjaturtle if found on PATH.""" + return shutil.which("jinjaturtle") + + +def can_jinjify_path(path: str) -> bool: + p = Path(path) + return p.suffix.lower() in SUPPORTED_EXTS + + +def run_jinjaturtle( + jt_exe: str, + src_path: str, + *, + role_name: str, + force_format: Optional[str] = None, +) -> JinjifyResult: + """ + Run jinjaturtle against src_path and return (template, defaults-yaml). + Uses tempfiles and captures outputs. + + jinjaturtle CLI: + jinjaturtle -r [-f ] [-d ] [-t ] + """ + src = Path(src_path) + if not src.is_file(): + raise FileNotFoundError(src_path) + + with tempfile.TemporaryDirectory(prefix="enroll-jt-") as td: + td_path = Path(td) + defaults_out = td_path / "defaults.yml" + template_out = td_path / "template.j2" + + cmd = [ + jt_exe, + str(src), + "-r", + role_name, + "-d", + str(defaults_out), + "-t", + str(template_out), + ] + if force_format: + cmd.extend(["-f", force_format]) + + p = subprocess.run(cmd, text=True, capture_output=True) # nosec + if p.returncode != 0: + raise RuntimeError( + "jinjaturtle failed for %s (role=%s)\ncmd=%r\nstdout=%s\nstderr=%s" + % (src_path, role_name, cmd, p.stdout, p.stderr) + ) + + vars_text = defaults_out.read_text(encoding="utf-8").strip() + template_text = template_out.read_text(encoding="utf-8") + + # jinjaturtle outputs a YAML mapping; strip leading document marker if present + if vars_text.startswith("---"): + vars_text = "\n".join(vars_text.splitlines()[1:]).lstrip() + + return JinjifyResult( + template_text=template_text, vars_text=vars_text.rstrip() + "\n" + ) + + +def replace_or_append_block( + base_text: str, + *, + begin: str, + end: str, + block_body: str, +) -> str: + """Replace a marked block if present; else append it.""" + pattern = re.compile( + re.escape(begin) + r".*?" + re.escape(end), + flags=re.DOTALL, + ) + new_block = f"{begin}\n{block_body.rstrip()}\n{end}" + if pattern.search(base_text): + return pattern.sub(new_block, base_text).rstrip() + "\n" + # ensure base ends with newline + bt = base_text.rstrip() + "\n" + if not bt.endswith("\n"): + bt += "\n" + return bt + "\n" + new_block + "\n" diff --git a/enroll/manifest.py b/enroll/manifest.py index b92f59c..b772bcd 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -3,7 +3,136 @@ from __future__ import annotations import json import os import shutil -from typing import Any, Dict, List +from pathlib import Path +from typing import Any, Dict, List, Optional, Set, Tuple + +from .jinjaturtle import ( + find_jinjaturtle_cmd, + can_jinjify_path, + run_jinjaturtle, +) + + +JINJATURTLE_BEGIN = "# BEGIN JINJATURTLE (generated by enroll)" +JINJATURTLE_END = "# END JINJATURTLE" + + +def _try_yaml(): + try: + import yaml # type: ignore + except Exception: + return None + return yaml + + +def _yaml_load_mapping(text: str) -> Dict[str, Any]: + yaml = _try_yaml() + if yaml is None: + return {} + try: + obj = yaml.safe_load(text) + except Exception: + return {} + if obj is None: + return {} + if isinstance(obj, dict): + return obj + return {} + + +def _yaml_dump_mapping(obj: Dict[str, Any], *, sort_keys: bool = True) -> str: + yaml = _try_yaml() + if yaml is None: + # fall back to a naive key: value dump (best-effort) + lines: List[str] = [] + for k, v in sorted(obj.items()) if sort_keys else obj.items(): + lines.append(f"{k}: {v!r}") + return "\n".join(lines).rstrip() + "\n" + return ( + yaml.safe_dump(obj, default_flow_style=False, sort_keys=sort_keys).rstrip() + + "\n" + ) + + +def _merge_list_keep_order(existing: List[Any], new: List[Any]) -> List[Any]: + out = list(existing) + seen = set(existing) + for item in new: + if item not in seen: + out.append(item) + seen.add(item) + return out + + +def _merge_mappings_preserve( + existing: Dict[str, Any], incoming: Dict[str, Any] +) -> Dict[str, Any]: + """Merge incoming into existing: + - lists: union (preserve existing order) + - scalars/dicts: only set if missing (do not overwrite) + """ + merged = dict(existing) + for k, v in incoming.items(): + if k in merged: + if isinstance(merged[k], list) and isinstance(v, list): + merged[k] = _merge_list_keep_order(merged[k], v) + else: + # keep existing value (non-overwriting) + continue + else: + merged[k] = v + return merged + + +def _merge_mappings_overwrite( + existing: Dict[str, Any], incoming: Dict[str, Any] +) -> Dict[str, Any]: + """Merge incoming into existing with overwrite. + + NOTE: Unlike role defaults merging, host_vars should reflect the current + harvest for a host. Therefore lists are replaced rather than unioned. + """ + merged = dict(existing) + merged.update(incoming) + return merged + + +def _write_role_defaults_merge(role_dir: str, incoming: Dict[str, Any]) -> None: + """Write/merge role defaults without clobbering existing values. + Used in site mode to keep roles reusable across hosts. + """ + defaults_path = os.path.join(role_dir, "defaults", "main.yml") + existing: Dict[str, Any] = {} + if os.path.exists(defaults_path): + try: + existing_text = Path(defaults_path).read_text(encoding="utf-8") + existing = _yaml_load_mapping(existing_text) + except Exception: + existing = {} + merged = _merge_mappings_preserve(existing, incoming) + body = "---\n" + _yaml_dump_mapping(merged, sort_keys=True) + with open(defaults_path, "w", encoding="utf-8") as f: + f.write(body) + + +def _extract_jinjaturtle_block(text: str) -> str: + """Return YAML text inside JINJATURTLE_BEGIN/END markers, or the whole text if no markers.""" + if JINJATURTLE_BEGIN in text and JINJATURTLE_END in text: + start = text.split(JINJATURTLE_BEGIN, 1)[1] + inner = start.split(JINJATURTLE_END, 1)[0] + return inner.strip() + "\n" + return text.strip() + "\n" + + +def _normalize_jinjaturtle_vars_text(vars_text: str) -> str: + """Deduplicate keys in a vars fragment by parsing as YAML and dumping it back.""" + m = _yaml_load_mapping(vars_text) + if not m: + # if YAML isn't available or parsing failed, return raw text (best-effort) + return vars_text.rstrip() + ( + "\n" if vars_text and not vars_text.endswith("\n") else "" + ) + return _yaml_dump_mapping(m, sort_keys=True) def _yaml_list(items: List[str], indent: int = 2) -> str: @@ -13,7 +142,20 @@ def _yaml_list(items: List[str], indent: int = 2) -> str: return "\n".join(f"{pad}- {x}" for x in items) -def _copy_artifacts(bundle_dir: str, role: str, role_dir: str) -> None: +def _copy_artifacts( + bundle_dir: str, + role: str, + dst_files_dir: str, + *, + preserve_existing: bool = False, + exclude_rels: Optional[Set[str]] = None, +) -> None: + """Copy harvested artifacts for a role into a destination *files* directory. + + In non --fqdn mode, this is usually /files. + In --fqdn site mode, this is usually: + inventory/host_vars///.files + """ artifacts_dir = os.path.join(bundle_dir, "artifacts", role) if not os.path.isdir(artifacts_dir): return @@ -21,7 +163,21 @@ def _copy_artifacts(bundle_dir: str, role: str, role_dir: str) -> None: for fn in files: src = os.path.join(root, fn) rel = os.path.relpath(src, artifacts_dir) - dst = os.path.join(role_dir, "files", rel) + dst = os.path.join(dst_files_dir, rel) + + # If a file was successfully templatised by JinjaTurtle, do NOT + # also materialize the raw copy in the destination files dir. + # (This keeps the output minimal and avoids redundant "raw" files.) + if exclude_rels and rel in exclude_rels: + try: + if os.path.isfile(dst): + os.remove(dst) + except Exception: + pass + continue + + if preserve_existing and os.path.exists(dst): + continue os.makedirs(os.path.dirname(dst), exist_ok=True) shutil.copy2(src, dst) @@ -32,9 +188,10 @@ def _write_role_scaffold(role_dir: str) -> None: os.makedirs(os.path.join(role_dir, "defaults"), exist_ok=True) os.makedirs(os.path.join(role_dir, "meta"), exist_ok=True) os.makedirs(os.path.join(role_dir, "files"), exist_ok=True) + os.makedirs(os.path.join(role_dir, "templates"), exist_ok=True) -def _write_playbook(path: str, roles: List[str]) -> None: +def _write_playbook_all(path: str, roles: List[str]) -> None: pb_lines = [ "---", "- name: Apply all roles on host", @@ -48,7 +205,306 @@ def _write_playbook(path: str, roles: List[str]) -> None: f.write("\n".join(pb_lines) + "\n") -def manifest(bundle_dir: str, out_dir: str) -> None: +def _write_playbook_host(path: str, fqdn: str, roles: List[str]) -> None: + pb_lines = [ + "---", + f"- name: Apply enroll roles on {fqdn}", + f" hosts: {fqdn}", + " become: true", + " roles:", + ] + for r in roles: + pb_lines.append(f" - {r}") + with open(path, "w", encoding="utf-8") as f: + f.write("\n".join(pb_lines) + "\n") + + +def _ensure_ansible_cfg(cfg_path: str) -> None: + if not os.path.exists(cfg_path): + with open(cfg_path, "w", encoding="utf-8") as f: + f.write("[defaults]\n") + f.write("roles_path = roles\n") + f.write("interpreter_python=/usr/bin/python3\n") + f.write("inventory = inventory\n") + f.write("stdout_callback = unixy\n") + f.write("force_color = 1\n") + f.write("vars_plugins_enabled = host_group_vars\n") + f.write("fact_caching = jsonfile\n") + f.write("fact_caching_connection = .enroll_cached_facts\n") + f.write("forks = 30\n") + f.write("remote_tmp = /tmp/ansible-${USER}\n") + f.write("timeout = 12\n") + f.write("[ssh_connection]\n") + f.write("pipelining = True\n") + f.write("scp_if_ssh = True\n") + return + + +def _ensure_inventory_host(inv_path: str, fqdn: str) -> None: + os.makedirs(os.path.dirname(inv_path), exist_ok=True) + if not os.path.exists(inv_path): + with open(inv_path, "w", encoding="utf-8") as f: + f.write("[all]\n") + f.write(fqdn + "\n") + return + + with open(inv_path, "r", encoding="utf-8") as f: + lines = [ln.rstrip("\n") for ln in f.readlines()] + + # ensure there is an [all] group; if not, create it at top + if not any(ln.strip() == "[all]" for ln in lines): + lines = ["[all]"] + lines + + # check if fqdn already present (exact match, ignoring whitespace) + if any(ln.strip() == fqdn for ln in lines): + return + + # append at end + lines.append(fqdn) + with open(inv_path, "w", encoding="utf-8") as f: + f.write("\n".join(lines) + "\n") + + +def _hostvars_path(site_root: str, fqdn: str, role: str) -> str: + return os.path.join(site_root, "inventory", "host_vars", fqdn, f"{role}.yml") + + +def _host_role_files_dir(site_root: str, fqdn: str, role: str) -> str: + """Host-specific files dir for a given role. + + Layout: + inventory/host_vars///.files/ + """ + return os.path.join(site_root, "inventory", "host_vars", fqdn, role, ".files") + + +def _write_hostvars(site_root: str, fqdn: str, role: str, data: Dict[str, Any]) -> None: + """Write host_vars YAML for a role for a specific host. + + This is host-specific state and should track the current harvest output. + Existing keys not mentioned in `data` are preserved, but keys in `data` + are overwritten (including list values). + """ + path = _hostvars_path(site_root, fqdn, role) + os.makedirs(os.path.dirname(path), exist_ok=True) + + existing_map: Dict[str, Any] = {} + if os.path.exists(path): + try: + existing_text = Path(path).read_text(encoding="utf-8") + existing_map = _yaml_load_mapping(existing_text) + except Exception: + existing_map = {} + + merged = _merge_mappings_overwrite(existing_map, data) + + out = "# Generated by enroll (host-specific vars)\n---\n" + _yaml_dump_mapping( + merged, sort_keys=True + ) + with open(path, "w", encoding="utf-8") as f: + f.write(out) + + +def _jinjify_managed_files( + bundle_dir: str, + role: str, + role_dir: str, + managed_files: List[Dict[str, Any]], + *, + jt_exe: Optional[str], + jt_enabled: bool, + overwrite_templates: bool, +) -> Tuple[Set[str], str]: + """ + Return (templated_src_rels, combined_vars_text). + combined_vars_text is a YAML mapping fragment (no leading ---). + """ + templated: Set[str] = set() + vars_map: Dict[str, Any] = {} + + if not (jt_enabled and jt_exe): + return templated, "" + + for mf in managed_files: + dest_path = mf.get("path", "") + src_rel = mf.get("src_rel", "") + if not dest_path or not src_rel: + continue + if not can_jinjify_path(dest_path): + continue + + artifact_path = os.path.join(bundle_dir, "artifacts", role, src_rel) + if not os.path.isfile(artifact_path): + continue + + try: + res = run_jinjaturtle(jt_exe, artifact_path, role_name=role) + except Exception: + # If jinjaturtle cannot process a file for any reason, skip silently. + # (Enroll's core promise is to be optimistic and non-interactive.) + continue + + tmpl_rel = src_rel + ".j2" + tmpl_dst = os.path.join(role_dir, "templates", tmpl_rel) + if overwrite_templates or not os.path.exists(tmpl_dst): + os.makedirs(os.path.dirname(tmpl_dst), exist_ok=True) + with open(tmpl_dst, "w", encoding="utf-8") as f: + f.write(res.template_text) + + templated.add(src_rel) + if res.vars_text.strip(): + # merge YAML mappings; last wins (avoids duplicate keys) + chunk = _yaml_load_mapping(res.vars_text) + if chunk: + vars_map = _merge_mappings_overwrite(vars_map, chunk) + + if vars_map: + combined = _yaml_dump_mapping(vars_map, sort_keys=True) + return templated, combined + return templated, "" + + +def _hostvars_only_jinjaturtle(vars_text: str) -> str: + # keep as valid YAML file + return _defaults_with_jinjaturtle("---\n", vars_text) + + +def _defaults_with_jinjaturtle(base_defaults: str, vars_text: str) -> str: + if not vars_text.strip(): + return base_defaults.rstrip() + "\n" + vars_text = _normalize_jinjaturtle_vars_text(vars_text) + # Always regenerate the block (we regenerate whole defaults files anyway) + return ( + base_defaults.rstrip() + + "\n\n" + + JINJATURTLE_BEGIN + + "\n" + + vars_text.rstrip() + + "\n" + + JINJATURTLE_END + + "\n" + ) + + + +def _write_role_defaults(role_dir: str, mapping: Dict[str, Any]) -> None: + """Overwrite role defaults/main.yml with the provided mapping.""" + defaults_path = os.path.join(role_dir, "defaults", "main.yml") + os.makedirs(os.path.dirname(defaults_path), exist_ok=True) + out = "---\n" + _yaml_dump_mapping(mapping, sort_keys=True) + with open(defaults_path, "w", encoding="utf-8") as f: + f.write(out) + + +def _build_managed_files_var( + managed_files: List[Dict[str, Any]], + templated_src_rels: Set[str], + *, + notify_other: Optional[str] = None, + notify_systemd: Optional[str] = None, +) -> List[Dict[str, Any]]: + """Convert enroll managed_files into an Ansible-friendly list of dicts. + + Each dict drives a role task loop and is safe across hosts. + """ + out: List[Dict[str, Any]] = [] + for mf in managed_files: + dest = mf.get("path") or "" + src_rel = mf.get("src_rel") or "" + if not dest or not src_rel: + continue + is_unit = str(dest).startswith("/etc/systemd/system/") + kind = "template" if src_rel in templated_src_rels else "copy" + notify: List[str] = [] + if is_unit and notify_systemd: + notify.append(notify_systemd) + if (not is_unit) and notify_other: + notify.append(notify_other) + out.append( + { + "dest": dest, + "src_rel": src_rel, + "owner": mf.get("owner") or "root", + "group": mf.get("group") or "root", + "mode": mf.get("mode") or "0644", + "kind": kind, + "is_systemd_unit": bool(is_unit), + "notify": notify, + } + ) + return out + + +def _render_generic_files_tasks(var_prefix: str, *, include_restart_notify: bool) -> str: + """Render generic tasks to deploy _managed_files safely.""" + # Using first_found makes roles work in both modes: + # - site-mode: inventory/host_vars///.files/... + # - non-site: roles//files/... + restart_notify = "Restart service" if include_restart_notify else "" + return f"""# Generated by enroll (data-driven tasks) + +- name: Deploy systemd unit files (templates) + ansible.builtin.template: + src: "{{{{ item.src_rel }}}}.j2" + dest: "{{{{ item.dest }}}}" + owner: "{{{{ item.owner }}}}" + group: "{{{{ item.group }}}}" + mode: "{{{{ item.mode }}}}" + loop: "{{{{ {var_prefix}_managed_files | default([]) | selectattr('is_systemd_unit','equalto', true) | selectattr('kind','equalto','template') | list }}}}" + notify: "{{{{ item.notify | default([]) }}}}" + +- name: Deploy systemd unit files (copies) + vars: + _enroll_ff: + files: + - "{{{{ inventory_dir }}}}/host_vars/{{{{ inventory_hostname }}}}/{{{{ role_name }}}}/.files/{{{{ item.src_rel }}}}" + - "{{{{ role_path }}}}/files/{{{{ item.src_rel }}}}" + ansible.builtin.copy: + src: "{{{{ lookup('ansible.builtin.first_found', _enroll_ff) }}}}" + dest: "{{{{ item.dest }}}}" + owner: "{{{{ item.owner }}}}" + group: "{{{{ item.group }}}}" + mode: "{{{{ item.mode }}}}" + loop: "{{{{ {var_prefix}_managed_files | default([]) | selectattr('is_systemd_unit','equalto', true) | selectattr('kind','equalto','copy') | list }}}}" + notify: "{{{{ item.notify | default([]) }}}}" + +- name: Reload systemd to pick up unit changes + ansible.builtin.meta: flush_handlers + when: "({var_prefix}_managed_files | default([]) | selectattr('is_systemd_unit','equalto', true) | list | length) > 0" + +- name: Deploy other managed files (templates) + ansible.builtin.template: + src: "{{{{ item.src_rel }}}}.j2" + dest: "{{{{ item.dest }}}}" + owner: "{{{{ item.owner }}}}" + group: "{{{{ item.group }}}}" + mode: "{{{{ item.mode }}}}" + loop: "{{{{ {var_prefix}_managed_files | default([]) | selectattr('is_systemd_unit','equalto', false) | selectattr('kind','equalto','template') | list }}}}" + notify: "{{{{ item.notify | default([]) }}}}" + +- name: Deploy other managed files (copies) + vars: + _enroll_ff: + files: + - "{{{{ inventory_dir }}}}/host_vars/{{{{ inventory_hostname }}}}/{{{{ role_name }}}}/.files/{{{{ item.src_rel }}}}" + - "{{{{ role_path }}}}/files/{{{{ item.src_rel }}}}" + ansible.builtin.copy: + src: "{{{{ lookup('ansible.builtin.first_found', _enroll_ff) }}}}" + dest: "{{{{ item.dest }}}}" + owner: "{{{{ item.owner }}}}" + group: "{{{{ item.group }}}}" + mode: "{{{{ item.mode }}}}" + loop: "{{{{ {var_prefix}_managed_files | default([]) | selectattr('is_systemd_unit','equalto', false) | selectattr('kind','equalto','copy') | list }}}}" + notify: "{{{{ item.notify | default([]) }}}}" +""" + +def manifest( + bundle_dir: str, + out_dir: str, + *, + fqdn: Optional[str] = None, + jinjaturtle: str = "auto", # auto|on|off +) -> None: state_path = os.path.join(bundle_dir, "state.json") with open(state_path, "r", encoding="utf-8") as f: state = json.load(f) @@ -58,15 +514,45 @@ def manifest(bundle_dir: str, out_dir: str) -> None: users_snapshot: Dict[str, Any] = state.get("users", {}) etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) + site_mode = fqdn is not None and fqdn != "" + + jt_exe = find_jinjaturtle_cmd() + jt_enabled = False + if jinjaturtle not in ("auto", "on", "off"): + raise ValueError("jinjaturtle must be one of: auto, on, off") + if jinjaturtle == "on": + if not jt_exe: + raise RuntimeError("jinjaturtle requested but not found on PATH") + jt_enabled = True + elif jinjaturtle == "auto": + jt_enabled = jt_exe is not None + else: + jt_enabled = False + os.makedirs(out_dir, exist_ok=True) roles_root = os.path.join(out_dir, "roles") os.makedirs(roles_root, exist_ok=True) + # Site-mode scaffolding + if site_mode: + os.makedirs(os.path.join(out_dir, "inventory"), exist_ok=True) + os.makedirs(os.path.join(out_dir, "inventory", "host_vars"), exist_ok=True) + os.makedirs(os.path.join(out_dir, "playbooks"), exist_ok=True) + _ensure_inventory_host( + os.path.join(out_dir, "inventory", "hosts.ini"), fqdn or "" + ) + _ensure_ansible_cfg(os.path.join(out_dir, "ansible.cfg")) + manifested_users_roles: List[str] = [] manifested_etc_custom_roles: List[str] = [] manifested_service_roles: List[str] = [] manifested_pkg_roles: List[str] = [] + # In site_mode, raw harvested files are stored under host-specific inventory + # to avoid cross-host clobber while still sharing a role definition. + + # ------------------------- + # ------------------------- # Users role (non-system users) # ------------------------- @@ -74,146 +560,191 @@ def manifest(bundle_dir: str, out_dir: str) -> None: role = users_snapshot.get("role_name", "users") role_dir = os.path.join(roles_root, role) _write_role_scaffold(role_dir) - _copy_artifacts(bundle_dir, role, role_dir) + + # Users role includes harvested SSH-related files; in site mode keep them + # host-specific to avoid cross-host clobber. + if site_mode: + _copy_artifacts( + bundle_dir, role, _host_role_files_dir(out_dir, fqdn or "", role) + ) + else: + _copy_artifacts(bundle_dir, role, os.path.join(role_dir, "files")) users = users_snapshot.get("users", []) managed_files = users_snapshot.get("managed_files", []) excluded = users_snapshot.get("excluded", []) notes = users_snapshot.get("notes", []) - # Build group set from users - group_names = set() + # Build groups list and a simplified user dict list suitable for loops + group_names: List[str] = [] + group_set = set() + users_data: List[Dict[str, Any]] = [] for u in users: - pg = u.get("primary_group") - if pg: - group_names.add(pg) - for g in u.get("supplementary_groups", []) or []: - group_names.add(g) - group_names = sorted(group_names) - - # defaults: store users list (handy for later), but tasks are explicit for readability - defaults = """--- -users_accounts: -""" + ( - "\n".join([f" - name: {u.get('name')}" for u in users]) + "\n" - ) - with open( - os.path.join(role_dir, "defaults", "main.yml"), "w", encoding="utf-8" - ) as f: - f.write(defaults) - - with open( - os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" - ) as f: - f.write("---\ndependencies: []\n") - - # tasks - lines: List[str] = ["---"] - # groups first (idempotent; safe even if already present) - for g in group_names: - lines.append(f"- name: Ensure group {g} exists") - lines.append(" ansible.builtin.group:") - lines.append(f" name: {g}") - lines.append(" state: present") - - # users - for u in users: - name = u["name"] - lines.append(f"- name: Ensure user {name} exists") - lines.append(" ansible.builtin.user:") - lines.append(f" name: {name}") - lines.append(f" uid: {u.get('uid')}") - lines.append(f" group: {u.get('primary_group')}") - supp = u.get("supplementary_groups") or [] - if supp: - lines.append(" groups: " + ",".join(sorted(supp))) - lines.append(" append: true") - lines.append(f" home: {u.get('home')}") - lines.append(" create_home: true") - if u.get("shell"): - lines.append(f" shell: {u.get('shell')}") - if u.get("gecos"): - # quote to avoid YAML surprises - gec = u.get("gecos").replace('"', '"') - lines.append(f' comment: "{gec}"') - lines.append(" state: present") - - # Ensure ~/.ssh + name = u.get("name") + if not name: + continue + pg = u.get("primary_group") or name home = u.get("home") or f"/home/{name}" sshdir = home.rstrip("/") + "/.ssh" - lines.append(f"- name: Ensure {name} .ssh directory exists") - lines.append(" ansible.builtin.file:") - lines.append(f" path: {sshdir}") - lines.append(" state: directory") - lines.append(f" owner: {name}") - lines.append(f" group: {u.get('primary_group')}") - lines.append(" mode: '0700'") + supp = u.get("supplementary_groups") or [] + if pg: + group_set.add(pg) + for g in supp: + if g: + group_set.add(g) - # Copy harvested SSH public material (authorized_keys) + users_data.append( + { + "name": name, + "uid": u.get("uid"), + "primary_group": pg, + "home": home, + "ssh_dir": sshdir, + "shell": u.get("shell"), + "gecos": u.get("gecos"), + "supplementary_groups": sorted(set(supp)), + } + ) + + group_names = sorted(group_set) + + # SSH-related files (authorized_keys, known_hosts, config, etc.) + ssh_files: List[Dict[str, Any]] = [] for mf in managed_files: - dest = mf["path"] - src = mf["src_rel"] - # Determine file owner from dest path: /home//... - owner = None - for u in users: - if dest.startswith((u.get("home") or "").rstrip("/") + "/"): - owner = u["name"] - group = u.get("primary_group") + dest = mf.get("path") or "" + src_rel = mf.get("src_rel") or "" + if not dest or not src_rel: + continue + + owner = "root" + group = "root" + for u in users_data: + home_prefix = (u.get("home") or "").rstrip("/") + "/" + if home_prefix and dest.startswith(home_prefix): + owner = str(u.get("name") or "root") + group = str(u.get("primary_group") or owner) break - if owner is None: - # fallback: try /home// - parts = dest.split("/") - owner = parts[2] if len(parts) > 2 and parts[1] == "home" else "root" - group = owner mode = "0600" if mf.get("reason") == "authorized_keys" else "0644" - lines.append(f"- name: Deploy {dest}") - lines.append(" ansible.builtin.copy:") - lines.append(f" src: {src}") - lines.append(f" dest: {dest}") - lines.append(f" owner: {owner}") - lines.append(f" group: {group}") - lines.append(f" mode: '{mode}'") + ssh_files.append( + { + "dest": dest, + "src_rel": src_rel, + "owner": owner, + "group": group, + "mode": mode, + } + ) - tasks = "\n".join(lines).rstrip() + "\n" - with open( - os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" - ) as f: - f.write(tasks) + # Variables are host-specific in site mode; in non-site mode they live in role defaults. + if site_mode: + _write_role_defaults( + role_dir, + { + "users_groups": [], + "users_users": [], + "users_ssh_files": [], + }, + ) + _write_hostvars( + out_dir, + fqdn or "", + role, + { + "users_groups": group_names, + "users_users": users_data, + "users_ssh_files": ssh_files, + }, + ) + else: + _write_role_defaults( + role_dir, + { + "users_groups": group_names, + "users_users": users_data, + "users_ssh_files": ssh_files, + }, + ) - # handlers (none needed) - with open( - os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8" - ) as f: + with open(os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8") as f: + f.write("---\ndependencies: []\n") + + # tasks (data-driven) + users_tasks = """--- +# Generated by enroll (data-driven tasks) + +- name: Ensure groups exist + ansible.builtin.group: + name: "{{ item }}" + state: present + loop: "{{ users_groups | default([]) }}" + +- name: Ensure users exist + ansible.builtin.user: + name: "{{ item.name }}" + uid: "{{ item.uid | default(omit) }}" + group: "{{ item.primary_group }}" + home: "{{ item.home }}" + create_home: true + shell: "{{ item.shell | default(omit) }}" + comment: "{{ item.gecos | default(omit) }}" + state: present + loop: "{{ users_users | default([]) }}" + +- name: Ensure users supplementary groups + ansible.builtin.user: + name: "{{ item.name }}" + groups: "{{ item.supplementary_groups | default([]) | join(',') }}" + append: true + loop: "{{ users_users | default([]) }}" + when: (item.supplementary_groups | default([])) | length > 0 + +- name: Ensure .ssh directories exist + ansible.builtin.file: + path: "{{ item.ssh_dir }}" + state: directory + owner: "{{ item.name }}" + group: "{{ item.primary_group }}" + mode: "0700" + loop: "{{ users_users | default([]) }}" + +- name: Deploy SSH-related files + vars: + _enroll_ff: + files: + - "{{ inventory_dir }}/host_vars/{{ inventory_hostname }}/{{ role_name }}/.files/{{ item.src_rel }}" + - "{{ role_path }}/files/{{ item.src_rel }}" + ansible.builtin.copy: + src: "{{ lookup('ansible.builtin.first_found', _enroll_ff) }}" + dest: "{{ item.dest }}" + owner: "{{ item.owner }}" + group: "{{ item.group }}" + mode: "{{ item.mode }}" + loop: "{{ users_ssh_files | default([]) }}" +""" + + with open(os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8") as f: + f.write(users_tasks) + + with open(os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8") as f: f.write("---\n") readme = ( - """# users + f"""# users Generated non-system user accounts and SSH public material. ## Users """ - + ( - "\n".join([f"- {u.get('name')} (uid {u.get('uid')})" for u in users]) - or "- (none)" - ) + + ("\n".join([f"- {u.get('name')} (uid {u.get('uid')})" for u in users]) or "- (none)") + """\n ## Included SSH files """ - + ( - "\n".join( - [f"- {mf.get('path')} ({mf.get('reason')})" for mf in managed_files] - ) - or "- (none)" - ) + + ("\n".join([f"- {mf.get('path')} ({mf.get('reason')})" for mf in managed_files]) or "- (none)") + """\n ## Excluded """ - + ( - "\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) - or "- (none)" - ) + + ("\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) or "- (none)") + """\n ## Notes """ @@ -225,6 +756,8 @@ Generated non-system user accounts and SSH public material. manifested_users_roles.append(role) + # ------------------------- + # ------------------------- # etc_custom role (unowned /etc not already attributed) # ------------------------- @@ -232,38 +765,69 @@ Generated non-system user accounts and SSH public material. role = etc_custom_snapshot.get("role_name", "etc_custom") role_dir = os.path.join(roles_root, role) _write_role_scaffold(role_dir) - _copy_artifacts(bundle_dir, role, role_dir) + + var_prefix = role managed_files = etc_custom_snapshot.get("managed_files", []) excluded = etc_custom_snapshot.get("excluded", []) notes = etc_custom_snapshot.get("notes", []) - # tasks: just deploy files (no restarts) - lines: List[str] = ["---"] - for mf in managed_files: - dest = mf["path"] - src = mf["src_rel"] - lines.append(f"- name: Deploy {dest}") - lines.append(" ansible.builtin.copy:") - lines.append(f" src: {src}") - lines.append(f" dest: {dest}") - lines.append(f" owner: {mf.get('owner')}") - lines.append(f" group: {mf.get('group')}") - lines.append(f" mode: '{mf.get('mode')}'") + templated, jt_vars = _jinjify_managed_files( + bundle_dir, + role, + role_dir, + managed_files, + jt_exe=jt_exe, + jt_enabled=jt_enabled, + overwrite_templates=not site_mode, + ) - tasks = "\n".join(lines).rstrip() + "\n" - with open( - os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" - ) as f: - f.write(tasks) + # Copy only the non-templated artifacts (templates live in the role). + if site_mode: + _copy_artifacts( + bundle_dir, + role, + _host_role_files_dir(out_dir, fqdn or "", role), + exclude_rels=templated, + ) + else: + _copy_artifacts( + bundle_dir, + role, + os.path.join(role_dir, "files"), + exclude_rels=templated, + ) - with open( - os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8" - ) as f: - f.write("---\n") - with open( - os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" - ) as f: + files_var = _build_managed_files_var( + managed_files, + templated, + notify_other=None, + notify_systemd="Run systemd daemon-reload", + ) + + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} + vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map = _merge_mappings_overwrite(vars_map, jt_map) + + if site_mode: + _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_hostvars(out_dir, fqdn or "", role, vars_map) + else: + _write_role_defaults(role_dir, vars_map) + + tasks = """---\n""" + _render_generic_files_tasks(var_prefix, include_restart_notify=False) + with open(os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8") as f: + f.write(tasks.rstrip() + "\n") + + handlers = """--- +- name: Run systemd daemon-reload + ansible.builtin.systemd: + daemon_reload: true +""" + with open(os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8") as f: + f.write(handlers) + + with open(os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8") as f: f.write("---\ndependencies: []\n") readme = ( @@ -277,10 +841,7 @@ Unowned /etc config files not attributed to packages or services. + """\n ## Excluded """ - + ( - "\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) - or "- (none)" - ) + + ("\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) or "- (none)") + """\n ## Notes """ @@ -292,127 +853,155 @@ Unowned /etc config files not attributed to packages or services. manifested_etc_custom_roles.append(role) + # ------------------------- + # ------------------------- # Service roles # ------------------------- for svc in services: role = svc["role_name"] unit = svc["unit"] - pkgs = svc["packages"] - managed_files = svc["managed_files"] + pkgs = svc.get("packages", []) or [] + managed_files = svc.get("managed_files", []) or [] role_dir = os.path.join(roles_root, role) _write_role_scaffold(role_dir) - _copy_artifacts(bundle_dir, role, role_dir) var_prefix = role was_active = svc.get("active_state") == "active" - defaults = f"""--- -{var_prefix}_packages: -{_yaml_list(pkgs, indent=2)} -{var_prefix}_active_state_at_harvest: "{svc.get("active_state")}" -{var_prefix}_start: {"true" if was_active else "false"} -""" - with open( - os.path.join(role_dir, "defaults", "main.yml"), "w", encoding="utf-8" - ) as f: - f.write(defaults) + unit_file_state = str(svc.get("unit_file_state") or "") + enabled_at_harvest = unit_file_state in ("enabled", "enabled-runtime") + desired_state = "started" if was_active else "stopped" - handlers = """--- + templated, jt_vars = _jinjify_managed_files( + bundle_dir, + role, + role_dir, + managed_files, + jt_exe=jt_exe, + jt_enabled=jt_enabled, + overwrite_templates=not site_mode, + ) + + # Copy only the non-templated artifacts. + if site_mode: + _copy_artifacts( + bundle_dir, + role, + _host_role_files_dir(out_dir, fqdn or "", role), + exclude_rels=templated, + ) + else: + _copy_artifacts( + bundle_dir, + role, + os.path.join(role_dir, "files"), + exclude_rels=templated, + ) + + files_var = _build_managed_files_var( + managed_files, + templated, + notify_other="Restart service", + notify_systemd="Run systemd daemon-reload", + ) + + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} + base_vars: Dict[str, Any] = { + f"{var_prefix}_unit_name": unit, + f"{var_prefix}_packages": pkgs, + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_manage_unit": True, + f"{var_prefix}_systemd_enabled": bool(enabled_at_harvest), + f"{var_prefix}_systemd_state": desired_state, + } + base_vars = _merge_mappings_overwrite(base_vars, jt_map) + + if site_mode: + # Role defaults are host-agnostic/safe; all harvested state is in host_vars. + _write_role_defaults( + role_dir, + { + f"{var_prefix}_unit_name": unit, + f"{var_prefix}_packages": [], + f"{var_prefix}_managed_files": [], + f"{var_prefix}_manage_unit": False, + f"{var_prefix}_systemd_enabled": False, + f"{var_prefix}_systemd_state": "stopped", + }, + ) + _write_hostvars(out_dir, fqdn or "", role, base_vars) + else: + _write_role_defaults(role_dir, base_vars) + + handlers = f"""--- - name: Run systemd daemon-reload ansible.builtin.systemd: daemon_reload: true - name: Restart service ansible.builtin.service: - name: "{{ unit_name }}" + name: "{{{{ {var_prefix}_unit_name }}}}" state: restarted + when: + - {var_prefix}_manage_unit | default(false) + - ({var_prefix}_systemd_state | default('stopped')) == 'started' """ - with open( - os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8" - ) as f: + with open(os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8") as f: f.write(handlers) - systemd_files = [ - mf for mf in managed_files if mf["path"].startswith("/etc/systemd/system/") - ] - other_files = [mf for mf in managed_files if mf not in systemd_files] - - def copy_task(mf: Dict[str, Any], notify: str | None) -> str: - notify_line = f" notify: {notify}\n" if notify else "" - return f"""- name: Deploy {mf["path"]} - ansible.builtin.copy: - src: "{mf["src_rel"]}" - dest: "{mf["path"]}" - owner: "{mf["owner"]}" - group: "{mf["group"]}" - mode: "{mf["mode"]}" -{notify_line}""" - task_parts: List[str] = [] task_parts.append( f"""--- -- name: Set unit name - ansible.builtin.set_fact: - unit_name: "{unit}" +# Generated by enroll (data-driven tasks) - name: Install packages for {role} ansible.builtin.apt: - name: "{{{{ {var_prefix}_packages }}}}" + name: "{{{{ {var_prefix}_packages | default([]) }}}}" state: present update_cache: true - when: {var_prefix}_packages | length > 0 + when: ({var_prefix}_packages | default([])) | length > 0 + """ ) - if systemd_files: - for mf in systemd_files: - task_parts.append(copy_task(mf, "[systemd daemon-reload]")) - task_parts.append( - """- name: Reload systemd to pick up unit changes - ansible.builtin.meta: flush_handlers -""" - ) - - for mf in other_files: - task_parts.append(copy_task(mf, "[Restart service]")) + task_parts.append(_render_generic_files_tasks(var_prefix, include_restart_notify=True)) task_parts.append( f""" - name: Probe whether systemd unit exists and is manageable ansible.builtin.systemd: - name: "{{{{ unit_name }}}}" + name: "{{{{ {var_prefix}_unit_name }}}}" check_mode: true register: _unit_probe failed_when: false changed_when: false + when: {var_prefix}_manage_unit | default(false) -- name: Ensure unit is enabled (preserve running state) +- name: Ensure unit enablement matches harvest ansible.builtin.systemd: - name: "{{{{ unit_name }}}}" - enabled: true - when: _unit_probe is succeeded - -- name: Start unit if it was active at harvest time - ansible.builtin.systemd: - name: "{{{{ unit_name }}}}" - state: started + name: "{{{{ {var_prefix}_unit_name }}}}" + enabled: "{{{{ {var_prefix}_systemd_enabled | bool }}}}" when: + - {var_prefix}_manage_unit | default(false) + - _unit_probe is succeeded + +- name: Ensure unit running state matches harvest + ansible.builtin.systemd: + name: "{{{{ {var_prefix}_unit_name }}}}" + state: "{{{{ {var_prefix}_systemd_state }}}}" + when: + - {var_prefix}_manage_unit | default(false) - _unit_probe is succeeded - - {var_prefix}_start | bool """ ) tasks = "\n".join(task_parts).rstrip() + "\n" - with open( - os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" - ) as f: + with open(os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8") as f: f.write(tasks) - with open( - os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" - ) as f: + with open(os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8") as f: f.write("---\ndependencies: []\n") excluded = svc.get("excluded", []) @@ -443,82 +1032,97 @@ Generated from `{unit}`. # ------------------------- for pr in package_roles: role = pr["role_name"] - pkg = pr["package"] - managed_files = pr["managed_files"] + pkg = pr.get("package") or "" + managed_files = pr.get("managed_files", []) or [] role_dir = os.path.join(roles_root, role) _write_role_scaffold(role_dir) - _copy_artifacts(bundle_dir, role, role_dir) var_prefix = role - defaults = f"""--- -{var_prefix}_packages: - - {pkg} -""" - with open( - os.path.join(role_dir, "defaults", "main.yml"), "w", encoding="utf-8" - ) as f: - f.write(defaults) + templated, jt_vars = _jinjify_managed_files( + bundle_dir, + role, + role_dir, + managed_files, + jt_exe=jt_exe, + jt_enabled=jt_enabled, + overwrite_templates=not site_mode, + ) + + # Copy only the non-templated artifacts. + if site_mode: + _copy_artifacts( + bundle_dir, + role, + _host_role_files_dir(out_dir, fqdn or "", role), + exclude_rels=templated, + ) + else: + _copy_artifacts( + bundle_dir, + role, + os.path.join(role_dir, "files"), + exclude_rels=templated, + ) + + pkgs = [pkg] if pkg else [] + + files_var = _build_managed_files_var( + managed_files, + templated, + notify_other=None, + notify_systemd="Run systemd daemon-reload", + ) + + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} + base_vars: Dict[str, Any] = { + f"{var_prefix}_packages": pkgs, + f"{var_prefix}_managed_files": files_var, + } + base_vars = _merge_mappings_overwrite(base_vars, jt_map) + + if site_mode: + _write_role_defaults( + role_dir, + { + f"{var_prefix}_packages": [], + f"{var_prefix}_managed_files": [], + }, + ) + _write_hostvars(out_dir, fqdn or "", role, base_vars) + else: + _write_role_defaults(role_dir, base_vars) handlers = """--- - name: Run systemd daemon-reload ansible.builtin.systemd: daemon_reload: true """ - with open( - os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8" - ) as f: + with open(os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8") as f: f.write(handlers) - systemd_files = [ - mf for mf in managed_files if mf["path"].startswith("/etc/systemd/system/") - ] - other_files = [mf for mf in managed_files if mf not in systemd_files] - - def copy_task(mf: Dict[str, Any], notify: str | None) -> str: - notify_line = f" notify: {notify}\n" if notify else "" - return f"""- name: Deploy {mf["path"]} - ansible.builtin.copy: - src: "{mf["src_rel"]}" - dest: "{mf["path"]}" - owner: "{mf["owner"]}" - group: "{mf["group"]}" - mode: "{mf["mode"]}" -{notify_line}""" - task_parts: List[str] = [] task_parts.append( f"""--- -- name: Install package {pkg} +# Generated by enroll (data-driven tasks) + +- name: Install packages for {role} ansible.builtin.apt: - name: "{{{{ {var_prefix}_packages }}}}" + name: "{{{{ {var_prefix}_packages | default([]) }}}}" state: present update_cache: true + when: ({var_prefix}_packages | default([])) | length > 0 + """ ) - - if systemd_files: - for mf in systemd_files: - task_parts.append(copy_task(mf, "[systemd daemon-reload]")) - task_parts.append( - """- name: Reload systemd to pick up unit changes - ansible.builtin.meta: flush_handlers -""" - ) - - for mf in other_files: - task_parts.append(copy_task(mf, None)) + task_parts.append(_render_generic_files_tasks(var_prefix, include_restart_notify=False)) tasks = "\n".join(task_parts).rstrip() + "\n" - with open( - os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" - ) as f: + with open(os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8") as f: f.write(tasks) - with open( - os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" - ) as f: + with open(os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8") as f: f.write("---\ndependencies: []\n") excluded = pr.get("excluded", []) @@ -542,12 +1146,16 @@ Generated for package `{pkg}`. f.write(readme) manifested_pkg_roles.append(role) - - # Playbooks - _write_playbook( - os.path.join(out_dir, "playbook.yml"), + all_roles = ( manifested_pkg_roles + manifested_service_roles + manifested_etc_custom_roles - + manifested_users_roles, + + manifested_users_roles ) + + if site_mode: + _write_playbook_host( + os.path.join(out_dir, "playbooks", f"{fqdn}.yml"), fqdn or "", all_roles + ) + else: + _write_playbook_all(os.path.join(out_dir, "playbook.yml"), all_roles) diff --git a/poetry.lock b/poetry.lock index a056de4..d594a1a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,9 +1,10 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry and should not be changed by hand. [[package]] name = "certifi" version = "2025.11.12" description = "Python package for providing Mozilla's CA Bundle." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -15,6 +16,7 @@ files = [ name = "charset-normalizer" version = "3.4.4" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -137,6 +139,7 @@ files = [ name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." +category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -148,6 +151,7 @@ files = [ name = "coverage" version = "7.13.0" description = "Code coverage measurement for Python" +category = "dev" optional = false python-versions = ">=3.10" files = [ @@ -255,6 +259,7 @@ toml = ["tomli"] name = "desktop-entry-lib" version = "5.0" description = "A library for working with .desktop files" +category = "dev" optional = false python-versions = ">=3.10" files = [ @@ -269,6 +274,7 @@ xdg-desktop-portal = ["jeepney"] name = "exceptiongroup" version = "1.3.1" description = "Backport of PEP 654 (exception groups)" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -286,6 +292,7 @@ test = ["pytest (>=6)"] name = "idna" version = "3.11" description = "Internationalized Domain Names in Applications (IDNA)" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -300,6 +307,7 @@ all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2 name = "iniconfig" version = "2.3.0" description = "brain-dead simple config-ini parsing" +category = "dev" optional = false python-versions = ">=3.10" files = [ @@ -311,6 +319,7 @@ files = [ name = "packaging" version = "25.0" description = "Core utilities for Python packages" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -322,6 +331,7 @@ files = [ name = "pluggy" version = "1.6.0" description = "plugin and hook calling mechanisms for python" +category = "dev" optional = false python-versions = ">=3.9" files = [ @@ -337,6 +347,7 @@ testing = ["coverage", "pytest", "pytest-benchmark"] name = "pygments" version = "2.19.2" description = "Pygments is a syntax highlighting package written in Python." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -351,6 +362,7 @@ windows-terminal = ["colorama (>=0.4.6)"] name = "pyproject-appimage" version = "4.2" description = "Generate AppImages from your Python projects" +category = "dev" optional = false python-versions = ">=3.9" files = [ @@ -367,6 +379,7 @@ tomli = {version = "*", markers = "python_version < \"3.11\""} name = "pytest" version = "8.4.2" description = "pytest: simple powerful testing with Python" +category = "dev" optional = false python-versions = ">=3.9" files = [ @@ -390,6 +403,7 @@ dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests name = "pytest-cov" version = "5.0.0" description = "Pytest plugin for measuring coverage." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -404,10 +418,94 @@ pytest = ">=4.6" [package.extras] testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] +[[package]] +name = "pyyaml" +version = "6.0.3" +description = "YAML parser and emitter for Python" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "PyYAML-6.0.3-cp38-cp38-macosx_10_13_x86_64.whl", hash = "sha256:c2514fceb77bc5e7a2f7adfaa1feb2fb311607c9cb518dbc378688ec73d8292f"}, + {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c57bb8c96f6d1808c030b1687b9b5fb476abaa47f0db9c0101f5e9f394e97f4"}, + {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efd7b85f94a6f21e4932043973a7ba2613b059c4a000551892ac9f1d11f5baf3"}, + {file = "PyYAML-6.0.3-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:22ba7cfcad58ef3ecddc7ed1db3409af68d023b7f940da23c6c2a1890976eda6"}, + {file = "PyYAML-6.0.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:6344df0d5755a2c9a276d4473ae6b90647e216ab4757f8426893b5dd2ac3f369"}, + {file = "PyYAML-6.0.3-cp38-cp38-win32.whl", hash = "sha256:3ff07ec89bae51176c0549bc4c63aa6202991da2d9a6129d7aef7f1407d3f295"}, + {file = "PyYAML-6.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:5cf4e27da7e3fbed4d6c3d8e797387aaad68102272f8f9752883bc32d61cb87b"}, + {file = "pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b"}, + {file = "pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956"}, + {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8"}, + {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198"}, + {file = "pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b"}, + {file = "pyyaml-6.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0"}, + {file = "pyyaml-6.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69"}, + {file = "pyyaml-6.0.3-cp310-cp310-win32.whl", hash = "sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e"}, + {file = "pyyaml-6.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c"}, + {file = "pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e"}, + {file = "pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824"}, + {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c"}, + {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:850774a7879607d3a6f50d36d04f00ee69e7fc816450e5f7e58d7f17f1ae5c00"}, + {file = "pyyaml-6.0.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8bb0864c5a28024fac8a632c443c87c5aa6f215c0b126c449ae1a150412f31d"}, + {file = "pyyaml-6.0.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37d57ad971609cf3c53ba6a7e365e40660e3be0e5175fa9f2365a379d6095a"}, + {file = "pyyaml-6.0.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37503bfbfc9d2c40b344d06b2199cf0e96e97957ab1c1b546fd4f87e53e5d3e4"}, + {file = "pyyaml-6.0.3-cp311-cp311-win32.whl", hash = "sha256:8098f252adfa6c80ab48096053f512f2321f0b998f98150cea9bd23d83e1467b"}, + {file = "pyyaml-6.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f3bfb4965eb874431221a3ff3fdcddc7e74e3b07799e0e84ca4a0f867d449bf"}, + {file = "pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196"}, + {file = "pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc"}, + {file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e"}, + {file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea"}, + {file = "pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5"}, + {file = "pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b"}, + {file = "pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd"}, + {file = "pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8"}, + {file = "pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6"}, + {file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6"}, + {file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be"}, + {file = "pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26"}, + {file = "pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c"}, + {file = "pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb"}, + {file = "pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac"}, + {file = "pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5"}, + {file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764"}, + {file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35"}, + {file = "pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac"}, + {file = "pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3"}, + {file = "pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3"}, + {file = "pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c"}, + {file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065"}, + {file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65"}, + {file = "pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9"}, + {file = "pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b"}, + {file = "pyyaml-6.0.3-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:b865addae83924361678b652338317d1bd7e79b1f4596f96b96c77a5a34b34da"}, + {file = "pyyaml-6.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c3355370a2c156cffb25e876646f149d5d68f5e0a3ce86a5084dd0b64a994917"}, + {file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3c5677e12444c15717b902a5798264fa7909e41153cdf9ef7ad571b704a63dd9"}, + {file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5ed875a24292240029e4483f9d4a4b8a1ae08843b9c54f43fcc11e404532a8a5"}, + {file = "pyyaml-6.0.3-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0150219816b6a1fa26fb4699fb7daa9caf09eb1999f3b70fb6e786805e80375a"}, + {file = "pyyaml-6.0.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fa160448684b4e94d80416c0fa4aac48967a969efe22931448d853ada8baf926"}, + {file = "pyyaml-6.0.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:27c0abcb4a5dac13684a37f76e701e054692a9b2d3064b70f5e4eb54810553d7"}, + {file = "pyyaml-6.0.3-cp39-cp39-win32.whl", hash = "sha256:1ebe39cb5fc479422b83de611d14e2c0d3bb2a18bbcb01f229ab3cfbd8fee7a0"}, + {file = "pyyaml-6.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:2e71d11abed7344e42a8849600193d15b6def118602c4c176f748e4583246007"}, + {file = "pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f"}, +] + [[package]] name = "requests" version = "2.32.5" description = "Python HTTP for Humans." +category = "dev" optional = false python-versions = ">=3.9" files = [ @@ -429,6 +527,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "tomli" version = "2.3.0" description = "A lil' TOML parser" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -480,6 +579,7 @@ files = [ name = "typing-extensions" version = "4.15.0" description = "Backported and Experimental Type Hints for Python 3.9+" +category = "dev" optional = false python-versions = ">=3.9" files = [ @@ -491,6 +591,7 @@ files = [ name = "urllib3" version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "dev" optional = false python-versions = ">=3.9" files = [ @@ -507,4 +608,4 @@ zstd = ["backports-zstd (>=1.0.0)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "84c06974dfe822257ef324807672e51d71c0a6197e037fa56e92d8369c40d341" +content-hash = "10c279bd393cab27a94b4848c6f88f3a7a3d1af5062882c3e6fd2c2e15c945c8" diff --git a/pyproject.toml b/pyproject.toml index 6b4d1b1..444def3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.0.4" +version = "0.0.5" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" @@ -10,6 +10,7 @@ repository = "https://git.mig5.net/mig5/enroll" [tool.poetry.dependencies] python = "^3.10" +pyyaml = "^6.0.3" [tool.poetry.scripts] enroll = "enroll.cli:main" From f40b9d834d008becd2bfab3e3845a75dfe86e991 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Tue, 16 Dec 2025 20:15:21 +1100 Subject: [PATCH 024/115] black and pyflakes3 --- enroll/manifest.py | 91 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 23 deletions(-) diff --git a/enroll/manifest.py b/enroll/manifest.py index b772bcd..e27cfd5 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -386,7 +386,6 @@ def _defaults_with_jinjaturtle(base_defaults: str, vars_text: str) -> str: ) - def _write_role_defaults(role_dir: str, mapping: Dict[str, Any]) -> None: """Overwrite role defaults/main.yml with the provided mapping.""" defaults_path = os.path.join(role_dir, "defaults", "main.yml") @@ -435,12 +434,13 @@ def _build_managed_files_var( return out -def _render_generic_files_tasks(var_prefix: str, *, include_restart_notify: bool) -> str: +def _render_generic_files_tasks( + var_prefix: str, *, include_restart_notify: bool +) -> str: """Render generic tasks to deploy _managed_files safely.""" # Using first_found makes roles work in both modes: # - site-mode: inventory/host_vars///.files/... # - non-site: roles//files/... - restart_notify = "Restart service" if include_restart_notify else "" return f"""# Generated by enroll (data-driven tasks) - name: Deploy systemd unit files (templates) @@ -498,6 +498,7 @@ def _render_generic_files_tasks(var_prefix: str, *, include_restart_notify: bool notify: "{{{{ item.notify | default([]) }}}}" """ + def manifest( bundle_dir: str, out_dir: str, @@ -666,7 +667,9 @@ def manifest( }, ) - with open(os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8") as f: + with open( + os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" + ) as f: f.write("---\ndependencies: []\n") # tasks (data-driven) @@ -723,28 +726,43 @@ def manifest( loop: "{{ users_ssh_files | default([]) }}" """ - with open(os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8") as f: + with open( + os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" + ) as f: f.write(users_tasks) - with open(os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8") as f: + with open( + os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8" + ) as f: f.write("---\n") readme = ( - f"""# users + """# users Generated non-system user accounts and SSH public material. ## Users """ - + ("\n".join([f"- {u.get('name')} (uid {u.get('uid')})" for u in users]) or "- (none)") + + ( + "\n".join([f"- {u.get('name')} (uid {u.get('uid')})" for u in users]) + or "- (none)" + ) + """\n ## Included SSH files """ - + ("\n".join([f"- {mf.get('path')} ({mf.get('reason')})" for mf in managed_files]) or "- (none)") + + ( + "\n".join( + [f"- {mf.get('path')} ({mf.get('reason')})" for mf in managed_files] + ) + or "- (none)" + ) + """\n ## Excluded """ - + ("\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) or "- (none)") + + ( + "\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) + or "- (none)" + ) + """\n ## Notes """ @@ -815,8 +833,12 @@ Generated non-system user accounts and SSH public material. else: _write_role_defaults(role_dir, vars_map) - tasks = """---\n""" + _render_generic_files_tasks(var_prefix, include_restart_notify=False) - with open(os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8") as f: + tasks = """---\n""" + _render_generic_files_tasks( + var_prefix, include_restart_notify=False + ) + with open( + os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" + ) as f: f.write(tasks.rstrip() + "\n") handlers = """--- @@ -824,10 +846,14 @@ Generated non-system user accounts and SSH public material. ansible.builtin.systemd: daemon_reload: true """ - with open(os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8") as f: + with open( + os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8" + ) as f: f.write(handlers) - with open(os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8") as f: + with open( + os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" + ) as f: f.write("---\ndependencies: []\n") readme = ( @@ -841,7 +867,10 @@ Unowned /etc config files not attributed to packages or services. + """\n ## Excluded """ - + ("\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) or "- (none)") + + ( + "\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) + or "- (none)" + ) + """\n ## Notes """ @@ -948,7 +977,9 @@ Unowned /etc config files not attributed to packages or services. - {var_prefix}_manage_unit | default(false) - ({var_prefix}_systemd_state | default('stopped')) == 'started' """ - with open(os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8") as f: + with open( + os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8" + ) as f: f.write(handlers) task_parts: List[str] = [] @@ -966,7 +997,9 @@ Unowned /etc config files not attributed to packages or services. """ ) - task_parts.append(_render_generic_files_tasks(var_prefix, include_restart_notify=True)) + task_parts.append( + _render_generic_files_tasks(var_prefix, include_restart_notify=True) + ) task_parts.append( f""" @@ -998,10 +1031,14 @@ Unowned /etc config files not attributed to packages or services. ) tasks = "\n".join(task_parts).rstrip() + "\n" - with open(os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8") as f: + with open( + os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" + ) as f: f.write(tasks) - with open(os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8") as f: + with open( + os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" + ) as f: f.write("---\ndependencies: []\n") excluded = svc.get("excluded", []) @@ -1099,7 +1136,9 @@ Generated from `{unit}`. ansible.builtin.systemd: daemon_reload: true """ - with open(os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8") as f: + with open( + os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8" + ) as f: f.write(handlers) task_parts: List[str] = [] @@ -1116,13 +1155,19 @@ Generated from `{unit}`. """ ) - task_parts.append(_render_generic_files_tasks(var_prefix, include_restart_notify=False)) + task_parts.append( + _render_generic_files_tasks(var_prefix, include_restart_notify=False) + ) tasks = "\n".join(task_parts).rstrip() + "\n" - with open(os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8") as f: + with open( + os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" + ) as f: f.write(tasks) - with open(os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8") as f: + with open( + os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" + ) as f: f.write("---\ndependencies: []\n") excluded = pr.get("excluded", []) From 026416d158baef451240c0d021e4b47b0e188e56 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Tue, 16 Dec 2025 20:48:08 +1100 Subject: [PATCH 025/115] Fix tests --- enroll/harvest.py | 2 +- enroll/manifest.py | 58 +++++++++++++++---- tests.sh | 2 +- tests/test_cli.py | 56 ++++++++++++++++-- tests/test_jinjaturtle.py | 99 ++++++++++++++++++++++++++++++++ tests/test_manifest.py | 118 ++++++++++++++++++++++++++++++++++++-- 6 files changed, 313 insertions(+), 22 deletions(-) create mode 100644 tests/test_jinjaturtle.py diff --git a/enroll/harvest.py b/enroll/harvest.py index 78f7d1f..688a489 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -132,7 +132,7 @@ def _safe_name(s: str) -> str: def _role_id(raw: str) -> str: - # normalize separators first + # normalise separators first s = re.sub(r"[^A-Za-z0-9]+", "_", raw) # split CamelCase -> snake_case s = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s) diff --git a/enroll/manifest.py b/enroll/manifest.py index e27cfd5..7565160 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -48,8 +48,24 @@ def _yaml_dump_mapping(obj: Dict[str, Any], *, sort_keys: bool = True) -> str: for k, v in sorted(obj.items()) if sort_keys else obj.items(): lines.append(f"{k}: {v!r}") return "\n".join(lines).rstrip() + "\n" + + # ansible-lint/yamllint's indentation rules are stricter than YAML itself. + # In particular, they expect sequences nested under a mapping key to be + # indented (e.g. `foo:\n - a`), whereas PyYAML's default is often + # `foo:\n- a`. + class _IndentDumper(yaml.SafeDumper): # type: ignore + def increase_indent(self, flow: bool = False, indentless: bool = False): + return super().increase_indent(flow, False) + return ( - yaml.safe_dump(obj, default_flow_style=False, sort_keys=sort_keys).rstrip() + yaml.dump( + obj, + Dumper=_IndentDumper, + default_flow_style=False, + sort_keys=sort_keys, + indent=2, + allow_unicode=True, + ).rstrip() + "\n" ) @@ -124,7 +140,7 @@ def _extract_jinjaturtle_block(text: str) -> str: return text.strip() + "\n" -def _normalize_jinjaturtle_vars_text(vars_text: str) -> str: +def _normalise_jinjaturtle_vars_text(vars_text: str) -> str: """Deduplicate keys in a vars fragment by parsing as YAML and dumping it back.""" m = _yaml_load_mapping(vars_text) if not m: @@ -166,14 +182,14 @@ def _copy_artifacts( dst = os.path.join(dst_files_dir, rel) # If a file was successfully templatised by JinjaTurtle, do NOT - # also materialize the raw copy in the destination files dir. + # also materialise the raw copy in the destination files dir. # (This keeps the output minimal and avoids redundant "raw" files.) if exclude_rels and rel in exclude_rels: try: if os.path.isfile(dst): os.remove(dst) except Exception: - pass + pass # nosec continue if preserve_existing and os.path.exists(dst): @@ -342,7 +358,7 @@ def _jinjify_managed_files( except Exception: # If jinjaturtle cannot process a file for any reason, skip silently. # (Enroll's core promise is to be optimistic and non-interactive.) - continue + continue # nosec tmpl_rel = src_rel + ".j2" tmpl_dst = os.path.join(role_dir, "templates", tmpl_rel) @@ -372,7 +388,7 @@ def _hostvars_only_jinjaturtle(vars_text: str) -> str: def _defaults_with_jinjaturtle(base_defaults: str, vars_text: str) -> str: if not vars_text.strip(): return base_defaults.rstrip() + "\n" - vars_text = _normalize_jinjaturtle_vars_text(vars_text) + vars_text = _normalise_jinjaturtle_vars_text(vars_text) # Always regenerate the block (we regenerate whole defaults files anyway) return ( base_defaults.rstrip() @@ -450,7 +466,11 @@ def _render_generic_files_tasks( owner: "{{{{ item.owner }}}}" group: "{{{{ item.group }}}}" mode: "{{{{ item.mode }}}}" - loop: "{{{{ {var_prefix}_managed_files | default([]) | selectattr('is_systemd_unit','equalto', true) | selectattr('kind','equalto','template') | list }}}}" + loop: >- + {{{{ {var_prefix}_managed_files | default([]) + | selectattr('is_systemd_unit', 'equalto', true) + | selectattr('kind', 'equalto', 'template') + | list }}}} notify: "{{{{ item.notify | default([]) }}}}" - name: Deploy systemd unit files (copies) @@ -465,12 +485,20 @@ def _render_generic_files_tasks( owner: "{{{{ item.owner }}}}" group: "{{{{ item.group }}}}" mode: "{{{{ item.mode }}}}" - loop: "{{{{ {var_prefix}_managed_files | default([]) | selectattr('is_systemd_unit','equalto', true) | selectattr('kind','equalto','copy') | list }}}}" + loop: >- + {{{{ {var_prefix}_managed_files | default([]) + | selectattr('is_systemd_unit', 'equalto', true) + | selectattr('kind', 'equalto', 'copy') + | list }}}} notify: "{{{{ item.notify | default([]) }}}}" - name: Reload systemd to pick up unit changes ansible.builtin.meta: flush_handlers - when: "({var_prefix}_managed_files | default([]) | selectattr('is_systemd_unit','equalto', true) | list | length) > 0" + when: >- + ({var_prefix}_managed_files | default([]) + | selectattr('is_systemd_unit', 'equalto', true) + | list + | length) > 0 - name: Deploy other managed files (templates) ansible.builtin.template: @@ -479,7 +507,11 @@ def _render_generic_files_tasks( owner: "{{{{ item.owner }}}}" group: "{{{{ item.group }}}}" mode: "{{{{ item.mode }}}}" - loop: "{{{{ {var_prefix}_managed_files | default([]) | selectattr('is_systemd_unit','equalto', false) | selectattr('kind','equalto','template') | list }}}}" + loop: >- + {{{{ {var_prefix}_managed_files | default([]) + | selectattr('is_systemd_unit', 'equalto', false) + | selectattr('kind', 'equalto', 'template') + | list }}}} notify: "{{{{ item.notify | default([]) }}}}" - name: Deploy other managed files (copies) @@ -494,7 +526,11 @@ def _render_generic_files_tasks( owner: "{{{{ item.owner }}}}" group: "{{{{ item.group }}}}" mode: "{{{{ item.mode }}}}" - loop: "{{{{ {var_prefix}_managed_files | default([]) | selectattr('is_systemd_unit','equalto', false) | selectattr('kind','equalto','copy') | list }}}}" + loop: >- + {{{{ {var_prefix}_managed_files | default([]) + | selectattr('is_systemd_unit', 'equalto', false) + | selectattr('kind', 'equalto', 'copy') + | list }}}} notify: "{{{{ item.notify | default([]) }}}}" """ diff --git a/tests.sh b/tests.sh index f8d246c..ea7ad59 100755 --- a/tests.sh +++ b/tests.sh @@ -11,7 +11,7 @@ rm -rf "${BUNDLE_DIR}" "${ANSIBLE_DIR}" # Generate data poetry run \ - enroll enroll \ + enroll single-shot \ --harvest "${BUNDLE_DIR}" \ --out "${ANSIBLE_DIR}" diff --git a/tests/test_cli.py b/tests/test_cli.py index a93c509..9e3422c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -22,9 +22,12 @@ def test_cli_harvest_subcommand_calls_harvest(monkeypatch, capsys, tmp_path): def test_cli_manifest_subcommand_calls_manifest(monkeypatch, tmp_path): called = {} - def fake_manifest(harvest_dir: str, out_dir: str): + def fake_manifest(harvest_dir: str, out_dir: str, **kwargs): called["harvest"] = harvest_dir called["out"] = out_dir + # Common manifest args should be passed through by the CLI. + called["fqdn"] = kwargs.get("fqdn") + called["jinjaturtle"] = kwargs.get("jinjaturtle") monkeypatch.setattr(cli, "manifest", fake_manifest) monkeypatch.setattr( @@ -43,6 +46,8 @@ def test_cli_manifest_subcommand_calls_manifest(monkeypatch, tmp_path): cli.main() assert called["harvest"] == str(tmp_path / "bundle") assert called["out"] == str(tmp_path / "ansible") + assert called["fqdn"] is None + assert called["jinjaturtle"] == "auto" def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path): @@ -52,8 +57,16 @@ def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path) calls.append(("harvest", bundle_dir)) return str(tmp_path / "bundle" / "state.json") - def fake_manifest(bundle_dir: str, out_dir: str): - calls.append(("manifest", bundle_dir, out_dir)) + def fake_manifest(bundle_dir: str, out_dir: str, **kwargs): + calls.append( + ( + "manifest", + bundle_dir, + out_dir, + kwargs.get("fqdn"), + kwargs.get("jinjaturtle"), + ) + ) monkeypatch.setattr(cli, "harvest", fake_harvest) monkeypatch.setattr(cli, "manifest", fake_manifest) @@ -62,7 +75,7 @@ def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path) "argv", [ "enroll", - "enroll", + "single-shot", "--harvest", str(tmp_path / "bundle"), "--out", @@ -73,5 +86,38 @@ def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path) cli.main() assert calls == [ ("harvest", str(tmp_path / "bundle")), - ("manifest", str(tmp_path / "bundle"), str(tmp_path / "ansible")), + ("manifest", str(tmp_path / "bundle"), str(tmp_path / "ansible"), None, "auto"), ] + + +def test_cli_manifest_common_args(monkeypatch, tmp_path): + """Ensure --fqdn and jinjaturtle mode flags are forwarded correctly.""" + + called = {} + + def fake_manifest(harvest_dir: str, out_dir: str, **kwargs): + called["harvest"] = harvest_dir + called["out"] = out_dir + called["fqdn"] = kwargs.get("fqdn") + called["jinjaturtle"] = kwargs.get("jinjaturtle") + + monkeypatch.setattr(cli, "manifest", fake_manifest) + monkeypatch.setattr( + sys, + "argv", + [ + "enroll", + "manifest", + "--harvest", + str(tmp_path / "bundle"), + "--out", + str(tmp_path / "ansible"), + "--fqdn", + "example.test", + "--no-jinjaturtle", + ], + ) + + cli.main() + assert called["fqdn"] == "example.test" + assert called["jinjaturtle"] == "off" diff --git a/tests/test_jinjaturtle.py b/tests/test_jinjaturtle.py new file mode 100644 index 0000000..68bb04c --- /dev/null +++ b/tests/test_jinjaturtle.py @@ -0,0 +1,99 @@ +import json +from pathlib import Path + +import enroll.manifest as manifest_mod +from enroll.jinjaturtle import JinjifyResult + + +def test_manifest_uses_jinjaturtle_templates_and_does_not_copy_raw( + monkeypatch, tmp_path: Path +): + """If jinjaturtle can templatisize a file, we should store a template in the role + and avoid keeping the raw file copy in the destination files area. + + This test stubs out jinjaturtle execution so it doesn't depend on the external tool. + """ + + bundle = tmp_path / "bundle" + out = tmp_path / "ansible" + + # A jinjaturtle-compatible config file. + (bundle / "artifacts" / "foo" / "etc").mkdir(parents=True, exist_ok=True) + (bundle / "artifacts" / "foo" / "etc" / "foo.ini").write_text( + "[main]\nkey = 1\n", encoding="utf-8" + ) + + state = { + "host": {"hostname": "test", "os": "debian"}, + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [ + { + "unit": "foo.service", + "role_name": "foo", + "packages": ["foo"], + "active_state": "inactive", + "sub_state": "dead", + "unit_file_state": "disabled", + "condition_result": "no", + "managed_files": [ + { + "path": "/etc/foo.ini", + "src_rel": "etc/foo.ini", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "modified_conffile", + } + ], + "excluded": [], + "notes": [], + } + ], + "package_roles": [], + } + + bundle.mkdir(parents=True, exist_ok=True) + (bundle / "state.json").write_text(json.dumps(state, indent=2), encoding="utf-8") + + # Pretend jinjaturtle exists. + monkeypatch.setattr( + manifest_mod, "find_jinjaturtle_cmd", lambda: "/usr/bin/jinjaturtle" + ) + + # Stub jinjaturtle output. + def fake_run_jinjaturtle( + jt_exe: str, src_path: str, *, role_name: str, force_format=None + ): + assert role_name == "foo" + return JinjifyResult( + template_text="[main]\nkey = {{ foo_key }}\n", + vars_text="foo_key: 1\n", + ) + + monkeypatch.setattr(manifest_mod, "run_jinjaturtle", fake_run_jinjaturtle) + + manifest_mod.manifest(str(bundle), str(out), jinjaturtle="on") + + # Template should exist in the role. + assert (out / "roles" / "foo" / "templates" / "etc" / "foo.ini.j2").exists() + + # Raw file should NOT be copied into role files/ because it was templatised. + assert not (out / "roles" / "foo" / "files" / "etc" / "foo.ini").exists() + + # Defaults should include jinjaturtle vars. + defaults = (out / "roles" / "foo" / "defaults" / "main.yml").read_text( + encoding="utf-8" + ) + assert "foo_key: 1" in defaults diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 09c66e1..98f418f 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -94,10 +94,16 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): manifest(str(bundle), str(out)) - # Service role: conditional start must be a clean Ansible expression + # Service role: systemd management should be gated on foo_manage_unit and a probe. tasks = (out / "roles" / "foo" / "tasks" / "main.yml").read_text(encoding="utf-8") - assert "when:\n - _unit_probe is succeeded\n - foo_start | bool\n" in tasks - # Ensure we didn't emit deprecated/broken '{{ }}' delimiters in when: + assert "- name: Probe whether systemd unit exists and is manageable" in tasks + assert "when: foo_manage_unit | default(false)" in tasks + assert ( + "when:\n - foo_manage_unit | default(false)\n - _unit_probe is succeeded\n" + in tasks + ) + + # Ensure we didn't emit deprecated/broken '{{ }}' delimiters in when: lines. for line in tasks.splitlines(): if line.lstrip().startswith("when:"): assert "{{" not in line and "}}" not in line @@ -105,7 +111,9 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): defaults = (out / "roles" / "foo" / "defaults" / "main.yml").read_text( encoding="utf-8" ) - assert "foo_start: false" in defaults + assert "foo_manage_unit: true" in defaults + assert "foo_systemd_enabled: true" in defaults + assert "foo_systemd_state: stopped" in defaults # Playbook should include users, etc_custom, packages, and services pb = (out / "playbook.yml").read_text(encoding="utf-8") @@ -113,3 +121,105 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): assert "- etc_custom" in pb assert "- curl" in pb assert "- foo" in pb + + +def test_manifest_site_mode_creates_host_inventory_and_raw_files(tmp_path: Path): + """In --fqdn mode, host-specific state goes into inventory/host_vars.""" + + fqdn = "host1.example.test" + bundle = tmp_path / "bundle" + out = tmp_path / "ansible" + + # Artifacts for a service-managed file. + (bundle / "artifacts" / "foo" / "etc").mkdir(parents=True, exist_ok=True) + (bundle / "artifacts" / "foo" / "etc" / "foo.conf").write_text( + "x", encoding="utf-8" + ) + + # Artifacts for etc_custom file so copy works. + (bundle / "artifacts" / "etc_custom" / "etc" / "default").mkdir( + parents=True, exist_ok=True + ) + (bundle / "artifacts" / "etc_custom" / "etc" / "default" / "keyboard").write_text( + "kbd", encoding="utf-8" + ) + + state = { + "host": {"hostname": "test", "os": "debian"}, + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [ + { + "path": "/etc/default/keyboard", + "src_rel": "etc/default/keyboard", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "custom_unowned", + } + ], + "excluded": [], + "notes": [], + }, + "services": [ + { + "unit": "foo.service", + "role_name": "foo", + "packages": ["foo"], + "active_state": "active", + "sub_state": "running", + "unit_file_state": "enabled", + "condition_result": "yes", + "managed_files": [ + { + "path": "/etc/foo.conf", + "src_rel": "etc/foo.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "modified_conffile", + } + ], + "excluded": [], + "notes": [], + } + ], + "package_roles": [], + } + + bundle.mkdir(parents=True, exist_ok=True) + (bundle / "state.json").write_text(json.dumps(state, indent=2), encoding="utf-8") + + manifest(str(bundle), str(out), fqdn=fqdn) + + # Host playbook exists. + assert (out / "playbooks" / f"{fqdn}.yml").exists() + + # Role defaults are safe/host-agnostic in site mode. + foo_defaults = (out / "roles" / "foo" / "defaults" / "main.yml").read_text( + encoding="utf-8" + ) + assert "foo_packages: []" in foo_defaults + assert "foo_managed_files: []" in foo_defaults + assert "foo_manage_unit: false" in foo_defaults + + # Host vars contain host-specific state. + foo_hostvars = (out / "inventory" / "host_vars" / fqdn / "foo.yml").read_text( + encoding="utf-8" + ) + assert "foo_packages" in foo_hostvars + assert "foo_managed_files" in foo_hostvars + assert "foo_manage_unit: true" in foo_hostvars + assert "foo_systemd_state: started" in foo_hostvars + + # Non-templated raw config is stored per-host under .files. + assert ( + out / "inventory" / "host_vars" / fqdn / "foo" / ".files" / "etc" / "foo.conf" + ).exists() From 6a36a9d2d5d4a1a15ee5ea564b9da5b312dcf68c Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Wed, 17 Dec 2025 17:02:16 +1100 Subject: [PATCH 026/115] Remote mode and dangerous flag, other tweaks * Add remote mode for harvesting a remote machine via a local workstation (no need to install enroll remotely) Optionally use `--no-sudo` if you don't want the remote user to have passwordless sudo when conducting the harvest, albeit you'll end up with less useful data (same as if running `enroll harvest` on a machine without sudo) * Add `--dangerous` flag to capture even sensitive data (use at your own risk!) * Do a better job at capturing other config files in `/etc//` even if that package doesn't normally ship or manage those files. --- CHANGELOG.md | 10 ++ README.md | 95 +++++++++-- enroll/cache.py | 79 +++++++++ enroll/cli.py | 116 +++++++++++-- enroll/harvest.py | 70 +++++++- enroll/ignore.py | 22 ++- enroll/jinjaturtle.py | 22 --- enroll/manifest.py | 90 ++++------- enroll/remote.py | 209 ++++++++++++++++++++++++ poetry.lock | 357 ++++++++++++++++++++++++++++++++++++++--- pyproject.toml | 1 + tests/test_cli.py | 141 +++++++++++++++- tests/test_manifest.py | 26 +++ 13 files changed, 1083 insertions(+), 155 deletions(-) create mode 100644 enroll/cache.py create mode 100644 enroll/remote.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 76d737d..8c2c247 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +# 0.1.0 + + * Add remote mode for harvesting a remote machine via a local workstation (no need to install enroll remotely) + Optionally use `--no-sudo` if you don't want the remote user to have passwordless sudo when conducting the + harvest, albeit you'll end up with less useful data (same as if running `enroll harvest` on a machine without + sudo) + * Add `--dangerous` flag to capture even sensitive data (use at your own risk!) + * Do a better job at capturing other config files in `/etc//` even if that package doesn't normally + ship or manage those files. + # 0.0.5 * Use JinjaTurtle to generate dynamic template/inventory if it's on the PATH diff --git a/README.md b/README.md index b9375dd..c9e195f 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ It aims to be **optimistic and noninteractive**: - Detects packages that have been installed -- Detects Debian package ownership of `/etc` files using dpkg’s local database. +- Detects Debian package ownership of `/etc` files using dpkg's local database. - Captures config that has **changed from packaged defaults** (dpkg conffile hashes + package md5sums when available). - Also captures **service-relevant custom/unowned files** under `/etc//...` (e.g. drop-in config includes). - Defensively excludes likely secrets (path denylist + content sniff + size caps). @@ -23,12 +23,12 @@ It aims to be **optimistic and noninteractive**: **enroll** has two distinct ways to generate Ansible: ## 1) Single-site mode (default: *no* `--fqdn`) -Use this when you’re enrolling **one server** (or you’re generating a “golden” role set you intend to reuse). +Use this when you're enrolling **one server** (or you're generating a "golden" role set you intend to reuse). **What you get** - Config, templates, and defaults are primarily **contained inside each role**. -- Raw config files (when not templated) live in the role’s `files/`. -- Template variables (when templated) live in the role’s `defaults/main.yml`. +- Raw config files (when not templated) live in the role's `files/`. +- Template variables (when templated) live in the role's `defaults/main.yml`. **Pros** - Roles are more **self-contained** and easier to understand. @@ -36,14 +36,14 @@ Use this when you’re enrolling **one server** (or you’re generating a “gol - Less inventory abstraction/duplication. **Cons** -- Less convenient for quickly enrolling multiple hosts with divergent configs (you’ll do more manual work to make roles flexible across hosts). +- Less convenient for quickly enrolling multiple hosts with divergent configs (you'll do more manual work to make roles flexible across hosts). ## 2) Multi-site mode (`--fqdn`) Use this when you want to enroll **several existing servers** quickly, especially if they differ. **What you get** - Roles are **shared** across hosts, but host-specific data lives in inventory. -- Host inventory drives what’s managed: +- Host inventory drives what's managed: - which files to deploy for that host - which packages are relevant for that host - which services should be enabled/started for that host @@ -51,17 +51,17 @@ Use this when you want to enroll **several existing servers** quickly, especiall **Pros** - Fastest way to retrofit **multiple servers** into config management. -- Avoids shared-role “host A breaks host B” problems by keeping host-specific state in inventory. +- Avoids shared-role "host A breaks host B" problems by keeping host-specific state in inventory. - Better fit when you already have a fleet and want to capture/reflect reality first. **Cons** -- More abstraction: roles become more “data-driven”. +- More abstraction: roles become more "data-driven". - Potential duplication: raw files may exist per-host in inventory (even if identical). - Harder to use the roles to **provision a brand-new server** without also building an inventory for that new host, because multi-site output assumes the server already exists and is being retrofitted. **Rule of thumb** -- If your goal is *“make this one server reproducible / provisionable”* → start with **single-site**. -- If your goal is *“get several already-running servers under management quickly”* → use **multi-site**. +- If your goal is *"make this one server reproducible / provisionable"* → start with **single-site**. +- If your goal is *"get several already-running servers under management quickly"* → use **multi-site**. --- @@ -75,6 +75,24 @@ It also detects if any config files have been *changed* from their packaged defa The harvest writes a state.json file explaining all the data it harvested and, if it chose not to harvest something, explanations as to why that is the case (see below: sensitive data). +### Remote harvesting (workstation → remote) + +If you'd prefer not to install **enroll** on the target host, you can run the harvest over SSH from your workstation and pull the harvest bundle back locally: + +```bash +enroll harvest --remote-host myhost.example.com --remote-user myuser --out /tmp/enroll-harvest +``` + +- `--remote-port` defaults to `22` +- `--remote-user` defaults to your local `$USER` + +This uploads a self-contained `enroll` zipapp to a temporary directory on the remote host, runs `harvest` there, then downloads the resulting harvest bundle to the `--out` directory on your workstation. + +**Privilege note:** A "full" harvest typically needs root access. Remote harvesting assumes the remote user can run `sudo` **without a password prompt** (NOPASSWD) so the harvest can run non-interactively. If you don't want this, pass `--no-sudo` as well. + +**JinjaTurtle note:** If you want to take advantage of JinjaTurtle to turn configs into templates (see below note on JinjaTurtle integration), you'll still need to install JinjaTurtle on the remote host first. + + ## Sensitive data **enroll** doesn't make any assumptions about how you might handle sensitive data from your config files, in Ansible. Some people might use SOPS, others might use Vault, others might do something else entirely. @@ -85,6 +103,25 @@ This inevitably means that it will deliberately miss some important config files Nonetheless, in the Harvest 'state' file, there should be an explanation of 'excluded files'. You can parse or inspect this file to find what it chose to ignore, and then you know what you might want to augment the results with later, once you 'manifest' the harvest into Ansible configuration. +Nonetheless, in some cases it may be appropriate to truly grab as much as you can, including secrets. For that, read on for the `--dangerous` flag. + +### Opting in to fetching sensitive data: `--dangerous` + +**WARNING:** `--dangerous` disables enroll's "likely a secret" safety checks. This can cause private keys, TLS key material, API tokens, database passwords, and other credentials to be copied into your harvest output **in plaintext**. + +Only use `--dangerous` if you explicitly want to scoop up sensitive files and you understand where the harvest output is stored, who can read it, and how it will be handled (backups, git commits, etc, as well as risk of using `--out` with a shared `/tmp` location where other users could see the data). We offer no liability if your sensitive data is compromised through the use of this tool! + +**Strong recommendation:** If you plan to keep harvested files long-term (especially in git), encrypt secrets at rest. A common approach is to use **SOPS** and then use the **community.sops** Ansible collection to load/decrypt encrypted content during deploy. + +Install the collection: + +```bash +ansible-galaxy collection install community.sops +``` + +Then you can use the collection's lookup/vars plugins or modules to decrypt or load SOPS-encrypted vars at runtime. + + ## Manifest The 'manifest' subcommand expects to be given a path to the 'harvest' obtained in the first step. It will then attempt to generate Ansible roles and playbooks (and potentially 'inventory') from that harvest. @@ -114,15 +151,17 @@ JinjaTurtle will be used automatically if it is detected on the `$PATH`. You can If you *do* have JinjaTurtle installed, but *don't* wish to make use of it, you can use `--no-jinjaturtle`, in which case all config files will be kept as 'raw' files. +**Remote mode**: if you are using the `--remote-xxx` flags for `manifest` or `single-shot` subcommands, and want to take advantage of the JinjaTurtle integration, you'll still need to install JinjaTurtle on the remote host *in advance*. + --- -# How multi-site avoids “shared role breaks a host” +# How multi-site avoids "shared role breaks a host" In multi-site mode, **roles are data-driven**. The role contains generic tasks like: -- “deploy all files listed for this host” -- “install packages listed for this host” -- “apply systemd enable/start state listed for this host” +- "deploy all files listed for this host" +- "install packages listed for this host" +- "apply systemd enable/start state listed for this host" The host inventory is what decides which files/packages/services apply to that host. This prevents the classic failure mode where host2 adds a config file to a shared role and host1 then fails trying to deploy a file it never had. @@ -130,7 +169,7 @@ Raw non-templated files are stored under: - `inventory/host_vars///.files/...` -…and the host’s role variables describe which of those files should be deployed. +…and the host's role variables describe which of those files should be deployed. --- @@ -182,6 +221,24 @@ On the host (root recommended to harvest as much data as possible): ```bash enroll harvest --out /tmp/enroll-harvest ``` +### Remote harvest over SSH (no enroll install required on the remote host) + +```bash +enroll harvest --remote-host myhost.example.com --remote-user myuser --out /tmp/enroll-harvest +``` + +### `--dangerous` (captures potentially sensitive files — read the warning above) + +```bash +enroll harvest --out /tmp/enroll-harvest --dangerous +``` + +Remote + dangerous: + +```bash +enroll harvest --remote-host myhost.example.com --remote-user myuser --out /tmp/enroll-harvest --dangerous +``` + ## 2. Generate Ansible manifests (roles/playbook) from that harvest @@ -208,6 +265,14 @@ Alternatively, do both steps in one shot: ```bash enroll single-shot --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible --fqdn "$(hostname -f)" ``` +Remote single-shot (run harvest over SSH, then manifest locally): + +```bash +enroll single-shot --remote-host myhost.example.com --remote-user myuser --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible --fqdn "myhost.example.com" +``` + +In multi-site mode (`--fqdn`), you can run single-shot repeatedly against multiple hosts while reusing the same `--out` directory so each host merges into the existing Ansible repo. + ## 3. Run Ansible diff --git a/enroll/cache.py b/enroll/cache.py new file mode 100644 index 0000000..1dc656b --- /dev/null +++ b/enroll/cache.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +import os +import re +import tempfile +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import Optional + + +def _safe_component(s: str) -> str: + s = s.strip() + if not s: + return "unknown" + s = re.sub(r"[^A-Za-z0-9_.-]+", "_", s) + s = re.sub(r"_+", "_", s) + return s[:64] + + +def enroll_cache_dir() -> Path: + """Return the base cache directory for enroll. + + We default to ~/.local/cache to match common Linux conventions in personal + homedirs, but honour XDG_CACHE_HOME if set. + """ + base = os.environ.get("XDG_CACHE_HOME") + if base: + root = Path(base).expanduser() + else: + root = Path.home() / ".local" / "cache" + return root / "enroll" + + +@dataclass(frozen=True) +class HarvestCache: + """A locally-persistent directory that holds a harvested bundle.""" + + dir: Path + + @property + def state_json(self) -> Path: + return self.dir / "state.json" + + +def _ensure_dir_secure(path: Path) -> None: + """Create a directory with restrictive permissions; refuse symlinks.""" + # Refuse a symlink at the leaf. + if path.exists() and path.is_symlink(): + raise RuntimeError(f"Refusing to use symlink path: {path}") + path.mkdir(parents=True, exist_ok=True, mode=0o700) + try: + os.chmod(path, 0o700) + except OSError: + # Best-effort; on some FS types chmod may fail. + pass + + +def new_harvest_cache_dir(*, hint: Optional[str] = None) -> HarvestCache: + """Create a new, unpredictable harvest directory under the user's cache. + + This mitigates pre-guessing attacks (e.g. an attacker creating a directory + in advance in a shared temp location) by creating the bundle directory under + the user's home and using mkdtemp() randomness. + """ + base = enroll_cache_dir() / "harvest" + _ensure_dir_secure(base) + + ts = datetime.now().strftime("%Y%m%d-%H%M%S") + safe = _safe_component(hint or "harvest") + prefix = f"{ts}-{safe}-" + + # mkdtemp creates a new directory with a random suffix. + d = Path(tempfile.mkdtemp(prefix=prefix, dir=str(base))) + try: + os.chmod(d, 0o700) + except OSError: + pass + return HarvestCache(dir=d) diff --git a/enroll/cli.py b/enroll/cli.py index 0511b54..a4b1142 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -1,9 +1,14 @@ from __future__ import annotations import argparse +import os +from pathlib import Path +from typing import Optional +from .cache import new_harvest_cache_dir from .harvest import harvest from .manifest import manifest +from .remote import remote_harvest def _add_common_manifest_args(p: argparse.ArgumentParser) -> None: @@ -32,46 +37,129 @@ def _jt_mode(args: argparse.Namespace) -> str: return "auto" +def _add_remote_args(p: argparse.ArgumentParser) -> None: + p.add_argument( + "--remote-host", + help="SSH host to run harvesting on (if set, harvest runs remotely and is pulled locally).", + ) + p.add_argument( + "--remote-port", + type=int, + default=22, + help="SSH port for --remote-host (default: 22).", + ) + p.add_argument( + "--remote-user", + default=os.environ.get("USER") or None, + help="SSH username for --remote-host (default: local $USER).", + ) + + def main() -> None: ap = argparse.ArgumentParser(prog="enroll") sub = ap.add_subparsers(dest="cmd", required=True) h = sub.add_parser("harvest", help="Harvest service/package/config state") - h.add_argument("--out", required=True, help="Harvest output directory") + h.add_argument("--out", help="Harvest output directory") + h.add_argument( + "--dangerous", + action="store_true", + help="Collect files more aggressively (may include secrets). Disables secret-avoidance checks.", + ) + h.add_argument( + "--no-sudo", + action="store_true", + help="Don't use sudo on the remote host (when using --remote options). This may result in a limited harvest due to permission restrictions.", + ) + _add_remote_args(h) - r = sub.add_parser("manifest", help="Render Ansible roles from a harvest") - r.add_argument( + m = sub.add_parser("manifest", help="Render Ansible roles from a harvest") + m.add_argument( "--harvest", required=True, help="Path to the directory created by the harvest command", ) - r.add_argument( + m.add_argument( "--out", required=True, help="Output directory for generated roles/playbook Ansible manifest", ) - _add_common_manifest_args(r) + _add_common_manifest_args(m) - e = sub.add_parser( + s = sub.add_parser( "single-shot", help="Harvest state, then manifest Ansible code, in one shot" ) - e.add_argument( - "--harvest", required=True, help="Path to the directory to place the harvest in" + s.add_argument("--harvest", help="Path to the directory to place the harvest in") + s.add_argument( + "--dangerous", + action="store_true", + help="Collect files more aggressively (may include secrets). Disables secret-avoidance checks.", ) - e.add_argument( + s.add_argument( + "--no-sudo", + action="store_true", + help="Don't use sudo on the remote host (when using --remote options). This may result in a limited harvest due to permission restrictions.", + ) + s.add_argument( "--out", required=True, help="Output directory for generated roles/playbook Ansible manifest", ) - _add_common_manifest_args(e) + _add_common_manifest_args(s) + _add_remote_args(s) args = ap.parse_args() + remote_host: Optional[str] = getattr(args, "remote_host", None) + if args.cmd == "harvest": - path = harvest(args.out) - print(path) + if remote_host: + out_dir = ( + Path(args.out) + if args.out + else new_harvest_cache_dir(hint=remote_host).dir + ) + state = remote_harvest( + local_out_dir=out_dir, + remote_host=remote_host, + remote_port=int(args.remote_port), + remote_user=args.remote_user, + dangerous=bool(args.dangerous), + no_sudo=bool(args.no_sudo), + ) + print(str(state)) + else: + if not args.out: + raise SystemExit("error: --out is required unless --remote-host is set") + path = harvest(args.out, dangerous=bool(args.dangerous)) + print(path) elif args.cmd == "manifest": manifest(args.harvest, args.out, fqdn=args.fqdn, jinjaturtle=_jt_mode(args)) elif args.cmd == "single-shot": - harvest(args.harvest) - manifest(args.harvest, args.out, fqdn=args.fqdn, jinjaturtle=_jt_mode(args)) + if remote_host: + harvest_dir = ( + Path(args.harvest) + if args.harvest + else new_harvest_cache_dir(hint=remote_host).dir + ) + remote_harvest( + local_out_dir=harvest_dir, + remote_host=remote_host, + remote_port=int(args.remote_port), + remote_user=args.remote_user, + dangerous=bool(args.dangerous), + no_sudo=bool(args.no_sudo), + ) + manifest( + str(harvest_dir), args.out, fqdn=args.fqdn, jinjaturtle=_jt_mode(args) + ) + # For usability (when --harvest wasn't provided), print the harvest path. + if not args.harvest: + print(str(harvest_dir / "state.json")) + else: + if not args.harvest: + raise SystemExit( + "error: --harvest is required unless --remote-host is set" + ) + harvest(args.harvest, dangerous=bool(args.dangerous)) + manifest(args.harvest, args.out, fqdn=args.fqdn, jinjaturtle=_jt_mode(args)) diff --git a/enroll/harvest.py b/enroll/harvest.py index 688a489..ef93903 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -199,7 +199,11 @@ def _maybe_add_specific_paths(hints: Set[str]) -> List[str]: def _scan_unowned_under_roots( - roots: List[str], owned_etc: Set[str], limit: int = MAX_UNOWNED_FILES_PER_ROLE + roots: List[str], + owned_etc: Set[str], + limit: int = MAX_UNOWNED_FILES_PER_ROLE, + *, + confish_only: bool = True, ) -> List[str]: found: List[str] = [] for root in roots: @@ -218,7 +222,7 @@ def _scan_unowned_under_roots( continue if not os.path.isfile(p) or os.path.islink(p): continue - if not _is_confish(p): + if confish_only and not _is_confish(p): continue found.append(p) return found @@ -233,8 +237,20 @@ def _topdirs_for_package(pkg: str, pkg_to_etc_paths: Dict[str, List[str]]) -> Se return topdirs -def harvest(bundle_dir: str, policy: Optional[IgnorePolicy] = None) -> str: - policy = policy or IgnorePolicy() +def harvest( + bundle_dir: str, + policy: Optional[IgnorePolicy] = None, + *, + dangerous: bool = False, +) -> str: + # If a policy is not supplied, build one. `--dangerous` relaxes secret + # detection and deny-glob skipping. + if policy is None: + policy = IgnorePolicy(dangerous=dangerous) + elif dangerous: + # If callers explicitly provided a policy but also requested + # dangerous behavior, honour the CLI intent. + policy.dangerous = True os.makedirs(bundle_dir, exist_ok=True) if hasattr(os, "geteuid") and os.geteuid() != 0: @@ -338,10 +354,42 @@ def harvest(bundle_dir: str, policy: Optional[IgnorePolicy] = None) -> str: if current != baseline: candidates.setdefault(path, "modified_packaged_file") - roots: List[str] = [] + # Capture custom/unowned files living under /etc/ for this service. + # + # Historically we only captured "config-ish" files (by extension). That + # misses important runtime-generated artifacts like certificates and + # key material under service directories (e.g. /etc/openvpn/*.crt). + # + # To avoid exploding output for shared trees (e.g. /etc/systemd), keep + # the older "config-ish only" behavior for known shared topdirs. + any_roots: List[str] = [] + confish_roots: List[str] = [] for h in hints: - roots.extend([f"/etc/{h}", f"/etc/{h}.d"]) - for pth in _scan_unowned_under_roots(roots, owned_etc): + roots_for_h = [f"/etc/{h}", f"/etc/{h}.d"] + if h in SHARED_ETC_TOPDIRS: + confish_roots.extend(roots_for_h) + else: + any_roots.extend(roots_for_h) + + found: List[str] = [] + found.extend( + _scan_unowned_under_roots( + any_roots, + owned_etc, + limit=MAX_UNOWNED_FILES_PER_ROLE, + confish_only=False, + ) + ) + if len(found) < MAX_UNOWNED_FILES_PER_ROLE: + found.extend( + _scan_unowned_under_roots( + confish_roots, + owned_etc, + limit=MAX_UNOWNED_FILES_PER_ROLE - len(found), + confish_only=True, + ) + ) + for pth in found: candidates.setdefault(pth, "custom_unowned") if not pkgs and not candidates: @@ -449,8 +497,14 @@ def harvest(bundle_dir: str, policy: Optional[IgnorePolicy] = None) -> str: roots.extend([f"/etc/logrotate.d/{td}"]) roots.extend([f"/etc/sysctl.d/{td}.conf"]) + # Capture any custom/unowned files under /etc/ for this + # manually-installed package. This may include runtime-generated + # artifacts like certificates, key files, and helper scripts which are + # not owned by any .deb. for pth in _scan_unowned_under_roots( - [r for r in roots if os.path.isdir(r)], owned_etc + [r for r in roots if os.path.isdir(r)], + owned_etc, + confish_only=False, ): candidates.setdefault(pth, "custom_unowned") diff --git a/enroll/ignore.py b/enroll/ignore.py index 9a9ecf2..73d577d 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -38,9 +38,13 @@ BLOCK_END = b"*/" @dataclass class IgnorePolicy: - deny_globs: list[str] = None + deny_globs: Optional[list[str]] = None max_file_bytes: int = 256_000 sample_bytes: int = 64_000 + # If True, be much less conservative about collecting potentially + # sensitive files. This disables deny globs (e.g. /etc/shadow, + # /etc/ssl/private/*) and skips heuristic content scanning. + dangerous: bool = False def __post_init__(self) -> None: if self.deny_globs is None: @@ -69,9 +73,10 @@ class IgnorePolicy: yield raw def deny_reason(self, path: str) -> Optional[str]: - for g in self.deny_globs: - if fnmatch.fnmatch(path, g): - return "denied_path" + if not self.dangerous: + for g in self.deny_globs or []: + if fnmatch.fnmatch(path, g): + return "denied_path" try: st = os.stat(path, follow_symlinks=True) @@ -93,9 +98,10 @@ class IgnorePolicy: if b"\x00" in data: return "binary_like" - for line in self.iter_effective_lines(data): - for pat in SENSITIVE_CONTENT_PATTERNS: - if pat.search(line): - return "sensitive_content" + if not self.dangerous: + for line in self.iter_effective_lines(data): + for pat in SENSITIVE_CONTENT_PATTERNS: + if pat.search(line): + return "sensitive_content" return None diff --git a/enroll/jinjaturtle.py b/enroll/jinjaturtle.py index f894f04..03f4adf 100644 --- a/enroll/jinjaturtle.py +++ b/enroll/jinjaturtle.py @@ -81,25 +81,3 @@ def run_jinjaturtle( return JinjifyResult( template_text=template_text, vars_text=vars_text.rstrip() + "\n" ) - - -def replace_or_append_block( - base_text: str, - *, - begin: str, - end: str, - block_body: str, -) -> str: - """Replace a marked block if present; else append it.""" - pattern = re.compile( - re.escape(begin) + r".*?" + re.escape(end), - flags=re.DOTALL, - ) - new_block = f"{begin}\n{block_body.rstrip()}\n{end}" - if pattern.search(base_text): - return pattern.sub(new_block, base_text).rstrip() + "\n" - # ensure base ends with newline - bt = base_text.rstrip() + "\n" - if not bt.endswith("\n"): - bt += "\n" - return bt + "\n" + new_block + "\n" diff --git a/enroll/manifest.py b/enroll/manifest.py index 7565160..bc94c7b 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -3,6 +3,8 @@ from __future__ import annotations import json import os import shutil +import stat +import tempfile from pathlib import Path from typing import Any, Dict, List, Optional, Set, Tuple @@ -70,36 +72,6 @@ def _yaml_dump_mapping(obj: Dict[str, Any], *, sort_keys: bool = True) -> str: ) -def _merge_list_keep_order(existing: List[Any], new: List[Any]) -> List[Any]: - out = list(existing) - seen = set(existing) - for item in new: - if item not in seen: - out.append(item) - seen.add(item) - return out - - -def _merge_mappings_preserve( - existing: Dict[str, Any], incoming: Dict[str, Any] -) -> Dict[str, Any]: - """Merge incoming into existing: - - lists: union (preserve existing order) - - scalars/dicts: only set if missing (do not overwrite) - """ - merged = dict(existing) - for k, v in incoming.items(): - if k in merged: - if isinstance(merged[k], list) and isinstance(v, list): - merged[k] = _merge_list_keep_order(merged[k], v) - else: - # keep existing value (non-overwriting) - continue - else: - merged[k] = v - return merged - - def _merge_mappings_overwrite( existing: Dict[str, Any], incoming: Dict[str, Any] ) -> Dict[str, Any]: @@ -113,33 +85,6 @@ def _merge_mappings_overwrite( return merged -def _write_role_defaults_merge(role_dir: str, incoming: Dict[str, Any]) -> None: - """Write/merge role defaults without clobbering existing values. - Used in site mode to keep roles reusable across hosts. - """ - defaults_path = os.path.join(role_dir, "defaults", "main.yml") - existing: Dict[str, Any] = {} - if os.path.exists(defaults_path): - try: - existing_text = Path(defaults_path).read_text(encoding="utf-8") - existing = _yaml_load_mapping(existing_text) - except Exception: - existing = {} - merged = _merge_mappings_preserve(existing, incoming) - body = "---\n" + _yaml_dump_mapping(merged, sort_keys=True) - with open(defaults_path, "w", encoding="utf-8") as f: - f.write(body) - - -def _extract_jinjaturtle_block(text: str) -> str: - """Return YAML text inside JINJATURTLE_BEGIN/END markers, or the whole text if no markers.""" - if JINJATURTLE_BEGIN in text and JINJATURTLE_END in text: - start = text.split(JINJATURTLE_BEGIN, 1)[1] - inner = start.split(JINJATURTLE_END, 1)[0] - return inner.strip() + "\n" - return text.strip() + "\n" - - def _normalise_jinjaturtle_vars_text(vars_text: str) -> str: """Deduplicate keys in a vars fragment by parsing as YAML and dumping it back.""" m = _yaml_load_mapping(vars_text) @@ -158,6 +103,30 @@ def _yaml_list(items: List[str], indent: int = 2) -> str: return "\n".join(f"{pad}- {x}" for x in items) +def _copy2_replace(src: str, dst: str) -> None: + dst_dir = os.path.dirname(dst) + os.makedirs(dst_dir, exist_ok=True) + + # Copy to a temp file in the same directory, then atomically replace. + fd, tmp = tempfile.mkstemp(prefix=".enroll-tmp-", dir=dst_dir) + os.close(fd) + try: + shutil.copy2(src, tmp) + + # Ensure the working tree stays mergeable: make the file user-writable. + st = os.stat(tmp, follow_symlinks=False) + mode = stat.S_IMODE(st.st_mode) + if not (mode & stat.S_IWUSR): + os.chmod(tmp, mode | stat.S_IWUSR) + + os.replace(tmp, dst) + finally: + try: + os.unlink(tmp) + except FileNotFoundError: + pass + + def _copy_artifacts( bundle_dir: str, role: str, @@ -195,7 +164,7 @@ def _copy_artifacts( if preserve_existing and os.path.exists(dst): continue os.makedirs(os.path.dirname(dst), exist_ok=True) - shutil.copy2(src, dst) + _copy2_replace(src, dst) def _write_role_scaffold(role_dir: str) -> None: @@ -380,11 +349,6 @@ def _jinjify_managed_files( return templated, "" -def _hostvars_only_jinjaturtle(vars_text: str) -> str: - # keep as valid YAML file - return _defaults_with_jinjaturtle("---\n", vars_text) - - def _defaults_with_jinjaturtle(base_defaults: str, vars_text: str) -> str: if not vars_text.strip(): return base_defaults.rstrip() + "\n" diff --git a/enroll/remote.py b/enroll/remote.py new file mode 100644 index 0000000..5dcf286 --- /dev/null +++ b/enroll/remote.py @@ -0,0 +1,209 @@ +from __future__ import annotations + +import os +import shutil +import tarfile +import tempfile +import zipapp +from pathlib import Path +from pathlib import PurePosixPath +from typing import Optional + + +def _safe_extract_tar(tar: tarfile.TarFile, dest: Path) -> None: + """Safely extract a tar archive into dest. + + Protects against path traversal (e.g. entries containing ../). + """ + + # Note: tar member names use POSIX separators regardless of platform. + dest = dest.resolve() + + for m in tar.getmembers(): + name = m.name + + # Some tar implementations include a top-level '.' entry when created + # with `tar -C .`. That's harmless and should be allowed. + if name in {".", "./"}: + continue + + # Reject absolute paths and any '..' components up front. + p = PurePosixPath(name) + if p.is_absolute() or ".." in p.parts: + raise RuntimeError(f"Unsafe tar member path: {name}") + + # Refuse to extract links or device nodes from an untrusted archive. + # (A symlink can be used to redirect subsequent writes outside dest.) + if m.issym() or m.islnk() or m.isdev(): + raise RuntimeError(f"Refusing to extract special tar member: {name}") + + member_path = (dest / Path(*p.parts)).resolve() + if member_path != dest and not str(member_path).startswith(str(dest) + os.sep): + raise RuntimeError(f"Unsafe tar member path: {name}") + + # Extract members one-by-one after validation. + for m in tar.getmembers(): + if m.name in {".", "./"}: + continue + tar.extract(m, path=dest) + + +def _build_enroll_pyz(tmpdir: Path) -> Path: + """Build a self-contained enroll zipapp (pyz) on the local machine. + + The resulting file is stdlib-only and can be executed on the remote host + as long as it has Python 3 available. + """ + import enroll as pkg + + pkg_dir = Path(pkg.__file__).resolve().parent + stage = tmpdir / "stage" + (stage / "enroll").mkdir(parents=True, exist_ok=True) + + def _ignore(d: str, names: list[str]) -> set[str]: + return { + n + for n in names + if n in {"__pycache__", ".pytest_cache"} or n.endswith(".pyc") + } + + shutil.copytree(pkg_dir, stage / "enroll", dirs_exist_ok=True, ignore=_ignore) + + pyz_path = tmpdir / "enroll.pyz" + zipapp.create_archive( + stage, + target=pyz_path, + main="enroll.cli:main", + compressed=True, + ) + return pyz_path + + +def _ssh_run(ssh, cmd: str) -> tuple[int, str, str]: + """Run a command over a Paramiko SSHClient.""" + _stdin, stdout, stderr = ssh.exec_command(cmd) + out = stdout.read().decode("utf-8", errors="replace") + err = stderr.read().decode("utf-8", errors="replace") + rc = stdout.channel.recv_exit_status() + return rc, out, err + + +def remote_harvest( + *, + local_out_dir: Path, + remote_host: str, + remote_port: int = 22, + remote_user: Optional[str] = None, + remote_python: str = "python3", + dangerous: bool = False, + no_sudo: bool = False, +) -> Path: + """Run enroll harvest on a remote host via SSH and pull the bundle locally. + + Returns the local path to state.json inside local_out_dir. + """ + + try: + import paramiko # type: ignore + except Exception as e: + raise RuntimeError( + "Remote harvesting requires the 'paramiko' package. " + "Install it with: pip install paramiko" + ) from e + + local_out_dir = Path(local_out_dir) + local_out_dir.mkdir(parents=True, exist_ok=True) + try: + os.chmod(local_out_dir, 0o700) + except OSError: + pass + + # Build a zipapp locally and upload it to the remote. + with tempfile.TemporaryDirectory(prefix="enroll-remote-") as td: + td_path = Path(td) + pyz = _build_enroll_pyz(td_path) + local_tgz = td_path / "bundle.tgz" + + ssh = paramiko.SSHClient() + ssh.load_system_host_keys() + # Default: refuse unknown host keys. + # Users should add the key to known_hosts. + ssh.set_missing_host_key_policy(paramiko.RejectPolicy()) + + ssh.connect( + hostname=remote_host, + port=int(remote_port), + username=remote_user, + allow_agent=True, + look_for_keys=True, + ) + + sftp = ssh.open_sftp() + try: + rc, out, err = _ssh_run(ssh, "mktemp -d") + if rc != 0: + raise RuntimeError(f"Remote mktemp failed: {err.strip()}") + rtmp = out.strip() + rapp = f"{rtmp}/enroll.pyz" + rbundle = f"{rtmp}/bundle" + rtgz = f"{rtmp}/bundle.tgz" + + sftp.put(str(pyz), rapp) + + # Run remote harvest. + _cmd = f"{remote_python} {rapp} harvest --out {rbundle}" + if not no_sudo: + cmd = f"sudo {_cmd}" + else: + cmd = _cmd + if dangerous: + cmd += " --dangerous" + rc, out, err = _ssh_run(ssh, cmd) + if rc != 0: + raise RuntimeError( + "Remote harvest failed.\n" + f"Command: {cmd}\n" + f"Exit code: {rc}\n" + f"Stderr: {err.strip()}" + ) + + if not no_sudo: + # Ensure user can read the files, before we tar it + cmd = f"sudo chown -R {remote_user} {rbundle}" + rc, out, err = _ssh_run(ssh, cmd) + if rc != 0: + raise RuntimeError( + "chown of harvest failed.\n" + f"Command: {cmd}\n" + f"Exit code: {rc}\n" + f"Stderr: {err.strip()}" + ) + + # Tar the bundle for efficient download. + cmd = f"tar -czf {rtgz} -C {rbundle} ." + rc, out, err = _ssh_run(ssh, cmd) + if rc != 0: + raise RuntimeError( + "Remote tar failed.\n" + f"Command: {cmd}\n" + f"Exit code: {rc}\n" + f"Stderr: {err.strip()}" + ) + + sftp.get(rtgz, str(local_tgz)) + + # Extract into the destination. + with tarfile.open(local_tgz, mode="r:gz") as tf: + _safe_extract_tar(tf, local_out_dir) + + # Cleanup remote tmpdir. + _ssh_run(ssh, f"rm -rf {rtmp}") + finally: + try: + sftp.close() + ssh.close() + except Exception: + ssh.close() + raise RuntimeError("Something went wrong generating the harvest") + + return local_out_dir / "state.json" diff --git a/poetry.lock b/poetry.lock index d594a1a..1f2948d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,10 +1,85 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. + +[[package]] +name = "bcrypt" +version = "5.0.0" +description = "Modern password hashing for your software and your servers" +optional = false +python-versions = ">=3.8" +files = [ + {file = "bcrypt-5.0.0-cp313-cp313t-macosx_10_12_universal2.whl", hash = "sha256:f3c08197f3039bec79cee59a606d62b96b16669cff3949f21e74796b6e3cd2be"}, + {file = "bcrypt-5.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:200af71bc25f22006f4069060c88ed36f8aa4ff7f53e67ff04d2ab3f1e79a5b2"}, + {file = "bcrypt-5.0.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:baade0a5657654c2984468efb7d6c110db87ea63ef5a4b54732e7e337253e44f"}, + {file = "bcrypt-5.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c58b56cdfb03202b3bcc9fd8daee8e8e9b6d7e3163aa97c631dfcfcc24d36c86"}, + {file = "bcrypt-5.0.0-cp313-cp313t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4bfd2a34de661f34d0bda43c3e4e79df586e4716ef401fe31ea39d69d581ef23"}, + {file = "bcrypt-5.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ed2e1365e31fc73f1825fa830f1c8f8917ca1b3ca6185773b349c20fd606cec2"}, + {file = "bcrypt-5.0.0-cp313-cp313t-manylinux_2_34_aarch64.whl", hash = "sha256:83e787d7a84dbbfba6f250dd7a5efd689e935f03dd83b0f919d39349e1f23f83"}, + {file = "bcrypt-5.0.0-cp313-cp313t-manylinux_2_34_x86_64.whl", hash = "sha256:137c5156524328a24b9fac1cb5db0ba618bc97d11970b39184c1d87dc4bf1746"}, + {file = "bcrypt-5.0.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:38cac74101777a6a7d3b3e3cfefa57089b5ada650dce2baf0cbdd9d65db22a9e"}, + {file = "bcrypt-5.0.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:d8d65b564ec849643d9f7ea05c6d9f0cd7ca23bdd4ac0c2dbef1104ab504543d"}, + {file = "bcrypt-5.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:741449132f64b3524e95cd30e5cd3343006ce146088f074f31ab26b94e6c75ba"}, + {file = "bcrypt-5.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:212139484ab3207b1f0c00633d3be92fef3c5f0af17cad155679d03ff2ee1e41"}, + {file = "bcrypt-5.0.0-cp313-cp313t-win32.whl", hash = "sha256:9d52ed507c2488eddd6a95bccee4e808d3234fa78dd370e24bac65a21212b861"}, + {file = "bcrypt-5.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f6984a24db30548fd39a44360532898c33528b74aedf81c26cf29c51ee47057e"}, + {file = "bcrypt-5.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:9fffdb387abe6aa775af36ef16f55e318dcda4194ddbf82007a6f21da29de8f5"}, + {file = "bcrypt-5.0.0-cp314-cp314t-macosx_10_12_universal2.whl", hash = "sha256:4870a52610537037adb382444fefd3706d96d663ac44cbb2f37e3919dca3d7ef"}, + {file = "bcrypt-5.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48f753100931605686f74e27a7b49238122aa761a9aefe9373265b8b7aa43ea4"}, + {file = "bcrypt-5.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f70aadb7a809305226daedf75d90379c397b094755a710d7014b8b117df1ebbf"}, + {file = "bcrypt-5.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:744d3c6b164caa658adcb72cb8cc9ad9b4b75c7db507ab4bc2480474a51989da"}, + {file = "bcrypt-5.0.0-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a28bc05039bdf3289d757f49d616ab3efe8cf40d8e8001ccdd621cd4f98f4fc9"}, + {file = "bcrypt-5.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:7f277a4b3390ab4bebe597800a90da0edae882c6196d3038a73adf446c4f969f"}, + {file = "bcrypt-5.0.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:79cfa161eda8d2ddf29acad370356b47f02387153b11d46042e93a0a95127493"}, + {file = "bcrypt-5.0.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a5393eae5722bcef046a990b84dff02b954904c36a194f6cfc817d7dca6c6f0b"}, + {file = "bcrypt-5.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7f4c94dec1b5ab5d522750cb059bb9409ea8872d4494fd152b53cca99f1ddd8c"}, + {file = "bcrypt-5.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0cae4cb350934dfd74c020525eeae0a5f79257e8a201c0c176f4b84fdbf2a4b4"}, + {file = "bcrypt-5.0.0-cp314-cp314t-win32.whl", hash = "sha256:b17366316c654e1ad0306a6858e189fc835eca39f7eb2cafd6aaca8ce0c40a2e"}, + {file = "bcrypt-5.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:92864f54fb48b4c718fc92a32825d0e42265a627f956bc0361fe869f1adc3e7d"}, + {file = "bcrypt-5.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dd19cf5184a90c873009244586396a6a884d591a5323f0e8a5922560718d4993"}, + {file = "bcrypt-5.0.0-cp38-abi3-macosx_10_12_universal2.whl", hash = "sha256:fc746432b951e92b58317af8e0ca746efe93e66555f1b40888865ef5bf56446b"}, + {file = "bcrypt-5.0.0-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c2388ca94ffee269b6038d48747f4ce8df0ffbea43f31abfa18ac72f0218effb"}, + {file = "bcrypt-5.0.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:560ddb6ec730386e7b3b26b8b4c88197aaed924430e7b74666a586ac997249ef"}, + {file = "bcrypt-5.0.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d79e5c65dcc9af213594d6f7f1fa2c98ad3fc10431e7aa53c176b441943efbdd"}, + {file = "bcrypt-5.0.0-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2b732e7d388fa22d48920baa267ba5d97cca38070b69c0e2d37087b381c681fd"}, + {file = "bcrypt-5.0.0-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0c8e093ea2532601a6f686edbc2c6b2ec24131ff5c52f7610dd64fa4553b5464"}, + {file = "bcrypt-5.0.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:5b1589f4839a0899c146e8892efe320c0fa096568abd9b95593efac50a87cb75"}, + {file = "bcrypt-5.0.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:89042e61b5e808b67daf24a434d89bab164d4de1746b37a8d173b6b14f3db9ff"}, + {file = "bcrypt-5.0.0-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:e3cf5b2560c7b5a142286f69bde914494b6d8f901aaa71e453078388a50881c4"}, + {file = "bcrypt-5.0.0-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:f632fd56fc4e61564f78b46a2269153122db34988e78b6be8b32d28507b7eaeb"}, + {file = "bcrypt-5.0.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:801cad5ccb6b87d1b430f183269b94c24f248dddbbc5c1f78b6ed231743e001c"}, + {file = "bcrypt-5.0.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3cf67a804fc66fc217e6914a5635000259fbbbb12e78a99488e4d5ba445a71eb"}, + {file = "bcrypt-5.0.0-cp38-abi3-win32.whl", hash = "sha256:3abeb543874b2c0524ff40c57a4e14e5d3a66ff33fb423529c88f180fd756538"}, + {file = "bcrypt-5.0.0-cp38-abi3-win_amd64.whl", hash = "sha256:35a77ec55b541e5e583eb3436ffbbf53b0ffa1fa16ca6782279daf95d146dcd9"}, + {file = "bcrypt-5.0.0-cp38-abi3-win_arm64.whl", hash = "sha256:cde08734f12c6a4e28dc6755cd11d3bdfea608d93d958fffbe95a7026ebe4980"}, + {file = "bcrypt-5.0.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:0c418ca99fd47e9c59a301744d63328f17798b5947b0f791e9af3c1c499c2d0a"}, + {file = "bcrypt-5.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddb4e1500f6efdd402218ffe34d040a1196c072e07929b9820f363a1fd1f4191"}, + {file = "bcrypt-5.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7aeef54b60ceddb6f30ee3db090351ecf0d40ec6e2abf41430997407a46d2254"}, + {file = "bcrypt-5.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f0ce778135f60799d89c9693b9b398819d15f1921ba15fe719acb3178215a7db"}, + {file = "bcrypt-5.0.0-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a71f70ee269671460b37a449f5ff26982a6f2ba493b3eabdd687b4bf35f875ac"}, + {file = "bcrypt-5.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f8429e1c410b4073944f03bd778a9e066e7fad723564a52ff91841d278dfc822"}, + {file = "bcrypt-5.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:edfcdcedd0d0f05850c52ba3127b1fce70b9f89e0fe5ff16517df7e81fa3cbb8"}, + {file = "bcrypt-5.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:611f0a17aa4a25a69362dcc299fda5c8a3d4f160e2abb3831041feb77393a14a"}, + {file = "bcrypt-5.0.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:db99dca3b1fdc3db87d7c57eac0c82281242d1eabf19dcb8a6b10eb29a2e72d1"}, + {file = "bcrypt-5.0.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:5feebf85a9cefda32966d8171f5db7e3ba964b77fdfe31919622256f80f9cf42"}, + {file = "bcrypt-5.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3ca8a166b1140436e058298a34d88032ab62f15aae1c598580333dc21d27ef10"}, + {file = "bcrypt-5.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:61afc381250c3182d9078551e3ac3a41da14154fbff647ddf52a769f588c4172"}, + {file = "bcrypt-5.0.0-cp39-abi3-win32.whl", hash = "sha256:64d7ce196203e468c457c37ec22390f1a61c85c6f0b8160fd752940ccfb3a683"}, + {file = "bcrypt-5.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:64ee8434b0da054d830fa8e89e1c8bf30061d539044a39524ff7dec90481e5c2"}, + {file = "bcrypt-5.0.0-cp39-abi3-win_arm64.whl", hash = "sha256:f2347d3534e76bf50bca5500989d6c1d05ed64b440408057a37673282c654927"}, + {file = "bcrypt-5.0.0-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:7edda91d5ab52b15636d9c30da87d2cc84f426c72b9dba7a9b4fe142ba11f534"}, + {file = "bcrypt-5.0.0-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:046ad6db88edb3c5ece4369af997938fb1c19d6a699b9c1b27b0db432faae4c4"}, + {file = "bcrypt-5.0.0-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:dcd58e2b3a908b5ecc9b9df2f0085592506ac2d5110786018ee5e160f28e0911"}, + {file = "bcrypt-5.0.0-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:6b8f520b61e8781efee73cba14e3e8c9556ccfb375623f4f97429544734545b4"}, + {file = "bcrypt-5.0.0.tar.gz", hash = "sha256:f748f7c2d6fd375cc93d3fba7ef4a9e3a092421b8dbf34d8d4dc06be9492dfdd"}, +] + +[package.extras] +tests = ["pytest (>=3.2.1,!=3.3.0)"] +typecheck = ["mypy"] [[package]] name = "certifi" version = "2025.11.12" description = "Python package for providing Mozilla's CA Bundle." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -12,11 +87,106 @@ files = [ {file = "certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316"}, ] +[[package]] +name = "cffi" +version = "2.0.0" +description = "Foreign Function Interface for Python calling C code." +optional = false +python-versions = ">=3.9" +files = [ + {file = "cffi-2.0.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:0cf2d91ecc3fcc0625c2c530fe004f82c110405f101548512cce44322fa8ac44"}, + {file = "cffi-2.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f73b96c41e3b2adedc34a7356e64c8eb96e03a3782b535e043a986276ce12a49"}, + {file = "cffi-2.0.0-cp310-cp310-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:53f77cbe57044e88bbd5ed26ac1d0514d2acf0591dd6bb02a3ae37f76811b80c"}, + {file = "cffi-2.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3e837e369566884707ddaf85fc1744b47575005c0a229de3327f8f9a20f4efeb"}, + {file = "cffi-2.0.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:5eda85d6d1879e692d546a078b44251cdd08dd1cfb98dfb77b670c97cee49ea0"}, + {file = "cffi-2.0.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9332088d75dc3241c702d852d4671613136d90fa6881da7d770a483fd05248b4"}, + {file = "cffi-2.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc7de24befaeae77ba923797c7c87834c73648a05a4bde34b3b7e5588973a453"}, + {file = "cffi-2.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cf364028c016c03078a23b503f02058f1814320a56ad535686f90565636a9495"}, + {file = "cffi-2.0.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e11e82b744887154b182fd3e7e8512418446501191994dbf9c9fc1f32cc8efd5"}, + {file = "cffi-2.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8ea985900c5c95ce9db1745f7933eeef5d314f0565b27625d9a10ec9881e1bfb"}, + {file = "cffi-2.0.0-cp310-cp310-win32.whl", hash = "sha256:1f72fb8906754ac8a2cc3f9f5aaa298070652a0ffae577e0ea9bd480dc3c931a"}, + {file = "cffi-2.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:b18a3ed7d5b3bd8d9ef7a8cb226502c6bf8308df1525e1cc676c3680e7176739"}, + {file = "cffi-2.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe"}, + {file = "cffi-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c"}, + {file = "cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92"}, + {file = "cffi-2.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93"}, + {file = "cffi-2.0.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5"}, + {file = "cffi-2.0.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664"}, + {file = "cffi-2.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26"}, + {file = "cffi-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9"}, + {file = "cffi-2.0.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414"}, + {file = "cffi-2.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743"}, + {file = "cffi-2.0.0-cp311-cp311-win32.whl", hash = "sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5"}, + {file = "cffi-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5"}, + {file = "cffi-2.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d"}, + {file = "cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d"}, + {file = "cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c"}, + {file = "cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe"}, + {file = "cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062"}, + {file = "cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e"}, + {file = "cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037"}, + {file = "cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba"}, + {file = "cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94"}, + {file = "cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187"}, + {file = "cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18"}, + {file = "cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5"}, + {file = "cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6"}, + {file = "cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb"}, + {file = "cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca"}, + {file = "cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b"}, + {file = "cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b"}, + {file = "cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2"}, + {file = "cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3"}, + {file = "cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26"}, + {file = "cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c"}, + {file = "cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b"}, + {file = "cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27"}, + {file = "cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75"}, + {file = "cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91"}, + {file = "cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5"}, + {file = "cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13"}, + {file = "cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b"}, + {file = "cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c"}, + {file = "cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef"}, + {file = "cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775"}, + {file = "cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205"}, + {file = "cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1"}, + {file = "cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f"}, + {file = "cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25"}, + {file = "cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad"}, + {file = "cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9"}, + {file = "cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d"}, + {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c"}, + {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8"}, + {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc"}, + {file = "cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592"}, + {file = "cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512"}, + {file = "cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4"}, + {file = "cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e"}, + {file = "cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6"}, + {file = "cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9"}, + {file = "cffi-2.0.0-cp39-cp39-macosx_10_13_x86_64.whl", hash = "sha256:fe562eb1a64e67dd297ccc4f5addea2501664954f2692b69a76449ec7913ecbf"}, + {file = "cffi-2.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:de8dad4425a6ca6e4e5e297b27b5c824ecc7581910bf9aee86cb6835e6812aa7"}, + {file = "cffi-2.0.0-cp39-cp39-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:4647afc2f90d1ddd33441e5b0e85b16b12ddec4fca55f0d9671fef036ecca27c"}, + {file = "cffi-2.0.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3f4d46d8b35698056ec29bca21546e1551a205058ae1a181d871e278b0b28165"}, + {file = "cffi-2.0.0-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:e6e73b9e02893c764e7e8d5bb5ce277f1a009cd5243f8228f75f842bf937c534"}, + {file = "cffi-2.0.0-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:cb527a79772e5ef98fb1d700678fe031e353e765d1ca2d409c92263c6d43e09f"}, + {file = "cffi-2.0.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:61d028e90346df14fedc3d1e5441df818d095f3b87d286825dfcbd6459b7ef63"}, + {file = "cffi-2.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0f6084a0ea23d05d20c3edcda20c3d006f9b6f3fefeac38f59262e10cef47ee2"}, + {file = "cffi-2.0.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1cd13c99ce269b3ed80b417dcd591415d3372bcac067009b6e0f59c7d4015e65"}, + {file = "cffi-2.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:89472c9762729b5ae1ad974b777416bfda4ac5642423fa93bd57a09204712322"}, + {file = "cffi-2.0.0-cp39-cp39-win32.whl", hash = "sha256:2081580ebb843f759b9f617314a24ed5738c51d2aee65d31e02f6f7a2b97707a"}, + {file = "cffi-2.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:b882b3df248017dba09d6b16defe9b5c407fe32fc7c65a9c69798e6175601be9"}, + {file = "cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529"}, +] + +[package.dependencies] +pycparser = {version = "*", markers = "implementation_name != \"PyPy\""} + [[package]] name = "charset-normalizer" version = "3.4.4" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -139,7 +309,6 @@ files = [ name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." -category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -151,7 +320,6 @@ files = [ name = "coverage" version = "7.13.0" description = "Code coverage measurement for Python" -category = "dev" optional = false python-versions = ">=3.10" files = [ @@ -255,11 +423,87 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1 [package.extras] toml = ["tomli"] +[[package]] +name = "cryptography" +version = "46.0.3" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +optional = false +python-versions = "!=3.9.0,!=3.9.1,>=3.8" +files = [ + {file = "cryptography-46.0.3-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:109d4ddfadf17e8e7779c39f9b18111a09efb969a301a31e987416a0191ed93a"}, + {file = "cryptography-46.0.3-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:09859af8466b69bc3c27bdf4f5d84a665e0f7ab5088412e9e2ec49758eca5cbc"}, + {file = "cryptography-46.0.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:01ca9ff2885f3acc98c29f1860552e37f6d7c7d013d7334ff2a9de43a449315d"}, + {file = "cryptography-46.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6eae65d4c3d33da080cff9c4ab1f711b15c1d9760809dad6ea763f3812d254cb"}, + {file = "cryptography-46.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5bf0ed4490068a2e72ac03d786693adeb909981cc596425d09032d372bcc849"}, + {file = "cryptography-46.0.3-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5ecfccd2329e37e9b7112a888e76d9feca2347f12f37918facbb893d7bb88ee8"}, + {file = "cryptography-46.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a2c0cd47381a3229c403062f764160d57d4d175e022c1df84e168c6251a22eec"}, + {file = "cryptography-46.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:549e234ff32571b1f4076ac269fcce7a808d3bf98b76c8dd560e42dbc66d7d91"}, + {file = "cryptography-46.0.3-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:c0a7bb1a68a5d3471880e264621346c48665b3bf1c3759d682fc0864c540bd9e"}, + {file = "cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:10b01676fc208c3e6feeb25a8b83d81767e8059e1fe86e1dc62d10a3018fa926"}, + {file = "cryptography-46.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0abf1ffd6e57c67e92af68330d05760b7b7efb243aab8377e583284dbab72c71"}, + {file = "cryptography-46.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a04bee9ab6a4da801eb9b51f1b708a1b5b5c9eb48c03f74198464c66f0d344ac"}, + {file = "cryptography-46.0.3-cp311-abi3-win32.whl", hash = "sha256:f260d0d41e9b4da1ed1e0f1ce571f97fe370b152ab18778e9e8f67d6af432018"}, + {file = "cryptography-46.0.3-cp311-abi3-win_amd64.whl", hash = "sha256:a9a3008438615669153eb86b26b61e09993921ebdd75385ddd748702c5adfddb"}, + {file = "cryptography-46.0.3-cp311-abi3-win_arm64.whl", hash = "sha256:5d7f93296ee28f68447397bf5198428c9aeeab45705a55d53a6343455dcb2c3c"}, + {file = "cryptography-46.0.3-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:00a5e7e87938e5ff9ff5447ab086a5706a957137e6e433841e9d24f38a065217"}, + {file = "cryptography-46.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c8daeb2d2174beb4575b77482320303f3d39b8e81153da4f0fb08eb5fe86a6c5"}, + {file = "cryptography-46.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39b6755623145ad5eff1dab323f4eae2a32a77a7abef2c5089a04a3d04366715"}, + {file = "cryptography-46.0.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:db391fa7c66df6762ee3f00c95a89e6d428f4d60e7abc8328f4fe155b5ac6e54"}, + {file = "cryptography-46.0.3-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:78a97cf6a8839a48c49271cdcbd5cf37ca2c1d6b7fdd86cc864f302b5e9bf459"}, + {file = "cryptography-46.0.3-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:dfb781ff7eaa91a6f7fd41776ec37c5853c795d3b358d4896fdbb5df168af422"}, + {file = "cryptography-46.0.3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6f61efb26e76c45c4a227835ddeae96d83624fb0d29eb5df5b96e14ed1a0afb7"}, + {file = "cryptography-46.0.3-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:23b1a8f26e43f47ceb6d6a43115f33a5a37d57df4ea0ca295b780ae8546e8044"}, + {file = "cryptography-46.0.3-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b419ae593c86b87014b9be7396b385491ad7f320bde96826d0dd174459e54665"}, + {file = "cryptography-46.0.3-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:50fc3343ac490c6b08c0cf0d704e881d0d660be923fd3076db3e932007e726e3"}, + {file = "cryptography-46.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22d7e97932f511d6b0b04f2bfd818d73dcd5928db509460aaf48384778eb6d20"}, + {file = "cryptography-46.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d55f3dffadd674514ad19451161118fd010988540cee43d8bc20675e775925de"}, + {file = "cryptography-46.0.3-cp314-cp314t-win32.whl", hash = "sha256:8a6e050cb6164d3f830453754094c086ff2d0b2f3a897a1d9820f6139a1f0914"}, + {file = "cryptography-46.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:760f83faa07f8b64e9c33fc963d790a2edb24efb479e3520c14a45741cd9b2db"}, + {file = "cryptography-46.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:516ea134e703e9fe26bcd1277a4b59ad30586ea90c365a87781d7887a646fe21"}, + {file = "cryptography-46.0.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:cb3d760a6117f621261d662bccc8ef5bc32ca673e037c83fbe565324f5c46936"}, + {file = "cryptography-46.0.3-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4b7387121ac7d15e550f5cb4a43aef2559ed759c35df7336c402bb8275ac9683"}, + {file = "cryptography-46.0.3-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:15ab9b093e8f09daab0f2159bb7e47532596075139dd74365da52ecc9cb46c5d"}, + {file = "cryptography-46.0.3-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:46acf53b40ea38f9c6c229599a4a13f0d46a6c3fa9ef19fc1a124d62e338dfa0"}, + {file = "cryptography-46.0.3-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10ca84c4668d066a9878890047f03546f3ae0a6b8b39b697457b7757aaf18dbc"}, + {file = "cryptography-46.0.3-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:36e627112085bb3b81b19fed209c05ce2a52ee8b15d161b7c643a7d5a88491f3"}, + {file = "cryptography-46.0.3-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1000713389b75c449a6e979ffc7dcc8ac90b437048766cef052d4d30b8220971"}, + {file = "cryptography-46.0.3-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:b02cf04496f6576afffef5ddd04a0cb7d49cf6be16a9059d793a30b035f6b6ac"}, + {file = "cryptography-46.0.3-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:71e842ec9bc7abf543b47cf86b9a743baa95f4677d22baa4c7d5c69e49e9bc04"}, + {file = "cryptography-46.0.3-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:402b58fc32614f00980b66d6e56a5b4118e6cb362ae8f3fda141ba4689bd4506"}, + {file = "cryptography-46.0.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef639cb3372f69ec44915fafcd6698b6cc78fbe0c2ea41be867f6ed612811963"}, + {file = "cryptography-46.0.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b51b8ca4f1c6453d8829e1eb7299499ca7f313900dd4d89a24b8b87c0a780d4"}, + {file = "cryptography-46.0.3-cp38-abi3-win32.whl", hash = "sha256:6276eb85ef938dc035d59b87c8a7dc559a232f954962520137529d77b18ff1df"}, + {file = "cryptography-46.0.3-cp38-abi3-win_amd64.whl", hash = "sha256:416260257577718c05135c55958b674000baef9a1c7d9e8f306ec60d71db850f"}, + {file = "cryptography-46.0.3-cp38-abi3-win_arm64.whl", hash = "sha256:d89c3468de4cdc4f08a57e214384d0471911a3830fcdaf7a8cc587e42a866372"}, + {file = "cryptography-46.0.3-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a23582810fedb8c0bc47524558fb6c56aac3fc252cb306072fd2815da2a47c32"}, + {file = "cryptography-46.0.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e7aec276d68421f9574040c26e2a7c3771060bc0cff408bae1dcb19d3ab1e63c"}, + {file = "cryptography-46.0.3-pp311-pypy311_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7ce938a99998ed3c8aa7e7272dca1a610401ede816d36d0693907d863b10d9ea"}, + {file = "cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:191bb60a7be5e6f54e30ba16fdfae78ad3a342a0599eb4193ba88e3f3d6e185b"}, + {file = "cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c70cc23f12726be8f8bc72e41d5065d77e4515efae3690326764ea1b07845cfb"}, + {file = "cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:9394673a9f4de09e28b5356e7fff97d778f8abad85c9d5ac4a4b7e25a0de7717"}, + {file = "cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:94cd0549accc38d1494e1f8de71eca837d0509d0d44bf11d158524b0e12cebf9"}, + {file = "cryptography-46.0.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6b5063083824e5509fdba180721d55909ffacccc8adbec85268b48439423d78c"}, + {file = "cryptography-46.0.3.tar.gz", hash = "sha256:a8b17438104fed022ce745b362294d9ce35b4c2e45c1d958ad4a4b019285f4a1"}, +] + +[package.dependencies] +cffi = {version = ">=2.0.0", markers = "python_full_version >= \"3.9\" and platform_python_implementation != \"PyPy\""} +typing-extensions = {version = ">=4.13.2", markers = "python_full_version < \"3.11\""} + +[package.extras] +docs = ["sphinx (>=5.3.0)", "sphinx-inline-tabs", "sphinx-rtd-theme (>=3.0.0)"] +docstest = ["pyenchant (>=3)", "readme-renderer (>=30.0)", "sphinxcontrib-spelling (>=7.3.1)"] +nox = ["nox[uv] (>=2024.4.15)"] +pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.14)", "ruff (>=0.11.11)"] +sdist = ["build (>=1.0.0)"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["certifi (>=2024)", "cryptography-vectors (==46.0.3)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] +test-randomorder = ["pytest-randomly"] + [[package]] name = "desktop-entry-lib" version = "5.0" description = "A library for working with .desktop files" -category = "dev" optional = false python-versions = ">=3.10" files = [ @@ -274,7 +518,6 @@ xdg-desktop-portal = ["jeepney"] name = "exceptiongroup" version = "1.3.1" description = "Backport of PEP 654 (exception groups)" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -292,7 +535,6 @@ test = ["pytest (>=6)"] name = "idna" version = "3.11" description = "Internationalized Domain Names in Applications (IDNA)" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -307,7 +549,6 @@ all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2 name = "iniconfig" version = "2.3.0" description = "brain-dead simple config-ini parsing" -category = "dev" optional = false python-versions = ">=3.10" files = [ @@ -315,11 +556,21 @@ files = [ {file = "iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730"}, ] +[[package]] +name = "invoke" +version = "2.2.1" +description = "Pythonic task execution" +optional = false +python-versions = ">=3.6" +files = [ + {file = "invoke-2.2.1-py3-none-any.whl", hash = "sha256:2413bc441b376e5cd3f55bb5d364f973ad8bdd7bf87e53c79de3c11bf3feecc8"}, + {file = "invoke-2.2.1.tar.gz", hash = "sha256:515bf49b4a48932b79b024590348da22f39c4942dff991ad1fb8b8baea1be707"}, +] + [[package]] name = "packaging" version = "25.0" description = "Core utilities for Python packages" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -327,11 +578,30 @@ files = [ {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, ] +[[package]] +name = "paramiko" +version = "4.0.0" +description = "SSH2 protocol library" +optional = false +python-versions = ">=3.9" +files = [ + {file = "paramiko-4.0.0-py3-none-any.whl", hash = "sha256:0e20e00ac666503bf0b4eda3b6d833465a2b7aff2e2b3d79a8bba5ef144ee3b9"}, + {file = "paramiko-4.0.0.tar.gz", hash = "sha256:6a25f07b380cc9c9a88d2b920ad37167ac4667f8d9886ccebd8f90f654b5d69f"}, +] + +[package.dependencies] +bcrypt = ">=3.2" +cryptography = ">=3.3" +invoke = ">=2.0" +pynacl = ">=1.5" + +[package.extras] +gssapi = ["gssapi (>=1.4.1)", "pyasn1 (>=0.1.7)", "pywin32 (>=2.1.8)"] + [[package]] name = "pluggy" version = "1.6.0" description = "plugin and hook calling mechanisms for python" -category = "dev" optional = false python-versions = ">=3.9" files = [ @@ -343,11 +613,21 @@ files = [ dev = ["pre-commit", "tox"] testing = ["coverage", "pytest", "pytest-benchmark"] +[[package]] +name = "pycparser" +version = "2.23" +description = "C parser in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934"}, + {file = "pycparser-2.23.tar.gz", hash = "sha256:78816d4f24add8f10a06d6f05b4d424ad9e96cfebf68a4ddc99c65c0720d00c2"}, +] + [[package]] name = "pygments" version = "2.19.2" description = "Pygments is a syntax highlighting package written in Python." -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -358,11 +638,53 @@ files = [ [package.extras] windows-terminal = ["colorama (>=0.4.6)"] +[[package]] +name = "pynacl" +version = "1.6.1" +description = "Python binding to the Networking and Cryptography (NaCl) library" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pynacl-1.6.1-cp314-cp314t-macosx_10_10_universal2.whl", hash = "sha256:7d7c09749450c385301a3c20dca967a525152ae4608c0a096fe8464bfc3df93d"}, + {file = "pynacl-1.6.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc734c1696ffd49b40f7c1779c89ba908157c57345cf626be2e0719488a076d3"}, + {file = "pynacl-1.6.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3cd787ec1f5c155dc8ecf39b1333cfef41415dc96d392f1ce288b4fe970df489"}, + {file = "pynacl-1.6.1-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b35d93ab2df03ecb3aa506be0d3c73609a51449ae0855c2e89c7ed44abde40b"}, + {file = "pynacl-1.6.1-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dece79aecbb8f4640a1adbb81e4aa3bfb0e98e99834884a80eb3f33c7c30e708"}, + {file = "pynacl-1.6.1-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:c2228054f04bf32d558fb89bb99f163a8197d5a9bf4efa13069a7fa8d4b93fc3"}, + {file = "pynacl-1.6.1-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:2b12f1b97346f177affcdfdc78875ff42637cb40dcf79484a97dae3448083a78"}, + {file = "pynacl-1.6.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e735c3a1bdfde3834503baf1a6d74d4a143920281cb724ba29fb84c9f49b9c48"}, + {file = "pynacl-1.6.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3384a454adf5d716a9fadcb5eb2e3e72cd49302d1374a60edc531c9957a9b014"}, + {file = "pynacl-1.6.1-cp314-cp314t-win32.whl", hash = "sha256:d8615ee34d01c8e0ab3f302dcdd7b32e2bcf698ba5f4809e7cc407c8cdea7717"}, + {file = "pynacl-1.6.1-cp314-cp314t-win_amd64.whl", hash = "sha256:5f5b35c1a266f8a9ad22525049280a600b19edd1f785bccd01ae838437dcf935"}, + {file = "pynacl-1.6.1-cp314-cp314t-win_arm64.whl", hash = "sha256:d984c91fe3494793b2a1fb1e91429539c6c28e9ec8209d26d25041ec599ccf63"}, + {file = "pynacl-1.6.1-cp38-abi3-macosx_10_10_universal2.whl", hash = "sha256:a6f9fd6d6639b1e81115c7f8ff16b8dedba1e8098d2756275d63d208b0e32021"}, + {file = "pynacl-1.6.1-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e49a3f3d0da9f79c1bec2aa013261ab9fa651c7da045d376bd306cf7c1792993"}, + {file = "pynacl-1.6.1-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7713f8977b5d25f54a811ec9efa2738ac592e846dd6e8a4d3f7578346a841078"}, + {file = "pynacl-1.6.1-cp38-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a3becafc1ee2e5ea7f9abc642f56b82dcf5be69b961e782a96ea52b55d8a9fc"}, + {file = "pynacl-1.6.1-cp38-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4ce50d19f1566c391fedc8dc2f2f5be265ae214112ebe55315e41d1f36a7f0a9"}, + {file = "pynacl-1.6.1-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:543f869140f67d42b9b8d47f922552d7a967e6c116aad028c9bfc5f3f3b3a7b7"}, + {file = "pynacl-1.6.1-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a2bb472458c7ca959aeeff8401b8efef329b0fc44a89d3775cffe8fad3398ad8"}, + {file = "pynacl-1.6.1-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:3206fa98737fdc66d59b8782cecc3d37d30aeec4593d1c8c145825a345bba0f0"}, + {file = "pynacl-1.6.1-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:53543b4f3d8acb344f75fd4d49f75e6572fce139f4bfb4815a9282296ff9f4c0"}, + {file = "pynacl-1.6.1-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:319de653ef84c4f04e045eb250e6101d23132372b0a61a7acf91bac0fda8e58c"}, + {file = "pynacl-1.6.1-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:262a8de6bba4aee8a66f5edf62c214b06647461c9b6b641f8cd0cb1e3b3196fe"}, + {file = "pynacl-1.6.1-cp38-abi3-win32.whl", hash = "sha256:9fd1a4eb03caf8a2fe27b515a998d26923adb9ddb68db78e35ca2875a3830dde"}, + {file = "pynacl-1.6.1-cp38-abi3-win_amd64.whl", hash = "sha256:a569a4069a7855f963940040f35e87d8bc084cb2d6347428d5ad20550a0a1a21"}, + {file = "pynacl-1.6.1-cp38-abi3-win_arm64.whl", hash = "sha256:5953e8b8cfadb10889a6e7bd0f53041a745d1b3d30111386a1bb37af171e6daf"}, + {file = "pynacl-1.6.1.tar.gz", hash = "sha256:8d361dac0309f2b6ad33b349a56cd163c98430d409fa503b10b70b3ad66eaa1d"}, +] + +[package.dependencies] +cffi = {version = ">=2.0.0", markers = "platform_python_implementation != \"PyPy\" and python_version >= \"3.9\""} + +[package.extras] +docs = ["sphinx (<7)", "sphinx_rtd_theme"] +tests = ["hypothesis (>=3.27.0)", "pytest (>=7.4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] + [[package]] name = "pyproject-appimage" version = "4.2" description = "Generate AppImages from your Python projects" -category = "dev" optional = false python-versions = ">=3.9" files = [ @@ -379,7 +701,6 @@ tomli = {version = "*", markers = "python_version < \"3.11\""} name = "pytest" version = "8.4.2" description = "pytest: simple powerful testing with Python" -category = "dev" optional = false python-versions = ">=3.9" files = [ @@ -403,7 +724,6 @@ dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests name = "pytest-cov" version = "5.0.0" description = "Pytest plugin for measuring coverage." -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -422,7 +742,6 @@ testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] name = "pyyaml" version = "6.0.3" description = "YAML parser and emitter for Python" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -505,7 +824,6 @@ files = [ name = "requests" version = "2.32.5" description = "Python HTTP for Humans." -category = "dev" optional = false python-versions = ">=3.9" files = [ @@ -527,7 +845,6 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "tomli" version = "2.3.0" description = "A lil' TOML parser" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -579,7 +896,6 @@ files = [ name = "typing-extensions" version = "4.15.0" description = "Backported and Experimental Type Hints for Python 3.9+" -category = "dev" optional = false python-versions = ">=3.9" files = [ @@ -591,7 +907,6 @@ files = [ name = "urllib3" version = "2.6.2" description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "dev" optional = false python-versions = ">=3.9" files = [ @@ -608,4 +923,4 @@ zstd = ["backports-zstd (>=1.0.0)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "10c279bd393cab27a94b4848c6f88f3a7a3d1af5062882c3e6fd2c2e15c945c8" +content-hash = "c3466a6595a9822763431a6dff0c7f835407a2591b92d5995592f8e6802c774a" diff --git a/pyproject.toml b/pyproject.toml index 444def3..9b16d36 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ repository = "https://git.mig5.net/mig5/enroll" [tool.poetry.dependencies] python = "^3.10" pyyaml = "^6.0.3" +paramiko = "^4.0.0" [tool.poetry.scripts] enroll = "enroll.cli:main" diff --git a/tests/test_cli.py b/tests/test_cli.py index 9e3422c..ca3bfa6 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -6,8 +6,9 @@ import enroll.cli as cli def test_cli_harvest_subcommand_calls_harvest(monkeypatch, capsys, tmp_path): called = {} - def fake_harvest(out: str): + def fake_harvest(out: str, dangerous: bool = False): called["out"] = out + called["dangerous"] = dangerous return str(tmp_path / "state.json") monkeypatch.setattr(cli, "harvest", fake_harvest) @@ -15,6 +16,7 @@ def test_cli_harvest_subcommand_calls_harvest(monkeypatch, capsys, tmp_path): cli.main() assert called["out"] == str(tmp_path) + assert called["dangerous"] is False captured = capsys.readouterr() assert str(tmp_path / "state.json") in captured.out @@ -53,8 +55,8 @@ def test_cli_manifest_subcommand_calls_manifest(monkeypatch, tmp_path): def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path): calls = [] - def fake_harvest(bundle_dir: str): - calls.append(("harvest", bundle_dir)) + def fake_harvest(bundle_dir: str, dangerous: bool = False): + calls.append(("harvest", bundle_dir, dangerous)) return str(tmp_path / "bundle" / "state.json") def fake_manifest(bundle_dir: str, out_dir: str, **kwargs): @@ -85,11 +87,142 @@ def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path) cli.main() assert calls == [ - ("harvest", str(tmp_path / "bundle")), + ("harvest", str(tmp_path / "bundle"), False), ("manifest", str(tmp_path / "bundle"), str(tmp_path / "ansible"), None, "auto"), ] +def test_cli_harvest_dangerous_flag_is_forwarded(monkeypatch, tmp_path): + called = {} + + def fake_harvest(out: str, dangerous: bool = False): + called["out"] = out + called["dangerous"] = dangerous + return str(tmp_path / "state.json") + + monkeypatch.setattr(cli, "harvest", fake_harvest) + monkeypatch.setattr( + sys, "argv", ["enroll", "harvest", "--out", str(tmp_path), "--dangerous"] + ) + + cli.main() + assert called["dangerous"] is True + + +def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir( + monkeypatch, capsys, tmp_path +): + from enroll.cache import HarvestCache + + cache_dir = tmp_path / "cache" + cache_dir.mkdir() + + called = {} + + def fake_cache_dir(*, hint=None): + called["hint"] = hint + return HarvestCache(dir=cache_dir) + + def fake_remote_harvest( + *, + local_out_dir, + remote_host, + remote_port, + remote_user, + dangerous, + no_sudo, + ): + called.update( + { + "local_out_dir": local_out_dir, + "remote_host": remote_host, + "remote_port": remote_port, + "remote_user": remote_user, + "dangerous": dangerous, + "no_sudo": no_sudo, + } + ) + return cache_dir / "state.json" + + monkeypatch.setattr(cli, "new_harvest_cache_dir", fake_cache_dir) + monkeypatch.setattr(cli, "remote_harvest", fake_remote_harvest) + monkeypatch.setattr( + sys, + "argv", + [ + "enroll", + "harvest", + "--remote-host", + "example.test", + "--remote-user", + "alice", + ], + ) + + cli.main() + out = capsys.readouterr().out + assert str(cache_dir / "state.json") in out + assert called["hint"] == "example.test" + assert called["local_out_dir"] == cache_dir + assert called["remote_host"] == "example.test" + assert called["remote_port"] == 22 + assert called["remote_user"] == "alice" + assert called["dangerous"] is False + assert called["no_sudo"] is False + + +def test_cli_single_shot_remote_without_harvest_prints_state_path( + monkeypatch, capsys, tmp_path +): + from enroll.cache import HarvestCache + + cache_dir = tmp_path / "cache" + cache_dir.mkdir() + ansible_dir = tmp_path / "ansible" + + calls = [] + + def fake_cache_dir(*, hint=None): + return HarvestCache(dir=cache_dir) + + def fake_remote_harvest(**kwargs): + calls.append(("remote_harvest", kwargs)) + return cache_dir / "state.json" + + def fake_manifest(harvest_dir: str, out_dir: str, **kwargs): + calls.append(("manifest", harvest_dir, out_dir, kwargs.get("fqdn"))) + + monkeypatch.setattr(cli, "new_harvest_cache_dir", fake_cache_dir) + monkeypatch.setattr(cli, "remote_harvest", fake_remote_harvest) + monkeypatch.setattr(cli, "manifest", fake_manifest) + monkeypatch.setattr( + sys, + "argv", + [ + "enroll", + "single-shot", + "--remote-host", + "example.test", + "--remote-user", + "alice", + "--out", + str(ansible_dir), + "--fqdn", + "example.test", + ], + ) + + cli.main() + out = capsys.readouterr().out + + # It should print the derived state.json path for usability when --harvest + # wasn't provided. + assert str(cache_dir / "state.json") in out + + # And it should manifest using the cache dir. + assert ("manifest", str(cache_dir), str(ansible_dir), "example.test") in calls + + def test_cli_manifest_common_args(monkeypatch, tmp_path): """Ensure --fqdn and jinjaturtle mode flags are forwarded correctly.""" diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 98f418f..99040b0 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -223,3 +223,29 @@ def test_manifest_site_mode_creates_host_inventory_and_raw_files(tmp_path: Path) assert ( out / "inventory" / "host_vars" / fqdn / "foo" / ".files" / "etc" / "foo.conf" ).exists() + + +def test_copy2_replace_overwrites_readonly_destination(tmp_path: Path): + """Merging into an existing manifest should tolerate read-only files. + + Some harvested artifacts (e.g. private keys) may be mode 0400. If a previous + run copied them into the destination tree, a subsequent run must still be + able to update/replace them. + """ + + import os + import stat + + from enroll.manifest import _copy2_replace + + src = tmp_path / "src" + dst = tmp_path / "dst" + src.write_text("new", encoding="utf-8") + dst.write_text("old", encoding="utf-8") + os.chmod(dst, 0o400) + + _copy2_replace(str(src), str(dst)) + + assert dst.read_text(encoding="utf-8") == "new" + mode = stat.S_IMODE(dst.stat().st_mode) + assert mode & stat.S_IWUSR # destination should remain mergeable From 33b117680074798ae6a6aef60195734589fdf834 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Wed, 17 Dec 2025 18:51:40 +1100 Subject: [PATCH 027/115] Add --sops mode to encrypt harvest and manifest data at rest (especially useful if using --dangerous) --- CHANGELOG.md | 10 +- Dockerfile.debbuild | 1 + README.md | 66 ++++++++- debian/changelog | 16 +++ debian/control | 5 +- enroll/cli.py | 328 +++++++++++++++++++++++++++++++++++------- enroll/ignore.py | 4 + enroll/jinjaturtle.py | 1 - enroll/manifest.py | 259 ++++++++++++++++++++++++++++----- enroll/remote.py | 48 +++++-- enroll/sopsutil.py | 137 ++++++++++++++++++ pyproject.toml | 2 +- 12 files changed, 760 insertions(+), 117 deletions(-) create mode 100644 enroll/sopsutil.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c2c247..1505f1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,12 @@ harvest, albeit you'll end up with less useful data (same as if running `enroll harvest` on a machine without sudo) * Add `--dangerous` flag to capture even sensitive data (use at your own risk!) + * Add `--sops` flag which makes the harvest and the manifest 'out' data encrypted as a single SOPS data file. + This would make `--dangerous` a little bit safer, if your intention is just to store the Ansible manifest + in git or somewhere similar for disaster-recovery purposes (e.g encrypted at rest for safe-keeping). * Do a better job at capturing other config files in `/etc//` even if that package doesn't normally ship or manage those files. + * Don't collect files ending in `.log` # 0.0.5 @@ -17,7 +21,7 @@ of the same role. Use 'single site' mode (no `--fqdn`) if you want more readable, self-contained roles (in which case, store each manifested output in its own repo per server) - * Generate an ansible.cfg if not present, to support host_vars plugin and other params, + * Generate an ansible.cfg if not present, to support `host_vars` plugin and other params, when using `--fqdn` mode * Be more permissive with files that we previously thought contained secrets (ignore commented lines) @@ -34,10 +38,10 @@ # 0.0.2 * Merge pkg_ and roles created based on file/service detection - * Avoid idempotency issue with users (password_lock) + * Avoid idempotency issue with users (`password_lock`) * Rename subcommands/args ('export' is now 'enroll', '--bundle' is now '--harvest') * Don't try and start systemd services that were Inactive at harvest time - * Capture miscellaneous files in /etc under their own etc_custom role, but not backup files + * Capture miscellaneous files in /etc under their own `etc_custom` role, but not backup files * Add tests * Various other bug fixes diff --git a/Dockerfile.debbuild b/Dockerfile.debbuild index 9009b41..a466ee2 100644 --- a/Dockerfile.debbuild +++ b/Dockerfile.debbuild @@ -25,6 +25,7 @@ RUN set -eux; \ python3-all \ python3-poetry-core \ python3-yaml \ + python3-paramiko \ rsync \ ca-certificates \ ; \ diff --git a/README.md b/README.md index c9e195f..8d3e455 100644 --- a/README.md +++ b/README.md @@ -90,9 +90,6 @@ This uploads a self-contained `enroll` zipapp to a temporary directory on the re **Privilege note:** A "full" harvest typically needs root access. Remote harvesting assumes the remote user can run `sudo` **without a password prompt** (NOPASSWD) so the harvest can run non-interactively. If you don't want this, pass `--no-sudo` as well. -**JinjaTurtle note:** If you want to take advantage of JinjaTurtle to turn configs into templates (see below note on JinjaTurtle integration), you'll still need to install JinjaTurtle on the remote host first. - - ## Sensitive data **enroll** doesn't make any assumptions about how you might handle sensitive data from your config files, in Ansible. Some people might use SOPS, others might use Vault, others might do something else entirely. @@ -121,6 +118,49 @@ ansible-galaxy collection install community.sops Then you can use the collection's lookup/vars plugins or modules to decrypt or load SOPS-encrypted vars at runtime. +Note the section below **also** talks about SOPS, but this is in the context of simply encrypting the data generated by `enroll` at rest for safe-keeping, **not** for direct integration with Ansible. + + +### Encrypting harvest/manifests at rest with `--sops` + +If you want to use `--dangerous` (or you simply want to keep the harvested artifacts private when they're sitting on disk, in git, etc), you can pass `--sops` to `harvest`, `manifest`, or `single-shot`. + +To use `--sops`, you will need to have [sops](https://github.com/getsops/sops) installed on your `$PATH`. + +- `--sops` expects one or more **GPG key fingerprints**. If `sops` is not on the `$PATH`, **enroll** will error. +- `harvest --sops ...` writes a *single* encrypted file (`harvest.tar.gz.sops`) instead of a plaintext directory. +- `manifest --sops ...` (and `single-shot --sops ...`) will: + - decrypt the harvest bundle with `sops -d` (if the `--harvest` input is an encrypted file), then generate manifests as normal + - bundle the entire generated Ansible output into a *single* encrypted file (`manifest.tar.gz.sops`) + +⚠️ **Important:** `manifest --sops` (and `single-shot --sops`) produces **one encrypted file**. It is **not** an Ansible repo you can point `ansible-playbook` at directly. It is **not** the same as using SOPS inventory with the Ansible SOPS collection. + +To use the encrypted SOPS manifest, decrypt and extract it first, then run Ansible from inside the extracted `manifest/` directory: + +```bash +sops -d /path/to/manifest.tar.gz.sops | tar -xzvf - +cd manifest +ansible-playbook ... +``` + +Example: + +```bash +# Harvest (encrypted-at-rest) +enroll harvest --out /tmp/enroll-harvest --dangerous --sops + +# Manifest (encrypted-at-rest) +enroll manifest --harvest /tmp/enroll-harvest/harvest.tar.gz.sops --out /tmp/enroll-ansible --sops + +# Decrypt/extract manifest output for inspection / ansible runs +cd /tmp/enroll-ansible +sops -d manifest.tar.gz.sops | tar -xzvf - +cd manifest +``` + +(If you want to manually inspect an encrypted harvest bundle, extract it into its own directory, e.g. `mkdir -p harvest && sops -d harvest.tar.gz.sops | tar -xzvf - -C harvest`.) + + ## Manifest @@ -151,8 +191,6 @@ JinjaTurtle will be used automatically if it is detected on the `$PATH`. You can If you *do* have JinjaTurtle installed, but *don't* wish to make use of it, you can use `--no-jinjaturtle`, in which case all config files will be kept as 'raw' files. -**Remote mode**: if you are using the `--remote-xxx` flags for `manifest` or `single-shot` subcommands, and want to take advantage of the JinjaTurtle integration, you'll still need to install JinjaTurtle on the remote host *in advance*. - --- # How multi-site avoids "shared role breaks a host" @@ -239,6 +277,24 @@ Remote + dangerous: enroll harvest --remote-host myhost.example.com --remote-user myuser --out /tmp/enroll-harvest --dangerous ``` +### `--sops` (encrypt bundles at rest) + +`--sops` bundles and encrypts the output as a single SOPS-encrypted `.tar.gz.sops` file (GPG). This is particularly useful if you're using `--dangerous`. + +```bash +# Encrypted harvest bundle (writes /tmp/enroll-harvest/harvest.tar.gz.sops) +enroll harvest --out /tmp/enroll-harvest --dangerous --sops + +# Encrypted manifest bundle (writes /tmp/enroll-ansible/manifest.tar.gz.sops) +enroll manifest --harvest /tmp/enroll-harvest/harvest.tar.gz.sops --out /tmp/enroll-ansible --sops + +# Decrypt/extract the manifest bundle, then run Ansible from inside ./manifest/ +cd /tmp/enroll-ansible +sops -d manifest.tar.gz.sops | tar -xzvf - +cd manifest +ansible-playbook ./playbook.yml +``` + ## 2. Generate Ansible manifests (roles/playbook) from that harvest diff --git a/debian/changelog b/debian/changelog index b889be7..16f7a0d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,19 @@ +enroll (0.1.0) unstable; urgency=medium + + * Add remote mode for harvesting a remote machine via a local workstation (no need to install enroll remotely) + Optionally use `--no-sudo` if you don't want the remote user to have passwordless sudo when conducting the + harvest, albeit you'll end up with less useful data (same as if running `enroll harvest` on a machine without + sudo) + * Add `--dangerous` flag to capture even sensitive data (use at your own risk!) + * Add `--sops` flag which makes the harvest and the manifest 'out' data encrypted as a single SOPS data file. + This would make `--dangerous` a little bit safer, if your intention is just to store the Ansible manifest + in git or somewhere similar for disaster-recovery purposes (e.g encrypted at rest for safe-keeping). + * Do a better job at capturing other config files in `/etc//` even if that package doesn't normally + ship or manage those files. + * Don't collect files ending in `.log` + + -- Miguel Jacq Tue, 17 Dec 2025 18:00:00 +1100 + enroll (0.0.5) unstable; urgency=medium * Use JinjaTurtle to generate dynamic template/inventory if it's on the PATH diff --git a/debian/control b/debian/control index 71b5661..7f323fd 100644 --- a/debian/control +++ b/debian/control @@ -9,12 +9,13 @@ Build-Depends: pybuild-plugin-pyproject, python3-all, python3-yaml, - python3-poetry-core + python3-poetry-core, + python3-paramiko Standards-Version: 4.6.2 Homepage: https://git.mig5.net/mig5/enroll Package: enroll Architecture: all -Depends: ${misc:Depends}, ${python3:Depends}, python3-yaml +Depends: ${misc:Depends}, ${python3:Depends}, python3-yaml, python3-paramiko Description: Harvest a host into Ansible roles A tool that inspects a system and emits Ansible roles/playbooks to reproduce it. diff --git a/enroll/cli.py b/enroll/cli.py index a4b1142..60e48a2 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -2,6 +2,8 @@ from __future__ import annotations import argparse import os +import tarfile +import tempfile from pathlib import Path from typing import Optional @@ -9,6 +11,56 @@ from .cache import new_harvest_cache_dir from .harvest import harvest from .manifest import manifest from .remote import remote_harvest +from .sopsutil import SopsError, encrypt_file_binary + + +def _resolve_sops_out_file(out: Optional[str], *, hint: str) -> Path: + """Resolve an output *file* path for --sops mode. + + If `out` looks like a directory (or points to an existing directory), we + place the encrypted harvest inside it as harvest.tar.gz.sops. + """ + if out: + p = Path(out).expanduser() + if p.exists() and p.is_dir(): + return p / "harvest.tar.gz.sops" + # Heuristic: treat paths with a suffix as files; otherwise directories. + if p.suffix: + return p + return p / "harvest.tar.gz.sops" + + # Default: use a secure cache directory. + d = new_harvest_cache_dir(hint=hint).dir + return d / "harvest.tar.gz.sops" + + +def _tar_dir_to(path_dir: Path, tar_path: Path) -> None: + tar_path.parent.mkdir(parents=True, exist_ok=True) + with tarfile.open(tar_path, mode="w:gz") as tf: + # Keep a stable on-disk layout when extracted: state.json + artifacts/ + tf.add(str(path_dir), arcname=".") + + +def _encrypt_harvest_dir_to_sops( + bundle_dir: Path, out_file: Path, fps: list[str] +) -> Path: + out_file = Path(out_file) + out_file.parent.mkdir(parents=True, exist_ok=True) + + # Create the tarball alongside the output file (keeps filesystem permissions/locality sane). + fd, tmp_tgz = tempfile.mkstemp( + prefix=".enroll-harvest-", suffix=".tar.gz", dir=str(out_file.parent) + ) + os.close(fd) + try: + _tar_dir_to(bundle_dir, Path(tmp_tgz)) + encrypt_file_binary(Path(tmp_tgz), out_file, pgp_fingerprints=fps, mode=0o600) + finally: + try: + os.unlink(tmp_tgz) + except FileNotFoundError: + pass + return out_file def _add_common_manifest_args(p: argparse.ArgumentParser) -> None: @@ -60,12 +112,27 @@ def main() -> None: sub = ap.add_subparsers(dest="cmd", required=True) h = sub.add_parser("harvest", help="Harvest service/package/config state") - h.add_argument("--out", help="Harvest output directory") + h.add_argument( + "--out", + help=( + "Harvest output directory. If --sops is set, this may be either a directory " + "(an encrypted file named harvest.tar.gz.sops will be created inside) or a file path." + ), + ) h.add_argument( "--dangerous", action="store_true", help="Collect files more aggressively (may include secrets). Disables secret-avoidance checks.", ) + h.add_argument( + "--sops", + nargs="+", + metavar="GPG_FINGERPRINT", + help=( + "Encrypt the harvest output as a SOPS-encrypted tarball using the given GPG fingerprint(s). " + "Requires `sops` on PATH." + ), + ) h.add_argument( "--no-sudo", action="store_true", @@ -77,24 +144,56 @@ def main() -> None: m.add_argument( "--harvest", required=True, - help="Path to the directory created by the harvest command", + help=( + "Path to the directory created by the harvest command, or (with --sops) " + "a SOPS-encrypted harvest tarball." + ), ) m.add_argument( "--out", required=True, - help="Output directory for generated roles/playbook Ansible manifest", + help=( + "Output location for the generated manifest. In plain mode this is a directory. " + "In --sops mode this may be either a directory (an encrypted file named manifest.tar.gz.sops will be created inside) " + "or a file path." + ), + ) + m.add_argument( + "--sops", + nargs="+", + metavar="GPG_FINGERPRINT", + help=( + "In --sops mode, decrypt the harvest using `sops -d` (if the harvest is an encrypted file) " + "and then bundle+encrypt the entire generated manifest output into a single SOPS-encrypted tarball " + "(binary) using the given GPG fingerprint(s). Requires `sops` on PATH." + ), ) _add_common_manifest_args(m) s = sub.add_parser( "single-shot", help="Harvest state, then manifest Ansible code, in one shot" ) - s.add_argument("--harvest", help="Path to the directory to place the harvest in") + s.add_argument( + "--harvest", + help=( + "Where to place the harvest. In plain mode this is a directory; in --sops mode this may be " + "a directory or a file path (an encrypted file is produced)." + ), + ) s.add_argument( "--dangerous", action="store_true", help="Collect files more aggressively (may include secrets). Disables secret-avoidance checks.", ) + s.add_argument( + "--sops", + nargs="+", + metavar="GPG_FINGERPRINT", + help=( + "Encrypt the harvest as a SOPS-encrypted tarball, and bundle+encrypt the manifest output in --out " + "(same behavior as `harvest --sops` and `manifest --sops`)." + ), + ) s.add_argument( "--no-sudo", action="store_true", @@ -103,7 +202,11 @@ def main() -> None: s.add_argument( "--out", required=True, - help="Output directory for generated roles/playbook Ansible manifest", + help=( + "Output location for the generated manifest. In plain mode this is a directory. " + "In --sops mode this may be either a directory (an encrypted file named manifest.tar.gz.sops will be created inside) " + "or a file path." + ), ) _add_common_manifest_args(s) _add_remote_args(s) @@ -112,54 +215,169 @@ def main() -> None: remote_host: Optional[str] = getattr(args, "remote_host", None) - if args.cmd == "harvest": - if remote_host: - out_dir = ( - Path(args.out) - if args.out - else new_harvest_cache_dir(hint=remote_host).dir + try: + if args.cmd == "harvest": + sops_fps = getattr(args, "sops", None) + if remote_host: + if sops_fps: + out_file = _resolve_sops_out_file(args.out, hint=remote_host) + with tempfile.TemporaryDirectory(prefix="enroll-harvest-") as td: + tmp_bundle = Path(td) / "bundle" + tmp_bundle.mkdir(parents=True, exist_ok=True) + try: + os.chmod(tmp_bundle, 0o700) + except OSError: + pass + remote_harvest( + local_out_dir=tmp_bundle, + remote_host=remote_host, + remote_port=int(args.remote_port), + remote_user=args.remote_user, + dangerous=bool(args.dangerous), + no_sudo=bool(args.no_sudo), + ) + _encrypt_harvest_dir_to_sops( + tmp_bundle, out_file, list(sops_fps) + ) + print(str(out_file)) + else: + out_dir = ( + Path(args.out) + if args.out + else new_harvest_cache_dir(hint=remote_host).dir + ) + state = remote_harvest( + local_out_dir=out_dir, + remote_host=remote_host, + remote_port=int(args.remote_port), + remote_user=args.remote_user, + dangerous=bool(args.dangerous), + no_sudo=bool(args.no_sudo), + ) + print(str(state)) + else: + if sops_fps: + out_file = _resolve_sops_out_file(args.out, hint="local") + with tempfile.TemporaryDirectory(prefix="enroll-harvest-") as td: + tmp_bundle = Path(td) / "bundle" + tmp_bundle.mkdir(parents=True, exist_ok=True) + try: + os.chmod(tmp_bundle, 0o700) + except OSError: + pass + harvest(str(tmp_bundle), dangerous=bool(args.dangerous)) + _encrypt_harvest_dir_to_sops( + tmp_bundle, out_file, list(sops_fps) + ) + print(str(out_file)) + else: + if not args.out: + raise SystemExit( + "error: --out is required unless --remote-host is set" + ) + path = harvest(args.out, dangerous=bool(args.dangerous)) + print(path) + elif args.cmd == "manifest": + out_enc = manifest( + args.harvest, + args.out, + fqdn=args.fqdn, + jinjaturtle=_jt_mode(args), + sops_fingerprints=getattr(args, "sops", None), ) - state = remote_harvest( - local_out_dir=out_dir, - remote_host=remote_host, - remote_port=int(args.remote_port), - remote_user=args.remote_user, - dangerous=bool(args.dangerous), - no_sudo=bool(args.no_sudo), - ) - print(str(state)) - else: - if not args.out: - raise SystemExit("error: --out is required unless --remote-host is set") - path = harvest(args.out, dangerous=bool(args.dangerous)) - print(path) - elif args.cmd == "manifest": - manifest(args.harvest, args.out, fqdn=args.fqdn, jinjaturtle=_jt_mode(args)) - elif args.cmd == "single-shot": - if remote_host: - harvest_dir = ( - Path(args.harvest) - if args.harvest - else new_harvest_cache_dir(hint=remote_host).dir - ) - remote_harvest( - local_out_dir=harvest_dir, - remote_host=remote_host, - remote_port=int(args.remote_port), - remote_user=args.remote_user, - dangerous=bool(args.dangerous), - no_sudo=bool(args.no_sudo), - ) - manifest( - str(harvest_dir), args.out, fqdn=args.fqdn, jinjaturtle=_jt_mode(args) - ) - # For usability (when --harvest wasn't provided), print the harvest path. - if not args.harvest: - print(str(harvest_dir / "state.json")) - else: - if not args.harvest: - raise SystemExit( - "error: --harvest is required unless --remote-host is set" - ) - harvest(args.harvest, dangerous=bool(args.dangerous)) - manifest(args.harvest, args.out, fqdn=args.fqdn, jinjaturtle=_jt_mode(args)) + if getattr(args, "sops", None) and out_enc: + print(str(out_enc)) + elif args.cmd == "single-shot": + sops_fps = getattr(args, "sops", None) + if remote_host: + if sops_fps: + out_file = _resolve_sops_out_file(args.harvest, hint=remote_host) + with tempfile.TemporaryDirectory(prefix="enroll-harvest-") as td: + tmp_bundle = Path(td) / "bundle" + tmp_bundle.mkdir(parents=True, exist_ok=True) + try: + os.chmod(tmp_bundle, 0o700) + except OSError: + pass + remote_harvest( + local_out_dir=tmp_bundle, + remote_host=remote_host, + remote_port=int(args.remote_port), + remote_user=args.remote_user, + dangerous=bool(args.dangerous), + no_sudo=bool(args.no_sudo), + ) + _encrypt_harvest_dir_to_sops( + tmp_bundle, out_file, list(sops_fps) + ) + + manifest( + str(out_file), + args.out, + fqdn=args.fqdn, + jinjaturtle=_jt_mode(args), + sops_fingerprints=list(sops_fps), + ) + if not args.harvest: + print(str(out_file)) + else: + harvest_dir = ( + Path(args.harvest) + if args.harvest + else new_harvest_cache_dir(hint=remote_host).dir + ) + remote_harvest( + local_out_dir=harvest_dir, + remote_host=remote_host, + remote_port=int(args.remote_port), + remote_user=args.remote_user, + dangerous=bool(args.dangerous), + no_sudo=bool(args.no_sudo), + ) + manifest( + str(harvest_dir), + args.out, + fqdn=args.fqdn, + jinjaturtle=_jt_mode(args), + ) + # For usability (when --harvest wasn't provided), print the harvest path. + if not args.harvest: + print(str(harvest_dir / "state.json")) + else: + if sops_fps: + out_file = _resolve_sops_out_file(args.harvest, hint="local") + with tempfile.TemporaryDirectory(prefix="enroll-harvest-") as td: + tmp_bundle = Path(td) / "bundle" + tmp_bundle.mkdir(parents=True, exist_ok=True) + try: + os.chmod(tmp_bundle, 0o700) + except OSError: + pass + harvest(str(tmp_bundle), dangerous=bool(args.dangerous)) + _encrypt_harvest_dir_to_sops( + tmp_bundle, out_file, list(sops_fps) + ) + + manifest( + str(out_file), + args.out, + fqdn=args.fqdn, + jinjaturtle=_jt_mode(args), + sops_fingerprints=list(sops_fps), + ) + if not args.harvest: + print(str(out_file)) + else: + if not args.harvest: + raise SystemExit( + "error: --harvest is required unless --remote-host is set" + ) + harvest(args.harvest, dangerous=bool(args.dangerous)) + manifest( + args.harvest, + args.out, + fqdn=args.fqdn, + jinjaturtle=_jt_mode(args), + ) + except SopsError as e: + raise SystemExit(f"error: {e}") diff --git a/enroll/ignore.py b/enroll/ignore.py index 73d577d..d8ffce9 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -73,6 +73,10 @@ class IgnorePolicy: yield raw def deny_reason(self, path: str) -> Optional[str]: + # Always ignore plain *.log files (rarely useful as config, often noisy). + if path.endswith(".log"): + return "log_file" + if not self.dangerous: for g in self.deny_globs or []: if fnmatch.fnmatch(path, g): diff --git a/enroll/jinjaturtle.py b/enroll/jinjaturtle.py index 03f4adf..67f0215 100644 --- a/enroll/jinjaturtle.py +++ b/enroll/jinjaturtle.py @@ -1,6 +1,5 @@ from __future__ import annotations -import re import shutil import subprocess # nosec import tempfile diff --git a/enroll/manifest.py b/enroll/manifest.py index bc94c7b..09666d4 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -4,6 +4,7 @@ import json import os import shutil import stat +import tarfile import tempfile from pathlib import Path from typing import Any, Dict, List, Optional, Set, Tuple @@ -14,9 +15,12 @@ from .jinjaturtle import ( run_jinjaturtle, ) - -JINJATURTLE_BEGIN = "# BEGIN JINJATURTLE (generated by enroll)" -JINJATURTLE_END = "# END JINJATURTLE" +from .remote import _safe_extract_tar +from .sopsutil import ( + decrypt_file_binary_to, + encrypt_file_binary, + require_sops_cmd, +) def _try_yaml(): @@ -85,24 +89,6 @@ def _merge_mappings_overwrite( return merged -def _normalise_jinjaturtle_vars_text(vars_text: str) -> str: - """Deduplicate keys in a vars fragment by parsing as YAML and dumping it back.""" - m = _yaml_load_mapping(vars_text) - if not m: - # if YAML isn't available or parsing failed, return raw text (best-effort) - return vars_text.rstrip() + ( - "\n" if vars_text and not vars_text.endswith("\n") else "" - ) - return _yaml_dump_mapping(m, sort_keys=True) - - -def _yaml_list(items: List[str], indent: int = 2) -> str: - pad = " " * indent - if not items: - return f"{pad}[]" - return "\n".join(f"{pad}- {x}" for x in items) - - def _copy2_replace(src: str, dst: str) -> None: dst_dir = os.path.dirname(dst) os.makedirs(dst_dir, exist_ok=True) @@ -349,23 +335,6 @@ def _jinjify_managed_files( return templated, "" -def _defaults_with_jinjaturtle(base_defaults: str, vars_text: str) -> str: - if not vars_text.strip(): - return base_defaults.rstrip() + "\n" - vars_text = _normalise_jinjaturtle_vars_text(vars_text) - # Always regenerate the block (we regenerate whole defaults files anyway) - return ( - base_defaults.rstrip() - + "\n\n" - + JINJATURTLE_BEGIN - + "\n" - + vars_text.rstrip() - + "\n" - + JINJATURTLE_END - + "\n" - ) - - def _write_role_defaults(role_dir: str, mapping: Dict[str, Any]) -> None: """Overwrite role defaults/main.yml with the provided mapping.""" defaults_path = os.path.join(role_dir, "defaults", "main.yml") @@ -499,7 +468,153 @@ def _render_generic_files_tasks( """ -def manifest( +def _prepare_bundle_dir( + bundle: str, + *, + sops_mode: bool, +) -> tuple[str, Optional[tempfile.TemporaryDirectory]]: + """Return (bundle_dir, tempdir). + + - In non-sops mode, `bundle` must be a directory. + - In sops mode, `bundle` may be a directory (already-decrypted) *or* + a SOPS-encrypted tarball. In the tarball case we decrypt+extract into + a secure temp directory. + """ + p = Path(bundle).expanduser() + + if p.is_dir(): + return str(p), None + + if not sops_mode: + raise RuntimeError(f"Harvest path is not a directory: {p}") + + if not p.exists(): + raise RuntimeError(f"Harvest path not found: {p}") + + # Ensure sops is available early for clear error messages. + require_sops_cmd() + + td = tempfile.TemporaryDirectory(prefix="enroll-harvest-") + td_path = Path(td.name) + try: + os.chmod(td_path, 0o700) + except OSError: + pass + + tar_path = td_path / "harvest.tar.gz" + out_dir = td_path / "bundle" + out_dir.mkdir(parents=True, exist_ok=True) + try: + os.chmod(out_dir, 0o700) + except OSError: + pass + + decrypt_file_binary_to(p, tar_path, mode=0o600) + + # Extract using the same safe extraction rules as remote harvesting. + with tarfile.open(tar_path, mode="r:gz") as tf: + _safe_extract_tar(tf, out_dir) + + return str(out_dir), td + + +def _resolve_sops_manifest_out_file(out: str) -> Path: + """Resolve an output *file* path for manifest --sops mode. + + If `out` looks like a directory (or points to an existing directory), we + place the encrypted manifest bundle inside it as manifest.tar.gz.sops. + """ + p = Path(out).expanduser() + if p.exists() and p.is_dir(): + return p / "manifest.tar.gz.sops" + # Heuristic: treat paths with a suffix as files; otherwise directories. + if p.suffix: + return p + return p / "manifest.tar.gz.sops" + + +def _tar_dir_to_with_progress( + src_dir: Path, tar_path: Path, *, desc: str = "tarring" +) -> None: + """Create a tar.gz of src_dir at tar_path, with a simple per-entry progress display.""" + src_dir = Path(src_dir) + tar_path = Path(tar_path) + tar_path.parent.mkdir(parents=True, exist_ok=True) + + # Collect paths (dirs + files) + paths: list[Path] = [src_dir] + for root, dirs, files in os.walk(str(src_dir)): + root_p = Path(root) + for d in sorted(dirs): + paths.append(root_p / d) + for f in sorted(files): + paths.append(root_p / f) + + total = len(paths) + is_tty = hasattr(os, "isatty") and os.isatty(2) + + def _print_progress(i: int, p: Path) -> None: + if not is_tty: + return + pct = (i / total * 100.0) if total else 100.0 + rel = "." + try: + rel = str(p.relative_to(src_dir)) + except Exception: + rel = str(p) + msg = f"{desc}: {i}/{total} ({pct:5.1f}%) {rel}" + try: + cols = shutil.get_terminal_size((80, 20)).columns + msg = msg[: cols - 1] + except Exception: + pass + os.write(2, ("\r" + msg).encode("utf-8", errors="replace")) + + with tarfile.open(tar_path, mode="w:gz") as tf: + prefix = Path("manifest") + + for i, p in enumerate(paths, start=1): + if p == src_dir: + arcname = str(prefix) + else: + rel = p.relative_to(src_dir) + arcname = str(prefix / rel) + tf.add(str(p), arcname=arcname, recursive=False) + _print_progress(i, p) + + if is_tty: + os.write(2, b"\n") + + +def _encrypt_manifest_out_dir_to_sops( + out_dir: Path, out_file: Path, fps: list[str] +) -> Path: + """Tar+encrypt the generated manifest output directory into a single .sops file.""" + require_sops_cmd() + out_file = Path(out_file) + out_file.parent.mkdir(parents=True, exist_ok=True) + + fd, tmp_tgz = tempfile.mkstemp( + prefix=".enroll-manifest-", + suffix=".tar.gz", + dir=str(out_file.parent), + ) + os.close(fd) + try: + _tar_dir_to_with_progress( + Path(out_dir), Path(tmp_tgz), desc="Bundling manifest" + ) + encrypt_file_binary(Path(tmp_tgz), out_file, pgp_fingerprints=fps, mode=0o600) + finally: + try: + os.unlink(tmp_tgz) + except FileNotFoundError: + pass + + return out_file + + +def _manifest_from_bundle_dir( bundle_dir: str, out_dir: str, *, @@ -1204,3 +1319,69 @@ Generated for package `{pkg}`. ) else: _write_playbook_all(os.path.join(out_dir, "playbook.yml"), all_roles) + + +def manifest( + bundle_dir: str, + out: str, + *, + fqdn: Optional[str] = None, + jinjaturtle: str = "auto", # auto|on|off + sops_fingerprints: Optional[List[str]] = None, +) -> Optional[str]: + """Render an Ansible manifest from a harvest. + + Plain mode: + - `bundle_dir` must be a directory + - `out` is a directory written in-place + + SOPS mode (when `sops_fingerprints` is provided): + - `bundle_dir` may be either a directory (already decrypted) or a SOPS + encrypted tarball (binary) produced by `harvest --sops` + - the manifest output is bundled (tar.gz) and encrypted into a single + SOPS file (binary) at the resolved output path. + + Returns: + - In SOPS mode: the path to the encrypted manifest bundle (.sops) + - In plain mode: None + """ + sops_mode = bool(sops_fingerprints) + + # Decrypt/extract the harvest bundle if needed. + resolved_bundle_dir, td_bundle = _prepare_bundle_dir( + bundle_dir, sops_mode=sops_mode + ) + + td_out: Optional[tempfile.TemporaryDirectory] = None + try: + if not sops_mode: + _manifest_from_bundle_dir( + resolved_bundle_dir, out, fqdn=fqdn, jinjaturtle=jinjaturtle + ) + return None + + # SOPS mode: generate into a secure temp dir, then tar+encrypt into a single file. + out_file = _resolve_sops_manifest_out_file(out) + + td_out = tempfile.TemporaryDirectory(prefix="enroll-manifest-") + tmp_out = Path(td_out.name) / "out" + tmp_out.mkdir(parents=True, exist_ok=True) + try: + os.chmod(tmp_out, 0o700) + except OSError: + pass + + _manifest_from_bundle_dir( + resolved_bundle_dir, str(tmp_out), fqdn=fqdn, jinjaturtle=jinjaturtle + ) + + enc = _encrypt_manifest_out_dir_to_sops( + tmp_out, out_file, list(sops_fingerprints or []) + ) + return str(enc) + + finally: + if td_out is not None: + td_out.cleanup() + if td_bundle is not None: + td_bundle.cleanup() diff --git a/enroll/remote.py b/enroll/remote.py index 5dcf286..df8d876 100644 --- a/enroll/remote.py +++ b/enroll/remote.py @@ -138,15 +138,29 @@ def remote_harvest( look_for_keys=True, ) + # If no username was explicitly provided, SSH may have selected a default. + # We need a concrete username for the (sudo) chown step below. + resolved_user = remote_user + if not resolved_user: + rc, out, err = _ssh_run(ssh, "id -un") + if rc == 0 and out.strip(): + resolved_user = out.strip() + sftp = ssh.open_sftp() + rtmp: Optional[str] = None try: rc, out, err = _ssh_run(ssh, "mktemp -d") if rc != 0: raise RuntimeError(f"Remote mktemp failed: {err.strip()}") rtmp = out.strip() + + # Be explicit: restrict the remote staging area to the current user. + rc, out, err = _ssh_run(ssh, f"chmod 700 {rtmp}") + if rc != 0: + raise RuntimeError(f"Remote chmod failed: {err.strip()}") + rapp = f"{rtmp}/enroll.pyz" rbundle = f"{rtmp}/bundle" - rtgz = f"{rtmp}/bundle.tgz" sftp.put(str(pyz), rapp) @@ -169,7 +183,12 @@ def remote_harvest( if not no_sudo: # Ensure user can read the files, before we tar it - cmd = f"sudo chown -R {remote_user} {rbundle}" + if not resolved_user: + raise RuntimeError( + "Unable to determine remote username for chown. " + "Pass --remote-user explicitly or use --no-sudo." + ) + cmd = f"sudo chown -R {resolved_user} {rbundle}" rc, out, err = _ssh_run(ssh, cmd) if rc != 0: raise RuntimeError( @@ -179,26 +198,33 @@ def remote_harvest( f"Stderr: {err.strip()}" ) - # Tar the bundle for efficient download. - cmd = f"tar -czf {rtgz} -C {rbundle} ." - rc, out, err = _ssh_run(ssh, cmd) + # Stream a tarball back to the local machine (avoid creating a tar file on the remote). + cmd = f"tar -cz -C {rbundle} ." + _stdin, stdout, stderr = ssh.exec_command(cmd) + with open(local_tgz, "wb") as f: + while True: + chunk = stdout.read(1024 * 128) + if not chunk: + break + f.write(chunk) + rc = stdout.channel.recv_exit_status() + err_text = stderr.read().decode("utf-8", errors="replace") if rc != 0: raise RuntimeError( - "Remote tar failed.\n" + "Remote tar stream failed.\n" f"Command: {cmd}\n" f"Exit code: {rc}\n" - f"Stderr: {err.strip()}" + f"Stderr: {err_text.strip()}" ) - sftp.get(rtgz, str(local_tgz)) - # Extract into the destination. with tarfile.open(local_tgz, mode="r:gz") as tf: _safe_extract_tar(tf, local_out_dir) - # Cleanup remote tmpdir. - _ssh_run(ssh, f"rm -rf {rtmp}") finally: + # Cleanup remote tmpdir even on failure. + if rtmp: + _ssh_run(ssh, f"rm -rf {rtmp}") try: sftp.close() ssh.close() diff --git a/enroll/sopsutil.py b/enroll/sopsutil.py new file mode 100644 index 0000000..d43d351 --- /dev/null +++ b/enroll/sopsutil.py @@ -0,0 +1,137 @@ +from __future__ import annotations + +import os +import shutil +import subprocess +import tempfile +from pathlib import Path +from typing import Iterable, List, Optional + + +class SopsError(RuntimeError): + pass + + +def find_sops_cmd() -> Optional[str]: + """Return the `sops` executable path if present on PATH.""" + return shutil.which("sops") + + +def require_sops_cmd() -> str: + exe = find_sops_cmd() + if not exe: + raise SopsError( + "--sops was requested but `sops` was not found on PATH. " + "Install sops and ensure it is available as `sops`." + ) + return exe + + +def _pgp_arg(fingerprints: Iterable[str]) -> str: + fps = [f.strip() for f in fingerprints if f and f.strip()] + if not fps: + raise SopsError("No GPG fingerprints provided for --sops") + # sops accepts a comma-separated list for --pgp. + return ",".join(fps) + + +def encrypt_file_binary( + src_path: Path, + dst_path: Path, + *, + pgp_fingerprints: List[str], + mode: int = 0o600, +) -> None: + """Encrypt src_path with sops (binary) and write to dst_path atomically.""" + sops = require_sops_cmd() + src_path = Path(src_path) + dst_path = Path(dst_path) + dst_path.parent.mkdir(parents=True, exist_ok=True) + + res = subprocess.run( + [ + sops, + "--encrypt", + "--input-type", + "binary", + "--output-type", + "binary", + "--pgp", + _pgp_arg(pgp_fingerprints), + str(src_path), + ], + capture_output=True, + check=False, + ) + if res.returncode != 0: + raise SopsError( + "sops encryption failed:\n" + f" cmd: {sops} --encrypt ... {src_path}\n" + f" rc: {res.returncode}\n" + f" stderr: {res.stderr.decode('utf-8', errors='replace').strip()}" + ) + + # Write atomically in the destination directory. + fd, tmp = tempfile.mkstemp(prefix=".enroll-sops-", dir=str(dst_path.parent)) + try: + with os.fdopen(fd, "wb") as f: + f.write(res.stdout) + try: + os.chmod(tmp, mode) + except OSError: + pass + os.replace(tmp, dst_path) + finally: + try: + os.unlink(tmp) + except FileNotFoundError: + pass + + +def decrypt_file_binary_to( + src_path: Path, + dst_path: Path, + *, + mode: int = 0o600, +) -> None: + """Decrypt a sops-encrypted file (binary) into dst_path.""" + sops = require_sops_cmd() + src_path = Path(src_path) + dst_path = Path(dst_path) + dst_path.parent.mkdir(parents=True, exist_ok=True) + + res = subprocess.run( + [ + sops, + "--decrypt", + "--input-type", + "binary", + "--output-type", + "binary", + str(src_path), + ], + capture_output=True, + check=False, + ) + if res.returncode != 0: + raise SopsError( + "sops decryption failed:\n" + f" cmd: {sops} --decrypt ... {src_path}\n" + f" rc: {res.returncode}\n" + f" stderr: {res.stderr.decode('utf-8', errors='replace').strip()}" + ) + + fd, tmp = tempfile.mkstemp(prefix=".enroll-sops-", dir=str(dst_path.parent)) + try: + with os.fdopen(fd, "wb") as f: + f.write(res.stdout) + try: + os.chmod(tmp, mode) + except OSError: + pass + os.replace(tmp, dst_path) + finally: + try: + os.unlink(tmp) + except FileNotFoundError: + pass diff --git a/pyproject.toml b/pyproject.toml index 9b16d36..ac65b02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.0.5" +version = "0.1.0" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" From 9ebd8ff990b5a8341be64bbd2e73fcab87128f74 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Wed, 17 Dec 2025 19:03:31 +1100 Subject: [PATCH 028/115] remove --out from harvest examples with remote mode, in README --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8d3e455..cd3bba4 100644 --- a/README.md +++ b/README.md @@ -259,10 +259,10 @@ On the host (root recommended to harvest as much data as possible): ```bash enroll harvest --out /tmp/enroll-harvest ``` -### Remote harvest over SSH (no enroll install required on the remote host) +### Remote harvest over SSH (no enroll install required on the remote host, no need for --out) ```bash -enroll harvest --remote-host myhost.example.com --remote-user myuser --out /tmp/enroll-harvest +enroll harvest --remote-host myhost.example.com --remote-user myuser ``` ### `--dangerous` (captures potentially sensitive files — read the warning above) @@ -274,7 +274,7 @@ enroll harvest --out /tmp/enroll-harvest --dangerous Remote + dangerous: ```bash -enroll harvest --remote-host myhost.example.com --remote-user myuser --out /tmp/enroll-harvest --dangerous +enroll harvest --remote-host myhost.example.com --remote-user myuser --dangerous ``` ### `--sops` (encrypt bundles at rest) From 62ec8e8b1bb7178b8f40574b38b2b6d8c7555780 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Wed, 17 Dec 2025 19:05:07 +1100 Subject: [PATCH 029/115] Silence bandit paranoia on certain lines --- enroll/manifest.py | 2 +- enroll/remote.py | 2 +- enroll/sopsutil.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/enroll/manifest.py b/enroll/manifest.py index 09666d4..afb8b88 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -567,7 +567,7 @@ def _tar_dir_to_with_progress( cols = shutil.get_terminal_size((80, 20)).columns msg = msg[: cols - 1] except Exception: - pass + pass # nosec os.write(2, ("\r" + msg).encode("utf-8", errors="replace")) with tarfile.open(tar_path, mode="w:gz") as tf: diff --git a/enroll/remote.py b/enroll/remote.py index df8d876..7ad8dc4 100644 --- a/enroll/remote.py +++ b/enroll/remote.py @@ -200,7 +200,7 @@ def remote_harvest( # Stream a tarball back to the local machine (avoid creating a tar file on the remote). cmd = f"tar -cz -C {rbundle} ." - _stdin, stdout, stderr = ssh.exec_command(cmd) + _stdin, stdout, stderr = ssh.exec_command(cmd) # nosec with open(local_tgz, "wb") as f: while True: chunk = stdout.read(1024 * 128) diff --git a/enroll/sopsutil.py b/enroll/sopsutil.py index d43d351..6c0c881 100644 --- a/enroll/sopsutil.py +++ b/enroll/sopsutil.py @@ -2,7 +2,7 @@ from __future__ import annotations import os import shutil -import subprocess +import subprocess # nosec import tempfile from pathlib import Path from typing import Iterable, List, Optional @@ -62,7 +62,7 @@ def encrypt_file_binary( ], capture_output=True, check=False, - ) + ) # nosec if res.returncode != 0: raise SopsError( "sops encryption failed:\n" @@ -112,7 +112,7 @@ def decrypt_file_binary_to( ], capture_output=True, check=False, - ) + ) # nosec if res.returncode != 0: raise SopsError( "sops decryption failed:\n" From a235028f3b28467687cc33a6996ec35efb2bf97f Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 13:34:37 +1100 Subject: [PATCH 030/115] black --- enroll/manifest.py | 2 +- enroll/remote.py | 2 +- enroll/sopsutil.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/enroll/manifest.py b/enroll/manifest.py index afb8b88..e55418c 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -567,7 +567,7 @@ def _tar_dir_to_with_progress( cols = shutil.get_terminal_size((80, 20)).columns msg = msg[: cols - 1] except Exception: - pass # nosec + pass # nosec os.write(2, ("\r" + msg).encode("utf-8", errors="replace")) with tarfile.open(tar_path, mode="w:gz") as tf: diff --git a/enroll/remote.py b/enroll/remote.py index 7ad8dc4..469248d 100644 --- a/enroll/remote.py +++ b/enroll/remote.py @@ -200,7 +200,7 @@ def remote_harvest( # Stream a tarball back to the local machine (avoid creating a tar file on the remote). cmd = f"tar -cz -C {rbundle} ." - _stdin, stdout, stderr = ssh.exec_command(cmd) # nosec + _stdin, stdout, stderr = ssh.exec_command(cmd) # nosec with open(local_tgz, "wb") as f: while True: chunk = stdout.read(1024 * 128) diff --git a/enroll/sopsutil.py b/enroll/sopsutil.py index 6c0c881..de36d4f 100644 --- a/enroll/sopsutil.py +++ b/enroll/sopsutil.py @@ -2,7 +2,7 @@ from __future__ import annotations import os import shutil -import subprocess # nosec +import subprocess # nosec import tempfile from pathlib import Path from typing import Iterable, List, Optional @@ -62,7 +62,7 @@ def encrypt_file_binary( ], capture_output=True, check=False, - ) # nosec + ) # nosec if res.returncode != 0: raise SopsError( "sops encryption failed:\n" @@ -112,7 +112,7 @@ def decrypt_file_binary_to( ], capture_output=True, check=False, - ) # nosec + ) # nosec if res.returncode != 0: raise SopsError( "sops decryption failed:\n" From 591ecaa23569c78e9b473dce59ab592f4234500f Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 13:41:22 +1100 Subject: [PATCH 031/115] Add pre-commit config --- .pre-commit-config.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..2fd6c83 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,19 @@ +repos: + - repo: https://github.com/pycqa/flake8 + rev: 7.3.0 + hooks: + - id: flake8 + args: ["--select=F"] + types: [python] + + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 25.11.0 + hooks: + - id: black + language_version: python3 + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer From bfa2f4a7243daf9e585ffb36cba7623c5c746905 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 13:44:26 +1100 Subject: [PATCH 032/115] Add bandit to pre-commit --- .pre-commit-config.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2fd6c83..62c3791 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,3 +17,8 @@ repos: hooks: - id: trailing-whitespace - id: end-of-file-fixer + + - repo: https://github.com/PyCQA/bandit + rev: 1.9.2 + hooks: + - id: bandit From e94bd86c75531a82092db610ca1b6a7c88bbe707 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 13:45:59 +1100 Subject: [PATCH 033/115] Add files param to bandit pre-commit --- .pre-commit-config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 62c3791..09c6889 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,3 +22,4 @@ repos: rev: 1.9.2 hooks: - id: bandit + files: ^enroll/ From 55e50ebf59f7e0a2a35ac44b62cb3f0229c75eed Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 13:50:00 +1100 Subject: [PATCH 034/115] Fix end of file/whitespace per pre-commit --- CHANGELOG.md | 2 +- README.md | 1 - enroll.svg | 1 - tests.sh | 4 ++-- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1505f1a..d8ca4b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,7 +39,7 @@ * Merge pkg_ and roles created based on file/service detection * Avoid idempotency issue with users (`password_lock`) - * Rename subcommands/args ('export' is now 'enroll', '--bundle' is now '--harvest') + * Rename subcommands/args ('export' is now 'enroll', '--bundle' is now '--harvest') * Don't try and start systemd services that were Inactive at harvest time * Capture miscellaneous files in /etc under their own `etc_custom` role, but not backup files * Add tests diff --git a/README.md b/README.md index cd3bba4..19b5377 100644 --- a/README.md +++ b/README.md @@ -357,4 +357,3 @@ My Forgejo doesn't yet support proper federation, and for that reason I've not o Instead, you can e-mail me (see the pyproject.toml for details) or contact me on the Fediverse: https://goto.mig5.net/@mig5 - diff --git a/enroll.svg b/enroll.svg index c986e1f..0ee1590 100644 --- a/enroll.svg +++ b/enroll.svg @@ -109,4 +109,3 @@ enroll - diff --git a/tests.sh b/tests.sh index ea7ad59..6becc39 100755 --- a/tests.sh +++ b/tests.sh @@ -15,10 +15,10 @@ poetry run \ --harvest "${BUNDLE_DIR}" \ --out "${ANSIBLE_DIR}" -builtin cd "${ANSIBLE_DIR}" +builtin cd "${ANSIBLE_DIR}" # Lint -ansible-lint "${ANSIBLE_DIR}" +ansible-lint "${ANSIBLE_DIR}" # Run ansible-playbook playbook.yml -i "localhost," -c local --check --diff From b5d2b99174a984ed941d5796a47f3d05e800bfb1 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 14:59:51 +1100 Subject: [PATCH 035/115] Add diff mode --- CHANGELOG.md | 5 + README.md | 388 +++++++++++------------- debian/changelog | 9 +- enroll/cli.py | 190 ++++++++++++ enroll/diff.py | 757 +++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- 6 files changed, 1131 insertions(+), 220 deletions(-) create mode 100644 enroll/diff.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d8ca4b5..81eed41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.1.1 + + * Add `diff` subcommand which can compare two harvests and send email or webhook notifications in different + formats. + # 0.1.0 * Add remote mode for harvesting a remote machine via a local workstation (no need to install enroll remotely) diff --git a/README.md b/README.md index 19b5377..68c35e5 100644 --- a/README.md +++ b/README.md @@ -4,217 +4,175 @@ Enroll logo
-**enroll** inspects a Linux machine (currently Debian-only) and generates Ansible roles for things it finds running on the machine. +**enroll** inspects a Linux machine (currently Debian-only) and generates Ansible roles/playbooks (and optionally inventory) for what it finds. It aims to be **optimistic and noninteractive**: -- Detects packages that have been installed -- Detects Debian package ownership of `/etc` files using dpkg's local database. +- Detects packages that have been installed. +- Detects Debian package ownership of `/etc` files using dpkg’s local database. - Captures config that has **changed from packaged defaults** (dpkg conffile hashes + package md5sums when available). - Also captures **service-relevant custom/unowned files** under `/etc//...` (e.g. drop-in config includes). - Defensively excludes likely secrets (path denylist + content sniff + size caps). -- Captures non-system users that exist on the system, and their SSH public keys -- Captures miscellaneous `/etc` files that it can't attribute to a package, and installs it in an `etc_custom` role -- Avoids trying to start systemd services that were detected as being Inactive during harvest +- Captures non-system users and their SSH public keys. +- Captures miscellaneous `/etc` files it can’t attribute to a package and installs them in an `etc_custom` role. +- Avoids trying to start systemd services that were detected as inactive during harvest. --- -# Two modes: single-site vs multi-site (`--fqdn`) +## Mental model -**enroll** has two distinct ways to generate Ansible: +`enroll` works in two phases: -## 1) Single-site mode (default: *no* `--fqdn`) -Use this when you're enrolling **one server** (or you're generating a "golden" role set you intend to reuse). +1) **Harvest**: collect host facts + relevant files into a harvest bundle (`state.json` + harvested artifacts) +2) **Manifest**: turn that harvest into Ansible roles/playbooks (and optionally inventory) -**What you get** -- Config, templates, and defaults are primarily **contained inside each role**. -- Raw config files (when not templated) live in the role's `files/`. -- Template variables (when templated) live in the role's `defaults/main.yml`. +Additionally: -**Pros** -- Roles are more **self-contained** and easier to understand. -- Better starting point for **provisioning new servers**, because the role contains most of what it needs. -- Less inventory abstraction/duplication. +- **Diff**: compare two harvests and report what changed (packages/services/users/files) since the previous snapshot. -**Cons** -- Less convenient for quickly enrolling multiple hosts with divergent configs (you'll do more manual work to make roles flexible across hosts). +--- -## 2) Multi-site mode (`--fqdn`) -Use this when you want to enroll **several existing servers** quickly, especially if they differ. +## Output modes: single-site vs multi-site (`--fqdn`) -**What you get** -- Roles are **shared** across hosts, but host-specific data lives in inventory. -- Host inventory drives what's managed: - - which files to deploy for that host - - which packages are relevant for that host - - which services should be enabled/started for that host -- For non-templated config, raw files live in host-specific inventory under `.files/` (per role). +`enroll manifest` (and `enroll single-shot`) support two distinct output styles. -**Pros** -- Fastest way to retrofit **multiple servers** into config management. -- Avoids shared-role "host A breaks host B" problems by keeping host-specific state in inventory. -- Better fit when you already have a fleet and want to capture/reflect reality first. +### Single-site mode (default: *no* `--fqdn`) +Use when enrolling **one server** (or generating a “golden” role set you intend to reuse). -**Cons** -- More abstraction: roles become more "data-driven". -- Potential duplication: raw files may exist per-host in inventory (even if identical). -- Harder to use the roles to **provision a brand-new server** without also building an inventory for that new host, because multi-site output assumes the server already exists and is being retrofitted. +**Characteristics** +- Roles are more self-contained. +- Raw config files live in the role’s `files/`. +- Template variables live in the role’s `defaults/main.yml`. + +### Multi-site mode (`--fqdn`) +Use when enrolling **several existing servers** quickly, especially if they differ. + +**Characteristics** +- Roles are shared, host-specific state lives in inventory. +- Host inventory drives what gets managed (files/packages/services). +- Non-templated raw files live per-host under `inventory/host_vars///.files/...`. **Rule of thumb** -- If your goal is *"make this one server reproducible / provisionable"* → start with **single-site**. -- If your goal is *"get several already-running servers under management quickly"* → use **multi-site**. +- “Make this one server reproducible/provisionable” → start with **single-site** +- “Get multiple already-running servers under management quickly” → use **multi-site** --- -# Key concepts +## Subcommands -## Harvest +### `enroll harvest` +Harvest state about a host and write a harvest bundle. -**enroll** begins by 'harvesting' known state about your host. This includes detecting what running services exist, what packages have been installed 'manually' (that is, stuff that doesn't come out of the box with the OS), and anything 'custom' in `/etc` that it can't attribute to a specific package. +**What it captures (high level)** +- Detected services + service-relevant packages +- “Manual” packages +- Changed-from-default config (plus related custom/unowned files under service dirs) +- Non-system users + SSH public keys +- Misc `/etc` that can’t be attributed to a package (`etc_custom` role) -It also detects if any config files have been *changed* from their packaged defaults. If they have, it will attempt to 'harvest' them. If the config file is identical to how it comes with the package, then it doesn't bother harvesting it, because there's little value in config-managing it if it's identical to what you get by simply installing the package! +**Common flags** +- Remote harvesting: + - `--remote-host`, `--remote-user`, `--remote-port` + - `--no-sudo` (if you don’t want/need sudo) +- Sensitive-data behaviour: + - default: tries to avoid likely secrets + - `--dangerous`: disables secret-safety checks (see “Sensitive data” below) +- Encrypt bundles at rest: + - `--sops `: writes a single encrypted `harvest.tar.gz.sops` instead of a plaintext directory -The harvest writes a state.json file explaining all the data it harvested and, if it chose not to harvest something, explanations as to why that is the case (see below: sensitive data). +--- -### Remote harvesting (workstation → remote) +### `enroll manifest` +Generate Ansible output from an existing harvest bundle. -If you'd prefer not to install **enroll** on the target host, you can run the harvest over SSH from your workstation and pull the harvest bundle back locally: +**Inputs** +- `--harvest /path/to/harvest` (directory) + or `--harvest /path/to/harvest.tar.gz.sops` (if using `--sops`) -```bash -enroll harvest --remote-host myhost.example.com --remote-user myuser --out /tmp/enroll-harvest -``` +**Output** +- In plaintext mode: an Ansible repo-like directory structure (roles/playbooks, and inventory in multi-site mode). +- In `--sops` mode: a single encrypted file `manifest.tar.gz.sops` containing the generated output. -- `--remote-port` defaults to `22` -- `--remote-user` defaults to your local `$USER` +**Common flags** +- `--fqdn `: enables **multi-site** output style -This uploads a self-contained `enroll` zipapp to a temporary directory on the remote host, runs `harvest` there, then downloads the resulting harvest bundle to the `--out` directory on your workstation. +--- -**Privilege note:** A "full" harvest typically needs root access. Remote harvesting assumes the remote user can run `sudo` **without a password prompt** (NOPASSWD) so the harvest can run non-interactively. If you don't want this, pass `--no-sudo` as well. +### `enroll single-shot` +Convenience wrapper that runs **harvest → manifest** in one command. + +Use this when you want “get me something workable ASAP”. + +Supports the same general flags as harvest/manifest, including `--fqdn`, remote harvest flags, and `--sops`. + +--- + +### `enroll diff` +Compare two harvest bundles and report what changed. + +**What it reports** +- Packages added/removed +- Services enabled added/removed, plus key state changes +- Users added/removed, plus field changes (uid/gid/home/shell/groups, etc.) +- Managed files added/removed/changed (metadata + content hash changes where available) + +**Inputs** +- `--old ` and `--new ` (directories or `state.json` paths) +- `--sops` when comparing SOPS-encrypted harvest bundles + +**Output formats** +- `--format json` (default for webhooks) +- `--format markdown` / `--format text` (human-oriented) + +**Notifications** +- Webhook: + - `--webhook ` + - `--webhook-format json|markdown|text` + - `--webhook-header 'Header-Name: value'` (repeatable) +- Email (optional): + - `--email-to ` (plus optional SMTP/sendmail-related flags, depending on your install) + +--- ## Sensitive data -**enroll** doesn't make any assumptions about how you might handle sensitive data from your config files, in Ansible. Some people might use SOPS, others might use Vault, others might do something else entirely. +By default, `enroll` does **not** assume how you handle secrets in Ansible. It will attempt to avoid harvesting likely sensitive data (private keys, passwords, tokens, etc.). This can mean it skips some config files you may ultimately want to manage. -For this reason, **enroll** will attempt to read config files, and if it detects data that looks like a sensitive SSH/SSL private key, or password, or API key, etc, then it won't harvest it for config management. +If you opt in to collecting everything: -This inevitably means that it will deliberately miss some important config files that you probably *want* to manage in Ansible. +### `--dangerous` +**WARNING:** disables “likely secret” safety checks. This can copy private keys, TLS key material, API tokens, database passwords, and other credentials into the harvest output **in plaintext**. -Nonetheless, in the Harvest 'state' file, there should be an explanation of 'excluded files'. You can parse or inspect this file to find what it chose to ignore, and then you know what you might want to augment the results with later, once you 'manifest' the harvest into Ansible configuration. +If you intend to keep harvests/manifests long-term (especially in git), strongly consider encrypting them at rest. -Nonetheless, in some cases it may be appropriate to truly grab as much as you can, including secrets. For that, read on for the `--dangerous` flag. +### Encrypt bundles at rest with `--sops` +`--sops` encrypts the harvest and/or manifest outputs into a single `.tar.gz.sops` file (GPG). This is for **storage-at-rest**, not for direct “Ansible SOPS inventory” workflows. -### Opting in to fetching sensitive data: `--dangerous` - -**WARNING:** `--dangerous` disables enroll's "likely a secret" safety checks. This can cause private keys, TLS key material, API tokens, database passwords, and other credentials to be copied into your harvest output **in plaintext**. - -Only use `--dangerous` if you explicitly want to scoop up sensitive files and you understand where the harvest output is stored, who can read it, and how it will be handled (backups, git commits, etc, as well as risk of using `--out` with a shared `/tmp` location where other users could see the data). We offer no liability if your sensitive data is compromised through the use of this tool! - -**Strong recommendation:** If you plan to keep harvested files long-term (especially in git), encrypt secrets at rest. A common approach is to use **SOPS** and then use the **community.sops** Ansible collection to load/decrypt encrypted content during deploy. - -Install the collection: - -```bash -ansible-galaxy collection install community.sops -``` - -Then you can use the collection's lookup/vars plugins or modules to decrypt or load SOPS-encrypted vars at runtime. - -Note the section below **also** talks about SOPS, but this is in the context of simply encrypting the data generated by `enroll` at rest for safe-keeping, **not** for direct integration with Ansible. - - -### Encrypting harvest/manifests at rest with `--sops` - -If you want to use `--dangerous` (or you simply want to keep the harvested artifacts private when they're sitting on disk, in git, etc), you can pass `--sops` to `harvest`, `manifest`, or `single-shot`. - -To use `--sops`, you will need to have [sops](https://github.com/getsops/sops) installed on your `$PATH`. - -- `--sops` expects one or more **GPG key fingerprints**. If `sops` is not on the `$PATH`, **enroll** will error. -- `harvest --sops ...` writes a *single* encrypted file (`harvest.tar.gz.sops`) instead of a plaintext directory. -- `manifest --sops ...` (and `single-shot --sops ...`) will: - - decrypt the harvest bundle with `sops -d` (if the `--harvest` input is an encrypted file), then generate manifests as normal - - bundle the entire generated Ansible output into a *single* encrypted file (`manifest.tar.gz.sops`) - -⚠️ **Important:** `manifest --sops` (and `single-shot --sops`) produces **one encrypted file**. It is **not** an Ansible repo you can point `ansible-playbook` at directly. It is **not** the same as using SOPS inventory with the Ansible SOPS collection. - -To use the encrypted SOPS manifest, decrypt and extract it first, then run Ansible from inside the extracted `manifest/` directory: - -```bash -sops -d /path/to/manifest.tar.gz.sops | tar -xzvf - -cd manifest -ansible-playbook ... -``` - -Example: - -```bash -# Harvest (encrypted-at-rest) -enroll harvest --out /tmp/enroll-harvest --dangerous --sops - -# Manifest (encrypted-at-rest) -enroll manifest --harvest /tmp/enroll-harvest/harvest.tar.gz.sops --out /tmp/enroll-ansible --sops - -# Decrypt/extract manifest output for inspection / ansible runs -cd /tmp/enroll-ansible -sops -d manifest.tar.gz.sops | tar -xzvf - -cd manifest -``` - -(If you want to manually inspect an encrypted harvest bundle, extract it into its own directory, e.g. `mkdir -p harvest && sops -d harvest.tar.gz.sops | tar -xzvf - -C harvest`.) - - - -## Manifest - -The 'manifest' subcommand expects to be given a path to the 'harvest' obtained in the first step. It will then attempt to generate Ansible roles and playbooks (and potentially 'inventory') from that harvest. - -Manifesting is the most complex step because a lot of people will have opinions on how Ansible roles and inventory should work. No solution is perfect for everyone. However, **enroll** tries to strike a reasonable balance. - -Remember, the purpose of this tool is to save **time** getting your systems into a decently-managed state. It's still up to you to wrangle it into a form that works for you on an ongoing basis. +⚠️ Important: `manifest --sops` produces one encrypted file. You must decrypt + extract it before running `ansible-playbook`. --- -# Single-shot mode for the impatient sysadmin +## JinjaTurtle integration (both modes) -**enroll** has a 'single-shot' subcommand which combines the two other phases (harvest and manifest) into one. Use it to generate both the harvest and then manifest ansible from that harvest all in one go. Perfect if you're in a hurry! +If [JinjaTurtle](https://git.mig5.net/mig5/jinjaturtle) is installed, `enroll` can generate Jinja2 templates for ini/json/xml/toml-style config. ---- - -# JinjaTurtle integration (both modes) - -If you also have my other tool [JinjaTurtle](https://git.mig5.net/mig5/jinjaturtle) installed, **enroll** will attempt to create Jinja2 templates for any ini/json/xml/toml style configuration that it finds. - -- Templates live in the **role** (`roles//templates/...`) +- Templates live in `roles//templates/...` - Variables live in: - - **single-site**: `roles//defaults/main.yml` - - **multi-site** (`--fqdn`): `inventory/host_vars//.yml` + - single-site: `roles//defaults/main.yml` + - multi-site: `inventory/host_vars//.yml` -JinjaTurtle will be used automatically if it is detected on the `$PATH`. You can also be explicit and pass `--jinjaturtle`, but this will throw an error if JinjaTurtle is not on the `$PATH`. - -If you *do* have JinjaTurtle installed, but *don't* wish to make use of it, you can use `--no-jinjaturtle`, in which case all config files will be kept as 'raw' files. +You can force it on with `--jinjaturtle` or disable with `--no-jinjaturtle`. --- -# How multi-site avoids "shared role breaks a host" +## How multi-site avoids “shared role breaks a host” -In multi-site mode, **roles are data-driven**. The role contains generic tasks like: - -- "deploy all files listed for this host" -- "install packages listed for this host" -- "apply systemd enable/start state listed for this host" - -The host inventory is what decides which files/packages/services apply to that host. This prevents the classic failure mode where host2 adds a config file to a shared role and host1 then fails trying to deploy a file it never had. - -Raw non-templated files are stored under: - -- `inventory/host_vars///.files/...` - -…and the host's role variables describe which of those files should be deployed. +In multi-site mode, roles are **data-driven**. The role tasks are generic (“deploy the files listed for this host”, “install the packages listed for this host”, “apply systemd enable/start state listed for this host”). Host inventory decides what applies per-host, avoiding the classic “host2 adds config, host1 breaks” failure mode. --- # Install ## Ubuntu/Debian apt repository - ```bash sudo mkdir -p /usr/share/keyrings curl -fsSL https://mig5.net/static/mig5.asc | sudo gpg --dearmor -o /usr/share/keyrings/mig5.gpg @@ -224,25 +182,19 @@ sudo apt install enroll ``` ## AppImage - -Download the AppImage file from the Releases page (verify with GPG if you wish, my fingerprint is [here](https://mig5.net/static/mig5.asc)), -then make it executable and run it: +Download it from my Releases page, then: ```bash chmod +x Enroll.AppImage ./Enroll.AppImage ``` -### Pip/PipX - +## Pip/PipX ```bash pip install enroll ``` -### Poetry - -Clone this repository with git, then: - +## Poetry (dev) ```bash poetry install poetry run enroll --help @@ -250,110 +202,110 @@ poetry run enroll --help --- -# Usage +## Found a bug / have a suggestion? -## 1. Harvest state/information about the host +My Forgejo doesn’t currently support federation, so I haven’t opened registration/login for issues. -On the host (root recommended to harvest as much data as possible): +Instead, email me (see `pyproject.toml`) or contact me on the Fediverse: +https://goto.mig5.net/@mig5 + +--- + +# Examples + +## Harvest + +### Local harvest ```bash enroll harvest --out /tmp/enroll-harvest ``` -### Remote harvest over SSH (no enroll install required on the remote host, no need for --out) +### Remote harvest over SSH ```bash -enroll harvest --remote-host myhost.example.com --remote-user myuser +enroll harvest --remote-host myhost.example.com --remote-user myuser --out /tmp/enroll-harvest ``` -### `--dangerous` (captures potentially sensitive files — read the warning above) - +### `--dangerous` ```bash enroll harvest --out /tmp/enroll-harvest --dangerous ``` -Remote + dangerous: - +### Remote + dangerous: ```bash enroll harvest --remote-host myhost.example.com --remote-user myuser --dangerous ``` -### `--sops` (encrypt bundles at rest) - -`--sops` bundles and encrypts the output as a single SOPS-encrypted `.tar.gz.sops` file (GPG). This is particularly useful if you're using `--dangerous`. - +### `--sops` (encrypt at rest) ```bash # Encrypted harvest bundle (writes /tmp/enroll-harvest/harvest.tar.gz.sops) enroll harvest --out /tmp/enroll-harvest --dangerous --sops +``` -# Encrypted manifest bundle (writes /tmp/enroll-ansible/manifest.tar.gz.sops) +--- + +## Manifest + +### Single-site (default: no --fqdn) +```bash +enroll manifest --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible +``` + +### Multi-site (--fqdn) +```bash +enroll manifest --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible --fqdn "$(hostname -f)" +``` + +### Manifest with `--sops` +```bash +# Generate encrypted manifest bundle (writes /tmp/enroll-ansible/manifest.tar.gz.sops) enroll manifest --harvest /tmp/enroll-harvest/harvest.tar.gz.sops --out /tmp/enroll-ansible --sops # Decrypt/extract the manifest bundle, then run Ansible from inside ./manifest/ cd /tmp/enroll-ansible sops -d manifest.tar.gz.sops | tar -xzvf - cd manifest -ansible-playbook ./playbook.yml ``` - -## 2. Generate Ansible manifests (roles/playbook) from that harvest - -### Single-site (default: no --fqdn) - -Good for one server, or for producing roles you want to reuse to provision new machines: - -```bash -enroll manifest --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible -``` - -### Multi-site (--fqdn) - -Best when enrolling multiple already-running servers into one repo: - -```bash -enroll manifest --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible --fqdn "$(hostname -f)" -``` +--- ## Single-shot -Alternatively, do both steps in one shot: - ```bash enroll single-shot --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible --fqdn "$(hostname -f)" ``` -Remote single-shot (run harvest over SSH, then manifest locally): +Remote single-shot (run harvest over SSH, then manifest locally): ```bash -enroll single-shot --remote-host myhost.example.com --remote-user myuser --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible --fqdn "myhost.example.com" +enroll single-shot --remote-host myhost.example.com --remote-user myuser --harvest /tmp/enroll-harvest --out /tmp/enroll-ansible --fqdn "myhost.example.com" ``` -In multi-site mode (`--fqdn`), you can run single-shot repeatedly against multiple hosts while reusing the same `--out` directory so each host merges into the existing Ansible repo. +--- +## Diff -## 3. Run Ansible +### Compare two harvest directories +```bash +enroll diff --old /path/to/harvestA --new /path/to/harvestB --format json +``` + +### Diff + webhook notify +```bash +enroll diff --old /path/to/golden/harvest --new /path/to/new/harvest --webhook https://nr.mig5.net/forms/webhooks/xxxx --webhook-format json --webhook-header 'X-Enroll-Secret: xxxx' +``` + +`diff` mode also supports email sending and text or markdown format, as well as `--exit-code` mode to trigger a return code of 2 (useful for crons or CI) + +--- + +## Run Ansible ### Single-site - -You can run it however you prefer (local connection or your own inventory). Example: - ```bash ansible-playbook -i "localhost," -c local /tmp/enroll-ansible/playbook.yml ``` ### Multi-site (--fqdn) - -In multi-site mode, enroll generates an ansible.cfg, `host_vars` inventory, and a host-specific playbook: - ```bash ansible-playbook /tmp/enroll-ansible/playbooks/"$(hostname -f)".yml ``` - ---- - -# Found a bug, have a suggestion? - -My Forgejo doesn't yet support proper federation, and for that reason I've not opened up registration/login to use the issue queue. - -Instead, you can e-mail me (see the pyproject.toml for details) or contact me on the Fediverse: - -https://goto.mig5.net/@mig5 diff --git a/debian/changelog b/debian/changelog index 16f7a0d..0cc5861 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.1.1) unstable; urgency=medium + + * Add `diff` subcommand which can compare two harvests and send email or webhook notifications in different + formats. + + -- Miguel Jacq Thu, 18 Dec 2025 15:00:00 +1100 + enroll (0.1.0) unstable; urgency=medium * Add remote mode for harvesting a remote machine via a local workstation (no need to install enroll remotely) @@ -12,7 +19,7 @@ enroll (0.1.0) unstable; urgency=medium ship or manage those files. * Don't collect files ending in `.log` - -- Miguel Jacq Tue, 17 Dec 2025 18:00:00 +1100 + -- Miguel Jacq Wed, 17 Dec 2025 18:00:00 +1100 enroll (0.0.5) unstable; urgency=medium diff --git a/enroll/cli.py b/enroll/cli.py index 60e48a2..2d8ed5e 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -8,6 +8,7 @@ from pathlib import Path from typing import Optional from .cache import new_harvest_cache_dir +from .diff import compare_harvests, format_report, post_webhook, send_email from .harvest import harvest from .manifest import manifest from .remote import remote_harvest @@ -211,6 +212,90 @@ def main() -> None: _add_common_manifest_args(s) _add_remote_args(s) + d = sub.add_parser("diff", help="Compare two harvests and report differences") + d.add_argument( + "--old", + required=True, + help=( + "Old/baseline harvest (directory, a path to state.json, a tarball, or a SOPS-encrypted bundle)." + ), + ) + d.add_argument( + "--new", + required=True, + help=( + "New/current harvest (directory, a path to state.json, a tarball, or a SOPS-encrypted bundle)." + ), + ) + d.add_argument( + "--sops", + action="store_true", + help="Allow SOPS-encrypted harvest bundle inputs (requires `sops` on PATH).", + ) + d.add_argument( + "--format", + choices=["text", "markdown", "json"], + default="text", + help="Report output format (default: text).", + ) + d.add_argument( + "--out", + help="Write the report to this file instead of stdout.", + ) + d.add_argument( + "--exit-code", + action="store_true", + help="Exit with status 2 if differences are detected.", + ) + d.add_argument( + "--notify-always", + action="store_true", + help="Send webhook/email even when there are no differences.", + ) + d.add_argument( + "--webhook", + help="POST the report to this URL (only when differences are detected, unless --notify-always).", + ) + d.add_argument( + "--webhook-format", + choices=["json", "text", "markdown"], + default="json", + help="Payload format for --webhook (default: json).", + ) + d.add_argument( + "--webhook-header", + action="append", + default=[], + metavar="K:V", + help="Extra HTTP header for --webhook (repeatable), e.g. 'Authorization: Bearer ...'.", + ) + d.add_argument( + "--email-to", + action="append", + default=[], + help="Email the report to this address (repeatable; only when differences are detected unless --notify-always).", + ) + d.add_argument( + "--email-from", + help="From address for --email-to (default: enroll@).", + ) + d.add_argument( + "--email-subject", + help="Subject for --email-to (default: 'enroll diff report').", + ) + d.add_argument( + "--smtp", + help="SMTP server host[:port] for --email-to. If omitted, uses local sendmail.", + ) + d.add_argument( + "--smtp-user", + help="SMTP username (optional).", + ) + d.add_argument( + "--smtp-password-env", + help="Environment variable containing SMTP password (optional).", + ) + args = ap.parse_args() remote_host: Optional[str] = getattr(args, "remote_host", None) @@ -287,6 +372,61 @@ def main() -> None: ) if getattr(args, "sops", None) and out_enc: print(str(out_enc)) + elif args.cmd == "diff": + report, has_changes = compare_harvests( + args.old, + args.new, + sops_mode=bool(getattr(args, "sops", False)), + ) + + txt = format_report(report, fmt=str(getattr(args, "format", "text"))) + out_path = getattr(args, "out", None) + if out_path: + p = Path(out_path).expanduser() + p.parent.mkdir(parents=True, exist_ok=True) + p.write_text(txt, encoding="utf-8") + else: + print(txt, end="" if txt.endswith("\n") else "\n") + + should_notify = has_changes or bool(getattr(args, "notify_always", False)) + + webhook = getattr(args, "webhook", None) + if webhook and should_notify: + wf = str(getattr(args, "webhook_format", "json")) + payload = format_report(report, fmt=wf) + body = payload.encode("utf-8") + headers = {} + if wf == "json": + headers["Content-Type"] = "application/json" + else: + headers["Content-Type"] = "text/plain; charset=utf-8" + for hv in getattr(args, "webhook_header", []) or []: + if ":" in hv: + k, v = hv.split(":", 1) + headers[k.strip()] = v.strip() + status, _resp = post_webhook(webhook, body, headers=headers) + if status and status >= 400: + raise SystemExit(f"error: webhook returned HTTP {status}") + + to_addrs = getattr(args, "email_to", []) or [] + if to_addrs and should_notify: + subject = getattr(args, "email_subject", None) or "enroll diff report" + smtp_pw = None + pw_env = getattr(args, "smtp_password_env", None) + if pw_env: + smtp_pw = os.environ.get(str(pw_env)) + send_email( + to_addrs=list(to_addrs), + subject=str(subject), + body=txt, + from_addr=getattr(args, "email_from", None), + smtp=getattr(args, "smtp", None), + smtp_user=getattr(args, "smtp_user", None), + smtp_password=smtp_pw, + ) + + if getattr(args, "exit_code", False) and has_changes: + raise SystemExit(2) elif args.cmd == "single-shot": sops_fps = getattr(args, "sops", None) if remote_host: @@ -379,5 +519,55 @@ def main() -> None: fqdn=args.fqdn, jinjaturtle=_jt_mode(args), ) + elif args.cmd == "diff": + report, has_changes = compare_harvests( + args.old, args.new, sops_mode=bool(getattr(args, "sops", False)) + ) + + rendered = format_report(report, fmt=str(args.format)) + if args.out: + Path(args.out).expanduser().write_text(rendered, encoding="utf-8") + else: + print(rendered, end="") + + do_notify = bool(has_changes or getattr(args, "notify_always", False)) + + if do_notify and getattr(args, "webhook", None): + wf = str(getattr(args, "webhook_format", "json")) + body = format_report(report, fmt=wf).encode("utf-8") + headers = {"User-Agent": "enroll"} + if wf == "json": + headers["Content-Type"] = "application/json" + else: + headers["Content-Type"] = "text/plain; charset=utf-8" + for hv in getattr(args, "webhook_header", []) or []: + if ":" not in hv: + raise SystemExit( + "error: --webhook-header must be in the form 'K:V'" + ) + k, v = hv.split(":", 1) + headers[k.strip()] = v.strip() + status, _ = post_webhook(str(args.webhook), body, headers=headers) + if status and status >= 400: + raise SystemExit(f"error: webhook returned HTTP {status}") + + if do_notify and (getattr(args, "email_to", []) or []): + subject = getattr(args, "email_subject", None) or "enroll diff report" + smtp_password = None + pw_env = getattr(args, "smtp_password_env", None) + if pw_env: + smtp_password = os.environ.get(str(pw_env)) + send_email( + to_addrs=list(getattr(args, "email_to", []) or []), + subject=str(subject), + body=rendered, + from_addr=getattr(args, "email_from", None), + smtp=getattr(args, "smtp", None), + smtp_user=getattr(args, "smtp_user", None), + smtp_password=smtp_password, + ) + + if getattr(args, "exit_code", False) and has_changes: + raise SystemExit(2) except SopsError as e: raise SystemExit(f"error: {e}") diff --git a/enroll/diff.py b/enroll/diff.py new file mode 100644 index 0000000..9b396fc --- /dev/null +++ b/enroll/diff.py @@ -0,0 +1,757 @@ +from __future__ import annotations + +import hashlib +import json +import os +import shutil +import subprocess # nosec +import tarfile +import tempfile +import urllib.request +from contextlib import ExitStack +from dataclasses import dataclass +from datetime import datetime, timezone +from email.message import EmailMessage +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Tuple + +from .remote import _safe_extract_tar +from .sopsutil import decrypt_file_binary_to, require_sops_cmd + + +def _utc_now_iso() -> str: + return datetime.now(tz=timezone.utc).isoformat() + + +def _sha256(path: Path) -> str: + h = hashlib.sha256() + with open(path, "rb") as f: + while True: + chunk = f.read(1024 * 1024) + if not chunk: + break + h.update(chunk) + return h.hexdigest() + + +@dataclass +class BundleRef: + """A prepared harvest bundle. + + `dir` is a directory containing state.json + artifacts/. + `tempdir` is set when the bundle needed extraction into a temp directory. + """ + + dir: Path + tempdir: Optional[tempfile.TemporaryDirectory] = None + + @property + def state_path(self) -> Path: + return self.dir / "state.json" + + +def _bundle_from_input(path: str, *, sops_mode: bool) -> BundleRef: + """Resolve a user-supplied path to a harvest bundle directory. + + Accepts: + - a bundle directory + - a path to state.json inside a bundle directory + - (sops mode or .sops) a SOPS-encrypted tar.gz bundle + - a plain tar.gz/tgz bundle + """ + + p = Path(path).expanduser() + + # Accept the state.json path directly (harvest often prints this). + if p.is_file() and p.name == "state.json": + p = p.parent + + if p.is_dir(): + return BundleRef(dir=p) + + if not p.exists(): + raise RuntimeError(f"Harvest path not found: {p}") + + # Auto-enable sops mode if it looks like an encrypted bundle. + is_sops = p.name.endswith(".sops") + if sops_mode or is_sops: + require_sops_cmd() + td = tempfile.TemporaryDirectory(prefix="enroll-harvest-") + td_path = Path(td.name) + try: + os.chmod(td_path, 0o700) + except OSError: + pass + + tar_path = td_path / "harvest.tar.gz" + out_dir = td_path / "bundle" + out_dir.mkdir(parents=True, exist_ok=True) + try: + os.chmod(out_dir, 0o700) + except OSError: + pass + + decrypt_file_binary_to(p, tar_path, mode=0o600) + with tarfile.open(tar_path, mode="r:gz") as tf: + _safe_extract_tar(tf, out_dir) + + return BundleRef(dir=out_dir, tempdir=td) + + # Plain tarballs (useful for operators who rsync/zip harvests around). + if p.suffixes[-2:] == [".tar", ".gz"] or p.suffix == ".tgz": + td = tempfile.TemporaryDirectory(prefix="enroll-harvest-") + td_path = Path(td.name) + try: + os.chmod(td_path, 0o700) + except OSError: + pass + out_dir = td_path / "bundle" + out_dir.mkdir(parents=True, exist_ok=True) + try: + os.chmod(out_dir, 0o700) + except OSError: + pass + with tarfile.open(p, mode="r:gz") as tf: + _safe_extract_tar(tf, out_dir) + return BundleRef(dir=out_dir, tempdir=td) + + raise RuntimeError( + f"Harvest path is not a directory, state.json, encrypted bundle, or tarball: {p}" + ) + + +def _load_state(bundle_dir: Path) -> Dict[str, Any]: + sp = bundle_dir / "state.json" + with open(sp, "r", encoding="utf-8") as f: + return json.load(f) + + +def _all_packages(state: Dict[str, Any]) -> List[str]: + pkgs = set(state.get("manual_packages", []) or []) + pkgs |= set(state.get("manual_packages_skipped", []) or []) + for s in state.get("services", []) or []: + for p in s.get("packages", []) or []: + pkgs.add(p) + return sorted(pkgs) + + +def _service_units(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: + out: Dict[str, Dict[str, Any]] = {} + for s in state.get("services", []) or []: + unit = s.get("unit") + if unit: + out[str(unit)] = s + return out + + +def _users_by_name(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: + users = (state.get("users") or {}).get("users") or [] + out: Dict[str, Dict[str, Any]] = {} + for u in users: + name = u.get("name") + if name: + out[str(name)] = u + return out + + +@dataclass(frozen=True) +class FileRec: + path: str + role: str + src_rel: str + owner: Optional[str] + group: Optional[str] + mode: Optional[str] + reason: Optional[str] + + +def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, Any]]]: + # Services + for s in state.get("services", []) or []: + role = s.get("role_name") or "unknown" + for mf in s.get("managed_files", []) or []: + yield str(role), mf + + # Package roles + for p in state.get("package_roles", []) or []: + role = p.get("role_name") or "unknown" + for mf in p.get("managed_files", []) or []: + yield str(role), mf + + # Users + u = state.get("users") or {} + u_role = u.get("role_name") or "users" + for mf in u.get("managed_files", []) or []: + yield str(u_role), mf + + # etc_custom + ec = state.get("etc_custom") or {} + ec_role = ec.get("role_name") or "etc_custom" + for mf in ec.get("managed_files", []) or []: + yield str(ec_role), mf + + +def _file_index(bundle_dir: Path, state: Dict[str, Any]) -> Dict[str, FileRec]: + """Return mapping of absolute path -> FileRec. + + If duplicates occur, the first one wins (should be rare by design). + """ + + out: Dict[str, FileRec] = {} + for role, mf in _iter_managed_files(state): + p = mf.get("path") + src_rel = mf.get("src_rel") + if not p or not src_rel: + continue + p = str(p) + if p in out: + continue + out[p] = FileRec( + path=p, + role=str(role), + src_rel=str(src_rel), + owner=mf.get("owner"), + group=mf.get("group"), + mode=mf.get("mode"), + reason=mf.get("reason"), + ) + return out + + +def _artifact_path(bundle_dir: Path, rec: FileRec) -> Path: + return bundle_dir / "artifacts" / rec.role / rec.src_rel + + +def compare_harvests( + old_path: str, + new_path: str, + *, + sops_mode: bool = False, +) -> Tuple[Dict[str, Any], bool]: + """Compare two harvests. + + Returns (report, has_changes). + """ + with ExitStack() as stack: + old_b = _bundle_from_input(old_path, sops_mode=sops_mode) + new_b = _bundle_from_input(new_path, sops_mode=sops_mode) + if old_b.tempdir: + stack.callback(old_b.tempdir.cleanup) + if new_b.tempdir: + stack.callback(new_b.tempdir.cleanup) + + old_state = _load_state(old_b.dir) + new_state = _load_state(new_b.dir) + + old_pkgs = set(_all_packages(old_state)) + new_pkgs = set(_all_packages(new_state)) + + pkgs_added = sorted(new_pkgs - old_pkgs) + pkgs_removed = sorted(old_pkgs - new_pkgs) + + old_units = _service_units(old_state) + new_units = _service_units(new_state) + units_added = sorted(set(new_units) - set(old_units)) + units_removed = sorted(set(old_units) - set(new_units)) + + units_changed: List[Dict[str, Any]] = [] + for unit in sorted(set(old_units) & set(new_units)): + a = old_units[unit] + b = new_units[unit] + ch: Dict[str, Any] = {} + for k in [ + "active_state", + "sub_state", + "unit_file_state", + "condition_result", + ]: + if a.get(k) != b.get(k): + ch[k] = {"old": a.get(k), "new": b.get(k)} + a_pk = set(a.get("packages", []) or []) + b_pk = set(b.get("packages", []) or []) + if a_pk != b_pk: + ch["packages"] = { + "added": sorted(b_pk - a_pk), + "removed": sorted(a_pk - b_pk), + } + if ch: + units_changed.append({"unit": unit, "changes": ch}) + + old_users = _users_by_name(old_state) + new_users = _users_by_name(new_state) + users_added = sorted(set(new_users) - set(old_users)) + users_removed = sorted(set(old_users) - set(new_users)) + + users_changed: List[Dict[str, Any]] = [] + for name in sorted(set(old_users) & set(new_users)): + a = old_users[name] + b = new_users[name] + ch: Dict[str, Any] = {} + for k in [ + "uid", + "gid", + "gecos", + "home", + "shell", + "primary_group", + ]: + if a.get(k) != b.get(k): + ch[k] = {"old": a.get(k), "new": b.get(k)} + a_sg = set(a.get("supplementary_groups", []) or []) + b_sg = set(b.get("supplementary_groups", []) or []) + if a_sg != b_sg: + ch["supplementary_groups"] = { + "added": sorted(b_sg - a_sg), + "removed": sorted(a_sg - b_sg), + } + if ch: + users_changed.append({"name": name, "changes": ch}) + + old_files = _file_index(old_b.dir, old_state) + new_files = _file_index(new_b.dir, new_state) + old_paths_set = set(old_files) + new_paths_set = set(new_files) + + files_added = sorted(new_paths_set - old_paths_set) + files_removed = sorted(old_paths_set - new_paths_set) + + # Hash cache to avoid reading the same file more than once. + hash_cache: Dict[str, str] = {} + + def _hash_for(bundle_dir: Path, rec: FileRec) -> Optional[str]: + ap = _artifact_path(bundle_dir, rec) + if not ap.exists() or not ap.is_file(): + return None + key = str(ap) + if key in hash_cache: + return hash_cache[key] + hash_cache[key] = _sha256(ap) + return hash_cache[key] + + files_changed: List[Dict[str, Any]] = [] + for p in sorted(old_paths_set & new_paths_set): + a = old_files[p] + b = new_files[p] + ch: Dict[str, Any] = {} + + # Role movement is itself interesting (e.g., file ownership attribution changed). + if a.role != b.role: + ch["role"] = {"old": a.role, "new": b.role} + for k in ["owner", "group", "mode", "reason"]: + av = getattr(a, k) + bv = getattr(b, k) + if av != bv: + ch[k] = {"old": av, "new": bv} + + ha = _hash_for(old_b.dir, a) + hb = _hash_for(new_b.dir, b) + if ha is None or hb is None: + if ha != hb: + ch["content"] = { + "old": "missing" if ha is None else "present", + "new": "missing" if hb is None else "present", + } + else: + if ha != hb: + ch["content"] = {"old_sha256": ha, "new_sha256": hb} + + if ch: + files_changed.append({"path": p, "changes": ch}) + + has_changes = any( + [ + pkgs_added, + pkgs_removed, + units_added, + units_removed, + units_changed, + users_added, + users_removed, + users_changed, + files_added, + files_removed, + files_changed, + ] + ) + + def _mtime_iso(p: Path) -> Optional[str]: + try: + ts = p.stat().st_mtime + except OSError: + return None + return datetime.fromtimestamp(ts, tz=timezone.utc).isoformat() + + report: Dict[str, Any] = { + "generated_at": _utc_now_iso(), + "old": { + "input": old_path, + "bundle_dir": str(old_b.dir), + "state_mtime": _mtime_iso(old_b.state_path), + "host": (old_state.get("host") or {}).get("hostname"), + }, + "new": { + "input": new_path, + "bundle_dir": str(new_b.dir), + "state_mtime": _mtime_iso(new_b.state_path), + "host": (new_state.get("host") or {}).get("hostname"), + }, + "packages": {"added": pkgs_added, "removed": pkgs_removed}, + "services": { + "enabled_added": units_added, + "enabled_removed": units_removed, + "changed": units_changed, + }, + "users": { + "added": users_added, + "removed": users_removed, + "changed": users_changed, + }, + "files": { + "added": [ + { + "path": p, + "role": new_files[p].role, + "reason": new_files[p].reason, + } + for p in files_added + ], + "removed": [ + { + "path": p, + "role": old_files[p].role, + "reason": old_files[p].reason, + } + for p in files_removed + ], + "changed": files_changed, + }, + } + + return report, has_changes + + +def format_report(report: Dict[str, Any], *, fmt: str = "text") -> str: + fmt = (fmt or "text").lower() + if fmt == "json": + return json.dumps(report, indent=2, sort_keys=True) + if fmt == "markdown": + return _report_markdown(report) + return _report_text(report) + + +def _report_text(report: Dict[str, Any]) -> str: + lines: List[str] = [] + old = report.get("old", {}) + new = report.get("new", {}) + lines.append( + f"enroll diff report (generated {report.get('generated_at')})\n" + f"old: {old.get('input')} (host={old.get('host')}, state_mtime={old.get('state_mtime')})\n" + f"new: {new.get('input')} (host={new.get('host')}, state_mtime={new.get('state_mtime')})" + ) + + pk = report.get("packages", {}) + lines.append("\nPackages") + lines.append(f" added: {len(pk.get('added', []) or [])}") + lines.append(f" removed: {len(pk.get('removed', []) or [])}") + for p in pk.get("added", []) or []: + lines.append(f" + {p}") + for p in pk.get("removed", []) or []: + lines.append(f" - {p}") + + sv = report.get("services", {}) + lines.append("\nServices (enabled systemd units)") + for u in sv.get("enabled_added", []) or []: + lines.append(f" + {u}") + for u in sv.get("enabled_removed", []) or []: + lines.append(f" - {u}") + for ch in sv.get("changed", []) or []: + unit = ch.get("unit") + lines.append(f" * {unit} changed") + for k, v in (ch.get("changes") or {}).items(): + if k == "packages": + a = (v or {}).get("added", []) + r = (v or {}).get("removed", []) + if a: + lines.append(f" packages +: {', '.join(a)}") + if r: + lines.append(f" packages -: {', '.join(r)}") + else: + lines.append(f" {k}: {v.get('old')} -> {v.get('new')}") + + us = report.get("users", {}) + lines.append("\nUsers") + for u in us.get("added", []) or []: + lines.append(f" + {u}") + for u in us.get("removed", []) or []: + lines.append(f" - {u}") + for ch in us.get("changed", []) or []: + name = ch.get("name") + lines.append(f" * {name} changed") + for k, v in (ch.get("changes") or {}).items(): + if k == "supplementary_groups": + a = (v or {}).get("added", []) + r = (v or {}).get("removed", []) + if a: + lines.append(f" groups +: {', '.join(a)}") + if r: + lines.append(f" groups -: {', '.join(r)}") + else: + lines.append(f" {k}: {v.get('old')} -> {v.get('new')}") + + fl = report.get("files", {}) + lines.append("\nFiles") + for e in fl.get("added", []) or []: + lines.append( + f" + {e.get('path')} (role={e.get('role')}, reason={e.get('reason')})" + ) + for e in fl.get("removed", []) or []: + lines.append( + f" - {e.get('path')} (role={e.get('role')}, reason={e.get('reason')})" + ) + for ch in fl.get("changed", []) or []: + p = ch.get("path") + lines.append(f" * {p} changed") + for k, v in (ch.get("changes") or {}).items(): + if k == "content": + if "old_sha256" in (v or {}): + lines.append(" content: sha256 changed") + else: + lines.append(f" content: {v.get('old')} -> {v.get('new')}") + else: + lines.append(f" {k}: {v.get('old')} -> {v.get('new')}") + + if not any( + [ + (pk.get("added") or []), + (pk.get("removed") or []), + (sv.get("enabled_added") or []), + (sv.get("enabled_removed") or []), + (sv.get("changed") or []), + (us.get("added") or []), + (us.get("removed") or []), + (us.get("changed") or []), + (fl.get("added") or []), + (fl.get("removed") or []), + (fl.get("changed") or []), + ] + ): + lines.append("\nNo differences detected.") + + return "\n".join(lines) + "\n" + + +def _report_markdown(report: Dict[str, Any]) -> str: + old = report.get("old", {}) + new = report.get("new", {}) + out: List[str] = [] + out.append("# enroll diff report\n") + out.append(f"Generated: `{report.get('generated_at')}`\n") + out.append( + f"- **Old**: `{old.get('input')}` (host={old.get('host')}, state_mtime={old.get('state_mtime')})\n" + f"- **New**: `{new.get('input')}` (host={new.get('host')}, state_mtime={new.get('state_mtime')})\n" + ) + + pk = report.get("packages", {}) + out.append("## Packages\n") + out.append(f"- Added: {len(pk.get('added', []) or [])}\n") + for p in pk.get("added", []) or []: + out.append(f" - `+ {p}`\n") + out.append(f"- Removed: {len(pk.get('removed', []) or [])}\n") + for p in pk.get("removed", []) or []: + out.append(f" - `- {p}`\n") + + sv = report.get("services", {}) + out.append("## Services (enabled systemd units)\n") + if sv.get("enabled_added"): + out.append("- Enabled added\n") + for u in sv.get("enabled_added", []) or []: + out.append(f" - `+ {u}`\n") + if sv.get("enabled_removed"): + out.append("- Enabled removed\n") + for u in sv.get("enabled_removed", []) or []: + out.append(f" - `- {u}`\n") + if sv.get("changed"): + out.append("- Changed\n") + for ch in sv.get("changed", []) or []: + unit = ch.get("unit") + out.append(f" - `{unit}`\n") + for k, v in (ch.get("changes") or {}).items(): + if k == "packages": + a = (v or {}).get("added", []) + r = (v or {}).get("removed", []) + if a: + out.append( + f" - packages added: {', '.join('`'+x+'`' for x in a)}\n" + ) + if r: + out.append( + f" - packages removed: {', '.join('`'+x+'`' for x in r)}\n" + ) + else: + out.append(f" - {k}: `{v.get('old')}` → `{v.get('new')}`\n") + + us = report.get("users", {}) + out.append("## Users\n") + if us.get("added"): + out.append("- Added\n") + for u in us.get("added", []) or []: + out.append(f" - `+ {u}`\n") + if us.get("removed"): + out.append("- Removed\n") + for u in us.get("removed", []) or []: + out.append(f" - `- {u}`\n") + if us.get("changed"): + out.append("- Changed\n") + for ch in us.get("changed", []) or []: + name = ch.get("name") + out.append(f" - `{name}`\n") + for k, v in (ch.get("changes") or {}).items(): + if k == "supplementary_groups": + a = (v or {}).get("added", []) + r = (v or {}).get("removed", []) + if a: + out.append( + f" - groups added: {', '.join('`'+x+'`' for x in a)}\n" + ) + if r: + out.append( + f" - groups removed: {', '.join('`'+x+'`' for x in r)}\n" + ) + else: + out.append(f" - {k}: `{v.get('old')}` → `{v.get('new')}`\n") + + fl = report.get("files", {}) + out.append("## Files\n") + if fl.get("added"): + out.append("- Added\n") + for e in fl.get("added", []) or []: + out.append( + f" - `+ {e.get('path')}` (role={e.get('role')}, reason={e.get('reason')})\n" + ) + if fl.get("removed"): + out.append("- Removed\n") + for e in fl.get("removed", []) or []: + out.append( + f" - `- {e.get('path')}` (role={e.get('role')}, reason={e.get('reason')})\n" + ) + if fl.get("changed"): + out.append("- Changed\n") + for ch in fl.get("changed", []) or []: + p = ch.get("path") + out.append(f" - `{p}`\n") + for k, v in (ch.get("changes") or {}).items(): + if k == "content": + if "old_sha256" in (v or {}): + out.append(" - content: sha256 changed\n") + else: + out.append( + f" - content: `{v.get('old')}` → `{v.get('new')}`\n" + ) + else: + out.append(f" - {k}: `{v.get('old')}` → `{v.get('new')}`\n") + + if not any( + [ + (pk.get("added") or []), + (pk.get("removed") or []), + (sv.get("enabled_added") or []), + (sv.get("enabled_removed") or []), + (sv.get("changed") or []), + (us.get("added") or []), + (us.get("removed") or []), + (us.get("changed") or []), + (fl.get("added") or []), + (fl.get("removed") or []), + (fl.get("changed") or []), + ] + ): + out.append("\n_No differences detected._\n") + + return "".join(out) + + +def post_webhook( + url: str, + body: bytes, + *, + headers: Optional[Dict[str, str]] = None, + timeout_s: int = 10, +) -> Tuple[int, str]: + req = urllib.request.Request(url=url, data=body, method="POST") + for k, v in (headers or {}).items(): + req.add_header(k, v) + try: + with urllib.request.urlopen(req, timeout=timeout_s) as resp: # nosec + status = int(getattr(resp, "status", 0) or 0) + text = resp.read().decode("utf-8", errors="replace") + return status, text + except Exception as e: + raise RuntimeError(f"webhook POST failed: {e}") from e + + +def send_email( + *, + to_addrs: List[str], + subject: str, + body: str, + from_addr: Optional[str] = None, + smtp: Optional[str] = None, + smtp_user: Optional[str] = None, + smtp_password: Optional[str] = None, +) -> None: + if not to_addrs: + raise RuntimeError("email: no recipients") + + msg = EmailMessage() + msg["To"] = ", ".join(to_addrs) + if from_addr: + msg["From"] = from_addr + else: + host = os.uname().nodename + msg["From"] = f"enroll@{host}" + msg["Subject"] = subject + msg.set_content(body) + + # Preferred: use local sendmail if smtp wasn't specified. + if not smtp: + sendmail = shutil.which("sendmail") + if not sendmail: + raise RuntimeError( + "email: no --smtp provided and sendmail not found on PATH" + ) + p = subprocess.run( + [sendmail, "-t", "-i"], + input=msg.as_bytes(), + capture_output=True, + check=False, + ) # nosec + if p.returncode != 0: + raise RuntimeError( + "email: sendmail failed:\n" + f" rc: {p.returncode}\n" + f" stderr: {p.stderr.decode('utf-8', errors='replace').strip()}" + ) + return + + import smtplib + + host = smtp + port = 25 + if ":" in smtp: + host, port_s = smtp.rsplit(":", 1) + try: + port = int(port_s) + except ValueError: + raise RuntimeError(f"email: invalid smtp port in {smtp!r}") + + with smtplib.SMTP(host, port, timeout=10) as s: + s.ehlo() + try: + s.starttls() + s.ehlo() + except Exception: + # STARTTLS is optional; ignore if unsupported. + pass # nosec + if smtp_user: + s.login(smtp_user, smtp_password or "") + s.send_message(msg) diff --git a/pyproject.toml b/pyproject.toml index ac65b02..5231ad9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.0" +version = "0.1.1" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" From 4660a0703e2471d35a3b6153d295b9195a7c4c93 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 17:11:04 +1100 Subject: [PATCH 036/115] Include files from `/usr/local/bin` and `/usr/local/etc` in harvest (assuming they aren't binaries or symlinks) and store in `usr_local_custom` role, similar to `etc_custom`. --- CHANGELOG.md | 5 ++ debian/changelog | 7 ++ enroll/diff.py | 6 ++ enroll/harvest.py | 106 ++++++++++++++++++++++++++ enroll/ignore.py | 5 ++ enroll/manifest.py | 102 +++++++++++++++++++++++++ pyproject.toml | 2 +- tests/test_diff_usr_local_custom.py | 111 ++++++++++++++++++++++++++++ tests/test_harvest.py | 41 +++++++++- tests/test_manifest.py | 73 ++++++++++++++++++ tests/test_misc_coverage.py | 96 ++++++++++++++++++++++++ 11 files changed, 551 insertions(+), 3 deletions(-) create mode 100644 tests/test_diff_usr_local_custom.py create mode 100644 tests/test_misc_coverage.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 81eed41..0e80a13 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.1.2 + + * Include files from `/usr/local/bin` and `/usr/local/etc` in harvest (assuming they aren't binaries or + symlinks) and store in `usr_local_custom` role, similar to `etc_custom`. + # 0.1.1 * Add `diff` subcommand which can compare two harvests and send email or webhook notifications in different diff --git a/debian/changelog b/debian/changelog index 0cc5861..0b16cfa 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.1.2) unstable; urgency=medium + + * Include files from `/usr/local/bin` and `/usr/local/etc` in harvest (assuming they aren't binaries or + symlinks) and store in `usr_local_custom` role, similar to `etc_custom`. + + -- Miguel Jacq Thu, 18 Dec 2025 17:07:00 +1100 + enroll (0.1.1) unstable; urgency=medium * Add `diff` subcommand which can compare two harvests and send email or webhook notifications in different diff --git a/enroll/diff.py b/enroll/diff.py index 9b396fc..e2861c9 100644 --- a/enroll/diff.py +++ b/enroll/diff.py @@ -190,6 +190,12 @@ def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, for mf in ec.get("managed_files", []) or []: yield str(ec_role), mf + # usr_local_custom + ul = state.get("usr_local_custom") or {} + ul_role = ul.get("role_name") or "usr_local_custom" + for mf in ul.get("managed_files", []) or []: + yield str(ul_role), mf + def _file_index(bundle_dir: Path, state: Dict[str, Any]) -> Dict[str, FileRec]: """Return mapping of absolute path -> FileRec. diff --git a/enroll/harvest.py b/enroll/harvest.py index ef93903..659bebc 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -78,6 +78,14 @@ class EtcCustomSnapshot: notes: List[str] +@dataclass +class UsrLocalCustomSnapshot: + role_name: str + managed_files: List[ManagedFile] + excluded: List[ExcludedFile] + notes: List[str] + + ALLOWED_UNOWNED_EXTS = { ".conf", ".cfg", @@ -701,6 +709,103 @@ def harvest( notes=etc_notes, ) + # ------------------------- + # usr_local_custom role (/usr/local/etc + /usr/local/bin scripts) + # ------------------------- + ul_notes: List[str] = [] + ul_excluded: List[ExcludedFile] = [] + ul_managed: List[ManagedFile] = [] + ul_role_name = "usr_local_custom" + + # Extend the already-captured set with etc_custom. + already_all: Set[str] = set(already) + for mf in etc_managed: + already_all.add(mf.path) + + def _scan_usr_local_tree( + root: str, *, require_executable: bool, cap: int, reason: str + ) -> None: + scanned = 0 + if not os.path.isdir(root): + return + for dirpath, _, filenames in os.walk(root): + for fn in filenames: + path = os.path.join(dirpath, fn) + if path in already_all: + continue + if not os.path.isfile(path) or os.path.islink(path): + continue + if require_executable: + try: + owner, group, mode = stat_triplet(path) + except OSError: + ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + try: + if (int(mode, 8) & 0o111) == 0: + continue + except ValueError: + # If mode parsing fails, be conservative and skip. + continue + else: + try: + owner, group, mode = stat_triplet(path) + except OSError: + ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + deny = policy.deny_reason(path) + if deny: + ul_excluded.append(ExcludedFile(path=path, reason=deny)) + continue + + src_rel = path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, ul_role_name, path, src_rel) + except OSError: + ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + ul_managed.append( + ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason, + ) + ) + + already_all.add(path) + scanned += 1 + if scanned >= cap: + ul_notes.append(f"Reached file cap ({cap}) while scanning {root}.") + return + + # /usr/local/etc: capture all non-binary regular files (filtered by IgnorePolicy) + _scan_usr_local_tree( + "/usr/local/etc", + require_executable=False, + cap=2000, + reason="usr_local_etc_custom", + ) + + # /usr/local/bin: capture executable scripts only (skip non-executable text) + _scan_usr_local_tree( + "/usr/local/bin", + require_executable=True, + cap=2000, + reason="usr_local_bin_script", + ) + + usr_local_custom_snapshot = UsrLocalCustomSnapshot( + role_name=ul_role_name, + managed_files=ul_managed, + excluded=ul_excluded, + notes=ul_notes, + ) + state = { "host": {"hostname": os.uname().nodename, "os": "debian"}, "users": asdict(users_snapshot), @@ -709,6 +814,7 @@ def harvest( "manual_packages_skipped": manual_pkgs_skipped, "package_roles": [asdict(p) for p in pkg_snaps], "etc_custom": asdict(etc_custom_snapshot), + "usr_local_custom": asdict(usr_local_custom_snapshot), } state_path = os.path.join(bundle_dir, "state.json") diff --git a/enroll/ignore.py b/enroll/ignore.py index d8ffce9..93ba423 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -23,6 +23,11 @@ DEFAULT_DENY_GLOBS = [ "/etc/gshadow", "/etc/*shadow", "/etc/letsencrypt/*", + "/usr/local/etc/ssl/private/*", + "/usr/local/etc/ssh/ssh_host_*", + "/usr/local/etc/*shadow", + "/usr/local/etc/*gshadow", + "/usr/local/etc/letsencrypt/*", ] SENSITIVE_CONTENT_PATTERNS = [ diff --git a/enroll/manifest.py b/enroll/manifest.py index e55418c..6909c5c 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -629,6 +629,7 @@ def _manifest_from_bundle_dir( package_roles: List[Dict[str, Any]] = state.get("package_roles", []) users_snapshot: Dict[str, Any] = state.get("users", {}) etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) + usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) site_mode = fqdn is not None and fqdn != "" @@ -661,6 +662,7 @@ def _manifest_from_bundle_dir( manifested_users_roles: List[str] = [] manifested_etc_custom_roles: List[str] = [] + manifested_usr_local_custom_roles: List[str] = [] manifested_service_roles: List[str] = [] manifested_pkg_roles: List[str] = [] @@ -999,6 +1001,105 @@ Unowned /etc config files not attributed to packages or services. # ------------------------- + # ------------------------- + + # ------------------------- + # usr_local_custom role (/usr/local/etc + /usr/local/bin scripts) + # ------------------------- + if usr_local_custom_snapshot and usr_local_custom_snapshot.get("managed_files"): + role = usr_local_custom_snapshot.get("role_name", "usr_local_custom") + role_dir = os.path.join(roles_root, role) + _write_role_scaffold(role_dir) + + var_prefix = role + + managed_files = usr_local_custom_snapshot.get("managed_files", []) + excluded = usr_local_custom_snapshot.get("excluded", []) + notes = usr_local_custom_snapshot.get("notes", []) + + templated, jt_vars = _jinjify_managed_files( + bundle_dir, + role, + role_dir, + managed_files, + jt_exe=jt_exe, + jt_enabled=jt_enabled, + overwrite_templates=not site_mode, + ) + + # Copy only the non-templated artifacts (templates live in the role). + if site_mode: + _copy_artifacts( + bundle_dir, + role, + _host_role_files_dir(out_dir, fqdn or "", role), + exclude_rels=templated, + ) + else: + _copy_artifacts( + bundle_dir, + role, + os.path.join(role_dir, "files"), + exclude_rels=templated, + ) + + files_var = _build_managed_files_var( + managed_files, + templated, + notify_other=None, + notify_systemd=None, + ) + + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} + vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map = _merge_mappings_overwrite(vars_map, jt_map) + + if site_mode: + _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_hostvars(out_dir, fqdn or "", role, vars_map) + else: + _write_role_defaults(role_dir, vars_map) + + tasks = "---\n" + _render_generic_files_tasks( + var_prefix, include_restart_notify=False + ) + with open( + os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write(tasks.rstrip() + "\n") + + # No handlers needed for this role, but keep a valid YAML document. + with open( + os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\n") + + with open( + os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\ndependencies: []\n") + + readme = ( + """# usr_local_custom\n\n""" + "Unowned /usr/local files (scripts in /usr/local/bin and config under /usr/local/etc).\n\n" + "## Managed files\n" + + ("\n".join([f"- {mf.get('path')}" for mf in managed_files]) or "- (none)") + + "\n\n## Excluded\n" + + ( + "\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) + or "- (none)" + ) + + "\n\n## Notes\n" + + ("\n".join([f"- {n}" for n in notes]) or "- (none)") + + "\n" + ) + with open(os.path.join(role_dir, "README.md"), "w", encoding="utf-8") as f: + f.write(readme) + + manifested_usr_local_custom_roles.append(role) + + # ------------------------- + # ------------------------- # Service roles # ------------------------- @@ -1310,6 +1411,7 @@ Generated for package `{pkg}`. manifested_pkg_roles + manifested_service_roles + manifested_etc_custom_roles + + manifested_usr_local_custom_roles + manifested_users_roles ) diff --git a/pyproject.toml b/pyproject.toml index 5231ad9..b5a07ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.1" +version = "0.1.2" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/tests/test_diff_usr_local_custom.py b/tests/test_diff_usr_local_custom.py new file mode 100644 index 0000000..88d594f --- /dev/null +++ b/tests/test_diff_usr_local_custom.py @@ -0,0 +1,111 @@ +import json +from pathlib import Path + +from enroll.diff import compare_harvests + + +def _write_bundle(root: Path, state: dict, artifacts: dict[str, bytes]) -> None: + root.mkdir(parents=True, exist_ok=True) + (root / "state.json").write_text(json.dumps(state, indent=2), encoding="utf-8") + for rel, data in artifacts.items(): + p = root / rel + p.parent.mkdir(parents=True, exist_ok=True) + p.write_bytes(data) + + +def test_diff_includes_usr_local_custom_files(tmp_path: Path): + old = tmp_path / "old" + new = tmp_path / "new" + + old_state = { + "host": {"hostname": "h1", "os": "debian"}, + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [], + "package_roles": [], + "manual_packages": ["curl"], + "manual_packages_skipped": [], + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + } + ], + "excluded": [], + "notes": [], + }, + } + new_state = { + **old_state, + "manual_packages": ["curl", "htop"], + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + }, + { + "path": "/usr/local/bin/myscript", + "src_rel": "usr/local/bin/myscript", + "owner": "root", + "group": "root", + "mode": "0755", + "reason": "usr_local_bin_script", + }, + ], + "excluded": [], + "notes": [], + }, + } + + _write_bundle( + old, + old_state, + { + "artifacts/usr_local_custom/usr/local/etc/myapp.conf": b"myapp=1\n", + }, + ) + _write_bundle( + new, + new_state, + { + "artifacts/usr_local_custom/usr/local/etc/myapp.conf": b"myapp=2\n", + "artifacts/usr_local_custom/usr/local/bin/myscript": b"#!/bin/sh\necho hi\n", + }, + ) + + report, has_changes = compare_harvests(str(old), str(new)) + assert has_changes is True + + # Packages: htop was added. + assert report["packages"]["added"] == ["htop"] + + # Files: /usr/local/etc/myapp.conf should be detected as changed (content sha differs). + changed_paths = {c["path"] for c in report["files"]["changed"]} + assert "/usr/local/etc/myapp.conf" in changed_paths + + # Files: new script was added. + added_paths = {a["path"] for a in report["files"]["added"]} + assert "/usr/local/bin/myscript" in added_paths diff --git a/tests/test_harvest.py b/tests/test_harvest.py index 8e19fb4..a832c81 100644 --- a/tests/test_harvest.py +++ b/tests/test_harvest.py @@ -23,30 +23,51 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( real_islink = os.path.islink # Fake filesystem: two /etc files exist, only one is dpkg-owned. + # Also include some /usr/local files to populate usr_local_custom. files = { "/etc/openvpn/server.conf": b"server", "/etc/default/keyboard": b"kbd", + "/usr/local/etc/myapp.conf": b"myapp=1\n", + "/usr/local/bin/myscript": b"#!/bin/sh\necho hi\n", + # non-executable text under /usr/local/bin should be skipped + "/usr/local/bin/readme.txt": b"hello\n", + } + dirs = { + "/etc", + "/etc/openvpn", + "/etc/default", + "/usr", + "/usr/local", + "/usr/local/etc", + "/usr/local/bin", } - dirs = {"/etc", "/etc/openvpn", "/etc/default"} def fake_isfile(p: str) -> bool: if p.startswith("/etc/") or p == "/etc": return p in files + if p.startswith("/usr/local/"): + return p in files return real_isfile(p) def fake_isdir(p: str) -> bool: if p.startswith("/etc"): return p in dirs + if p.startswith("/usr/local") or p in ("/usr", "/usr/local"): + return p in dirs return real_isdir(p) def fake_islink(p: str) -> bool: if p.startswith("/etc"): return False + if p.startswith("/usr/local"): + return False return real_islink(p) def fake_exists(p: str) -> bool: if p.startswith("/etc"): return p in files or p in dirs + if p.startswith("/usr/local") or p in ("/usr", "/usr/local"): + return p in files or p in dirs return real_exists(p) def fake_walk(root: str): @@ -57,6 +78,10 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( yield ("/etc/openvpn", [], ["server.conf"]) elif root == "/etc/default": yield ("/etc/default", [], ["keyboard"]) + elif root == "/usr/local/etc": + yield ("/usr/local/etc", [], ["myapp.conf"]) + elif root == "/usr/local/bin": + yield ("/usr/local/bin", [], ["myscript", "readme.txt"]) else: yield (root, [], []) @@ -109,7 +134,13 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( monkeypatch.setattr(h, "list_manual_packages", lambda: ["openvpn", "curl"]) monkeypatch.setattr(h, "collect_non_system_users", lambda: []) - monkeypatch.setattr(h, "stat_triplet", lambda p: ("root", "root", "0644")) + def fake_stat_triplet(p: str): + if p == "/usr/local/bin/myscript": + return ("root", "root", "0755") + # /usr/local/bin/readme.txt remains non-executable + return ("root", "root", "0644") + + monkeypatch.setattr(h, "stat_triplet", fake_stat_triplet) # Avoid needing source files on disk by implementing our own bundle copier def fake_copy(bundle_dir: str, role_name: str, abs_path: str, src_rel: str): @@ -139,3 +170,9 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( assert any( mf["path"] == "/etc/default/keyboard" for mf in etc_custom["managed_files"] ) + + # /usr/local content is attributed to usr_local_custom + ul = st["usr_local_custom"] + assert any(mf["path"] == "/usr/local/etc/myapp.conf" for mf in ul["managed_files"]) + assert any(mf["path"] == "/usr/local/bin/myscript" for mf in ul["managed_files"]) + assert all(mf["path"] != "/usr/local/bin/readme.txt" for mf in ul["managed_files"]) diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 99040b0..92c3dfc 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -47,6 +47,29 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): "excluded": [], "notes": [], }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + }, + { + "path": "/usr/local/bin/myscript", + "src_rel": "usr/local/bin/myscript", + "owner": "root", + "group": "root", + "mode": "0755", + "reason": "usr_local_bin_script", + }, + ], + "excluded": [], + "notes": [], + }, "services": [ { "unit": "foo.service", @@ -92,6 +115,26 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): "kbd", encoding="utf-8" ) + # Create artifacts for usr_local_custom files so copy works + (bundle / "artifacts" / "usr_local_custom" / "usr" / "local" / "etc").mkdir( + parents=True, exist_ok=True + ) + ( + bundle + / "artifacts" + / "usr_local_custom" + / "usr" + / "local" + / "etc" + / "myapp.conf" + ).write_text("myapp=1\n", encoding="utf-8") + (bundle / "artifacts" / "usr_local_custom" / "usr" / "local" / "bin").mkdir( + parents=True, exist_ok=True + ) + ( + bundle / "artifacts" / "usr_local_custom" / "usr" / "local" / "bin" / "myscript" + ).write_text("#!/bin/sh\necho hi\n", encoding="utf-8") + manifest(str(bundle), str(out)) # Service role: systemd management should be gated on foo_manage_unit and a probe. @@ -119,6 +162,7 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): pb = (out / "playbook.yml").read_text(encoding="utf-8") assert "- users" in pb assert "- etc_custom" in pb + assert "- usr_local_custom" in pb assert "- curl" in pb assert "- foo" in pb @@ -168,6 +212,21 @@ def test_manifest_site_mode_creates_host_inventory_and_raw_files(tmp_path: Path) "excluded": [], "notes": [], }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + } + ], + "excluded": [], + "notes": [], + }, "services": [ { "unit": "foo.service", @@ -197,6 +256,20 @@ def test_manifest_site_mode_creates_host_inventory_and_raw_files(tmp_path: Path) bundle.mkdir(parents=True, exist_ok=True) (bundle / "state.json").write_text(json.dumps(state, indent=2), encoding="utf-8") + # Artifacts for usr_local_custom file so copy works. + (bundle / "artifacts" / "usr_local_custom" / "usr" / "local" / "etc").mkdir( + parents=True, exist_ok=True + ) + ( + bundle + / "artifacts" + / "usr_local_custom" + / "usr" + / "local" + / "etc" + / "myapp.conf" + ).write_text("myapp=1\n", encoding="utf-8") + manifest(str(bundle), str(out), fqdn=fqdn) # Host playbook exists. diff --git a/tests/test_misc_coverage.py b/tests/test_misc_coverage.py new file mode 100644 index 0000000..b4250fc --- /dev/null +++ b/tests/test_misc_coverage.py @@ -0,0 +1,96 @@ +import stat +from pathlib import Path + +import pytest + +from enroll.cache import _safe_component, new_harvest_cache_dir +from enroll.ignore import IgnorePolicy +from enroll.sopsutil import ( + SopsError, + _pgp_arg, + decrypt_file_binary_to, + encrypt_file_binary, +) + + +def test_safe_component_sanitizes_and_bounds_length(): + assert _safe_component(" ") == "unknown" + assert _safe_component("a/b c") == "a_b_c" + assert _safe_component("x" * 200) == "x" * 64 + + +def test_new_harvest_cache_dir_uses_xdg_cache_home(tmp_path: Path, monkeypatch): + monkeypatch.setenv("XDG_CACHE_HOME", str(tmp_path / "xdg")) + hc = new_harvest_cache_dir(hint="my host/01") + assert hc.dir.exists() + assert "my_host_01" in hc.dir.name + assert str(hc.dir).startswith(str(tmp_path / "xdg")) + # best-effort: ensure directory is not world-readable on typical FS + try: + mode = stat.S_IMODE(hc.dir.stat().st_mode) + assert mode & 0o077 == 0 + except OSError: + pass + + +def test_ignore_policy_denies_binary_and_sensitive_content(tmp_path: Path): + p_bin = tmp_path / "binfile" + p_bin.write_bytes(b"abc\x00def") + assert IgnorePolicy().deny_reason(str(p_bin)) == "binary_like" + + p_secret = tmp_path / "secret.conf" + p_secret.write_text("password=foo\n", encoding="utf-8") + assert IgnorePolicy().deny_reason(str(p_secret)) == "sensitive_content" + + # dangerous mode disables heuristic scanning (but still checks file-ness/size) + assert IgnorePolicy(dangerous=True).deny_reason(str(p_secret)) is None + + +def test_ignore_policy_denies_usr_local_shadow_by_glob(): + # This should short-circuit before stat() (path doesn't need to exist). + assert IgnorePolicy().deny_reason("/usr/local/etc/shadow") == "denied_path" + + +def test_sops_pgp_arg_and_encrypt_decrypt_roundtrip(tmp_path: Path, monkeypatch): + assert _pgp_arg([" ABC ", "DEF"]) == "ABC,DEF" + with pytest.raises(SopsError): + _pgp_arg([]) + + # Stub out sops and subprocess. + import enroll.sopsutil as s + + monkeypatch.setattr(s, "require_sops_cmd", lambda: "sops") + + class R: + def __init__(self, rc: int, out: bytes, err: bytes = b""): + self.returncode = rc + self.stdout = out + self.stderr = err + + calls = [] + + def fake_run(cmd, capture_output, check): + calls.append(cmd) + # Return a deterministic payload so we can assert file writes. + if "--encrypt" in cmd: + return R(0, b"ENCRYPTED") + if "--decrypt" in cmd: + return R(0, b"PLAINTEXT") + return R(1, b"", b"bad") + + monkeypatch.setattr(s.subprocess, "run", fake_run) + + src = tmp_path / "src.bin" + src.write_bytes(b"x") + enc = tmp_path / "out.sops" + dec = tmp_path / "out.bin" + + encrypt_file_binary(src, enc, pgp_fingerprints=["ABC"], mode=0o600) + assert enc.read_bytes() == b"ENCRYPTED" + + decrypt_file_binary_to(enc, dec, mode=0o644) + assert dec.read_bytes() == b"PLAINTEXT" + + # Sanity: we invoked encrypt and decrypt. + assert any("--encrypt" in c for c in calls) + assert any("--decrypt" in c for c in calls) From 25add369dc44db59b95d7286546b26dcbee9a8c7 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 18 Dec 2025 17:24:45 +1100 Subject: [PATCH 037/115] README.md update --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 68c35e5..6645437 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ It aims to be **optimistic and noninteractive**: - Defensively excludes likely secrets (path denylist + content sniff + size caps). - Captures non-system users and their SSH public keys. - Captures miscellaneous `/etc` files it can’t attribute to a package and installs them in an `etc_custom` role. +- Ditto for /usr/local/bin (for non-binary files) and /usr/local/etc - Avoids trying to start systemd services that were detected as inactive during harvest. --- From 240e79706f18d0092fa54698c2e16b7c2ddd127b Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 20 Dec 2025 17:47:00 +1100 Subject: [PATCH 038/115] Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` arguments. --- CHANGELOG.md | 5 + README.md | 26 ++++ enroll/cli.py | 78 +++++++++++- enroll/diff.py | 6 + enroll/harvest.py | 110 ++++++++++++++++ enroll/manifest.py | 115 +++++++++++++++++ enroll/pathfilter.py | 293 +++++++++++++++++++++++++++++++++++++++++++ enroll/remote.py | 21 +++- tests/test_cli.py | 45 ++++++- 9 files changed, 687 insertions(+), 12 deletions(-) create mode 100644 enroll/pathfilter.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e80a13..2d8d6e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.1.3 + + * Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` + arguments. + # 0.1.2 * Include files from `/usr/local/bin` and `/usr/local/etc` in harvest (assuming they aren't binaries or diff --git a/README.md b/README.md index 6645437..84a6965 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,7 @@ Harvest state about a host and write a harvest bundle. - Changed-from-default config (plus related custom/unowned files under service dirs) - Non-system users + SSH public keys - Misc `/etc` that can’t be attributed to a package (`etc_custom` role) +- Optional user-specified extra files/dirs via `--include-path` (emitted as an `extra_paths` role at manifest time) **Common flags** - Remote harvesting: @@ -79,6 +80,14 @@ Harvest state about a host and write a harvest bundle. - `--dangerous`: disables secret-safety checks (see “Sensitive data” below) - Encrypt bundles at rest: - `--sops `: writes a single encrypted `harvest.tar.gz.sops` instead of a plaintext directory +- Path selection (include/exclude): + - `--include-path ` (repeatable): add extra files/dirs to harvest (even from locations normally ignored, like `/home`). Still subject to secret-safety checks unless `--dangerous`. + - `--exclude-path ` (repeatable): skip files/dirs even if they would normally be harvested. + - Pattern syntax: + - plain path: matches that file; directories match the directory + everything under it + - glob (default): supports `*` and `**` (prefix with `glob:` to force) + - regex: prefix with `re:` or `regex:` + - Precedence: excludes win over includes. --- @@ -227,6 +236,23 @@ enroll harvest --out /tmp/enroll-harvest enroll harvest --remote-host myhost.example.com --remote-user myuser --out /tmp/enroll-harvest ``` +### Include paths (`--include-path`) +```bash +# Add a few dotfiles from /home (still secret-safe unless --dangerous) +enroll harvest --out /tmp/enroll-harvest --include-path '/home/*/.bashrc' --include-path '/home/*/.profile' +``` + +### Exclude paths (`--exclude-path`) +```bash +# Skip specific /usr/local/bin entries (or patterns) +enroll harvest --out /tmp/enroll-harvest --exclude-path '/usr/local/bin/docker-*' --exclude-path '/usr/local/bin/some-tool' +``` + +### Regex include +```bash +enroll harvest --out /tmp/enroll-harvest --include-path 're:^/home/[^/]+/\.config/myapp/.*$' +``` + ### `--dangerous` ```bash enroll harvest --out /tmp/enroll-harvest --dangerous diff --git a/enroll/cli.py b/enroll/cli.py index 2d8ed5e..f6efe11 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -125,6 +125,27 @@ def main() -> None: action="store_true", help="Collect files more aggressively (may include secrets). Disables secret-avoidance checks.", ) + h.add_argument( + "--include-path", + action="append", + default=[], + metavar="PATTERN", + help=( + "Include extra file paths to harvest (repeatable). Supports globs (including '**') and regex via 're:'. " + "Included files are still filtered by IgnorePolicy unless --dangerous is used." + ), + ) + h.add_argument( + "--exclude-path", + action="append", + default=[], + metavar="PATTERN", + help=( + "Exclude file paths from harvesting (repeatable). Supports globs (including '**') and regex via 're:'. " + "Excludes apply to all harvesting, including defaults." + ), + ) + h.add_argument( "--sops", nargs="+", @@ -186,6 +207,27 @@ def main() -> None: action="store_true", help="Collect files more aggressively (may include secrets). Disables secret-avoidance checks.", ) + s.add_argument( + "--include-path", + action="append", + default=[], + metavar="PATTERN", + help=( + "Include extra file paths to harvest (repeatable). Supports globs (including '**') and regex via 're:'. " + "Included files are still filtered by IgnorePolicy unless --dangerous is used." + ), + ) + s.add_argument( + "--exclude-path", + action="append", + default=[], + metavar="PATTERN", + help=( + "Exclude file paths from harvesting (repeatable). Supports globs (including '**') and regex via 're:'. " + "Excludes apply to all harvesting, including defaults." + ), + ) + s.add_argument( "--sops", nargs="+", @@ -320,6 +362,8 @@ def main() -> None: remote_user=args.remote_user, dangerous=bool(args.dangerous), no_sudo=bool(args.no_sudo), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), ) _encrypt_harvest_dir_to_sops( tmp_bundle, out_file, list(sops_fps) @@ -338,6 +382,8 @@ def main() -> None: remote_user=args.remote_user, dangerous=bool(args.dangerous), no_sudo=bool(args.no_sudo), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), ) print(str(state)) else: @@ -350,7 +396,12 @@ def main() -> None: os.chmod(tmp_bundle, 0o700) except OSError: pass - harvest(str(tmp_bundle), dangerous=bool(args.dangerous)) + harvest( + str(tmp_bundle), + dangerous=bool(args.dangerous), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), + ) _encrypt_harvest_dir_to_sops( tmp_bundle, out_file, list(sops_fps) ) @@ -360,7 +411,12 @@ def main() -> None: raise SystemExit( "error: --out is required unless --remote-host is set" ) - path = harvest(args.out, dangerous=bool(args.dangerous)) + path = harvest( + args.out, + dangerous=bool(args.dangerous), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), + ) print(path) elif args.cmd == "manifest": out_enc = manifest( @@ -446,6 +502,8 @@ def main() -> None: remote_user=args.remote_user, dangerous=bool(args.dangerous), no_sudo=bool(args.no_sudo), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), ) _encrypt_harvest_dir_to_sops( tmp_bundle, out_file, list(sops_fps) @@ -473,6 +531,8 @@ def main() -> None: remote_user=args.remote_user, dangerous=bool(args.dangerous), no_sudo=bool(args.no_sudo), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), ) manifest( str(harvest_dir), @@ -493,7 +553,12 @@ def main() -> None: os.chmod(tmp_bundle, 0o700) except OSError: pass - harvest(str(tmp_bundle), dangerous=bool(args.dangerous)) + harvest( + str(tmp_bundle), + dangerous=bool(args.dangerous), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), + ) _encrypt_harvest_dir_to_sops( tmp_bundle, out_file, list(sops_fps) ) @@ -512,7 +577,12 @@ def main() -> None: raise SystemExit( "error: --harvest is required unless --remote-host is set" ) - harvest(args.harvest, dangerous=bool(args.dangerous)) + harvest( + args.harvest, + dangerous=bool(args.dangerous), + include_paths=list(getattr(args, "include_path", []) or []), + exclude_paths=list(getattr(args, "exclude_path", []) or []), + ) manifest( args.harvest, args.out, diff --git a/enroll/diff.py b/enroll/diff.py index e2861c9..a2b7d91 100644 --- a/enroll/diff.py +++ b/enroll/diff.py @@ -196,6 +196,12 @@ def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, for mf in ul.get("managed_files", []) or []: yield str(ul_role), mf + # extra_paths + xp = state.get("extra_paths") or {} + xp_role = xp.get("role_name") or "extra_paths" + for mf in xp.get("managed_files", []) or []: + yield str(xp_role), mf + def _file_index(bundle_dir: Path, state: Dict[str, Any]) -> Dict[str, FileRec]: """Return mapping of absolute path -> FileRec. diff --git a/enroll/harvest.py b/enroll/harvest.py index 659bebc..48242d6 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -19,6 +19,7 @@ from .debian import ( stat_triplet, ) from .ignore import IgnorePolicy +from .pathfilter import PathFilter, expand_includes from .accounts import collect_non_system_users @@ -86,6 +87,16 @@ class UsrLocalCustomSnapshot: notes: List[str] +@dataclass +class ExtraPathsSnapshot: + role_name: str + include_patterns: List[str] + exclude_patterns: List[str] + managed_files: List[ManagedFile] + excluded: List[ExcludedFile] + notes: List[str] + + ALLOWED_UNOWNED_EXTS = { ".conf", ".cfg", @@ -250,6 +261,8 @@ def harvest( policy: Optional[IgnorePolicy] = None, *, dangerous: bool = False, + include_paths: Optional[List[str]] = None, + exclude_paths: Optional[List[str]] = None, ) -> str: # If a policy is not supplied, build one. `--dangerous` relaxes secret # detection and deny-glob skipping. @@ -261,6 +274,10 @@ def harvest( policy.dangerous = True os.makedirs(bundle_dir, exist_ok=True) + # User-provided includes/excludes. Excludes apply to all harvesting; + # includes are harvested into an extra role. + path_filter = PathFilter(include=include_paths or (), exclude=exclude_paths or ()) + if hasattr(os, "geteuid") and os.geteuid() != 0: print( "Warning: not running as root; harvest may miss files or metadata.", @@ -406,6 +423,9 @@ def harvest( ) for path, reason in sorted(candidates.items()): + if path_filter.is_excluded(path): + excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue deny = policy.deny_reason(path) if deny: excluded.append(ExcludedFile(path=path, reason=deny)) @@ -522,6 +542,9 @@ def harvest( candidates.setdefault(r, "custom_specific_path") for path, reason in sorted(candidates.items()): + if path_filter.is_excluded(path): + excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue deny = policy.deny_reason(path) if deny: excluded.append(ExcludedFile(path=path, reason=deny)) @@ -593,6 +616,9 @@ def harvest( # Copy only safe SSH public material: authorized_keys + *.pub for sf in u.ssh_files: + if path_filter.is_excluded(sf): + users_excluded.append(ExcludedFile(path=sf, reason="user_excluded")) + continue deny = policy.deny_reason(sf) if deny: users_excluded.append(ExcludedFile(path=sf, reason=deny)) @@ -665,6 +691,10 @@ def harvest( if not _is_confish(path): continue + if path_filter.is_excluded(path): + etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue + deny = policy.deny_reason(path) if deny: etc_excluded.append(ExcludedFile(path=path, reason=deny)) @@ -754,6 +784,10 @@ def harvest( ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) continue + if path_filter.is_excluded(path): + ul_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue + deny = policy.deny_reason(path) if deny: ul_excluded.append(ExcludedFile(path=path, reason=deny)) @@ -806,6 +840,81 @@ def harvest( notes=ul_notes, ) + # ------------------------- + # extra_paths role (user-requested includes) + # ------------------------- + extra_notes: List[str] = [] + extra_excluded: List[ExcludedFile] = [] + extra_managed: List[ManagedFile] = [] + extra_role_name = "extra_paths" + + include_specs = list(include_paths or []) + exclude_specs = list(exclude_paths or []) + + if include_specs: + extra_notes.append("User include patterns:") + extra_notes.extend([f"- {p}" for p in include_specs]) + if exclude_specs: + extra_notes.append("User exclude patterns:") + extra_notes.extend([f"- {p}" for p in exclude_specs]) + + included_files: List[str] = [] + if include_specs: + files, inc_notes = expand_includes( + path_filter.iter_include_patterns(), + exclude=path_filter, + max_files=4000, + ) + included_files = files + extra_notes.extend(inc_notes) + + for path in included_files: + if path in already_all: + continue + + if path_filter.is_excluded(path): + extra_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue + + deny = policy.deny_reason(path) + if deny: + extra_excluded.append(ExcludedFile(path=path, reason=deny)) + continue + + try: + owner, group, mode = stat_triplet(path) + except OSError: + extra_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + src_rel = path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, extra_role_name, path, src_rel) + except OSError: + extra_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + extra_managed.append( + ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason="user_include", + ) + ) + already_all.add(path) + + extra_paths_snapshot = ExtraPathsSnapshot( + role_name=extra_role_name, + include_patterns=include_specs, + exclude_patterns=exclude_specs, + managed_files=extra_managed, + excluded=extra_excluded, + notes=extra_notes, + ) + state = { "host": {"hostname": os.uname().nodename, "os": "debian"}, "users": asdict(users_snapshot), @@ -815,6 +924,7 @@ def harvest( "package_roles": [asdict(p) for p in pkg_snaps], "etc_custom": asdict(etc_custom_snapshot), "usr_local_custom": asdict(usr_local_custom_snapshot), + "extra_paths": asdict(extra_paths_snapshot), } state_path = os.path.join(bundle_dir, "state.json") diff --git a/enroll/manifest.py b/enroll/manifest.py index 6909c5c..2f28eab 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -630,6 +630,7 @@ def _manifest_from_bundle_dir( users_snapshot: Dict[str, Any] = state.get("users", {}) etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) + extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {}) site_mode = fqdn is not None and fqdn != "" @@ -663,6 +664,7 @@ def _manifest_from_bundle_dir( manifested_users_roles: List[str] = [] manifested_etc_custom_roles: List[str] = [] manifested_usr_local_custom_roles: List[str] = [] + manifested_extra_paths_roles: List[str] = [] manifested_service_roles: List[str] = [] manifested_pkg_roles: List[str] = [] @@ -1098,6 +1100,118 @@ Unowned /etc config files not attributed to packages or services. manifested_usr_local_custom_roles.append(role) + # ------------------------- + # extra_paths role (user-requested includes) + # ------------------------- + if extra_paths_snapshot and extra_paths_snapshot.get("managed_files"): + role = extra_paths_snapshot.get("role_name", "extra_paths") + role_dir = os.path.join(roles_root, role) + _write_role_scaffold(role_dir) + + var_prefix = role + + managed_files = extra_paths_snapshot.get("managed_files", []) + excluded = extra_paths_snapshot.get("excluded", []) + notes = extra_paths_snapshot.get("notes", []) + include_pats = extra_paths_snapshot.get("include_patterns", []) or [] + exclude_pats = extra_paths_snapshot.get("exclude_patterns", []) or [] + + templated, jt_vars = _jinjify_managed_files( + bundle_dir, + role, + role_dir, + managed_files, + jt_exe=jt_exe, + jt_enabled=jt_enabled, + overwrite_templates=not site_mode, + ) + + if site_mode: + _copy_artifacts( + bundle_dir, + role, + _host_role_files_dir(out_dir, fqdn or "", role), + exclude_rels=templated, + ) + else: + _copy_artifacts( + bundle_dir, + role, + os.path.join(role_dir, "files"), + exclude_rels=templated, + ) + + files_var = _build_managed_files_var( + managed_files, + templated, + notify_other=None, + notify_systemd=None, + ) + + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} + vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map = _merge_mappings_overwrite(vars_map, jt_map) + + if site_mode: + _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_hostvars(out_dir, fqdn or "", role, vars_map) + else: + _write_role_defaults(role_dir, vars_map) + + tasks = "---\n" + _render_generic_files_tasks( + var_prefix, include_restart_notify=False + ) + with open( + os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write(tasks.rstrip() + "\n") + + with open( + os.path.join(role_dir, "handlers", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\n") + + with open( + os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\ndependencies: []\n") + + readme = ( + f"""# {role} + +User-requested extra file harvesting. + +## Include patterns +""" + + ("\n".join([f"- {p}" for p in include_pats]) or "- (none)") + + """\n +## Exclude patterns +""" + + ("\n".join([f"- {p}" for p in exclude_pats]) or "- (none)") + + """\n +## Managed files +""" + + ("\n".join([f"- {mf.get('path')}" for mf in managed_files]) or "- (none)") + + """\n +## Excluded +""" + + ( + "\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) + or "- (none)" + ) + + """\n +## Notes +""" + + ("\n".join([f"- {n}" for n in notes]) or "- (none)") + + """\n""" + ) + with open(os.path.join(role_dir, "README.md"), "w", encoding="utf-8") as f: + f.write(readme) + + manifested_extra_paths_roles.append(role) + + manifested_usr_local_custom_roles.append(role) + # ------------------------- # ------------------------- @@ -1412,6 +1526,7 @@ Generated for package `{pkg}`. + manifested_service_roles + manifested_etc_custom_roles + manifested_usr_local_custom_roles + + manifested_extra_paths_roles + manifested_users_roles ) diff --git a/enroll/pathfilter.py b/enroll/pathfilter.py new file mode 100644 index 0000000..9df4afa --- /dev/null +++ b/enroll/pathfilter.py @@ -0,0 +1,293 @@ +from __future__ import annotations + +import glob +import os +import re +from dataclasses import dataclass +from pathlib import PurePosixPath +from typing import List, Optional, Sequence, Set, Tuple + + +_REGEX_PREFIXES = ("re:", "regex:") + + +def _has_glob_chars(s: str) -> bool: + return any(ch in s for ch in "*?[") + + +def _norm_abs(p: str) -> str: + """Normalise a path-ish string to an absolute POSIX path. + + We treat inputs that don't start with '/' as being relative to '/'. + """ + + p = p.strip() + if not p: + return "/" + if not p.startswith("/"): + p = "/" + p + # `normpath` keeps a leading '/' for absolute paths. + return os.path.normpath(p) + + +def _posix_match(path: str, pattern: str) -> bool: + """Path matching with glob semantics. + + Uses PurePosixPath.match which: + - treats '/' as a segment separator + - supports '**' for recursive matching + + Both `path` and `pattern` are treated as absolute paths. + """ + + # PurePosixPath.match is anchored and works best on relative strings. + p = path.lstrip("/") + pat = pattern.lstrip("/") + try: + return PurePosixPath(p).match(pat) + except Exception: + # If the pattern is somehow invalid, fail closed. + return False + + +def _regex_literal_prefix(regex: str) -> str: + """Best-effort literal prefix extraction for a regex. + + This lets us pick a starting directory to walk when expanding regex-based + include patterns. + """ + + s = regex + if s.startswith("^"): + s = s[1:] + out: List[str] = [] + escaped = False + meta = set(".^$*+?{}[]\\|()") + for ch in s: + if escaped: + out.append(ch) + escaped = False + continue + if ch == "\\": + escaped = True + continue + if ch in meta: + break + out.append(ch) + return "".join(out) + + +@dataclass(frozen=True) +class CompiledPathPattern: + raw: str + kind: str # 'prefix' | 'glob' | 'regex' + value: str + regex: Optional[re.Pattern[str]] = None + + def matches(self, path: str) -> bool: + p = _norm_abs(path) + + if self.kind == "regex": + if not self.regex: + return False + # Search (not match) so users can write unanchored patterns. + return self.regex.search(p) is not None + + if self.kind == "glob": + return _posix_match(p, self.value) + + # prefix + pref = self.value.rstrip("/") + return p == pref or p.startswith(pref + "/") + + +def compile_path_pattern(raw: str) -> CompiledPathPattern: + s = raw.strip() + for pre in _REGEX_PREFIXES: + if s.startswith(pre): + rex = s[len(pre) :].strip() + try: + return CompiledPathPattern( + raw=raw, kind="regex", value=rex, regex=re.compile(rex) + ) + except re.error: + # Treat invalid regexes as non-matching. + return CompiledPathPattern(raw=raw, kind="regex", value=rex, regex=None) + + # If the user explicitly says glob:, honour it. + if s.startswith("glob:"): + pat = s[len("glob:") :].strip() + return CompiledPathPattern(raw=raw, kind="glob", value=_norm_abs(pat)) + + # Heuristic: if it contains glob metacharacters, treat as a glob. + if _has_glob_chars(s) or "**" in s: + return CompiledPathPattern(raw=raw, kind="glob", value=_norm_abs(s)) + + # Otherwise treat as an exact path-or-prefix (dir subtree). + return CompiledPathPattern(raw=raw, kind="prefix", value=_norm_abs(s)) + + +@dataclass +class PathFilter: + """User-provided path filters. + + Semantics: + - exclude patterns always win + - include patterns are used only to expand *additional* files to harvest + (they do not restrict the default harvest set) + + Patterns: + - By default: glob-like (supports '**') + - Regex: prefix with 're:' or 'regex:' + - Force glob: prefix with 'glob:' + - A plain path without wildcards matches that path and everything under it + (directory-prefix behavior). + + Examples: + --exclude-path /usr/local/bin/docker-* + --include-path /home/*/.bashrc + --include-path 're:^/home/[^/]+/.config/myapp/.*$' + """ + + include: Sequence[str] = () + exclude: Sequence[str] = () + + def __post_init__(self) -> None: + self._include = [ + compile_path_pattern(p) for p in self.include if str(p).strip() + ] + self._exclude = [ + compile_path_pattern(p) for p in self.exclude if str(p).strip() + ] + + def is_excluded(self, path: str) -> bool: + for pat in self._exclude: + if pat.matches(path): + return True + return False + + def iter_include_patterns(self) -> List[CompiledPathPattern]: + return list(self._include) + + +def expand_includes( + patterns: Sequence[CompiledPathPattern], + *, + exclude: Optional[PathFilter] = None, + max_files: int = 4000, +) -> Tuple[List[str], List[str]]: + """Expand include patterns into concrete file paths. + + Returns (paths, notes). The returned paths are absolute paths. + + This function is intentionally conservative: + - symlinks are ignored (both dirs and files) + - the number of collected files is capped + + Regex patterns are expanded by walking a best-effort inferred root. + """ + + out: List[str] = [] + notes: List[str] = [] + seen: Set[str] = set() + + def _maybe_add_file(p: str) -> None: + if len(out) >= max_files: + return + p = _norm_abs(p) + if exclude and exclude.is_excluded(p): + return + if p in seen: + return + if not os.path.isfile(p) or os.path.islink(p): + return + seen.add(p) + out.append(p) + + def _walk_dir(root: str, match: Optional[CompiledPathPattern] = None) -> None: + root = _norm_abs(root) + if not os.path.isdir(root) or os.path.islink(root): + return + for dirpath, dirnames, filenames in os.walk(root, followlinks=False): + # Prune excluded directories early. + if exclude: + dirnames[:] = [ + d + for d in dirnames + if not exclude.is_excluded(os.path.join(dirpath, d)) + and not os.path.islink(os.path.join(dirpath, d)) + ] + for fn in filenames: + if len(out) >= max_files: + return + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + if exclude and exclude.is_excluded(p): + continue + if match is not None and not match.matches(p): + continue + if p in seen: + continue + seen.add(p) + out.append(_norm_abs(p)) + + for pat in patterns: + if len(out) >= max_files: + notes.append( + f"Include cap reached ({max_files}); some includes were not expanded." + ) + break + + matched_any = False + + if pat.kind == "prefix": + p = pat.value + if os.path.isfile(p) and not os.path.islink(p): + _maybe_add_file(p) + matched_any = True + elif os.path.isdir(p) and not os.path.islink(p): + before = len(out) + _walk_dir(p) + matched_any = len(out) > before + else: + # Still allow prefix patterns that don't exist now (e.g. remote different) + # by matching nothing rather than erroring. + matched_any = False + + elif pat.kind == "glob": + # Use glob for expansion; also walk directories that match. + gpat = pat.value + hits = glob.glob(gpat, recursive=True) + for h in hits: + if len(out) >= max_files: + break + h = _norm_abs(h) + if exclude and exclude.is_excluded(h): + continue + if os.path.isdir(h) and not os.path.islink(h): + before = len(out) + _walk_dir(h) + if len(out) > before: + matched_any = True + elif os.path.isfile(h) and not os.path.islink(h): + _maybe_add_file(h) + matched_any = True + + else: # regex + rex = pat.value + prefix = _regex_literal_prefix(rex) + # Determine a walk root. If we can infer an absolute prefix, use its + # directory; otherwise fall back to '/'. + if prefix.startswith("/"): + root = os.path.dirname(prefix) or "/" + else: + root = "/" + before = len(out) + _walk_dir(root, match=pat) + matched_any = len(out) > before + + if not matched_any: + notes.append(f"Include pattern matched no files: {pat.raw!r}") + + return out, notes diff --git a/enroll/remote.py b/enroll/remote.py index 469248d..9618512 100644 --- a/enroll/remote.py +++ b/enroll/remote.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +import shlex import shutil import tarfile import tempfile @@ -97,6 +98,8 @@ def remote_harvest( remote_python: str = "python3", dangerous: bool = False, no_sudo: bool = False, + include_paths: Optional[list[str]] = None, + exclude_paths: Optional[list[str]] = None, ) -> Path: """Run enroll harvest on a remote host via SSH and pull the bundle locally. @@ -165,13 +168,25 @@ def remote_harvest( sftp.put(str(pyz), rapp) # Run remote harvest. - _cmd = f"{remote_python} {rapp} harvest --out {rbundle}" + argv: list[str] = [ + remote_python, + rapp, + "harvest", + "--out", + rbundle, + ] + if dangerous: + argv.append("--dangerous") + for p in include_paths or []: + argv.extend(["--include-path", str(p)]) + for p in exclude_paths or []: + argv.extend(["--exclude-path", str(p)]) + + _cmd = " ".join(shlex.quote(a) for a in argv) if not no_sudo: cmd = f"sudo {_cmd}" else: cmd = _cmd - if dangerous: - cmd += " --dangerous" rc, out, err = _ssh_run(ssh, cmd) if rc != 0: raise RuntimeError( diff --git a/tests/test_cli.py b/tests/test_cli.py index ca3bfa6..4477b24 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -6,9 +6,17 @@ import enroll.cli as cli def test_cli_harvest_subcommand_calls_harvest(monkeypatch, capsys, tmp_path): called = {} - def fake_harvest(out: str, dangerous: bool = False): + def fake_harvest( + out: str, + dangerous: bool = False, + include_paths=None, + exclude_paths=None, + **_kwargs, + ): called["out"] = out called["dangerous"] = dangerous + called["include_paths"] = include_paths or [] + called["exclude_paths"] = exclude_paths or [] return str(tmp_path / "state.json") monkeypatch.setattr(cli, "harvest", fake_harvest) @@ -17,6 +25,8 @@ def test_cli_harvest_subcommand_calls_harvest(monkeypatch, capsys, tmp_path): cli.main() assert called["out"] == str(tmp_path) assert called["dangerous"] is False + assert called["include_paths"] == [] + assert called["exclude_paths"] == [] captured = capsys.readouterr() assert str(tmp_path / "state.json") in captured.out @@ -55,8 +65,16 @@ def test_cli_manifest_subcommand_calls_manifest(monkeypatch, tmp_path): def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path): calls = [] - def fake_harvest(bundle_dir: str, dangerous: bool = False): - calls.append(("harvest", bundle_dir, dangerous)) + def fake_harvest( + bundle_dir: str, + dangerous: bool = False, + include_paths=None, + exclude_paths=None, + **_kwargs, + ): + calls.append( + ("harvest", bundle_dir, dangerous, include_paths or [], exclude_paths or []) + ) return str(tmp_path / "bundle" / "state.json") def fake_manifest(bundle_dir: str, out_dir: str, **kwargs): @@ -87,7 +105,7 @@ def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path) cli.main() assert calls == [ - ("harvest", str(tmp_path / "bundle"), False), + ("harvest", str(tmp_path / "bundle"), False, [], []), ("manifest", str(tmp_path / "bundle"), str(tmp_path / "ansible"), None, "auto"), ] @@ -95,9 +113,17 @@ def test_cli_enroll_subcommand_runs_harvest_then_manifest(monkeypatch, tmp_path) def test_cli_harvest_dangerous_flag_is_forwarded(monkeypatch, tmp_path): called = {} - def fake_harvest(out: str, dangerous: bool = False): + def fake_harvest( + out: str, + dangerous: bool = False, + include_paths=None, + exclude_paths=None, + **_kwargs, + ): called["out"] = out called["dangerous"] = dangerous + called["include_paths"] = include_paths or [] + called["exclude_paths"] = exclude_paths or [] return str(tmp_path / "state.json") monkeypatch.setattr(cli, "harvest", fake_harvest) @@ -107,6 +133,8 @@ def test_cli_harvest_dangerous_flag_is_forwarded(monkeypatch, tmp_path): cli.main() assert called["dangerous"] is True + assert called["include_paths"] == [] + assert called["exclude_paths"] == [] def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir( @@ -131,6 +159,9 @@ def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir( remote_user, dangerous, no_sudo, + include_paths=None, + exclude_paths=None, + **_kwargs, ): called.update( { @@ -140,6 +171,8 @@ def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir( "remote_user": remote_user, "dangerous": dangerous, "no_sudo": no_sudo, + "include_paths": include_paths or [], + "exclude_paths": exclude_paths or [], } ) return cache_dir / "state.json" @@ -169,6 +202,8 @@ def test_cli_harvest_remote_calls_remote_harvest_and_uses_cache_dir( assert called["remote_user"] == "alice" assert called["dangerous"] is False assert called["no_sudo"] is False + assert called["include_paths"] == [] + assert called["exclude_paths"] == [] def test_cli_single_shot_remote_without_harvest_prints_state_path( From 9641637d4d27df0c1d524a20c63adae90ff424fa Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 20 Dec 2025 18:24:46 +1100 Subject: [PATCH 039/115] Add support for an enroll.ini config file to store arguments per subcommand, to avoid having to remember them all for repetitive executions. --- CHANGELOG.md | 2 + README.md | 56 ++++++++++ debian/changelog | 9 ++ enroll/cli.py | 264 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 330 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d8d6e4..90478e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ * Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` arguments. + * Add support for an enroll.ini config file to store arguments per subcommand, to avoid having to remember + them all for repetitive executions. # 0.1.2 diff --git a/README.md b/README.md index 84a6965..a5d2157 100644 --- a/README.md +++ b/README.md @@ -336,3 +336,59 @@ ansible-playbook -i "localhost," -c local /tmp/enroll-ansible/playbook.yml ```bash ansible-playbook /tmp/enroll-ansible/playbooks/"$(hostname -f)".yml ``` + +## Configuration file + +As can be seen above, there are a lot of powerful 'permutations' available to all four subcommands. + +Sometimes, it can be easier to store them in a config file so you don't have to remember them! + +Enroll supports reading an ini-style file of all the arguments for each subcommand. + +### Location of the config file + +The path the config file can be specified with `-c` or `--config` on the command-line. Otherwise, +Enroll will look for `./enroll.ini`, `./.enroll.ini` (in the current working directory), +``~/.config/enroll/enroll.ini` (or `$XDG_CONFIG_HOME/enroll/enroll.ini`). + +You may also pass `--no-config` if you deliberately want to ignore the config file even if it existed. + +### Precedence + +Highest wins: + + * Explicit CLI flags + * INI config ([cmd], [enroll]) + * argparse defaults + +### Example config file + +Here is an example. + +Whenever an argument on the command-line has a 'hyphen' in it, just be sure to change it to an underscore in the ini file. + +```ini +[enroll] +# (future global flags may live here) + +[harvest] +dangerous = false +include_path = + /home/*/.bashrc + /home/*/.profile +exclude_path = /usr/local/bin/docker-*, /usr/local/bin/some-tool +# remote_host = yourserver.example.com +# remote_user = you +# remote_port = 2222 + +[manifest] +# you can set defaults here too, e.g. +no_jinjaturtle = true +sops = 00AE817C24A10C2540461A9C1D7CDE0234DB458D + +[single-shot] +# if you use single-shot, put its defaults here. +# It does not inherit those of the subsections above, so you +# may wish to repeat them here. +include_path = re:^/home/[^/]+/\.config/myapp/.*$ +``` diff --git a/debian/changelog b/debian/changelog index 0b16cfa..f6ba2f7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,12 @@ +enroll (0.1.3) unstable; urgency=medium + + * Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` + arguments. + * Add support for an enroll.ini config file to store arguments per subcommand, to avoid having to remember + them all for repetitive executions. + + -- Miguel Jacq Sat, 20 Dec 2025 18:24:00 +1100 + enroll (0.1.2) unstable; urgency=medium * Include files from `/usr/local/bin` and `/usr/local/etc` in harvest (assuming they aren't binaries or diff --git a/enroll/cli.py b/enroll/cli.py index f6efe11..e5f729d 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -1,7 +1,9 @@ from __future__ import annotations import argparse +import configparser import os +import sys import tarfile import tempfile from pathlib import Path @@ -15,6 +17,232 @@ from .remote import remote_harvest from .sopsutil import SopsError, encrypt_file_binary +def _discover_config_path(argv: list[str]) -> Optional[Path]: + """Return the config path to use, if any. + + Precedence: + 1) --no-config disables loading. + 2) --config PATH (or -c PATH) + 3) $ENROLL_CONFIG + 4) ./enroll.ini, ./.enroll.ini + 5) $XDG_CONFIG_HOME/enroll/enroll.ini (or ~/.config/enroll/enroll.ini) + + The config file is optional; if no file is found, returns None. + """ + + # Quick scan for explicit flags without needing to build the full parser. + if "--no-config" in argv: + return None + + def _value_after(flag: str) -> Optional[str]: + try: + i = argv.index(flag) + except ValueError: + return None + if i + 1 >= len(argv): + return None + return argv[i + 1] + + p = _value_after("--config") or _value_after("-c") + if p: + return Path(p).expanduser() + + envp = os.environ.get("ENROLL_CONFIG") + if envp: + return Path(envp).expanduser() + + cwd = Path.cwd() + for name in ("enroll.ini", ".enroll.ini"): + cp = cwd / name + if cp.exists() and cp.is_file(): + return cp + + xdg = os.environ.get("XDG_CONFIG_HOME") + if xdg: + base = Path(xdg).expanduser() + else: + base = Path.home() / ".config" + cp = base / "enroll" / "enroll.ini" + if cp.exists() and cp.is_file(): + return cp + + return None + + +def _parse_bool(s: str) -> Optional[bool]: + v = str(s).strip().lower() + if v in {"1", "true", "yes", "y", "on"}: + return True + if v in {"0", "false", "no", "n", "off"}: + return False + return None + + +def _action_lookup(p: argparse.ArgumentParser) -> dict[str, argparse.Action]: + """Map config keys -> argparse actions for a parser. + + Accepts both dest names and long option names without leading dashes, + normalized with '-' -> '_'. + """ + + m: dict[str, argparse.Action] = {} + for a in p._actions: # noqa: SLF001 (argparse internal) + if not getattr(a, "dest", None): + continue + dest = str(a.dest).strip().lower() + if dest: + m[dest] = a + for opt in getattr(a, "option_strings", []) or []: + k = opt.lstrip("-").strip().lower() + if k: + m[k.replace("-", "_")] = a + m[k] = a + return m + + +def _choose_flag(a: argparse.Action) -> Optional[str]: + # Prefer a long flag if available (e.g. --dangerous over -d) + for s in getattr(a, "option_strings", []) or []: + if s.startswith("--"): + return s + for s in getattr(a, "option_strings", []) or []: + return s + return None + + +def _split_list_value(v: str) -> list[str]: + # Support comma-separated and/or multi-line lists. + raw = str(v) + if "\n" in raw: + parts = [p.strip() for p in raw.splitlines()] + return [p for p in parts if p] + if "," in raw: + parts = [p.strip() for p in raw.split(",")] + return [p for p in parts if p] + raw = raw.strip() + return [raw] if raw else [] + + +def _section_to_argv( + p: argparse.ArgumentParser, cfg: configparser.ConfigParser, section: str +) -> list[str]: + """Translate an INI section into argv tokens for this parser.""" + if not cfg.has_section(section): + return [] + + lookup = _action_lookup(p) + out: list[str] = [] + + for k, v in cfg.items(section): + key = str(k).strip().lower().replace("-", "_") + # Avoid recursion / confusing self-configuration. + if key in {"config", "no_config"}: + continue + + a = lookup.get(key) + if not a: + # Unknown keys are ignored (but we try to be helpful). + print( + f"warning: config [{section}] contains unknown option '{k}' (ignored)", + file=sys.stderr, + ) + continue + + flag = _choose_flag(a) + if not flag: + continue + + # Boolean flags + if isinstance(a, argparse._StoreTrueAction): # noqa: SLF001 + b = _parse_bool(v) + if b is True: + out.append(flag) + continue + if isinstance(a, argparse._StoreFalseAction): # noqa: SLF001 + b = _parse_bool(v) + if b is False: + out.append(flag) + continue + + # Repeated options + if isinstance(a, argparse._AppendAction): # noqa: SLF001 + for item in _split_list_value(v): + out.extend([flag, item]) + continue + + # Count flags (rare, but easy to support) + if isinstance(a, argparse._CountAction): # noqa: SLF001 + b = _parse_bool(v) + if b is True: + out.append(flag) + else: + try: + n = int(str(v).strip()) + except ValueError: + n = 0 + out.extend([flag] * max(0, n)) + continue + + # Standard scalar options + sval = str(v).strip() + if sval: + out.extend([flag, sval]) + + return out + + +def _inject_config_argv( + argv: list[str], + *, + cfg_path: Optional[Path], + root_parser: argparse.ArgumentParser, + subparsers: dict[str, argparse.ArgumentParser], +) -> list[str]: + """Return argv with config-derived tokens inserted. + + We insert: + - [enroll] options before the subcommand + - [] options immediately after the subcommand token + + CLI flags always win because they come later in argv. + """ + + if not cfg_path: + return argv + cfg_path = Path(cfg_path).expanduser() + if not (cfg_path.exists() and cfg_path.is_file()): + return argv + + cfg = configparser.ConfigParser() + try: + cfg.read(cfg_path, encoding="utf-8") + except (OSError, configparser.Error) as e: + raise SystemExit(f"error: failed to read config file {cfg_path}: {e}") + + global_tokens = _section_to_argv(root_parser, cfg, "enroll") + + # Find the subcommand token position. + cmd_pos: Optional[int] = None + cmd_name: Optional[str] = None + for i, tok in enumerate(argv): + if tok in subparsers: + cmd_pos = i + cmd_name = tok + break + if cmd_pos is None or cmd_name is None: + # No subcommand found (argparse will handle the error); only apply global. + return global_tokens + argv + + cmd_tokens = _section_to_argv(subparsers[cmd_name], cfg, cmd_name) + # Also accept section names with '_' in place of '-' (e.g. [single_shot]) + if "-" in cmd_name: + alt = cmd_name.replace("-", "_") + if alt != cmd_name: + cmd_tokens += _section_to_argv(subparsers[cmd_name], cfg, alt) + + return global_tokens + argv[: cmd_pos + 1] + cmd_tokens + argv[cmd_pos + 1 :] + + def _resolve_sops_out_file(out: Optional[str], *, hint: str) -> Path: """Resolve an output *file* path for --sops mode. @@ -95,6 +323,22 @@ def _add_remote_args(p: argparse.ArgumentParser) -> None: "--remote-host", help="SSH host to run harvesting on (if set, harvest runs remotely and is pulled locally).", ) + + +def _add_config_args(p: argparse.ArgumentParser) -> None: + p.add_argument( + "-c", + "--config", + help=( + "Path to an INI config file for default options. If omitted, enroll will look for " + "./enroll.ini, ./.enroll.ini, or ~/.config/enroll/enroll.ini (or $XDG_CONFIG_HOME/enroll/enroll.ini)." + ), + ) + p.add_argument( + "--no-config", + action="store_true", + help="Do not load any INI config file (even if one would be auto-discovered).", + ) p.add_argument( "--remote-port", type=int, @@ -110,9 +354,11 @@ def _add_remote_args(p: argparse.ArgumentParser) -> None: def main() -> None: ap = argparse.ArgumentParser(prog="enroll") + _add_config_args(ap) sub = ap.add_subparsers(dest="cmd", required=True) h = sub.add_parser("harvest", help="Harvest service/package/config state") + _add_config_args(h) h.add_argument( "--out", help=( @@ -163,6 +409,7 @@ def main() -> None: _add_remote_args(h) m = sub.add_parser("manifest", help="Render Ansible roles from a harvest") + _add_config_args(m) m.add_argument( "--harvest", required=True, @@ -195,6 +442,7 @@ def main() -> None: s = sub.add_parser( "single-shot", help="Harvest state, then manifest Ansible code, in one shot" ) + _add_config_args(s) s.add_argument( "--harvest", help=( @@ -255,6 +503,7 @@ def main() -> None: _add_remote_args(s) d = sub.add_parser("diff", help="Compare two harvests and report differences") + _add_config_args(d) d.add_argument( "--old", required=True, @@ -338,7 +587,20 @@ def main() -> None: help="Environment variable containing SMTP password (optional).", ) - args = ap.parse_args() + argv = sys.argv[1:] + cfg_path = _discover_config_path(argv) + argv = _inject_config_argv( + argv, + cfg_path=cfg_path, + root_parser=ap, + subparsers={ + "harvest": h, + "manifest": m, + "single-shot": s, + "diff": d, + }, + ) + args = ap.parse_args(argv) remote_host: Optional[str] = getattr(args, "remote_host", None) From cf819f755a8ce200a2c6079f70a4ef14cc9efe06 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 20 Dec 2025 18:26:04 +1100 Subject: [PATCH 040/115] 0.1.3 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b5a07ab..541eded 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.2" +version = "0.1.3" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" From 59239eb2d27d799628e1d1e890325ed2947e6b91 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 20 Dec 2025 18:38:05 +1100 Subject: [PATCH 041/115] Fix formatting in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a5d2157..00f9d98 100644 --- a/README.md +++ b/README.md @@ -349,7 +349,7 @@ Enroll supports reading an ini-style file of all the arguments for each subcomma The path the config file can be specified with `-c` or `--config` on the command-line. Otherwise, Enroll will look for `./enroll.ini`, `./.enroll.ini` (in the current working directory), -``~/.config/enroll/enroll.ini` (or `$XDG_CONFIG_HOME/enroll/enroll.ini`). +`~/.config/enroll/enroll.ini` (or `$XDG_CONFIG_HOME/enroll/enroll.ini`). You may also pass `--no-config` if you deliberately want to ignore the config file even if it existed. From 51196a0a2b1615b7c463aebf59d861080a2b0ff5 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 22 Dec 2025 17:28:10 +1100 Subject: [PATCH 042/115] Fix trivy exit code --- .forgejo/workflows/trivy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.forgejo/workflows/trivy.yml b/.forgejo/workflows/trivy.yml index fad2f6f..d5585f4 100644 --- a/.forgejo/workflows/trivy.yml +++ b/.forgejo/workflows/trivy.yml @@ -23,7 +23,7 @@ jobs: - name: Run trivy run: | - trivy fs --no-progress --ignore-unfixed --format table --disable-telemetry . + trivy fs --no-progress --ignore-unfixed --format table --disable-telemetry --skip-version-check --exit-code 1 . # Notify if any previous step in this job failed - name: Notify on failure From 8c478249d9b2f112ffbdc66e25274160bb7b37e9 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Tue, 23 Dec 2025 17:22:50 +1100 Subject: [PATCH 043/115] Add build-deb action workflow --- .forgejo/workflows/build-deb.yml | 65 ++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 .forgejo/workflows/build-deb.yml diff --git a/.forgejo/workflows/build-deb.yml b/.forgejo/workflows/build-deb.yml new file mode 100644 index 0000000..28276df --- /dev/null +++ b/.forgejo/workflows/build-deb.yml @@ -0,0 +1,65 @@ +name: CI + +on: + push: + +jobs: + test: + runs-on: docker + + steps: + - name: Install system dependencies + run: | + apt-get update + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + build-essential \ + devscripts \ + debhelper \ + dh-python \ + pybuild-plugin-pyproject \ + python3-all \ + python3-poetry-core \ + python3-yaml \ + python3-paramiko \ + rsync \ + ca-certificates + + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Build deb + run: | + mkdir /out + + rsync -a --delete \ + --exclude '.git' \ + --exclude '.venv' \ + --exclude 'dist' \ + --exclude 'build' \ + --exclude '__pycache__' \ + --exclude '.pytest_cache' \ + --exclude '.mypy_cache' \ + ./ /out/ + + cd /out/ + export DEBEMAIL="mig@mig5.net" + export DEBFULLNAME="Miguel Jacq" + + dch --distribution "trixie" --local "~trixie" "CI build for trixie" + dpkg-buildpackage -us -uc -b + + # Notify if any previous step in this job failed + - name: Notify on failure + if: ${{ failure() }} + env: + WEBHOOK_URL: ${{ secrets.NODERED_WEBHOOK_URL }} + REPOSITORY: ${{ forgejo.repository }} + RUN_NUMBER: ${{ forgejo.run_number }} + SERVER_URL: ${{ forgejo.server_url }} + run: | + curl -X POST \ + -H "Content-Type: application/json" \ + -d "{\"repository\":\"$REPOSITORY\",\"run_number\":\"$RUN_NUMBER\",\"status\":\"failure\",\"url\":\"$SERVER_URL/$REPOSITORY/actions/runs/$RUN_NUMBER\"}" \ + "$WEBHOOK_URL" From 4d2250f974195c3f5dd300aacb2d43e7aa6f2d65 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 27 Dec 2025 16:56:30 +1100 Subject: [PATCH 044/115] Add fedora rpm building --- Dockerfile.rpmbuild | 102 ++++++++++++++++++++++++++++++++++++++++++++ README.md | 4 +- poetry.lock | 2 +- pyproject.toml | 4 +- release.sh | 31 ++++++++++++++ rpm/enroll.spec | 47 ++++++++++++++++++++ 6 files changed, 185 insertions(+), 5 deletions(-) create mode 100644 Dockerfile.rpmbuild create mode 100644 rpm/enroll.spec diff --git a/Dockerfile.rpmbuild b/Dockerfile.rpmbuild new file mode 100644 index 0000000..c928cea --- /dev/null +++ b/Dockerfile.rpmbuild @@ -0,0 +1,102 @@ +# syntax=docker/dockerfile:1 +FROM fedora:42 + +RUN set -eux; \ + dnf -y update; \ + dnf -y install \ + rpm-build \ + rpmdevtools \ + redhat-rpm-config \ + gcc \ + make \ + findutils \ + tar \ + gzip \ + rsync \ + python3 \ + python3-devel \ + python3-setuptools \ + python3-wheel \ + pyproject-rpm-macros \ + python3-rpm-macros \ + python3-yaml \ + python3-paramiko \ + openssl-devel \ + python3-poetry-core ; \ + dnf -y clean all + +# Build runner script (copies repo, tars, runs rpmbuild) +RUN set -eux; cat > /usr/local/bin/build-rpm <<'EOF' +#!/usr/bin/env bash +set -euo pipefail + +SRC="${SRC:-/src}" +WORKROOT="${WORKROOT:-/work}" +OUT="${OUT:-/out}" +DEPS_DIR="${DEPS_DIR:-/deps}" + +# Install jinjaturtle from local rpm +# Filter out .src.rpm and debug* subpackages if present. +if [ -d "${DEPS_DIR}" ] && compgen -G "${DEPS_DIR}/*.rpm" > /dev/null; then + mapfile -t rpms < <(ls -1 "${DEPS_DIR}"/*.rpm | grep -vE '(\.src\.rpm$|-(debuginfo|debugsource)-)') + if [ "${#rpms[@]}" -gt 0 ]; then + echo "Installing dependency RPMs from ${DEPS_DIR}:" + printf ' - %s\n' "${rpms[@]}" + dnf -y install "${rpms[@]}" + dnf -y clean all + else + echo "NOTE: Only src/debug RPMs found in ${DEPS_DIR}; nothing installed." >&2 + fi +else + echo "NOTE: No RPMs found in ${DEPS_DIR}. If the build fails with missing python3dist(jinjaturtle)," >&2 + echo " mount your jinjaturtle RPM directory as -v :/deps" >&2 +fi + +mkdir -p "${WORKROOT}" "${OUT}" +WORK="${WORKROOT}/src" +rm -rf "${WORK}" +mkdir -p "${WORK}" + +rsync -a --delete \ + --exclude '.git' \ + --exclude '.venv' \ + --exclude 'dist' \ + --exclude 'build' \ + --exclude '__pycache__' \ + --exclude '.pytest_cache' \ + --exclude '.mypy_cache' \ + "${SRC}/" "${WORK}/" + +cd "${WORK}" + +# Determine version from pyproject.toml unless provided +if [ -n "${VERSION:-}" ]; then + ver="${VERSION}" +else + ver="$(grep -m1 '^version = ' pyproject.toml | sed -E 's/version = "([^"]+)".*/\1/')" +fi + +TOPDIR="${WORKROOT}/rpmbuild" +mkdir -p "${TOPDIR}"/{BUILD,BUILDROOT,RPMS,SOURCES,SPECS,SRPMS} + +tarball="${TOPDIR}/SOURCES/enroll-${ver}.tar.gz" +tar -czf "${tarball}" --transform "s#^#enroll/#" . + +spec_src="rpm/enroll.spec" + +cp -v "${spec_src}" "${TOPDIR}/SPECS/enroll.spec" + +rpmbuild -ba "${TOPDIR}/SPECS/enroll.spec" \ + --define "_topdir ${TOPDIR}" \ + --define "upstream_version ${ver}" + +shopt -s nullglob +cp -v "${TOPDIR}"/RPMS/*/*.rpm "${OUT}/" || true +cp -v "${TOPDIR}"/SRPMS/*.src.rpm "${OUT}/" || true +echo "Artifacts copied to ${OUT}" +EOF + +RUN chmod +x /usr/local/bin/build-rpm + +WORKDIR /work +ENTRYPOINT ["/usr/local/bin/build-rpm"] diff --git a/README.md b/README.md index 00f9d98..5a0db91 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,6 @@ **enroll** inspects a Linux machine (currently Debian-only) and generates Ansible roles/playbooks (and optionally inventory) for what it finds. -It aims to be **optimistic and noninteractive**: - Detects packages that have been installed. - Detects Debian package ownership of `/etc` files using dpkg’s local database. - Captures config that has **changed from packaged defaults** (dpkg conffile hashes + package md5sums when available). @@ -26,9 +25,10 @@ It aims to be **optimistic and noninteractive**: 1) **Harvest**: collect host facts + relevant files into a harvest bundle (`state.json` + harvested artifacts) 2) **Manifest**: turn that harvest into Ansible roles/playbooks (and optionally inventory) -Additionally: +Additionally, some other functionalities exist: - **Diff**: compare two harvests and report what changed (packages/services/users/files) since the previous snapshot. +- **Single-shot mode**: run both harvest and manifest at once. --- diff --git a/poetry.lock b/poetry.lock index 1f2948d..0a90711 100644 --- a/poetry.lock +++ b/poetry.lock @@ -923,4 +923,4 @@ zstd = ["backports-zstd (>=1.0.0)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "c3466a6595a9822763431a6dff0c7f835407a2591b92d5995592f8e6802c774a" +content-hash = "20623104a1a5f4c6d4aaa759f25b2591d5de345d1464e727eb4140a6ef9a5b6e" diff --git a/pyproject.toml b/pyproject.toml index 541eded..3079404 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,8 +10,8 @@ repository = "https://git.mig5.net/mig5/enroll" [tool.poetry.dependencies] python = "^3.10" -pyyaml = "^6.0.3" -paramiko = "^4.0.0" +pyyaml = "^6" +paramiko = ">=3.5" [tool.poetry.scripts] enroll = "enroll.cli:main" diff --git a/release.sh b/release.sh index fe99a52..fdbe771 100755 --- a/release.sh +++ b/release.sh @@ -42,3 +42,34 @@ for dist in ${DISTS[@]}; do debfile=$(ls -1 dist/${release}/*.deb) reprepro -b /home/user/git/repo includedeb "${release}" "${debfile}" done + +# RPM +sudo apt-get -y install createrepo-c rpm +docker build -f Dockerfile.rpmbuild -t enroll:f42 --progress=plain . +docker run --rm -v "$PWD":/src -v "$PWD/dist/rpm":/out -v "$HOME/git/jinjaturtle/dist/rpm":/deps:ro enroll:f42 +sudo chown -R "${USER}" "$PWD/dist" + +REPO_ROOT="${HOME}/git/repo_rpm" +RPM_REPO="${REPO_ROOT}/rpm/x86_64" +BUILD_OUTPUT="${HOME}/git/enroll/dist" +REMOTE="letessier.mig5.net:/opt/repo_rpm" +KEYID="00AE817C24A10C2540461A9C1D7CDE0234DB458D" + +echo "==> Updating RPM repo..." +mkdir -p "$RPM_REPO" + +for file in `ls -1 "${BUILD_OUTPUT}/rpm"`; do + rpmsign --addsign "${BUILD_OUTPUT}/rpm/$file" +done + +cp "${BUILD_OUTPUT}/rpm/"*.rpm "$RPM_REPO/" + +createrepo_c "$RPM_REPO" + +echo "==> Signing repomd.xml..." +qubes-gpg-client --local-user "$KEYID" --detach-sign --armor "$RPM_REPO/repodata/repomd.xml" > "$RPM_REPO/repodata/repomd.xml.asc" + +echo "==> Syncing repo to server..." +rsync -aHPvz --exclude=.git --delete "$REPO_ROOT/" "$REMOTE/" + +echo "Done!" diff --git a/rpm/enroll.spec b/rpm/enroll.spec new file mode 100644 index 0000000..403d6da --- /dev/null +++ b/rpm/enroll.spec @@ -0,0 +1,47 @@ +%global upstream_version 0.1.3 + +Name: enroll +Version: %{upstream_version} +Release: 1%{?dist}.enroll1 +Summary: Enroll a server's running state retrospectively into Ansible. + +License: GPL-3.0-or-later +URL: https://git.mig5.net/mig5/enroll +Source0: %{name}-%{version}.tar.gz + +BuildArch: noarch + +BuildRequires: pyproject-rpm-macros +BuildRequires: python3-devel +BuildRequires: python3-poetry-core + +Requires: python3-yaml +Requires: python3-paramiko + +# Make sure private repo dependency is pulled in by package name as well. +Recommends: jinjaturtle + +%description +Enroll a server's running state retrospectively into Ansible. + +%prep +%autosetup -n enroll + +%generate_buildrequires +%pyproject_buildrequires + +%build +%pyproject_wheel + +%install +%pyproject_install +%pyproject_save_files enroll + +%files -f %{pyproject_files} +%license LICENSE +%doc README.md CHANGELOG.md +%{_bindir}/enroll + +%changelog +* Sat Dec 27 2025 Miguel Jacq - %{version}-%{release} +- Initial RPM packaging for Fedora 42 From 054a6192d170dcd1bf418263376287f711ff6dd6 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 27 Dec 2025 19:02:22 +1100 Subject: [PATCH 045/115] Capture more singletons in /etc and avoid apt duplication --- CHANGELOG.md | 5 + debian/changelog | 7 + enroll/harvest.py | 367 +++++++++++++++++++++++++++++++++++++++++++--- enroll/ignore.py | 22 +++ enroll/systemd.py | 97 ++++++++++++ rpm/enroll.spec | 5 +- 6 files changed, 481 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 90478e5..a51be14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.1.4 + + * Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers + * Avoid duplicate apt data in package-specific roles. + # 0.1.3 * Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` diff --git a/debian/changelog b/debian/changelog index f6ba2f7..17b8985 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.1.4) unstable; urgency=medium + + * Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers + * Avoid duplicate apt data in package-specific roles. + + -- Miguel Jacq Sat, 27 Dec 2025 19:00:00 +1100 + enroll (0.1.3) unstable; urgency=medium * Allow the user to add extra paths to harvest, or paths to ignore, using `--exclude-path` and `--include-path` diff --git a/enroll/harvest.py b/enroll/harvest.py index 48242d6..0543355 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -8,7 +8,13 @@ import shutil from dataclasses import dataclass, asdict from typing import Dict, List, Optional, Set -from .systemd import list_enabled_services, get_unit_info, UnitQueryError +from .systemd import ( + list_enabled_services, + list_enabled_timers, + get_unit_info, + get_timer_info, + UnitQueryError, +) from .debian import ( build_dpkg_etc_index, dpkg_owner, @@ -98,24 +104,24 @@ class ExtraPathsSnapshot: ALLOWED_UNOWNED_EXTS = { + ".cnf", ".conf", ".cfg", ".ini", - ".cnf", - ".yaml", - ".yml", ".json", - ".toml", + ".link", + ".mount", + ".netdev", + ".network", + ".path", ".rules", ".service", ".socket", - ".timer", ".target", - ".path", - ".mount", - ".network", - ".netdev", - ".link", + ".timer", + ".toml", + ".yaml", + ".yml", "", # allow extensionless (common in /etc/default and /etc/init.d) } @@ -123,23 +129,24 @@ MAX_UNOWNED_FILES_PER_ROLE = 400 # Directories that are shared across many packages; never attribute unowned files in these trees to a single package. SHARED_ETC_TOPDIRS = { - "default", "apparmor.d", - "network", - "init.d", - "systemd", - "pam.d", - "ssh", - "ssl", - "sudoers.d", + "apt", "cron.d", "cron.daily", "cron.weekly", "cron.monthly", "cron.hourly", + "default", + "init.d", "logrotate.d", - "sysctl.d", "modprobe.d", + "network", + "pam.d", + "ssh", + "ssl", + "sudoers.d", + "sysctl.d", + "systemd", } @@ -256,6 +263,181 @@ def _topdirs_for_package(pkg: str, pkg_to_etc_paths: Dict[str, List[str]]) -> Se return topdirs +# ------------------------- +# System capture helpers +# ------------------------- + +_APT_SOURCE_GLOBS = [ + "/etc/apt/sources.list", + "/etc/apt/sources.list.d/*.list", + "/etc/apt/sources.list.d/*.sources", +] + +_APT_MISC_GLOBS = [ + "/etc/apt/apt.conf", + "/etc/apt/apt.conf.d/*", + "/etc/apt/preferences", + "/etc/apt/preferences.d/*", + "/etc/apt/auth.conf", + "/etc/apt/auth.conf.d/*", + "/etc/apt/trusted.gpg", + "/etc/apt/trusted.gpg.d/*", + "/etc/apt/keyrings/*", +] + +_SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ + # mounts + ("/etc/fstab", "system_mounts"), + ("/etc/crypttab", "system_mounts"), + # logrotate + ("/etc/logrotate.conf", "system_logrotate"), + ("/etc/logrotate.d/*", "system_logrotate"), + # sysctl / modules + ("/etc/sysctl.conf", "system_sysctl"), + ("/etc/sysctl.d/*", "system_sysctl"), + ("/etc/modprobe.d/*", "system_modprobe"), + ("/etc/modules", "system_modprobe"), + ("/etc/modules-load.d/*", "system_modprobe"), + # cron + ("/etc/crontab", "system_cron"), + ("/etc/cron.d/*", "system_cron"), + ("/etc/anacrontab", "system_cron"), + ("/etc/anacron/*", "system_cron"), + ("/var/spool/cron/crontabs/*", "system_cron"), + ("/var/spool/crontabs/*", "system_cron"), + # network + ("/etc/netplan/*", "system_network"), + ("/etc/systemd/network/*", "system_network"), + ("/etc/network/interfaces", "system_network"), + ("/etc/network/interfaces.d/*", "system_network"), + ("/etc/resolvconf.conf", "system_network"), + ("/etc/resolvconf/resolv.conf.d/*", "system_network"), + # firewall + ("/etc/nftables.conf", "system_firewall"), + ("/etc/nftables.d/*", "system_firewall"), + ("/etc/iptables/rules.v4", "system_firewall"), + ("/etc/iptables/rules.v6", "system_firewall"), + ("/etc/ufw/*", "system_firewall"), + ("/etc/default/ufw", "system_firewall"), + # other + ("/etc/rc.local", "system_rc"), +] + + +def _iter_matching_files(spec: str, *, cap: int = 2000) -> List[str]: + """Expand a glob spec and also walk directories to collect files.""" + out: List[str] = [] + for p in glob.glob(spec): + if len(out) >= cap: + break + if os.path.islink(p): + continue + if os.path.isfile(p): + out.append(p) + continue + if os.path.isdir(p): + for dirpath, _, filenames in os.walk(p): + for fn in filenames: + if len(out) >= cap: + break + fp = os.path.join(dirpath, fn) + if os.path.islink(fp) or not os.path.isfile(fp): + continue + out.append(fp) + if len(out) >= cap: + break + return out + + +def _parse_apt_signed_by(source_files: List[str]) -> Set[str]: + """Return absolute keyring paths referenced via signed-by / Signed-By.""" + out: Set[str] = set() + + # deb line: deb [signed-by=/usr/share/keyrings/foo.gpg] ... + re_signed_by = re.compile(r"signed-by\s*=\s*([^\]\s]+)", re.IGNORECASE) + # deb822: Signed-By: /usr/share/keyrings/foo.gpg + re_signed_by_hdr = re.compile(r"^\s*Signed-By\s*:\s*(.+)$", re.IGNORECASE) + + for sf in source_files: + try: + with open(sf, "r", encoding="utf-8", errors="replace") as f: + for raw in f: + line = raw.strip() + if not line or line.startswith("#"): + continue + + m = re_signed_by_hdr.match(line) + if m: + val = m.group(1).strip() + if val.startswith("|"): + continue + toks = re.split(r"[\s,]+", val) + for t in toks: + if t.startswith("/"): + out.add(t) + continue + + # Try bracketed options first (common for .list files) + if "[" in line and "]" in line: + bracket = line.split("[", 1)[1].split("]", 1)[0] + for mm in re_signed_by.finditer(bracket): + val = mm.group(1).strip().strip("\"'") + for t in re.split(r"[\s,]+", val): + if t.startswith("/"): + out.add(t) + continue + + # Fallback: signed-by= in whole line + for mm in re_signed_by.finditer(line): + val = mm.group(1).strip().strip("\"'") + for t in re.split(r"[\s,]+", val): + if t.startswith("/"): + out.add(t) + except OSError: + continue + + return out + + +def _iter_system_capture_paths() -> List[tuple[str, str]]: + """Return (path, reason) pairs for essential system config/state.""" + out: List[tuple[str, str]] = [] + + # APT: capture sources and related config + apt_sources: List[str] = [] + for g in _APT_SOURCE_GLOBS: + apt_sources.extend(_iter_matching_files(g)) + for p in sorted(set(apt_sources)): + out.append((p, "system_apt_sources")) + + # APT: misc config files/dirs + for g in _APT_MISC_GLOBS: + for p in _iter_matching_files(g): + out.append((p, "system_apt_config")) + + # APT: referenced keyrings (may live outside /etc) + signed_by = _parse_apt_signed_by(sorted(set(apt_sources))) + for p in sorted(signed_by): + if os.path.islink(p) or not os.path.isfile(p): + continue + out.append((p, "system_apt_keyring")) + + # Other system config/state globs + for spec, reason in _SYSTEM_CAPTURE_GLOBS: + for p in _iter_matching_files(spec): + out.append((p, reason)) + + # De-dup while preserving first reason + seen: Set[str] = set() + uniq: List[tuple[str, str]] = [] + for p, r in out: + if p in seen: + continue + seen.add(p) + uniq.append((p, r)) + return uniq + + def harvest( bundle_dir: str, policy: Optional[IgnorePolicy] = None, @@ -467,6 +649,107 @@ def harvest( ) ) + # ------------------------- + # Enabled systemd timers + # + # Timers are typically related to a service/package, so we try to attribute + # timer unit overrides to their associated role rather than creating a + # standalone timer role. If we can't attribute a timer, it will fall back + # to etc_custom (if it's a custom /etc unit). + # ------------------------- + timer_extra_by_pkg: Dict[str, List[str]] = {} + try: + enabled_timers = list_enabled_timers() + except Exception: + enabled_timers = [] + + service_snap_by_unit: Dict[str, ServiceSnapshot] = { + s.unit: s for s in service_snaps + } + + for t in enabled_timers: + try: + ti = get_timer_info(t) + except Exception: # nosec + continue + + timer_paths: List[str] = [] + for pth in [ti.fragment_path, *ti.dropin_paths, *ti.env_files]: + if not pth: + continue + if not pth.startswith("/etc/"): + # Prefer capturing only custom/overridden units. + continue + if os.path.islink(pth) or not os.path.isfile(pth): + continue + timer_paths.append(pth) + + if not timer_paths: + continue + + # Primary attribution: timer -> trigger service role + snap = None + if ti.trigger_unit: + snap = service_snap_by_unit.get(ti.trigger_unit) + + if snap is not None: + for path in timer_paths: + if path_filter.is_excluded(path): + snap.excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + continue + deny = policy.deny_reason(path) + if deny: + snap.excluded.append(ExcludedFile(path=path, reason=deny)) + continue + try: + owner, group, mode = stat_triplet(path) + except OSError: + snap.excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + src_rel = path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, snap.role_name, path, src_rel) + except OSError: + snap.excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + snap.managed_files.append( + ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason="related_timer", + ) + ) + continue + + # Secondary attribution: associate timer overrides with a package role + # (useful when a timer triggers a service that isn't enabled). + pkgs: Set[str] = set() + if ti.fragment_path: + p = dpkg_owner(ti.fragment_path) + if p: + pkgs.add(p) + if ti.trigger_unit and ti.trigger_unit.endswith(".service"): + try: + ui = get_unit_info(ti.trigger_unit) + if ui.fragment_path: + p = dpkg_owner(ui.fragment_path) + if p: + pkgs.add(p) + for exe in ui.exec_paths: + p = dpkg_owner(exe) + if p: + pkgs.add(p) + except Exception: # nosec + pass + + for pkg in pkgs: + timer_extra_by_pkg.setdefault(pkg, []).extend(timer_paths) + # ------------------------- # Manually installed package roles # ------------------------- @@ -490,6 +773,9 @@ def harvest( managed: List[ManagedFile] = [] candidates: Dict[str, str] = {} + for tpath in timer_extra_by_pkg.get(pkg, []): + candidates.setdefault(tpath, "related_timer") + conff = conffiles_by_pkg.get(pkg, {}) md5sums = read_pkg_md5sums(pkg) @@ -677,7 +963,46 @@ def harvest( for mf in users_managed: already.add(mf.path) - # Walk /etc for unowned config-ish files + # Capture essential system config/state (even if package-owned). + for path, reason in _iter_system_capture_paths(): + if path in already: + continue + + if path_filter.is_excluded(path): + etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue + + deny = policy.deny_reason(path) + if deny: + etc_excluded.append(ExcludedFile(path=path, reason=deny)) + continue + + try: + owner, group, mode = stat_triplet(path) + except OSError: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + src_rel = path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, etc_role_name, path, src_rel) + except OSError: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + etc_managed.append( + ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason, + ) + ) + already.add(path) + + # Walk /etc for remaining unowned config-ish files scanned = 0 for dirpath, _, filenames in os.walk("/etc"): for fn in filenames: diff --git a/enroll/ignore.py b/enroll/ignore.py index 93ba423..ab2cb96 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -30,6 +30,21 @@ DEFAULT_DENY_GLOBS = [ "/usr/local/etc/letsencrypt/*", ] + +# Allow a small set of binary config artifacts that are commonly required to +# reproduce system configuration (notably APT keyrings). These are still subject +# to size and readability limits, but are exempt from the "binary_like" denial. +DEFAULT_ALLOW_BINARY_GLOBS = [ + "/etc/apt/trusted.gpg", + "/etc/apt/trusted.gpg.d/*.gpg", + "/etc/apt/keyrings/*.gpg", + "/etc/apt/keyrings/*.pgp", + "/etc/apt/keyrings/*.asc", + "/usr/share/keyrings/*.gpg", + "/usr/share/keyrings/*.pgp", + "/usr/share/keyrings/*.asc", +] + SENSITIVE_CONTENT_PATTERNS = [ re.compile(rb"-----BEGIN (RSA |EC |OPENSSH |)PRIVATE KEY-----"), re.compile(rb"(?i)\bpassword\s*="), @@ -44,6 +59,7 @@ BLOCK_END = b"*/" @dataclass class IgnorePolicy: deny_globs: Optional[list[str]] = None + allow_binary_globs: Optional[list[str]] = None max_file_bytes: int = 256_000 sample_bytes: int = 64_000 # If True, be much less conservative about collecting potentially @@ -54,6 +70,8 @@ class IgnorePolicy: def __post_init__(self) -> None: if self.deny_globs is None: self.deny_globs = list(DEFAULT_DENY_GLOBS) + if self.allow_binary_globs is None: + self.allow_binary_globs = list(DEFAULT_ALLOW_BINARY_GLOBS) def iter_effective_lines(self, content: bytes): in_block = False @@ -105,6 +123,10 @@ class IgnorePolicy: return "unreadable" if b"\x00" in data: + for g in self.allow_binary_globs or []: + if fnmatch.fnmatch(path, g): + # Binary is acceptable for explicitly-allowed paths. + return None return "binary_like" if not self.dangerous: diff --git a/enroll/systemd.py b/enroll/systemd.py index ae8ce8d..7081001 100644 --- a/enroll/systemd.py +++ b/enroll/systemd.py @@ -33,6 +33,19 @@ def _run(cmd: list[str]) -> str: return p.stdout +@dataclass +class TimerInfo: + name: str + fragment_path: Optional[str] + dropin_paths: List[str] + env_files: List[str] + trigger_unit: Optional[str] + active_state: Optional[str] + sub_state: Optional[str] + unit_file_state: Optional[str] + condition_result: Optional[str] + + def list_enabled_services() -> List[str]: out = _run( [ @@ -58,6 +71,31 @@ def list_enabled_services() -> List[str]: return sorted(set(units)) +def list_enabled_timers() -> List[str]: + out = _run( + [ + "systemctl", + "list-unit-files", + "--type=timer", + "--state=enabled", + "--no-legend", + ] + ) + units: List[str] = [] + for line in out.splitlines(): + parts = line.split() + if not parts: + continue + unit = parts[0].strip() + if not unit.endswith(".timer"): + continue + # Skip template units like "foo@.timer" + if unit.endswith("@.timer"): + continue + units.append(unit) + return sorted(set(units)) + + def get_unit_info(unit: str) -> UnitInfo: p = subprocess.run( [ @@ -117,3 +155,62 @@ def get_unit_info(unit: str) -> UnitInfo: unit_file_state=kv.get("UnitFileState") or None, condition_result=kv.get("ConditionResult") or None, ) + + +def get_timer_info(unit: str) -> TimerInfo: + p = subprocess.run( + [ + "systemctl", + "show", + unit, + "-p", + "FragmentPath", + "-p", + "DropInPaths", + "-p", + "EnvironmentFiles", + "-p", + "Unit", + "-p", + "ActiveState", + "-p", + "SubState", + "-p", + "UnitFileState", + "-p", + "ConditionResult", + ], + text=True, + capture_output=True, + ) # nosec + if p.returncode != 0: + raise RuntimeError(f"systemctl show failed for {unit}: {p.stderr}") + + kv: dict[str, str] = {} + for line in (p.stdout or "").splitlines(): + if "=" in line: + k, v = line.split("=", 1) + kv[k] = v.strip() + + fragment = kv.get("FragmentPath") or None + dropins = [pp for pp in (kv.get("DropInPaths", "") or "").split() if pp] + + env_files: List[str] = [] + for token in (kv.get("EnvironmentFiles", "") or "").split(): + token = token.lstrip("-") + if token: + env_files.append(token) + + trigger = kv.get("Unit") or None + + return TimerInfo( + name=unit, + fragment_path=fragment, + dropin_paths=dropins, + env_files=env_files, + trigger_unit=trigger, + active_state=kv.get("ActiveState") or None, + sub_state=kv.get("SubState") or None, + unit_file_state=kv.get("UnitFileState") or None, + condition_result=kv.get("ConditionResult") or None, + ) diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 403d6da..707dc10 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.1.3 +%global upstream_version 0.1.4 Name: enroll Version: %{upstream_version} @@ -44,4 +44,7 @@ Enroll a server's running state retrospectively into Ansible. %changelog * Sat Dec 27 2025 Miguel Jacq - %{version}-%{release} +- Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers +- Avoid duplicate apt data in package-specific roles. +* Sat Dec 27 2025 Miguel Jacq - %{version}-%{release} - Initial RPM packaging for Fedora 42 From 40aad9e798c4631c571608dbeeb1a2319440cdc0 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 27 Dec 2025 19:04:00 +1100 Subject: [PATCH 046/115] 0.1.4 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3079404..f1f2420 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.3" +version = "0.1.4" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" From cae6246177581a0cc79e6aa3704298a164a154e3 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 27 Dec 2025 19:14:01 +1100 Subject: [PATCH 047/115] Add Fedora install steps to README --- README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.md b/README.md index 5a0db91..c6b8123 100644 --- a/README.md +++ b/README.md @@ -191,6 +191,25 @@ sudo apt update sudo apt install enroll ``` +### Fedora 42 + +```bash +sudo rpm --import https://mig5.net/static/mig5.asc + +sudo tee /etc/yum.repos.d/mig5.repo > /dev/null << 'EOF' +[mig5] +name=mig5 Repository +baseurl=https://rpm.mig5.net/rpm/$basearch +enabled=1 +gpgcheck=1 +repo_gpgcheck=1 +gpgkey=https://mig5.net/static/mig5.asc +EOF + +sudo dnf upgrade --refresh +sudo dnf install enroll +``` + ## AppImage Download it from my Releases page, then: From 303c1b0dd8b47fed40bb275845155a4c9daf4b38 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 28 Dec 2025 09:30:21 +1100 Subject: [PATCH 048/115] Consolidate logrotate and cron files into their main service/package roles if they exist. Standardise on MAX_FILES_CAP in one place --- enroll/harvest.py | 233 ++++++++++++++++++++++++++++++++++++------- enroll/manifest.py | 30 ++---- enroll/pathfilter.py | 2 +- 3 files changed, 208 insertions(+), 57 deletions(-) diff --git a/enroll/harvest.py b/enroll/harvest.py index 0543355..d4cfacd 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -125,7 +125,9 @@ ALLOWED_UNOWNED_EXTS = { "", # allow extensionless (common in /etc/default and /etc/init.d) } -MAX_UNOWNED_FILES_PER_ROLE = 400 +MAX_FILES_CAP = 4000 + +MAX_UNOWNED_FILES_PER_ROLE = 500 # Directories that are shared across many packages; never attribute unowned files in these trees to a single package. SHARED_ETC_TOPDIRS = { @@ -324,7 +326,7 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ ] -def _iter_matching_files(spec: str, *, cap: int = 2000) -> List[str]: +def _iter_matching_files(spec: str, *, cap: int = MAX_FILES_CAP) -> List[str]: """Expand a glob spec and also walk directories to collect files.""" out: List[str] = [] for p in glob.glob(spec): @@ -963,43 +965,141 @@ def harvest( for mf in users_managed: already.add(mf.path) + # Maps for re-attributing shared snippets (cron.d/logrotate.d) to existing roles. + svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} + pkg_by_role: Dict[str, PackageSnapshot] = {p.role_name: p for p in pkg_snaps} + + def _target_role_for_shared_snippet(path: str) -> Optional[tuple[str, str]]: + """If `path` is a shared snippet, return (role_name, reason) to attach to.""" + base = os.path.basename(path) + + # Try full filename and stem (before first dot). + candidates: List[str] = [base] + if "." in base: + candidates.append(base.split(".", 1)[0]) + + seen: Set[str] = set() + uniq: List[str] = [] + for c in candidates: + if c and c not in seen: + seen.add(c) + uniq.append(c) + + if path.startswith("/etc/logrotate.d/"): + for c in uniq: + rn = _safe_name(c) + if rn in svc_by_role or rn in pkg_by_role: + return (rn, "logrotate_snippet") + return None + + if path.startswith("/etc/cron.d/"): + for c in uniq: + rn = _safe_name(c) + if rn in svc_by_role or rn in pkg_by_role: + return (rn, "cron_snippet") + return None + + return None + # Capture essential system config/state (even if package-owned). for path, reason in _iter_system_capture_paths(): if path in already: continue + target = _target_role_for_shared_snippet(path) + if path_filter.is_excluded(path): - etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + already.add(path) continue deny = policy.deny_reason(path) if deny: - etc_excluded.append(ExcludedFile(path=path, reason=deny)) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason=deny)) + already.add(path) continue try: owner, group, mode = stat_triplet(path) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue src_rel = path.lstrip("/") + role_for_copy = etc_role_name + reason_for_role = reason + if target: + role_for_copy, reason_for_role = target + try: - _copy_into_bundle(bundle_dir, etc_role_name, path, src_rel) + _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue - etc_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + mf = ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason_for_role, ) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].managed_files.append(mf) + elif rn in pkg_by_role: + pkg_by_role[rn].managed_files.append(mf) + else: + etc_managed.append(mf) + already.add(path) # Walk /etc for remaining unowned config-ish files @@ -1016,45 +1116,106 @@ def harvest( if not _is_confish(path): continue + target = _target_role_for_shared_snippet(path) + if path_filter.is_excluded(path): - etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="user_excluded") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + already.add(path) continue deny = policy.deny_reason(path) if deny: - etc_excluded.append(ExcludedFile(path=path, reason=deny)) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason=deny) + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason=deny)) + already.add(path) continue try: owner, group, mode = stat_triplet(path) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue src_rel = path.lstrip("/") + role_for_copy = etc_role_name + reason_for_role = "custom_unowned" + if target: + role_for_copy, reason_for_role = target + try: - _copy_into_bundle(bundle_dir, etc_role_name, path, src_rel) + _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel) except OSError: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + elif rn in pkg_by_role: + pkg_by_role[rn].excluded.append( + ExcludedFile(path=path, reason="unreadable") + ) + else: + etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) + already.add(path) continue - etc_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason="custom_unowned", - ) + mf = ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason_for_role, ) + if target: + rn, _ = target + if rn in svc_by_role: + svc_by_role[rn].managed_files.append(mf) + elif rn in pkg_by_role: + pkg_by_role[rn].managed_files.append(mf) + else: + etc_managed.append(mf) scanned += 1 - if scanned >= 2000: + if scanned >= MAX_FILES_CAP: etc_notes.append( - "Reached file cap (2000) while scanning /etc for unowned files." + f"Reached file cap ({MAX_FILES_CAP}) while scanning /etc for unowned files." ) break - if scanned >= 2000: + if scanned >= MAX_FILES_CAP: break etc_custom_snapshot = EtcCustomSnapshot( @@ -1146,7 +1307,7 @@ def harvest( _scan_usr_local_tree( "/usr/local/etc", require_executable=False, - cap=2000, + cap=MAX_FILES_CAP, reason="usr_local_etc_custom", ) @@ -1154,7 +1315,7 @@ def harvest( _scan_usr_local_tree( "/usr/local/bin", require_executable=True, - cap=2000, + cap=MAX_FILES_CAP, reason="usr_local_bin_script", ) @@ -1188,7 +1349,7 @@ def harvest( files, inc_notes = expand_includes( path_filter.iter_include_patterns(), exclude=path_filter, - max_files=4000, + max_files=MAX_FILES_CAP, ) included_files = files extra_notes.extend(inc_notes) diff --git a/enroll/manifest.py b/enroll/manifest.py index 2f28eab..d5ebff7 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -138,7 +138,6 @@ def _copy_artifacts( # If a file was successfully templatised by JinjaTurtle, do NOT # also materialise the raw copy in the destination files dir. - # (This keeps the output minimal and avoids redundant "raw" files.) if exclude_rels and rel in exclude_rels: try: if os.path.isfile(dst): @@ -165,7 +164,7 @@ def _write_role_scaffold(role_dir: str) -> None: def _write_playbook_all(path: str, roles: List[str]) -> None: pb_lines = [ "---", - "- name: Apply all roles on host", + "- name: Apply all roles on all hosts", " hosts: all", " become: true", " roles:", @@ -179,7 +178,7 @@ def _write_playbook_all(path: str, roles: List[str]) -> None: def _write_playbook_host(path: str, fqdn: str, roles: List[str]) -> None: pb_lines = [ "---", - f"- name: Apply enroll roles on {fqdn}", + f"- name: Apply all roles on {fqdn}", f" hosts: {fqdn}", " become: true", " roles:", @@ -390,9 +389,9 @@ def _render_generic_files_tasks( # Using first_found makes roles work in both modes: # - site-mode: inventory/host_vars///.files/... # - non-site: roles//files/... - return f"""# Generated by enroll (data-driven tasks) + return f"""# Generated by enroll -- name: Deploy systemd unit files (templates) +- name: Deploy any systemd unit files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" dest: "{{{{ item.dest }}}}" @@ -406,7 +405,7 @@ def _render_generic_files_tasks( | list }}}} notify: "{{{{ item.notify | default([]) }}}}" -- name: Deploy systemd unit files (copies) +- name: Deploy any systemd unit files (raw files) vars: _enroll_ff: files: @@ -433,7 +432,7 @@ def _render_generic_files_tasks( | list | length) > 0 -- name: Deploy other managed files (templates) +- name: Deploy any other managed files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" dest: "{{{{ item.dest }}}}" @@ -447,7 +446,7 @@ def _render_generic_files_tasks( | list }}}} notify: "{{{{ item.notify | default([]) }}}}" -- name: Deploy other managed files (copies) +- name: Deploy any other managed files (raw files) vars: _enroll_ff: files: @@ -668,11 +667,6 @@ def _manifest_from_bundle_dir( manifested_service_roles: List[str] = [] manifested_pkg_roles: List[str] = [] - # In site_mode, raw harvested files are stored under host-specific inventory - # to avoid cross-host clobber while still sharing a role definition. - - # ------------------------- - # ------------------------- # Users role (non-system users) # ------------------------- @@ -793,7 +787,7 @@ def _manifest_from_bundle_dir( # tasks (data-driven) users_tasks = """--- -# Generated by enroll (data-driven tasks) +# Generated by enroll - name: Ensure groups exist ansible.builtin.group: @@ -893,8 +887,6 @@ Generated non-system user accounts and SSH public material. manifested_users_roles.append(role) - # ------------------------- - # ------------------------- # etc_custom role (unowned /etc not already attributed) # ------------------------- @@ -1212,8 +1204,6 @@ User-requested extra file harvesting. manifested_usr_local_custom_roles.append(role) - # ------------------------- - # ------------------------- # Service roles # ------------------------- @@ -1315,7 +1305,7 @@ User-requested extra file harvesting. task_parts: List[str] = [] task_parts.append( f"""--- -# Generated by enroll (data-driven tasks) +# Generated by enroll - name: Install packages for {role} ansible.builtin.apt: @@ -1474,7 +1464,7 @@ Generated from `{unit}`. task_parts: List[str] = [] task_parts.append( f"""--- -# Generated by enroll (data-driven tasks) +# Generated by enroll - name: Install packages for {role} ansible.builtin.apt: diff --git a/enroll/pathfilter.py b/enroll/pathfilter.py index 9df4afa..6541ca9 100644 --- a/enroll/pathfilter.py +++ b/enroll/pathfilter.py @@ -174,7 +174,7 @@ def expand_includes( patterns: Sequence[CompiledPathPattern], *, exclude: Optional[PathFilter] = None, - max_files: int = 4000, + max_files: int, ) -> Tuple[List[str], List[str]]: """Expand include patterns into concrete file paths. From 8c6b51be3eb2ea949861937eddcffed74a439873 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 28 Dec 2025 09:39:14 +1100 Subject: [PATCH 049/115] Manage apt stuff in its own role, not in etc_custom --- enroll/diff.py | 6 ++ enroll/harvest.py | 120 ++++++++++++++++++++++++++++++---- enroll/manifest.py | 157 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 270 insertions(+), 13 deletions(-) diff --git a/enroll/diff.py b/enroll/diff.py index a2b7d91..0110d17 100644 --- a/enroll/diff.py +++ b/enroll/diff.py @@ -184,6 +184,12 @@ def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, for mf in u.get("managed_files", []) or []: yield str(u_role), mf + # apt_config + ac = state.get("apt_config") or {} + ac_role = ac.get("role_name") or "apt_config" + for mf in ac.get("managed_files", []) or []: + yield str(ac_role), mf + # etc_custom ec = state.get("etc_custom") or {} ec_role = ec.get("role_name") or "etc_custom" diff --git a/enroll/harvest.py b/enroll/harvest.py index d4cfacd..c1a1986 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -77,6 +77,14 @@ class UsersSnapshot: notes: List[str] +@dataclass +class AptConfigSnapshot: + role_name: str + managed_files: List[ManagedFile] + excluded: List[ExcludedFile] + notes: List[str] + + @dataclass class EtcCustomSnapshot: role_name: str @@ -126,7 +134,6 @@ ALLOWED_UNOWNED_EXTS = { } MAX_FILES_CAP = 4000 - MAX_UNOWNED_FILES_PER_ROLE = 500 # Directories that are shared across many packages; never attribute unowned files in these trees to a single package. @@ -401,30 +408,61 @@ def _parse_apt_signed_by(source_files: List[str]) -> Set[str]: return out -def _iter_system_capture_paths() -> List[tuple[str, str]]: - """Return (path, reason) pairs for essential system config/state.""" - out: List[tuple[str, str]] = [] +def _iter_apt_capture_paths() -> List[tuple[str, str]]: + """Return (path, reason) pairs for APT configuration. - # APT: capture sources and related config + This captures the full /etc/apt tree (subject to IgnorePolicy at copy time), + plus any keyrings referenced via signed-by/Signed-By which may live outside + /etc (e.g. /usr/share/keyrings). + """ + reasons: Dict[str, str] = {} + + # Capture all regular files under /etc/apt (no symlinks). + if os.path.isdir("/etc/apt"): + for dirpath, _, filenames in os.walk("/etc/apt"): + for fn in filenames: + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + reasons.setdefault(p, "apt_config") + + # Identify source files explicitly for nicer reasons and keyring discovery. apt_sources: List[str] = [] for g in _APT_SOURCE_GLOBS: apt_sources.extend(_iter_matching_files(g)) for p in sorted(set(apt_sources)): - out.append((p, "system_apt_sources")) + reasons[p] = "apt_source" - # APT: misc config files/dirs - for g in _APT_MISC_GLOBS: + # Keyrings in standard locations. + for g in ( + "/etc/apt/trusted.gpg", + "/etc/apt/trusted.gpg.d/*", + "/etc/apt/keyrings/*", + ): for p in _iter_matching_files(g): - out.append((p, "system_apt_config")) + reasons[p] = "apt_keyring" - # APT: referenced keyrings (may live outside /etc) + # Keyrings referenced by sources (may live outside /etc/apt). signed_by = _parse_apt_signed_by(sorted(set(apt_sources))) for p in sorted(signed_by): if os.path.islink(p) or not os.path.isfile(p): continue - out.append((p, "system_apt_keyring")) + if p.startswith("/etc/apt/"): + reasons[p] = "apt_keyring" + else: + reasons[p] = "apt_signed_by_keyring" + + # De-dup with stable ordering. + uniq: List[tuple[str, str]] = [] + for p in sorted(reasons.keys()): + uniq.append((p, reasons[p])) + return uniq + + +def _iter_system_capture_paths() -> List[tuple[str, str]]: + """Return (path, reason) pairs for essential system config/state (non-APT).""" + out: List[tuple[str, str]] = [] - # Other system config/state globs for spec, reason in _SYSTEM_CAPTURE_GLOBS: for p in _iter_matching_files(spec): out.append((p, reason)) @@ -544,6 +582,8 @@ def harvest( for path in pkg_to_etc_paths.get(pkg, []): if not os.path.isfile(path) or os.path.islink(path): continue + if path.startswith("/etc/apt/"): + continue if path in conff: # Only capture conffiles when they differ from the package default. try: @@ -784,6 +824,8 @@ def harvest( for path in pkg_to_etc_paths.get(pkg, []): if not os.path.isfile(path) or os.path.islink(path): continue + if path.startswith("/etc/apt/"): + continue if path in conff: try: current = file_md5(path) @@ -946,6 +988,55 @@ def harvest( notes=users_notes, ) + # ------------------------- + # apt_config role (APT configuration and keyrings) + # ------------------------- + apt_notes: List[str] = [] + apt_excluded: List[ExcludedFile] = [] + apt_managed: List[ManagedFile] = [] + apt_role_name = "apt_config" + + for path, reason in _iter_apt_capture_paths(): + if path_filter.is_excluded(path): + apt_excluded.append(ExcludedFile(path=path, reason="user_excluded")) + continue + + deny = policy.deny_reason(path) + if deny: + apt_excluded.append(ExcludedFile(path=path, reason=deny)) + continue + + try: + owner, group, mode = stat_triplet(path) + except OSError: + apt_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + src_rel = path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, apt_role_name, path, src_rel) + except OSError: + apt_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + + apt_managed.append( + ManagedFile( + path=path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason, + ) + ) + + apt_config_snapshot = AptConfigSnapshot( + role_name=apt_role_name, + managed_files=apt_managed, + excluded=apt_excluded, + notes=apt_notes, + ) + # ------------------------- # etc_custom role (unowned /etc files not already attributed elsewhere) # ------------------------- @@ -964,6 +1055,8 @@ def harvest( already.add(mf.path) for mf in users_managed: already.add(mf.path) + for mf in apt_managed: + already.add(mf.path) # Maps for re-attributing shared snippets (cron.d/logrotate.d) to existing roles. svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} @@ -1107,6 +1200,8 @@ def harvest( for dirpath, _, filenames in os.walk("/etc"): for fn in filenames: path = os.path.join(dirpath, fn) + if path.startswith("/etc/apt/"): + continue if path in already: continue if path in owned_etc: @@ -1408,6 +1503,7 @@ def harvest( "manual_packages": manual_pkgs, "manual_packages_skipped": manual_pkgs_skipped, "package_roles": [asdict(p) for p in pkg_snaps], + "apt_config": asdict(apt_config_snapshot), "etc_custom": asdict(etc_custom_snapshot), "usr_local_custom": asdict(usr_local_custom_snapshot), "extra_paths": asdict(extra_paths_snapshot), diff --git a/enroll/manifest.py b/enroll/manifest.py index d5ebff7..dbc2353 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -2,6 +2,7 @@ from __future__ import annotations import json import os +import re import shutil import stat import tarfile @@ -627,6 +628,7 @@ def _manifest_from_bundle_dir( services: List[Dict[str, Any]] = state.get("services", []) package_roles: List[Dict[str, Any]] = state.get("package_roles", []) users_snapshot: Dict[str, Any] = state.get("users", {}) + apt_config_snapshot: Dict[str, Any] = state.get("apt_config", {}) etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {}) @@ -661,6 +663,7 @@ def _manifest_from_bundle_dir( _ensure_ansible_cfg(os.path.join(out_dir, "ansible.cfg")) manifested_users_roles: List[str] = [] + manifested_apt_config_roles: List[str] = [] manifested_etc_custom_roles: List[str] = [] manifested_usr_local_custom_roles: List[str] = [] manifested_extra_paths_roles: List[str] = [] @@ -887,6 +890,157 @@ Generated non-system user accounts and SSH public material. manifested_users_roles.append(role) + # ------------------------- + # apt_config role (APT sources, pinning, and keyrings) + # ------------------------- + if apt_config_snapshot and apt_config_snapshot.get("managed_files"): + role = apt_config_snapshot.get("role_name", "apt_config") + role_dir = os.path.join(roles_root, role) + _write_role_scaffold(role_dir) + + var_prefix = role + + managed_files = apt_config_snapshot.get("managed_files", []) + excluded = apt_config_snapshot.get("excluded", []) + notes = apt_config_snapshot.get("notes", []) + + templated, jt_vars = _jinjify_managed_files( + bundle_dir, + role, + role_dir, + managed_files, + jt_exe=jt_exe, + jt_enabled=jt_enabled, + overwrite_templates=not site_mode, + ) + + # Copy only the non-templated artifacts (templates live in the role). + if site_mode: + _copy_artifacts( + bundle_dir, + role, + _host_role_files_dir(out_dir, fqdn or "", role), + exclude_rels=templated, + ) + else: + _copy_artifacts( + bundle_dir, + role, + os.path.join(role_dir, "files"), + exclude_rels=templated, + ) + + files_var = _build_managed_files_var( + managed_files, + templated, + notify_other=None, + notify_systemd=None, + ) + + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} + vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map = _merge_mappings_overwrite(vars_map, jt_map) + + if site_mode: + _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_hostvars(out_dir, fqdn or "", role, vars_map) + else: + _write_role_defaults(role_dir, vars_map) + + tasks = """---\n""" + _render_generic_files_tasks( + var_prefix, include_restart_notify=False + ) + with open( + os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write(tasks.rstrip() + "\n") + + with open( + os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\ndependencies: []\n") + + # README: summarise repos and keyrings + source_paths: List[str] = [] + keyring_paths: List[str] = [] + repo_hosts: Set[str] = set() + + url_re = re.compile(r"(?:https?|ftp)://([^/\s]+)", re.IGNORECASE) + + for mf in managed_files: + p = str(mf.get("path") or "") + src_rel = str(mf.get("src_rel") or "") + if not p or not src_rel: + continue + + if p == "/etc/apt/sources.list" or p.startswith("/etc/apt/sources.list.d/"): + source_paths.append(p) + art_path = os.path.join(bundle_dir, "artifacts", role, src_rel) + try: + with open(art_path, "r", encoding="utf-8", errors="replace") as sf: + for line in sf: + line = line.strip() + if not line or line.startswith("#"): + continue + for m in url_re.finditer(line): + repo_hosts.add(m.group(1)) + except OSError: + pass # nosec + + if ( + p.startswith("/etc/apt/trusted.gpg") + or p.startswith("/etc/apt/keyrings/") + or p.startswith("/usr/share/keyrings/") + ): + keyring_paths.append(p) + + source_paths = sorted(set(source_paths)) + keyring_paths = sorted(set(keyring_paths)) + repos = sorted(repo_hosts) + + readme = ( + """# apt_config + +APT configuration harvested from the system (sources, pinning, and keyrings). + +## Repository hosts +""" + + ("\n".join([f"- {h}" for h in repos]) or "- (none)") + + """\n +## Source files +""" + + ("\n".join([f"- {p}" for p in source_paths]) or "- (none)") + + """\n +## Keyrings +""" + + ("\n".join([f"- {p}" for p in keyring_paths]) or "- (none)") + + """\n +## Managed files +""" + + ( + "\n".join( + [f"- {mf.get('path')} ({mf.get('reason')})" for mf in managed_files] + ) + or "- (none)" + ) + + """\n +## Excluded +""" + + ( + "\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) + or "- (none)" + ) + + """\n +## Notes +""" + + ("\n".join([f"- {n}" for n in notes]) or "- (none)") + + """\n""" + ) + with open(os.path.join(role_dir, "README.md"), "w", encoding="utf-8") as f: + f.write(readme) + + manifested_apt_config_roles.append(role) + # ------------------------- # etc_custom role (unowned /etc not already attributed) # ------------------------- @@ -1512,7 +1666,8 @@ Generated for package `{pkg}`. manifested_pkg_roles.append(role) all_roles = ( - manifested_pkg_roles + manifested_apt_config_roles + + manifested_pkg_roles + manifested_service_roles + manifested_etc_custom_roles + manifested_usr_local_custom_roles From 3fc5aec5fc53090ebfd0e315d9bfdd1442320e98 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 28 Dec 2025 09:56:52 +1100 Subject: [PATCH 050/115] 0.1.5 --- CHANGELOG.md | 6 ++++++ debian/changelog | 8 ++++++++ pyproject.toml | 2 +- rpm/enroll.spec | 6 +++++- 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a51be14..79e45cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# 0.1.5 + + * Consolidate logrotate and cron files into their main service/package roles if they exist. + * Standardise on MAX_FILES_CAP in one place + * Manage apt stuff in its own role, not in etc_custom + # 0.1.4 * Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers diff --git a/debian/changelog b/debian/changelog index 17b8985..5f3be58 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,11 @@ +enroll (0.1.5) unstable; urgency=medium + + * Consolidate logrotate and cron files into their main service/package roles if they exist. + * Standardise on MAX_FILES_CAP in one place + * Manage apt stuff in its own role, not in etc_custom + + -- Miguel Jacq Sun, 28 Dec 2025 10:00:00 +1100 + enroll (0.1.4) unstable; urgency=medium * Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers diff --git a/pyproject.toml b/pyproject.toml index f1f2420..3aa01d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.4" +version = "0.1.5" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 707dc10..ed0a3c9 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.1.4 +%global upstream_version 0.1.5 Name: enroll Version: %{upstream_version} @@ -43,6 +43,10 @@ Enroll a server's running state retrospectively into Ansible. %{_bindir}/enroll %changelog +* Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} +- Consolidate logrotate and cron files into their main service/package roles if they exist. +- Standardise on MAX_FILES_CAP in one place +- Manage apt stuff in its own role, not in etc_custom * Sat Dec 27 2025 Miguel Jacq - %{version}-%{release} - Attempt to capture more stuff from /etc that might not be attributable to a specific package. This includes common singletons and systemd timers - Avoid duplicate apt data in package-specific roles. From 921801caa632c894ac4228efb390061b64fd668b Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 28 Dec 2025 15:32:40 +1100 Subject: [PATCH 051/115] 0.1.6 --- CHANGELOG.md | 5 + debian/changelog | 7 + enroll/cli.py | 2 +- enroll/debian.py | 4 +- enroll/harvest.py | 718 ++++++++++++++++---------------------- enroll/pathfilter.py | 2 +- pyproject.toml | 2 +- rpm/enroll.spec | 5 +- tests/test___main__.py | 18 + tests/test_accounts.py | 143 ++++++++ tests/test_debian.py | 154 ++++++++ tests/test_diff_bundle.py | 89 +++++ tests/test_pathfilter.py | 80 +++++ tests/test_remote.py | 175 ++++++++++ tests/test_systemd.py | 121 +++++++ 15 files changed, 1102 insertions(+), 423 deletions(-) create mode 100644 tests/test___main__.py create mode 100644 tests/test_accounts.py create mode 100644 tests/test_debian.py create mode 100644 tests/test_diff_bundle.py create mode 100644 tests/test_pathfilter.py create mode 100644 tests/test_remote.py create mode 100644 tests/test_systemd.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 79e45cd..2a4c39d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.1.6 + + * DRY up some code logic + * More test coverage + # 0.1.5 * Consolidate logrotate and cron files into their main service/package roles if they exist. diff --git a/debian/changelog b/debian/changelog index 5f3be58..a15c38a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.1.6) unstable; urgency=medium + + * DRY up some code logic + * More test coverage + + -- Miguel Jacq Sun, 28 Dec 2025 15:30:00 +1100 + enroll (0.1.5) unstable; urgency=medium * Consolidate logrotate and cron files into their main service/package roles if they exist. diff --git a/enroll/cli.py b/enroll/cli.py index e5f729d..ae9aba0 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -482,7 +482,7 @@ def main() -> None: metavar="GPG_FINGERPRINT", help=( "Encrypt the harvest as a SOPS-encrypted tarball, and bundle+encrypt the manifest output in --out " - "(same behavior as `harvest --sops` and `manifest --sops`)." + "(same behaviour as `harvest --sops` and `manifest --sops`)." ), ) s.add_argument( diff --git a/enroll/debian.py b/enroll/debian.py index 58569e5..0ddc1f3 100644 --- a/enroll/debian.py +++ b/enroll/debian.py @@ -154,7 +154,9 @@ def parse_status_conffiles( if ":" in line: k, v = line.split(":", 1) key = k - cur[key] = v.lstrip() + # Preserve leading spaces in continuation lines, but strip + # the trailing newline from the initial key line value. + cur[key] = v.lstrip().rstrip("\n") if cur: flush() diff --git a/enroll/harvest.py b/enroll/harvest.py index c1a1986..56e5aed 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -112,9 +112,9 @@ class ExtraPathsSnapshot: ALLOWED_UNOWNED_EXTS = { + ".cfg", ".cnf", ".conf", - ".cfg", ".ini", ".json", ".link", @@ -136,7 +136,9 @@ ALLOWED_UNOWNED_EXTS = { MAX_FILES_CAP = 4000 MAX_UNOWNED_FILES_PER_ROLE = 500 -# Directories that are shared across many packages; never attribute unowned files in these trees to a single package. +# Directories that are shared across many packages. +# Never attribute all unowned files in these trees +# to one single package. SHARED_ETC_TOPDIRS = { "apparmor.d", "apt", @@ -195,6 +197,82 @@ def _copy_into_bundle( shutil.copy2(abs_path, dst) +def _capture_file( + *, + bundle_dir: str, + role_name: str, + abs_path: str, + reason: str, + policy: IgnorePolicy, + path_filter: PathFilter, + managed_out: List[ManagedFile], + excluded_out: List[ExcludedFile], + seen_role: Optional[Set[str]] = None, + seen_global: Optional[Set[str]] = None, + metadata: Optional[tuple[str, str, str]] = None, +) -> bool: + """Try to capture a single file into the bundle. + + Returns True if the file was copied (managed), False otherwise. + + * seen_role: de-dupe within a role (prevents duplicate tasks/records) + * seen_global: de-dupe across roles/stages (prevents multiple roles copying same path) + * metadata: optional (owner, group, mode) tuple to avoid re-statting + """ + + if seen_global is not None and abs_path in seen_global: + return False + if seen_role is not None and abs_path in seen_role: + return False + + def _mark_seen() -> None: + if seen_role is not None: + seen_role.add(abs_path) + if seen_global is not None: + seen_global.add(abs_path) + + if path_filter.is_excluded(abs_path): + excluded_out.append(ExcludedFile(path=abs_path, reason="user_excluded")) + _mark_seen() + return False + + deny = policy.deny_reason(abs_path) + if deny: + excluded_out.append(ExcludedFile(path=abs_path, reason=deny)) + _mark_seen() + return False + + try: + owner, group, mode = ( + metadata if metadata is not None else stat_triplet(abs_path) + ) + except OSError: + excluded_out.append(ExcludedFile(path=abs_path, reason="unreadable")) + _mark_seen() + return False + + src_rel = abs_path.lstrip("/") + try: + _copy_into_bundle(bundle_dir, role_name, abs_path, src_rel) + except OSError: + excluded_out.append(ExcludedFile(path=abs_path, reason="unreadable")) + _mark_seen() + return False + + managed_out.append( + ManagedFile( + path=abs_path, + src_rel=src_rel, + owner=owner, + group=group, + mode=mode, + reason=reason, + ) + ) + _mark_seen() + return True + + def _is_confish(path: str) -> bool: base = os.path.basename(path) _, ext = os.path.splitext(base) @@ -227,7 +305,6 @@ def _maybe_add_specific_paths(hints: Set[str]) -> List[str]: f"/etc/default/{h}", f"/etc/init.d/{h}", f"/etc/sysctl.d/{h}.conf", - f"/etc/logrotate.d/{h}", ] ) return paths @@ -492,7 +569,7 @@ def harvest( policy = IgnorePolicy(dangerous=dangerous) elif dangerous: # If callers explicitly provided a policy but also requested - # dangerous behavior, honour the CLI intent. + # dangerous behaviour, honour the CLI intent. policy.dangerous = True os.makedirs(bundle_dir, exist_ok=True) @@ -513,12 +590,21 @@ def harvest( # Service roles # ------------------------- service_snaps: List[ServiceSnapshot] = [] + # Track alias strings (service names, package names, stems) that should map + # back to the service role for shared snippet attribution (cron.d/logrotate.d). + service_role_aliases: Dict[str, Set[str]] = {} + # De-dupe per-role captures (avoids duplicate tasks in manifest generation). + seen_by_role: Dict[str, Set[str]] = {} for unit in list_enabled_services(): role = _role_name_from_unit(unit) try: ui = get_unit_info(unit) except UnitQueryError as e: + # Even when we can't query the unit, keep a minimal alias mapping so + # shared snippets can still be attributed to this role by name. + service_role_aliases.setdefault(role, _hint_names(unit, set()) | {role}) + seen_by_role.setdefault(role, set()) service_snaps.append( ServiceSnapshot( unit=unit, @@ -567,6 +653,10 @@ def harvest( hints = _hint_names(unit, pkgs) _add_pkgs_from_etc_topdirs(hints, topdir_to_pkgs, pkgs) + # Keep a stable set of aliases for this service role. Include current + # packages as well, so that package-named snippets (e.g. cron.d or + # logrotate.d entries) can still be attributed back to this service. + service_role_aliases[role] = set(hints) | set(pkgs) | {role} for sp in _maybe_add_specific_paths(hints): if not os.path.exists(sp): @@ -610,7 +700,7 @@ def harvest( # key material under service directories (e.g. /etc/openvpn/*.crt). # # To avoid exploding output for shared trees (e.g. /etc/systemd), keep - # the older "config-ish only" behavior for known shared topdirs. + # the older "config-ish only" behaviour for known shared topdirs. any_roots: List[str] = [] confish_roots: List[str] = [] for h in hints: @@ -646,34 +736,20 @@ def harvest( "No packages or /etc candidates detected (unexpected for enabled service)." ) + # De-dupe within this role while capturing. This also avoids emitting + # duplicate Ansible tasks for the same destination path. + role_seen = seen_by_role.setdefault(role, set()) for path, reason in sorted(candidates.items()): - if path_filter.is_excluded(path): - excluded.append(ExcludedFile(path=path, reason="user_excluded")) - continue - deny = policy.deny_reason(path) - if deny: - excluded.append(ExcludedFile(path=path, reason=deny)) - continue - try: - owner, group, mode = stat_triplet(path) - except OSError: - excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, role, path, src_rel) - except OSError: - excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + _capture_file( + bundle_dir=bundle_dir, + role_name=role, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=managed, + excluded_out=excluded, + seen_role=role_seen, ) service_snaps.append( @@ -735,36 +811,18 @@ def harvest( snap = service_snap_by_unit.get(ti.trigger_unit) if snap is not None: + role_seen = seen_by_role.setdefault(snap.role_name, set()) for path in timer_paths: - if path_filter.is_excluded(path): - snap.excluded.append( - ExcludedFile(path=path, reason="user_excluded") - ) - continue - deny = policy.deny_reason(path) - if deny: - snap.excluded.append(ExcludedFile(path=path, reason=deny)) - continue - try: - owner, group, mode = stat_triplet(path) - except OSError: - snap.excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, snap.role_name, path, src_rel) - except OSError: - snap.excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - snap.managed_files.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason="related_timer", - ) + _capture_file( + bundle_dir=bundle_dir, + role_name=snap.role_name, + abs_path=path, + reason="related_timer", + policy=policy, + path_filter=path_filter, + managed_out=snap.managed_files, + excluded_out=snap.excluded, + seen_role=role_seen, ) continue @@ -852,7 +910,6 @@ def harvest( roots.extend([f"/etc/{td}", f"/etc/{td}.d"]) roots.extend([f"/etc/default/{td}"]) roots.extend([f"/etc/init.d/{td}"]) - roots.extend([f"/etc/logrotate.d/{td}"]) roots.extend([f"/etc/sysctl.d/{td}.conf"]) # Capture any custom/unowned files under /etc/ for this @@ -871,34 +928,18 @@ def harvest( if r not in owned_etc and _is_confish(r): candidates.setdefault(r, "custom_specific_path") + role_seen = seen_by_role.setdefault(role, set()) for path, reason in sorted(candidates.items()): - if path_filter.is_excluded(path): - excluded.append(ExcludedFile(path=path, reason="user_excluded")) - continue - deny = policy.deny_reason(path) - if deny: - excluded.append(ExcludedFile(path=path, reason=deny)) - continue - try: - owner, group, mode = stat_triplet(path) - except OSError: - excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, role, path, src_rel) - except OSError: - excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + _capture_file( + bundle_dir=bundle_dir, + role_name=role, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=managed, + excluded_out=excluded, + seen_role=role_seen, ) if not pkg_to_etc_paths.get(pkg, []) and not managed: @@ -929,6 +970,7 @@ def harvest( users_notes.append(f"Failed to enumerate users: {e!r}") users_role_name = "users" + users_role_seen = seen_by_role.setdefault(users_role_name, set()) for u in user_records: users_list.append( @@ -946,38 +988,21 @@ def harvest( # Copy only safe SSH public material: authorized_keys + *.pub for sf in u.ssh_files: - if path_filter.is_excluded(sf): - users_excluded.append(ExcludedFile(path=sf, reason="user_excluded")) - continue - deny = policy.deny_reason(sf) - if deny: - users_excluded.append(ExcludedFile(path=sf, reason=deny)) - continue - try: - owner, group, mode = stat_triplet(sf) - except OSError: - users_excluded.append(ExcludedFile(path=sf, reason="unreadable")) - continue - src_rel = sf.lstrip("/") - try: - _copy_into_bundle(bundle_dir, users_role_name, sf, src_rel) - except OSError: - users_excluded.append(ExcludedFile(path=sf, reason="unreadable")) - continue reason = ( "authorized_keys" if sf.endswith("/authorized_keys") else "ssh_public_key" ) - users_managed.append( - ManagedFile( - path=sf, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + _capture_file( + bundle_dir=bundle_dir, + role_name=users_role_name, + abs_path=sf, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=users_managed, + excluded_out=users_excluded, + seen_role=users_role_seen, ) users_snapshot = UsersSnapshot( @@ -995,39 +1020,19 @@ def harvest( apt_excluded: List[ExcludedFile] = [] apt_managed: List[ManagedFile] = [] apt_role_name = "apt_config" + apt_role_seen = seen_by_role.setdefault(apt_role_name, set()) for path, reason in _iter_apt_capture_paths(): - if path_filter.is_excluded(path): - apt_excluded.append(ExcludedFile(path=path, reason="user_excluded")) - continue - - deny = policy.deny_reason(path) - if deny: - apt_excluded.append(ExcludedFile(path=path, reason=deny)) - continue - - try: - owner, group, mode = stat_triplet(path) - except OSError: - apt_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, apt_role_name, path, src_rel) - except OSError: - apt_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - - apt_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) + _capture_file( + bundle_dir=bundle_dir, + role_name=apt_role_name, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=apt_managed, + excluded_out=apt_excluded, + seen_role=apt_role_seen, ) apt_config_snapshot = AptConfigSnapshot( @@ -1062,11 +1067,58 @@ def harvest( svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} pkg_by_role: Dict[str, PackageSnapshot] = {p.role_name: p for p in pkg_snaps} - def _target_role_for_shared_snippet(path: str) -> Optional[tuple[str, str]]: - """If `path` is a shared snippet, return (role_name, reason) to attach to.""" - base = os.path.basename(path) + # Package name -> role_name for manually-installed package roles. + pkg_name_to_role: Dict[str, str] = {p.package: p.role_name for p in pkg_snaps} - # Try full filename and stem (before first dot). + # Package name -> list of service role names that reference it. + pkg_to_service_roles: Dict[str, List[str]] = {} + for s in service_snaps: + for pkg in s.packages: + pkg_to_service_roles.setdefault(pkg, []).append(s.role_name) + + # Alias -> role mapping used as a fallback when dpkg ownership is missing. + # Prefer service roles over package roles when both would match. + alias_ranked: Dict[str, tuple[int, str]] = {} + + def _add_alias(alias: str, role_name: str, *, priority: int) -> None: + key = _safe_name(alias) + if not key: + return + cur = alias_ranked.get(key) + if ( + cur is None + or priority < cur[0] + or (priority == cur[0] and role_name < cur[1]) + ): + alias_ranked[key] = (priority, role_name) + + for role_name, aliases in service_role_aliases.items(): + for a in aliases: + _add_alias(a, role_name, priority=0) + + for p in pkg_snaps: + _add_alias(p.package, p.role_name, priority=1) + + def _target_role_for_shared_snippet(path: str) -> Optional[tuple[str, str]]: + """If `path` is a shared snippet, return (role_name, reason) to attach to. + + This is used primarily for /etc/logrotate.d/* and /etc/cron.d/* where + files are "owned" by many packages but people tend to reason about them + per service. + + Resolution order: + 1) dpkg owner -> service role (if any service references the package) + 2) dpkg owner -> package role (manual package role exists) + 3) basename/stem alias match -> preferred role + """ + if path.startswith("/etc/logrotate.d/"): + tag = "logrotate_snippet" + elif path.startswith("/etc/cron.d/"): + tag = "cron_snippet" + else: + return None + + base = os.path.basename(path) candidates: List[str] = [base] if "." in base: candidates.append(base.split(".", 1)[0]) @@ -1078,122 +1130,62 @@ def harvest( seen.add(c) uniq.append(c) - if path.startswith("/etc/logrotate.d/"): - for c in uniq: - rn = _safe_name(c) - if rn in svc_by_role or rn in pkg_by_role: - return (rn, "logrotate_snippet") - return None + pkg = dpkg_owner(path) + if pkg: + svc_roles = pkg_to_service_roles.get(pkg) + if svc_roles: + # Deterministic tie-break: lowest role name. + return (sorted(set(svc_roles))[0], tag) + pkg_role = pkg_name_to_role.get(pkg) + if pkg_role: + return (pkg_role, tag) - if path.startswith("/etc/cron.d/"): - for c in uniq: - rn = _safe_name(c) - if rn in svc_by_role or rn in pkg_by_role: - return (rn, "cron_snippet") - return None + for c in uniq: + key = _safe_name(c) + hit = alias_ranked.get(key) + if hit is not None: + return (hit[1], tag) return None + def _lists_for_role(role_name: str) -> tuple[List[ManagedFile], List[ExcludedFile]]: + if role_name in svc_by_role: + snap = svc_by_role[role_name] + return (snap.managed_files, snap.excluded) + if role_name in pkg_by_role: + snap = pkg_by_role[role_name] + return (snap.managed_files, snap.excluded) + # Fallback (shouldn't normally happen): attribute to etc_custom. + return (etc_managed, etc_excluded) + # Capture essential system config/state (even if package-owned). + etc_role_seen = seen_by_role.setdefault(etc_role_name, set()) for path, reason in _iter_system_capture_paths(): if path in already: continue target = _target_role_for_shared_snippet(path) - - if path_filter.is_excluded(path): - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="user_excluded") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="user_excluded") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) - already.add(path) - continue - - deny = policy.deny_reason(path) - if deny: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason=deny) - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason=deny) - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason=deny)) - already.add(path) - continue - - try: - owner, group, mode = stat_triplet(path) - except OSError: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) - already.add(path) - continue - - src_rel = path.lstrip("/") - role_for_copy = etc_role_name - reason_for_role = reason - if target: + if target is not None: role_for_copy, reason_for_role = target - - try: - _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel) - except OSError: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) - already.add(path) - continue - - mf = ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason_for_role, - ) - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].managed_files.append(mf) - elif rn in pkg_by_role: - pkg_by_role[rn].managed_files.append(mf) + managed_out, excluded_out = _lists_for_role(role_for_copy) + role_seen = seen_by_role.setdefault(role_for_copy, set()) else: - etc_managed.append(mf) + role_for_copy, reason_for_role = (etc_role_name, reason) + managed_out, excluded_out = (etc_managed, etc_excluded) + role_seen = etc_role_seen - already.add(path) + _capture_file( + bundle_dir=bundle_dir, + role_name=role_for_copy, + abs_path=path, + reason=reason_for_role, + policy=policy, + path_filter=path_filter, + managed_out=managed_out, + excluded_out=excluded_out, + seen_role=role_seen, + seen_global=already, + ) # Walk /etc for remaining unowned config-ish files scanned = 0 @@ -1212,99 +1204,28 @@ def harvest( continue target = _target_role_for_shared_snippet(path) - - if path_filter.is_excluded(path): - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="user_excluded") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="user_excluded") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="user_excluded")) - already.add(path) - continue - - deny = policy.deny_reason(path) - if deny: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason=deny) - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason=deny) - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason=deny)) - already.add(path) - continue - - try: - owner, group, mode = stat_triplet(path) - except OSError: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) - already.add(path) - continue - - src_rel = path.lstrip("/") - role_for_copy = etc_role_name - reason_for_role = "custom_unowned" - if target: + if target is not None: role_for_copy, reason_for_role = target - - try: - _copy_into_bundle(bundle_dir, role_for_copy, path, src_rel) - except OSError: - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - elif rn in pkg_by_role: - pkg_by_role[rn].excluded.append( - ExcludedFile(path=path, reason="unreadable") - ) - else: - etc_excluded.append(ExcludedFile(path=path, reason="unreadable")) - already.add(path) - continue - - mf = ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason_for_role, - ) - if target: - rn, _ = target - if rn in svc_by_role: - svc_by_role[rn].managed_files.append(mf) - elif rn in pkg_by_role: - pkg_by_role[rn].managed_files.append(mf) + managed_out, excluded_out = _lists_for_role(role_for_copy) + role_seen = seen_by_role.setdefault(role_for_copy, set()) else: - etc_managed.append(mf) - scanned += 1 + role_for_copy, reason_for_role = (etc_role_name, "custom_unowned") + managed_out, excluded_out = (etc_managed, etc_excluded) + role_seen = etc_role_seen + + if _capture_file( + bundle_dir=bundle_dir, + role_name=role_for_copy, + abs_path=path, + reason=reason_for_role, + policy=policy, + path_filter=path_filter, + managed_out=managed_out, + excluded_out=excluded_out, + seen_role=role_seen, + seen_global=already, + ): + scanned += 1 if scanned >= MAX_FILES_CAP: etc_notes.append( f"Reached file cap ({MAX_FILES_CAP}) while scanning /etc for unowned files." @@ -1339,6 +1260,7 @@ def harvest( scanned = 0 if not os.path.isdir(root): return + role_seen = seen_by_role.setdefault(ul_role_name, set()) for dirpath, _, filenames in os.walk(root): for fn in filenames: path = os.path.join(dirpath, fn) @@ -1346,54 +1268,34 @@ def harvest( continue if not os.path.isfile(path) or os.path.islink(path): continue + try: + owner, group, mode = stat_triplet(path) + except OSError: + ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) + continue + if require_executable: - try: - owner, group, mode = stat_triplet(path) - except OSError: - ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue try: if (int(mode, 8) & 0o111) == 0: continue except ValueError: # If mode parsing fails, be conservative and skip. continue - else: - try: - owner, group, mode = stat_triplet(path) - except OSError: - ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - if path_filter.is_excluded(path): - ul_excluded.append(ExcludedFile(path=path, reason="user_excluded")) - continue - - deny = policy.deny_reason(path) - if deny: - ul_excluded.append(ExcludedFile(path=path, reason=deny)) - continue - - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, ul_role_name, path, src_rel) - except OSError: - ul_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - - ul_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason=reason, - ) - ) - - already_all.add(path) - scanned += 1 + if _capture_file( + bundle_dir=bundle_dir, + role_name=ul_role_name, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=ul_managed, + excluded_out=ul_excluded, + seen_role=role_seen, + metadata=(owner, group, mode), + ): + already_all.add(path) + scanned += 1 if scanned >= cap: ul_notes.append(f"Reached file cap ({cap}) while scanning {root}.") return @@ -1428,6 +1330,7 @@ def harvest( extra_excluded: List[ExcludedFile] = [] extra_managed: List[ManagedFile] = [] extra_role_name = "extra_paths" + extra_role_seen = seen_by_role.setdefault(extra_role_name, set()) include_specs = list(include_paths or []) exclude_specs = list(exclude_paths or []) @@ -1453,39 +1356,18 @@ def harvest( if path in already_all: continue - if path_filter.is_excluded(path): - extra_excluded.append(ExcludedFile(path=path, reason="user_excluded")) - continue - - deny = policy.deny_reason(path) - if deny: - extra_excluded.append(ExcludedFile(path=path, reason=deny)) - continue - - try: - owner, group, mode = stat_triplet(path) - except OSError: - extra_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - - src_rel = path.lstrip("/") - try: - _copy_into_bundle(bundle_dir, extra_role_name, path, src_rel) - except OSError: - extra_excluded.append(ExcludedFile(path=path, reason="unreadable")) - continue - - extra_managed.append( - ManagedFile( - path=path, - src_rel=src_rel, - owner=owner, - group=group, - mode=mode, - reason="user_include", - ) - ) - already_all.add(path) + if _capture_file( + bundle_dir=bundle_dir, + role_name=extra_role_name, + abs_path=path, + reason="user_include", + policy=policy, + path_filter=path_filter, + managed_out=extra_managed, + excluded_out=extra_excluded, + seen_role=extra_role_seen, + ): + already_all.add(path) extra_paths_snapshot = ExtraPathsSnapshot( role_name=extra_role_name, diff --git a/enroll/pathfilter.py b/enroll/pathfilter.py index 6541ca9..680d390 100644 --- a/enroll/pathfilter.py +++ b/enroll/pathfilter.py @@ -141,7 +141,7 @@ class PathFilter: - Regex: prefix with 're:' or 'regex:' - Force glob: prefix with 'glob:' - A plain path without wildcards matches that path and everything under it - (directory-prefix behavior). + (directory-prefix behaviour). Examples: --exclude-path /usr/local/bin/docker-* diff --git a/pyproject.toml b/pyproject.toml index 3aa01d0..c7356bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.5" +version = "0.1.6" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/rpm/enroll.spec b/rpm/enroll.spec index ed0a3c9..637dee1 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.1.5 +%global upstream_version 0.1.6 Name: enroll Version: %{upstream_version} @@ -44,6 +44,9 @@ Enroll a server's running state retrospectively into Ansible. %changelog * Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} +- DRY up some code logic +- More test coverage +* Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} - Consolidate logrotate and cron files into their main service/package roles if they exist. - Standardise on MAX_FILES_CAP in one place - Manage apt stuff in its own role, not in etc_custom diff --git a/tests/test___main__.py b/tests/test___main__.py new file mode 100644 index 0000000..2e83ac1 --- /dev/null +++ b/tests/test___main__.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +import runpy + + +def test_module_main_invokes_cli_main(monkeypatch): + import enroll.cli + + called = {"ok": False} + + def fake_main() -> None: + called["ok"] = True + + monkeypatch.setattr(enroll.cli, "main", fake_main) + + # Execute enroll.__main__ as if `python -m enroll`. + runpy.run_module("enroll.__main__", run_name="__main__") + assert called["ok"] is True diff --git a/tests/test_accounts.py b/tests/test_accounts.py new file mode 100644 index 0000000..d5cc267 --- /dev/null +++ b/tests/test_accounts.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import os +from pathlib import Path + + +def test_parse_login_defs_parses_known_keys(tmp_path: Path): + from enroll.accounts import parse_login_defs + + p = tmp_path / "login.defs" + p.write_text( + """ + # comment + UID_MIN 1000 + UID_MAX 60000 + SYS_UID_MIN 100 + SYS_UID_MAX 999 + UID_MIN not_an_int + OTHER 123 + """, + encoding="utf-8", + ) + + vals = parse_login_defs(str(p)) + assert vals["UID_MIN"] == 1000 + assert vals["UID_MAX"] == 60000 + assert vals["SYS_UID_MIN"] == 100 + assert vals["SYS_UID_MAX"] == 999 + assert "OTHER" not in vals + + +def test_parse_passwd_and_group_and_ssh_files(tmp_path: Path): + from enroll.accounts import find_user_ssh_files, parse_group, parse_passwd + + passwd = tmp_path / "passwd" + passwd.write_text( + "\n".join( + [ + "root:x:0:0:root:/root:/bin/bash", + "# comment", + "alice:x:1000:1000:Alice:/home/alice:/bin/bash", + "bob:x:1001:1000:Bob:/home/bob:/usr/sbin/nologin", + "badline", + "cathy:x:notint:1000:Cathy:/home/cathy:/bin/bash", + "", + ] + ), + encoding="utf-8", + ) + + group = tmp_path / "group" + group.write_text( + "\n".join( + [ + "root:x:0:", + "users:x:1000:alice,bob", + "admins:x:1002:alice", + "badgroup:x:notint:alice", + "", + ] + ), + encoding="utf-8", + ) + + rows = parse_passwd(str(passwd)) + assert ("alice", 1000, 1000, "Alice", "/home/alice", "/bin/bash") in rows + assert all(r[0] != "cathy" for r in rows) # skipped invalid UID + + gid_to_name, name_to_gid, members = parse_group(str(group)) + assert gid_to_name[1000] == "users" + assert name_to_gid["admins"] == 1002 + assert "alice" in members["admins"] + + # ssh discovery: only authorized_keys, no symlinks + home = tmp_path / "home" / "alice" + sshdir = home / ".ssh" + sshdir.mkdir(parents=True) + ak = sshdir / "authorized_keys" + ak.write_text("ssh-ed25519 AAA...", encoding="utf-8") + # a symlink should be ignored + (sshdir / "authorized_keys2").write_text("x", encoding="utf-8") + os.symlink(str(sshdir / "authorized_keys2"), str(sshdir / "authorized_keys_link")) + assert find_user_ssh_files(str(home)) == [str(ak)] + + +def test_collect_non_system_users(monkeypatch, tmp_path: Path): + import enroll.accounts as a + + orig_parse_login_defs = a.parse_login_defs + orig_parse_passwd = a.parse_passwd + orig_parse_group = a.parse_group + + # Provide controlled passwd/group/login.defs inputs via monkeypatch. + passwd = tmp_path / "passwd" + passwd.write_text( + "\n".join( + [ + "root:x:0:0:root:/root:/bin/bash", + "nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin", + "alice:x:1000:1000:Alice:/home/alice:/bin/bash", + "sysuser:x:200:200:Sys:/home/sys:/bin/bash", + "bob:x:1001:1000:Bob:/home/bob:/bin/false", + "", + ] + ), + encoding="utf-8", + ) + group = tmp_path / "group" + group.write_text( + "\n".join( + [ + "users:x:1000:alice,bob", + "admins:x:1002:alice", + "", + ] + ), + encoding="utf-8", + ) + + defs = tmp_path / "login.defs" + defs.write_text("UID_MIN 1000\n", encoding="utf-8") + + monkeypatch.setattr( + a, "parse_login_defs", lambda path=str(defs): orig_parse_login_defs(path) + ) + monkeypatch.setattr( + a, "parse_passwd", lambda path=str(passwd): orig_parse_passwd(path) + ) + monkeypatch.setattr( + a, "parse_group", lambda path=str(group): orig_parse_group(path) + ) + + # Use a stable fake ssh discovery. + monkeypatch.setattr( + a, "find_user_ssh_files", lambda home: [f"{home}/.ssh/authorized_keys"] + ) + + users = a.collect_non_system_users() + assert [u.name for u in users] == ["alice"] + u = users[0] + assert u.primary_group == "users" + assert u.supplementary_groups == ["admins"] + assert u.ssh_files == ["/home/alice/.ssh/authorized_keys"] diff --git a/tests/test_debian.py b/tests/test_debian.py new file mode 100644 index 0000000..333afc1 --- /dev/null +++ b/tests/test_debian.py @@ -0,0 +1,154 @@ +from __future__ import annotations + +import hashlib +from pathlib import Path + + +def test_dpkg_owner_parses_output(monkeypatch): + import enroll.debian as d + + class P: + def __init__(self, rc: int, out: str): + self.returncode = rc + self.stdout = out + self.stderr = "" + + def fake_run(cmd, text, capture_output): + assert cmd[:2] == ["dpkg", "-S"] + return P( + 0, + """ + diversion by foo from: /etc/something + nginx-common:amd64: /etc/nginx/nginx.conf + nginx-common, nginx: /etc/nginx/sites-enabled/default + """, + ) + + monkeypatch.setattr(d.subprocess, "run", fake_run) + assert d.dpkg_owner("/etc/nginx/nginx.conf") == "nginx-common" + + def fake_run_none(cmd, text, capture_output): + return P(1, "") + + monkeypatch.setattr(d.subprocess, "run", fake_run_none) + assert d.dpkg_owner("/missing") is None + + +def test_list_manual_packages_parses_and_sorts(monkeypatch): + import enroll.debian as d + + class P: + def __init__(self, rc: int, out: str): + self.returncode = rc + self.stdout = out + self.stderr = "" + + def fake_run(cmd, text, capture_output): + assert cmd == ["apt-mark", "showmanual"] + return P(0, "\n# comment\nnginx\nvim\nnginx\n") + + monkeypatch.setattr(d.subprocess, "run", fake_run) + assert d.list_manual_packages() == ["nginx", "vim"] + + +def test_build_dpkg_etc_index(tmp_path: Path): + import enroll.debian as d + + info = tmp_path / "info" + info.mkdir() + (info / "nginx.list").write_text( + "/etc/nginx/nginx.conf\n/etc/nginx/sites-enabled/default\n/usr/bin/nginx\n", + encoding="utf-8", + ) + (info / "vim:amd64.list").write_text( + "/etc/vim/vimrc\n/usr/bin/vim\n", + encoding="utf-8", + ) + + owned, owner_map, topdir_to_pkgs, pkg_to_etc = d.build_dpkg_etc_index(str(info)) + assert "/etc/nginx/nginx.conf" in owned + assert owner_map["/etc/nginx/nginx.conf"] == "nginx" + assert "nginx" in topdir_to_pkgs + assert topdir_to_pkgs["nginx"] == {"nginx"} + assert pkg_to_etc["vim"] == ["/etc/vim/vimrc"] + + +def test_parse_status_conffiles_handles_continuations(tmp_path: Path): + import enroll.debian as d + + status = tmp_path / "status" + status.write_text( + "\n".join( + [ + "Package: nginx", + "Version: 1", + "Conffiles:", + " /etc/nginx/nginx.conf abcdef", + " /etc/nginx/mime.types 123456", + "", + "Package: other", + "Version: 2", + "", + ] + ), + encoding="utf-8", + ) + m = d.parse_status_conffiles(str(status)) + assert m["nginx"]["/etc/nginx/nginx.conf"] == "abcdef" + assert m["nginx"]["/etc/nginx/mime.types"] == "123456" + assert "other" not in m + + +def test_read_pkg_md5sums_and_file_md5(tmp_path: Path, monkeypatch): + import enroll.debian as d + + # Patch /var/lib/dpkg/info/.md5sums lookup to a tmp file. + md5_file = tmp_path / "pkg.md5sums" + md5_file.write_text("0123456789abcdef etc/foo.conf\n", encoding="utf-8") + + def fake_exists(path: str) -> bool: + return path.endswith("/var/lib/dpkg/info/p1.md5sums") + + real_open = open + + def fake_open(path: str, *args, **kwargs): + if path.endswith("/var/lib/dpkg/info/p1.md5sums"): + return real_open(md5_file, *args, **kwargs) + return real_open(path, *args, **kwargs) + + monkeypatch.setattr(d.os.path, "exists", fake_exists) + monkeypatch.setattr("builtins.open", fake_open) + + m = d.read_pkg_md5sums("p1") + assert m == {"etc/foo.conf": "0123456789abcdef"} + + content = b"hello world\n" + p = tmp_path / "x" + p.write_bytes(content) + assert d.file_md5(str(p)) == hashlib.md5(content).hexdigest() + + +def test_stat_triplet_fallbacks(tmp_path: Path, monkeypatch): + import enroll.debian as d + import sys + + p = tmp_path / "f" + p.write_text("x", encoding="utf-8") + + class FakePwdMod: + @staticmethod + def getpwuid(_): # pragma: no cover + raise KeyError + + class FakeGrpMod: + @staticmethod + def getgrgid(_): # pragma: no cover + raise KeyError + + # stat_triplet imports pwd/grp inside the function, so patch sys.modules. + monkeypatch.setitem(sys.modules, "pwd", FakePwdMod) + monkeypatch.setitem(sys.modules, "grp", FakeGrpMod) + owner, group, mode = d.stat_triplet(str(p)) + assert owner.isdigit() + assert group.isdigit() + assert mode.isdigit() and len(mode) == 4 diff --git a/tests/test_diff_bundle.py b/tests/test_diff_bundle.py new file mode 100644 index 0000000..66ef094 --- /dev/null +++ b/tests/test_diff_bundle.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import os +import tarfile +from pathlib import Path + +import pytest + + +def _make_bundle_dir(tmp_path: Path) -> Path: + b = tmp_path / "bundle" + (b / "artifacts").mkdir(parents=True) + (b / "state.json").write_text("{}\n", encoding="utf-8") + return b + + +def _tar_gz_of_dir(src: Path, out: Path) -> None: + with tarfile.open(out, mode="w:gz") as tf: + # tar -C src . semantics + for p in src.rglob("*"): + rel = p.relative_to(src) + tf.add(p, arcname=str(rel)) + + +def test_bundle_from_directory_and_statejson_path(tmp_path: Path): + import enroll.diff as d + + b = _make_bundle_dir(tmp_path) + + br1 = d._bundle_from_input(str(b), sops_mode=False) + assert br1.dir == b + assert br1.state_path.exists() + + br2 = d._bundle_from_input(str(b / "state.json"), sops_mode=False) + assert br2.dir == b + + +def test_bundle_from_tarball_extracts(tmp_path: Path): + import enroll.diff as d + + b = _make_bundle_dir(tmp_path) + tgz = tmp_path / "bundle.tgz" + _tar_gz_of_dir(b, tgz) + + br = d._bundle_from_input(str(tgz), sops_mode=False) + try: + assert br.dir.is_dir() + assert (br.dir / "state.json").exists() + finally: + if br.tempdir: + br.tempdir.cleanup() + + +def test_bundle_from_sops_like_file(monkeypatch, tmp_path: Path): + import enroll.diff as d + + b = _make_bundle_dir(tmp_path) + tgz = tmp_path / "bundle.tar.gz" + _tar_gz_of_dir(b, tgz) + + # Pretend the tarball is an encrypted bundle by giving it a .sops name. + sops_path = tmp_path / "bundle.tar.gz.sops" + sops_path.write_bytes(tgz.read_bytes()) + + # Stub out sops machinery: "decrypt" just copies through. + monkeypatch.setattr(d, "require_sops_cmd", lambda: "sops") + + def fake_decrypt(src: Path, dest: Path, mode: int): + dest.write_bytes(Path(src).read_bytes()) + try: + os.chmod(dest, mode) + except OSError: + pass + + monkeypatch.setattr(d, "decrypt_file_binary_to", fake_decrypt) + + br = d._bundle_from_input(str(sops_path), sops_mode=False) + try: + assert (br.dir / "state.json").exists() + finally: + if br.tempdir: + br.tempdir.cleanup() + + +def test_bundle_from_input_missing_path(tmp_path: Path): + import enroll.diff as d + + with pytest.raises(RuntimeError, match="not found"): + d._bundle_from_input(str(tmp_path / "nope"), sops_mode=False) diff --git a/tests/test_pathfilter.py b/tests/test_pathfilter.py new file mode 100644 index 0000000..406b7e7 --- /dev/null +++ b/tests/test_pathfilter.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +import os +from pathlib import Path + + +def test_compile_and_match_prefix_glob_and_regex(tmp_path: Path): + from enroll.pathfilter import PathFilter, compile_path_pattern + + # prefix semantics: matches the exact path and subtree + p = compile_path_pattern("/etc/nginx") + assert p.kind == "prefix" + assert p.matches("/etc/nginx") + assert p.matches("/etc/nginx/nginx.conf") + assert not p.matches("/etc/nginx2/nginx.conf") + + # glob semantics + g = compile_path_pattern("/etc/**/*.conf") + assert g.kind == "glob" + assert g.matches("/etc/nginx/nginx.conf") + assert not g.matches("/var/etc/nginx.conf") + + # explicit glob + g2 = compile_path_pattern("glob:/home/*/.bashrc") + assert g2.kind == "glob" + assert g2.matches("/home/alice/.bashrc") + + # regex semantics (search, not match) + r = compile_path_pattern(r"re:/home/[^/]+/\.ssh/authorized_keys$") + assert r.kind == "regex" + assert r.matches("/home/alice/.ssh/authorized_keys") + assert not r.matches("/home/alice/.ssh/authorized_keys2") + + # invalid regex: never matches + bad = compile_path_pattern("re:[") + assert bad.kind == "regex" + assert not bad.matches("/etc/passwd") + + # exclude wins + pf = PathFilter(exclude=["/etc/nginx"], include=["/etc/nginx/nginx.conf"]) + assert pf.is_excluded("/etc/nginx/nginx.conf") + + +def test_expand_includes_respects_exclude_symlinks_and_caps(tmp_path: Path): + from enroll.pathfilter import PathFilter, compile_path_pattern, expand_includes + + root = tmp_path / "root" + (root / "a").mkdir(parents=True) + (root / "a" / "one.txt").write_text("1", encoding="utf-8") + (root / "a" / "two.txt").write_text("2", encoding="utf-8") + (root / "b").mkdir() + (root / "b" / "secret.txt").write_text("s", encoding="utf-8") + + # symlink file should be ignored + os.symlink(str(root / "a" / "one.txt"), str(root / "a" / "link.txt")) + + exclude = PathFilter(exclude=[str(root / "b")]) + + pats = [ + compile_path_pattern(str(root / "a")), + compile_path_pattern("glob:" + str(root / "**" / "*.txt")), + ] + + paths, notes = expand_includes(pats, exclude=exclude, max_files=2) + # cap should limit to 2 files + assert len(paths) == 2 + assert any("cap" in n.lower() for n in notes) + # excluded dir should not contribute + assert all("/b/" not in p for p in paths) + # symlink ignored + assert all(not p.endswith("link.txt") for p in paths) + + +def test_expand_includes_notes_on_no_matches(tmp_path: Path): + from enroll.pathfilter import compile_path_pattern, expand_includes + + pats = [compile_path_pattern(str(tmp_path / "does_not_exist"))] + paths, notes = expand_includes(pats, max_files=10) + assert paths == [] + assert any("matched no files" in n.lower() for n in notes) diff --git a/tests/test_remote.py b/tests/test_remote.py new file mode 100644 index 0000000..576c0b1 --- /dev/null +++ b/tests/test_remote.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import io +import tarfile +from pathlib import Path + +import pytest + + +def _make_tgz_bytes(files: dict[str, bytes]) -> bytes: + bio = io.BytesIO() + with tarfile.open(fileobj=bio, mode="w:gz") as tf: + for name, content in files.items(): + ti = tarfile.TarInfo(name=name) + ti.size = len(content) + tf.addfile(ti, io.BytesIO(content)) + return bio.getvalue() + + +def test_safe_extract_tar_rejects_path_traversal(tmp_path: Path): + from enroll.remote import _safe_extract_tar + + # Build an unsafe tar with ../ traversal + bio = io.BytesIO() + with tarfile.open(fileobj=bio, mode="w:gz") as tf: + ti = tarfile.TarInfo(name="../evil") + ti.size = 1 + tf.addfile(ti, io.BytesIO(b"x")) + + bio.seek(0) + with tarfile.open(fileobj=bio, mode="r:gz") as tf: + with pytest.raises(RuntimeError, match="Unsafe tar member path"): + _safe_extract_tar(tf, tmp_path) + + +def test_safe_extract_tar_rejects_symlinks(tmp_path: Path): + from enroll.remote import _safe_extract_tar + + bio = io.BytesIO() + with tarfile.open(fileobj=bio, mode="w:gz") as tf: + ti = tarfile.TarInfo(name="link") + ti.type = tarfile.SYMTYPE + ti.linkname = "/etc/passwd" + tf.addfile(ti) + + bio.seek(0) + with tarfile.open(fileobj=bio, mode="r:gz") as tf: + with pytest.raises(RuntimeError, match="Refusing to extract"): + _safe_extract_tar(tf, tmp_path) + + +def test_remote_harvest_happy_path(tmp_path: Path, monkeypatch): + import sys + + import enroll.remote as r + + # Avoid building a real zipapp; just create a file. + def fake_build(_td: Path) -> Path: + p = _td / "enroll.pyz" + p.write_bytes(b"PYZ") + return p + + monkeypatch.setattr(r, "_build_enroll_pyz", fake_build) + + # Prepare a tiny harvest bundle tar stream from the "remote". + tgz = _make_tgz_bytes({"state.json": b'{"ok": true}\n'}) + + calls: list[str] = [] + + class _Chan: + def __init__(self, rc: int = 0): + self._rc = rc + + def recv_exit_status(self) -> int: + return self._rc + + class _Stdout: + def __init__(self, payload: bytes = b"", rc: int = 0): + self._bio = io.BytesIO(payload) + self.channel = _Chan(rc) + + def read(self, n: int = -1) -> bytes: + return self._bio.read(n) + + class _Stderr: + def __init__(self, payload: bytes = b""): + self._bio = io.BytesIO(payload) + + def read(self, n: int = -1) -> bytes: + return self._bio.read(n) + + class _SFTP: + def __init__(self): + self.put_calls: list[tuple[str, str]] = [] + + def put(self, local: str, remote: str) -> None: + self.put_calls.append((local, remote)) + + def close(self) -> None: + return + + class FakeSSH: + def __init__(self): + self._sftp = _SFTP() + + def load_system_host_keys(self): + return + + def set_missing_host_key_policy(self, _policy): + return + + def connect(self, **kwargs): + # Accept any connect parameters. + return + + def open_sftp(self): + return self._sftp + + def exec_command(self, cmd: str): + calls.append(cmd) + # The tar stream uses exec_command directly. + if cmd.startswith("tar -cz -C"): + return (None, _Stdout(tgz, rc=0), _Stderr(b"")) + + # _ssh_run path: id -un, mktemp -d, chmod, sudo harvest, sudo chown, rm -rf + if cmd == "id -un": + return (None, _Stdout(b"alice\n"), _Stderr()) + if cmd == "mktemp -d": + return (None, _Stdout(b"/tmp/enroll-remote-123\n"), _Stderr()) + if cmd.startswith("chmod 700"): + return (None, _Stdout(b""), _Stderr()) + if " harvest " in cmd: + return (None, _Stdout(b""), _Stderr()) + if cmd.startswith("sudo chown -R"): + return (None, _Stdout(b""), _Stderr()) + if cmd.startswith("rm -rf"): + return (None, _Stdout(b""), _Stderr()) + + return (None, _Stdout(b""), _Stderr(b"unknown")) + + def close(self): + return + + import types + + class RejectPolicy: + pass + + FakeParamiko = types.SimpleNamespace(SSHClient=FakeSSH, RejectPolicy=RejectPolicy) + + # Provide a fake paramiko module. + monkeypatch.setitem(sys.modules, "paramiko", FakeParamiko) + + out_dir = tmp_path / "out" + state_path = r.remote_harvest( + local_out_dir=out_dir, + remote_host="example.com", + remote_port=2222, + remote_user=None, + include_paths=["/etc/nginx/nginx.conf"], + exclude_paths=["/etc/shadow"], + dangerous=True, + no_sudo=False, + ) + + assert state_path == out_dir / "state.json" + assert state_path.exists() + assert b"ok" in state_path.read_bytes() + + # Ensure we attempted remote harvest with sudo and passed include/exclude and dangerous. + joined = "\n".join(calls) + assert "sudo" in joined + assert "--dangerous" in joined + assert "--include-path" in joined + assert "--exclude-path" in joined diff --git a/tests/test_systemd.py b/tests/test_systemd.py new file mode 100644 index 0000000..f351159 --- /dev/null +++ b/tests/test_systemd.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +import pytest + + +def test_list_enabled_services_and_timers_filters_templates(monkeypatch): + import enroll.systemd as s + + def fake_run(cmd: list[str]) -> str: + if "--type=service" in cmd: + return "\n".join( + [ + "nginx.service enabled", + "getty@.service enabled", # template + "foo@bar.service enabled", # instance units are included + "ssh.service enabled", + ] + ) + if "--type=timer" in cmd: + return "\n".join( + [ + "apt-daily.timer enabled", + "foo@.timer enabled", # template + ] + ) + raise AssertionError("unexpected") + + monkeypatch.setattr(s, "_run", fake_run) + assert s.list_enabled_services() == [ + "foo@bar.service", + "nginx.service", + "ssh.service", + ] + assert s.list_enabled_timers() == ["apt-daily.timer"] + + +def test_get_unit_info_parses_fields(monkeypatch): + import enroll.systemd as s + + class P: + def __init__(self, rc: int, out: str, err: str = ""): + self.returncode = rc + self.stdout = out + self.stderr = err + + def fake_run(cmd, check, text, capture_output): + assert cmd[0:2] == ["systemctl", "show"] + return P( + 0, + "\n".join( + [ + "FragmentPath=/lib/systemd/system/nginx.service", + "DropInPaths=/etc/systemd/system/nginx.service.d/override.conf /etc/systemd/system/nginx.service.d/extra.conf", + "EnvironmentFiles=-/etc/default/nginx /etc/nginx/env", + "ExecStart={ path=/usr/sbin/nginx ; argv[]=/usr/sbin/nginx -g daemon off; }", + "ActiveState=active", + "SubState=running", + "UnitFileState=enabled", + "ConditionResult=yes", + ] + ), + ) + + monkeypatch.setattr(s.subprocess, "run", fake_run) + ui = s.get_unit_info("nginx.service") + assert ui.fragment_path == "/lib/systemd/system/nginx.service" + assert "/etc/default/nginx" in ui.env_files + assert "/etc/nginx/env" in ui.env_files + assert "/usr/sbin/nginx" in ui.exec_paths + assert ui.active_state == "active" + + +def test_get_unit_info_raises_unit_query_error(monkeypatch): + import enroll.systemd as s + + class P: + def __init__(self, rc: int, out: str, err: str): + self.returncode = rc + self.stdout = out + self.stderr = err + + def fake_run(cmd, check, text, capture_output): + return P(1, "", "no such unit") + + monkeypatch.setattr(s.subprocess, "run", fake_run) + with pytest.raises(s.UnitQueryError) as ei: + s.get_unit_info("missing.service") + assert "missing.service" in str(ei.value) + assert ei.value.unit == "missing.service" + + +def test_get_timer_info_parses_fields(monkeypatch): + import enroll.systemd as s + + class P: + def __init__(self, rc: int, out: str, err: str = ""): + self.returncode = rc + self.stdout = out + self.stderr = err + + def fake_run(cmd, text, capture_output): + return P( + 0, + "\n".join( + [ + "FragmentPath=/lib/systemd/system/apt-daily.timer", + "DropInPaths=", + "EnvironmentFiles=-/etc/default/apt", + "Unit=apt-daily.service", + "ActiveState=active", + "SubState=waiting", + "UnitFileState=enabled", + "ConditionResult=yes", + ] + ), + ) + + monkeypatch.setattr(s.subprocess, "run", fake_run) + ti = s.get_timer_info("apt-daily.timer") + assert ti.trigger_unit == "apt-daily.service" + assert "/etc/default/apt" in ti.env_files From 8c19473e18b388b95ac3a5f77942cd081c17e889 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sun, 28 Dec 2025 18:37:14 +1100 Subject: [PATCH 052/115] Fix an attribution bug for certain files ending up in the wrong package/role. --- CHANGELOG.md | 4 ++ debian/changelog | 6 +++ enroll/harvest.py | 44 +++++++++++++++-- pyproject.toml | 2 +- rpm/enroll.spec | 4 +- tests/test_harvest.py | 107 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 160 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a4c39d..f2cb109 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 0.1.7 + + * Fix an attribution bug for certain files ending up in the wrong package/role. + # 0.1.6 * DRY up some code logic diff --git a/debian/changelog b/debian/changelog index a15c38a..eabdefc 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +enroll (0.1.7) unstable; urgency=medium + + * Fix an attribution bug for certain files ending up in the wrong package/role. + + -- Miguel Jacq Sun, 28 Dec 2025 18:30:00 +1100 + enroll (0.1.6) unstable; urgency=medium * DRY up some code logic diff --git a/enroll/harvest.py b/enroll/harvest.py index 56e5aed..d678b89 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -292,9 +292,26 @@ def _hint_names(unit: str, pkgs: Set[str]) -> Set[str]: def _add_pkgs_from_etc_topdirs( hints: Set[str], topdir_to_pkgs: Dict[str, Set[str]], pkgs: Set[str] ) -> None: + """Expand a service's package set using dpkg-owned /etc top-level dirs. + + This is a heuristic: many Debian packages split a service across multiple + packages (e.g. nginx + nginx-common) while sharing a single /etc/ + tree. + + We intentionally *avoid* using shared trees (e.g. /etc/cron.d, /etc/ssl, + /etc/apparmor.d) to expand package sets, because many unrelated packages + legitimately install files there. + + We also consider the common ".d" variant (e.g. hint "apparmor" -> + topdir "apparmor.d") so we can explicitly skip known shared trees. + """ + for h in hints: - for p in topdir_to_pkgs.get(h, set()): - pkgs.add(p) + for top in (h, f"{h}.d"): + if top in SHARED_ETC_TOPDIRS: + continue + for p in topdir_to_pkgs.get(top, set()): + pkgs.add(p) def _maybe_add_specific_paths(hints: Set[str]) -> List[str]: @@ -1132,10 +1149,27 @@ def harvest( pkg = dpkg_owner(path) if pkg: - svc_roles = pkg_to_service_roles.get(pkg) + svc_roles = sorted(set(pkg_to_service_roles.get(pkg, []))) if svc_roles: - # Deterministic tie-break: lowest role name. - return (sorted(set(svc_roles))[0], tag) + # If multiple service roles reference the same package, prefer + # the role that most closely matches the snippet name (basename + # or stem). This avoids surprising attributions such as an + # AppArmor loader role "claiming" a cron/logrotate snippet + # that is clearly named after another package/service. + if len(svc_roles) > 1: + # Direct role-name matches first. + for c in [pkg, *uniq]: + rn = _safe_name(c) + if rn in svc_roles: + return (rn, tag) + # Next, use the alias map if it points at one of the roles. + for c in [pkg, *uniq]: + hit = alias_ranked.get(_safe_name(c)) + if hit is not None and hit[1] in svc_roles: + return (hit[1], tag) + + # Deterministic fallback: lowest role name. + return (svc_roles[0], tag) pkg_role = pkg_name_to_role.get(pkg) if pkg_role: return (pkg_role, tag) diff --git a/pyproject.toml b/pyproject.toml index c7356bc..ca875e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.6" +version = "0.1.7" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 637dee1..f63a12c 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.1.6 +%global upstream_version 0.1.7 Name: enroll Version: %{upstream_version} @@ -44,6 +44,8 @@ Enroll a server's running state retrospectively into Ansible. %changelog * Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} +- Fix an attribution bug for certain files ending up in the wrong package/role. +* Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} - DRY up some code logic - More test coverage * Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} diff --git a/tests/test_harvest.py b/tests/test_harvest.py index a832c81..fa796f0 100644 --- a/tests/test_harvest.py +++ b/tests/test_harvest.py @@ -176,3 +176,110 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( assert any(mf["path"] == "/usr/local/etc/myapp.conf" for mf in ul["managed_files"]) assert any(mf["path"] == "/usr/local/bin/myscript" for mf in ul["managed_files"]) assert all(mf["path"] != "/usr/local/bin/readme.txt" for mf in ul["managed_files"]) + + +def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( + monkeypatch, tmp_path: Path +): + """Regression test for shared snippet routing. + + When multiple service roles reference the same owning package, we prefer the + role whose name matches the snippet/package (e.g. ntpsec) rather than a + lexicographic tie-break that could incorrectly pick another role. + """ + + bundle = tmp_path / "bundle" + + files = {"/etc/cron.d/ntpsec": b"# cron\n"} + dirs = {"/etc", "/etc/cron.d"} + + monkeypatch.setattr(h.os.path, "isfile", lambda p: p in files) + monkeypatch.setattr(h.os.path, "islink", lambda p: False) + monkeypatch.setattr(h.os.path, "isdir", lambda p: p in dirs) + monkeypatch.setattr(h.os.path, "exists", lambda p: p in files or p in dirs) + monkeypatch.setattr(h.os, "walk", lambda root: [("/etc/cron.d", [], ["ntpsec"])]) + + # Only include the cron snippet in the system capture set. + monkeypatch.setattr( + h, "_iter_system_capture_paths", lambda: [("/etc/cron.d/ntpsec", "system_cron")] + ) + + monkeypatch.setattr( + h, "list_enabled_services", lambda: ["apparmor.service", "ntpsec.service"] + ) + + def fake_unit_info(unit: str) -> UnitInfo: + if unit == "apparmor.service": + return UnitInfo( + name=unit, + fragment_path="/lib/systemd/system/apparmor.service", + dropin_paths=[], + env_files=[], + exec_paths=["/usr/sbin/apparmor"], + active_state="active", + sub_state="running", + unit_file_state="enabled", + condition_result=None, + ) + return UnitInfo( + name=unit, + fragment_path="/lib/systemd/system/ntpsec.service", + dropin_paths=[], + env_files=[], + exec_paths=["/usr/sbin/ntpd"], + active_state="active", + sub_state="running", + unit_file_state="enabled", + condition_result=None, + ) + + monkeypatch.setattr(h, "get_unit_info", fake_unit_info) + + # Dpkg /etc index: no owned /etc paths needed for this test. + monkeypatch.setattr( + h, + "build_dpkg_etc_index", + lambda: (set(), {}, {}, {}), + ) + monkeypatch.setattr(h, "parse_status_conffiles", lambda: {}) + monkeypatch.setattr(h, "read_pkg_md5sums", lambda pkg: {}) + monkeypatch.setattr(h, "file_md5", lambda path: "x") + monkeypatch.setattr(h, "list_manual_packages", lambda: []) + monkeypatch.setattr(h, "collect_non_system_users", lambda: []) + + # Make apparmor *also* claim the ntpsec package (simulates overly-broad + # package inference). The snippet routing should still prefer role 'ntpsec'. + def fake_dpkg_owner(p: str): + if p == "/etc/cron.d/ntpsec": + return "ntpsec" + if "apparmor" in p: + return "ntpsec" # intentionally misleading + if "ntpsec" in p or "ntpd" in p: + return "ntpsec" + return None + + monkeypatch.setattr(h, "dpkg_owner", fake_dpkg_owner) + monkeypatch.setattr(h, "stat_triplet", lambda p: ("root", "root", "0644")) + + def fake_copy(bundle_dir: str, role_name: str, abs_path: str, src_rel: str): + dst = Path(bundle_dir) / "artifacts" / role_name / src_rel + dst.parent.mkdir(parents=True, exist_ok=True) + dst.write_bytes(files[abs_path]) + + monkeypatch.setattr(h, "_copy_into_bundle", fake_copy) + + class AllowAll: + def deny_reason(self, path: str): + return None + + state_path = h.harvest(str(bundle), policy=AllowAll()) + st = json.loads(Path(state_path).read_text(encoding="utf-8")) + + # Cron snippet should end up attached to the ntpsec role, not apparmor. + svc_ntpsec = next(s for s in st["services"] if s["role_name"] == "ntpsec") + assert any(mf["path"] == "/etc/cron.d/ntpsec" for mf in svc_ntpsec["managed_files"]) + + svc_apparmor = next(s for s in st["services"] if s["role_name"] == "apparmor") + assert all( + mf["path"] != "/etc/cron.d/ntpsec" for mf in svc_apparmor["managed_files"] + ) From ad2abed6127989e62a639874f861acbfaf2e9915 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 14:29:11 +1100 Subject: [PATCH 053/115] Add version CLI arg --- CHANGELOG.md | 4 ++++ enroll/cli.py | 61 ++++++++++++++++++++++++++++------------------- enroll/version.py | 32 +++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 25 deletions(-) create mode 100644 enroll/version.py diff --git a/CHANGELOG.md b/CHANGELOG.md index f2cb109..e07f57b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 0.2.0 + + * Add version CLI arg + # 0.1.7 * Fix an attribution bug for certain files ending up in the wrong package/role. diff --git a/enroll/cli.py b/enroll/cli.py index ae9aba0..bb4d3f1 100644 --- a/enroll/cli.py +++ b/enroll/cli.py @@ -15,6 +15,7 @@ from .harvest import harvest from .manifest import manifest from .remote import remote_harvest from .sopsutil import SopsError, encrypt_file_binary +from .version import get_enroll_version def _discover_config_path(argv: list[str]) -> Optional[Path]: @@ -318,13 +319,6 @@ def _jt_mode(args: argparse.Namespace) -> str: return "auto" -def _add_remote_args(p: argparse.ArgumentParser) -> None: - p.add_argument( - "--remote-host", - help="SSH host to run harvesting on (if set, harvest runs remotely and is pulled locally).", - ) - - def _add_config_args(p: argparse.ArgumentParser) -> None: p.add_argument( "-c", @@ -339,6 +333,13 @@ def _add_config_args(p: argparse.ArgumentParser) -> None: action="store_true", help="Do not load any INI config file (even if one would be auto-discovered).", ) + + +def _add_remote_args(p: argparse.ArgumentParser) -> None: + p.add_argument( + "--remote-host", + help="SSH host to run harvesting on (if set, harvest runs remotely and is pulled locally).", + ) p.add_argument( "--remote-port", type=int, @@ -354,11 +355,18 @@ def _add_config_args(p: argparse.ArgumentParser) -> None: def main() -> None: ap = argparse.ArgumentParser(prog="enroll") + ap.add_argument( + "-v", + "--version", + action="version", + version=f"{get_enroll_version()}", + ) _add_config_args(ap) sub = ap.add_subparsers(dest="cmd", required=True) h = sub.add_parser("harvest", help="Harvest service/package/config state") _add_config_args(h) + _add_remote_args(h) h.add_argument( "--out", help=( @@ -406,7 +414,6 @@ def main() -> None: action="store_true", help="Don't use sudo on the remote host (when using --remote options). This may result in a limited harvest due to permission restrictions.", ) - _add_remote_args(h) m = sub.add_parser("manifest", help="Render Ansible roles from a harvest") _add_config_args(m) @@ -443,6 +450,7 @@ def main() -> None: "single-shot", help="Harvest state, then manifest Ansible code, in one shot" ) _add_config_args(s) + _add_remote_args(s) s.add_argument( "--harvest", help=( @@ -500,7 +508,6 @@ def main() -> None: ), ) _add_common_manifest_args(s) - _add_remote_args(s) d = sub.add_parser("diff", help="Compare two harvests and report differences") _add_config_args(d) @@ -602,14 +609,12 @@ def main() -> None: ) args = ap.parse_args(argv) - remote_host: Optional[str] = getattr(args, "remote_host", None) - try: if args.cmd == "harvest": sops_fps = getattr(args, "sops", None) - if remote_host: + if args.remote_host: if sops_fps: - out_file = _resolve_sops_out_file(args.out, hint=remote_host) + out_file = _resolve_sops_out_file(args.out, hint=args.remote_host) with tempfile.TemporaryDirectory(prefix="enroll-harvest-") as td: tmp_bundle = Path(td) / "bundle" tmp_bundle.mkdir(parents=True, exist_ok=True) @@ -619,7 +624,7 @@ def main() -> None: pass remote_harvest( local_out_dir=tmp_bundle, - remote_host=remote_host, + remote_host=args.remote_host, remote_port=int(args.remote_port), remote_user=args.remote_user, dangerous=bool(args.dangerous), @@ -635,11 +640,11 @@ def main() -> None: out_dir = ( Path(args.out) if args.out - else new_harvest_cache_dir(hint=remote_host).dir + else new_harvest_cache_dir(hint=args.remote_host).dir ) state = remote_harvest( local_out_dir=out_dir, - remote_host=remote_host, + remote_host=args.remote_host, remote_port=int(args.remote_port), remote_user=args.remote_user, dangerous=bool(args.dangerous), @@ -669,12 +674,16 @@ def main() -> None: ) print(str(out_file)) else: - if not args.out: - raise SystemExit( - "error: --out is required unless --remote-host is set" + if args.out: + out_dir = args.out + else: + out_dir = ( + Path(args.out) + if args.out + else new_harvest_cache_dir(hint=args.remote_host).dir ) path = harvest( - args.out, + out_dir, dangerous=bool(args.dangerous), include_paths=list(getattr(args, "include_path", []) or []), exclude_paths=list(getattr(args, "exclude_path", []) or []), @@ -747,9 +756,11 @@ def main() -> None: raise SystemExit(2) elif args.cmd == "single-shot": sops_fps = getattr(args, "sops", None) - if remote_host: + if args.remote_host: if sops_fps: - out_file = _resolve_sops_out_file(args.harvest, hint=remote_host) + out_file = _resolve_sops_out_file( + args.harvest, hint=args.remote_host + ) with tempfile.TemporaryDirectory(prefix="enroll-harvest-") as td: tmp_bundle = Path(td) / "bundle" tmp_bundle.mkdir(parents=True, exist_ok=True) @@ -759,7 +770,7 @@ def main() -> None: pass remote_harvest( local_out_dir=tmp_bundle, - remote_host=remote_host, + remote_host=args.remote_host, remote_port=int(args.remote_port), remote_user=args.remote_user, dangerous=bool(args.dangerous), @@ -784,11 +795,11 @@ def main() -> None: harvest_dir = ( Path(args.harvest) if args.harvest - else new_harvest_cache_dir(hint=remote_host).dir + else new_harvest_cache_dir(hint=args.remote_host).dir ) remote_harvest( local_out_dir=harvest_dir, - remote_host=remote_host, + remote_host=args.remote_host, remote_port=int(args.remote_port), remote_user=args.remote_user, dangerous=bool(args.dangerous), diff --git a/enroll/version.py b/enroll/version.py new file mode 100644 index 0000000..bbe78b6 --- /dev/null +++ b/enroll/version.py @@ -0,0 +1,32 @@ +from __future__ import annotations + + +def get_enroll_version() -> str: + """ + Best-effort version lookup that works when installed via: + - poetry/pip/wheel + - deb/rpm system packages + Falls back to "0+unknown" when running from an unpacked source tree. + """ + try: + from importlib.metadata import ( + packages_distributions, + version, + ) + except Exception: + # Very old Python or unusual environment + return "unknown" + + # Map import package -> dist(s) + dist_names = [] + try: + dist_names = (packages_distributions() or {}).get("enroll", []) or [] + except Exception: + dist_names = [] + + # Try mapped dists first, then a reasonable default + for dist in [*dist_names, "enroll"]: + try: + return version(dist) + except Exception: + return "unknown" From 984b0fa81b5b224951816c4dc46a74734b950d07 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 14:59:34 +1100 Subject: [PATCH 054/115] Add ability to enroll RH-style systems (DNF5/DNF/RPM) --- CHANGELOG.md | 1 + README.md | 18 +-- enroll/debian.py | 26 ---- enroll/fsutil.py | 40 ++++++ enroll/harvest.py | 272 ++++++++++++++++++++++++++--------------- enroll/ignore.py | 1 + enroll/manifest.py | 229 ++++++++++++++++++++++++++++++---- enroll/platform.py | 261 +++++++++++++++++++++++++++++++++++++++ enroll/rpm.py | 266 ++++++++++++++++++++++++++++++++++++++++ tests/test_debian.py | 56 --------- tests/test_fsutil.py | 25 ++++ tests/test_harvest.py | 142 +++++++++++++++------ tests/test_manifest.py | 93 ++++++++++++++ tests/test_platform.py | 93 ++++++++++++++ tests/test_rpm.py | 131 ++++++++++++++++++++ 15 files changed, 1400 insertions(+), 254 deletions(-) create mode 100644 enroll/fsutil.py create mode 100644 enroll/platform.py create mode 100644 enroll/rpm.py create mode 100644 tests/test_fsutil.py create mode 100644 tests/test_platform.py create mode 100644 tests/test_rpm.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e07f57b..f92e0b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # 0.2.0 * Add version CLI arg + * Add ability to enroll RH-style systems (DNF5/DNF/RPM) # 0.1.7 diff --git a/README.md b/README.md index c6b8123..d075951 100644 --- a/README.md +++ b/README.md @@ -4,15 +4,15 @@ Enroll logo -**enroll** inspects a Linux machine (currently Debian-only) and generates Ansible roles/playbooks (and optionally inventory) for what it finds. +**enroll** inspects a Linux machine (Debian-like or RedHat-like) and generates Ansible roles/playbooks (and optionally inventory) for what it finds. - Detects packages that have been installed. -- Detects Debian package ownership of `/etc` files using dpkg’s local database. -- Captures config that has **changed from packaged defaults** (dpkg conffile hashes + package md5sums when available). +- Detects package ownership of `/etc` files where possible +- Captures config that has **changed from packaged defaults** where possible (e.g dpkg conffile hashes + package md5sums when available). - Also captures **service-relevant custom/unowned files** under `/etc//...` (e.g. drop-in config includes). - Defensively excludes likely secrets (path denylist + content sniff + size caps). - Captures non-system users and their SSH public keys. -- Captures miscellaneous `/etc` files it can’t attribute to a package and installs them in an `etc_custom` role. +- Captures miscellaneous `/etc` files it can't attribute to a package and installs them in an `etc_custom` role. - Ditto for /usr/local/bin (for non-binary files) and /usr/local/etc - Avoids trying to start systemd services that were detected as inactive during harvest. @@ -41,8 +41,8 @@ Use when enrolling **one server** (or generating a “golden” role set you int **Characteristics** - Roles are more self-contained. -- Raw config files live in the role’s `files/`. -- Template variables live in the role’s `defaults/main.yml`. +- Raw config files live in the role's `files/`. +- Template variables live in the role's `defaults/main.yml`. ### Multi-site mode (`--fqdn`) Use when enrolling **several existing servers** quickly, especially if they differ. @@ -68,13 +68,13 @@ Harvest state about a host and write a harvest bundle. - “Manual” packages - Changed-from-default config (plus related custom/unowned files under service dirs) - Non-system users + SSH public keys -- Misc `/etc` that can’t be attributed to a package (`etc_custom` role) +- Misc `/etc` that can't be attributed to a package (`etc_custom` role) - Optional user-specified extra files/dirs via `--include-path` (emitted as an `extra_paths` role at manifest time) **Common flags** - Remote harvesting: - `--remote-host`, `--remote-user`, `--remote-port` - - `--no-sudo` (if you don’t want/need sudo) + - `--no-sudo` (if you don't want/need sudo) - Sensitive-data behaviour: - default: tries to avoid likely secrets - `--dangerous`: disables secret-safety checks (see “Sensitive data” below) @@ -233,7 +233,7 @@ poetry run enroll --help ## Found a bug / have a suggestion? -My Forgejo doesn’t currently support federation, so I haven’t opened registration/login for issues. +My Forgejo doesn't currently support federation, so I haven't opened registration/login for issues. Instead, email me (see `pyproject.toml`) or contact me on the Fediverse: diff --git a/enroll/debian.py b/enroll/debian.py index 0ddc1f3..7e1ee2d 100644 --- a/enroll/debian.py +++ b/enroll/debian.py @@ -1,7 +1,6 @@ from __future__ import annotations import glob -import hashlib import os import subprocess # nosec from typing import Dict, List, Optional, Set, Tuple @@ -180,28 +179,3 @@ def read_pkg_md5sums(pkg: str) -> Dict[str, str]: md5, rel = line.split(None, 1) m[rel.strip()] = md5.strip() return m - - -def file_md5(path: str) -> str: - h = hashlib.md5() # nosec - with open(path, "rb") as f: - for chunk in iter(lambda: f.read(1024 * 1024), b""): - h.update(chunk) - return h.hexdigest() - - -def stat_triplet(path: str) -> Tuple[str, str, str]: - st = os.stat(path, follow_symlinks=True) - mode = oct(st.st_mode & 0o777)[2:].zfill(4) - - import pwd, grp - - try: - owner = pwd.getpwuid(st.st_uid).pw_name - except KeyError: - owner = str(st.st_uid) - try: - group = grp.getgrgid(st.st_gid).gr_name - except KeyError: - group = str(st.st_gid) - return owner, group, mode diff --git a/enroll/fsutil.py b/enroll/fsutil.py new file mode 100644 index 0000000..3d18df6 --- /dev/null +++ b/enroll/fsutil.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import hashlib +import os +from typing import Tuple + + +def file_md5(path: str) -> str: + """Return hex MD5 of a file. + + Used for Debian dpkg baseline comparisons. + """ + h = hashlib.md5() # nosec + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(1024 * 1024), b""): + h.update(chunk) + return h.hexdigest() + + +def stat_triplet(path: str) -> Tuple[str, str, str]: + """Return (owner, group, mode) for a path. + + owner/group are usernames/group names when resolvable, otherwise numeric ids. + mode is a zero-padded octal string (e.g. "0644"). + """ + st = os.stat(path, follow_symlinks=True) + mode = oct(st.st_mode & 0o777)[2:].zfill(4) + + import grp + import pwd + + try: + owner = pwd.getpwuid(st.st_uid).pw_name + except KeyError: + owner = str(st.st_uid) + try: + group = grp.getgrgid(st.st_gid).gr_name + except KeyError: + group = str(st.st_gid) + return owner, group, mode diff --git a/enroll/harvest.py b/enroll/harvest.py index d678b89..bb706b1 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -15,18 +15,12 @@ from .systemd import ( get_timer_info, UnitQueryError, ) -from .debian import ( - build_dpkg_etc_index, - dpkg_owner, - file_md5, - list_manual_packages, - parse_status_conffiles, - read_pkg_md5sums, - stat_triplet, -) +from .fsutil import stat_triplet +from .platform import detect_platform, get_backend from .ignore import IgnorePolicy from .pathfilter import PathFilter, expand_includes from .accounts import collect_non_system_users +from .version import get_enroll_version @dataclass @@ -85,6 +79,14 @@ class AptConfigSnapshot: notes: List[str] +@dataclass +class DnfConfigSnapshot: + role_name: str + managed_files: List[ManagedFile] + excluded: List[ExcludedFile] + notes: List[str] + + @dataclass class EtcCustomSnapshot: role_name: str @@ -158,6 +160,13 @@ SHARED_ETC_TOPDIRS = { "sudoers.d", "sysctl.d", "systemd", + # RPM-family shared trees + "dnf", + "yum", + "yum.repos.d", + "sysconfig", + "pki", + "firewalld", } @@ -314,17 +323,23 @@ def _add_pkgs_from_etc_topdirs( pkgs.add(p) -def _maybe_add_specific_paths(hints: Set[str]) -> List[str]: - paths: List[str] = [] - for h in hints: - paths.extend( - [ - f"/etc/default/{h}", - f"/etc/init.d/{h}", - f"/etc/sysctl.d/{h}.conf", - ] - ) - return paths +def _maybe_add_specific_paths(hints: Set[str], backend) -> List[str]: + # Delegate to backend-specific conventions (e.g. /etc/default on Debian, + # /etc/sysconfig on Fedora/RHEL). Always include sysctl.d. + try: + return backend.specific_paths_for_hints(hints) + except Exception: + # Best-effort fallback (Debian-ish). + paths: List[str] = [] + for h in hints: + paths.extend( + [ + f"/etc/default/{h}", + f"/etc/init.d/{h}", + f"/etc/sysctl.d/{h}.conf", + ] + ) + return paths def _scan_unowned_under_roots( @@ -408,6 +423,7 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ ("/etc/anacron/*", "system_cron"), ("/var/spool/cron/crontabs/*", "system_cron"), ("/var/spool/crontabs/*", "system_cron"), + ("/var/spool/cron/*", "system_cron"), # network ("/etc/netplan/*", "system_network"), ("/etc/systemd/network/*", "system_network"), @@ -415,6 +431,9 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ ("/etc/network/interfaces.d/*", "system_network"), ("/etc/resolvconf.conf", "system_network"), ("/etc/resolvconf/resolv.conf.d/*", "system_network"), + ("/etc/NetworkManager/system-connections/*", "system_network"), + ("/etc/sysconfig/network*", "system_network"), + ("/etc/sysconfig/network-scripts/*", "system_network"), # firewall ("/etc/nftables.conf", "system_firewall"), ("/etc/nftables.d/*", "system_firewall"), @@ -422,6 +441,10 @@ _SYSTEM_CAPTURE_GLOBS: List[tuple[str, str]] = [ ("/etc/iptables/rules.v6", "system_firewall"), ("/etc/ufw/*", "system_firewall"), ("/etc/default/ufw", "system_firewall"), + ("/etc/firewalld/*", "system_firewall"), + ("/etc/firewalld/zones/*", "system_firewall"), + # SELinux + ("/etc/selinux/config", "system_security"), # other ("/etc/rc.local", "system_rc"), ] @@ -553,6 +576,51 @@ def _iter_apt_capture_paths() -> List[tuple[str, str]]: return uniq +def _iter_dnf_capture_paths() -> List[tuple[str, str]]: + """Return (path, reason) pairs for DNF/YUM configuration on RPM systems. + + Captures: + - /etc/dnf/* (dnf.conf, vars, plugins, modules, automatic) + - /etc/yum.conf (legacy) + - /etc/yum.repos.d/*.repo + - /etc/pki/rpm-gpg/* (GPG key files) + """ + reasons: Dict[str, str] = {} + + for root, tag in ( + ("/etc/dnf", "dnf_config"), + ("/etc/yum", "yum_config"), + ): + if os.path.isdir(root): + for dirpath, _, filenames in os.walk(root): + for fn in filenames: + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + reasons.setdefault(p, tag) + + # Legacy yum.conf. + if os.path.isfile("/etc/yum.conf") and not os.path.islink("/etc/yum.conf"): + reasons.setdefault("/etc/yum.conf", "yum_conf") + + # Repositories. + if os.path.isdir("/etc/yum.repos.d"): + for p in _iter_matching_files("/etc/yum.repos.d/*.repo"): + reasons[p] = "yum_repo" + + # RPM GPG keys. + if os.path.isdir("/etc/pki/rpm-gpg"): + for dirpath, _, filenames in os.walk("/etc/pki/rpm-gpg"): + for fn in filenames: + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + reasons.setdefault(p, "rpm_gpg_key") + + # Stable ordering. + return [(p, reasons[p]) for p in sorted(reasons.keys())] + + def _iter_system_capture_paths() -> List[tuple[str, str]]: """Return (path, reason) pairs for essential system config/state (non-APT).""" out: List[tuple[str, str]] = [] @@ -600,8 +668,12 @@ def harvest( flush=True, ) - owned_etc, etc_owner_map, topdir_to_pkgs, pkg_to_etc_paths = build_dpkg_etc_index() - conffiles_by_pkg = parse_status_conffiles() + platform = detect_platform() + backend = get_backend(platform) + + owned_etc, etc_owner_map, topdir_to_pkgs, pkg_to_etc_paths = ( + backend.build_etc_index() + ) # ------------------------- # Service roles @@ -645,12 +717,12 @@ def harvest( candidates: Dict[str, str] = {} if ui.fragment_path: - p = dpkg_owner(ui.fragment_path) + p = backend.owner_of_path(ui.fragment_path) if p: pkgs.add(p) for exe in ui.exec_paths: - p = dpkg_owner(exe) + p = backend.owner_of_path(exe) if p: pkgs.add(p) @@ -675,7 +747,7 @@ def harvest( # logrotate.d entries) can still be attributed back to this service. service_role_aliases[role] = set(hints) | set(pkgs) | {role} - for sp in _maybe_add_specific_paths(hints): + for sp in _maybe_add_specific_paths(hints, backend): if not os.path.exists(sp): continue if sp in etc_owner_map: @@ -684,31 +756,13 @@ def harvest( candidates.setdefault(sp, "custom_specific_path") for pkg in sorted(pkgs): - conff = conffiles_by_pkg.get(pkg, {}) - md5sums = read_pkg_md5sums(pkg) - for path in pkg_to_etc_paths.get(pkg, []): + etc_paths = pkg_to_etc_paths.get(pkg, []) + for path, reason in backend.modified_paths(pkg, etc_paths).items(): if not os.path.isfile(path) or os.path.islink(path): continue - if path.startswith("/etc/apt/"): + if backend.is_pkg_config_path(path): continue - if path in conff: - # Only capture conffiles when they differ from the package default. - try: - current = file_md5(path) - except OSError: - continue - if current != conff[path]: - candidates.setdefault(path, "modified_conffile") - continue - rel = path.lstrip("/") - baseline = md5sums.get(rel) - if baseline: - try: - current = file_md5(path) - except OSError: - continue - if current != baseline: - candidates.setdefault(path, "modified_packaged_file") + candidates.setdefault(path, reason) # Capture custom/unowned files living under /etc/ for this service. # @@ -847,18 +901,18 @@ def harvest( # (useful when a timer triggers a service that isn't enabled). pkgs: Set[str] = set() if ti.fragment_path: - p = dpkg_owner(ti.fragment_path) + p = backend.owner_of_path(ti.fragment_path) if p: pkgs.add(p) if ti.trigger_unit and ti.trigger_unit.endswith(".service"): try: ui = get_unit_info(ti.trigger_unit) if ui.fragment_path: - p = dpkg_owner(ui.fragment_path) + p = backend.owner_of_path(ui.fragment_path) if p: pkgs.add(p) for exe in ui.exec_paths: - p = dpkg_owner(exe) + p = backend.owner_of_path(exe) if p: pkgs.add(p) except Exception: # nosec @@ -870,7 +924,7 @@ def harvest( # ------------------------- # Manually installed package roles # ------------------------- - manual_pkgs = list_manual_packages() + manual_pkgs = backend.list_manual_packages() # Avoid duplicate roles: if a manual package is already managed by any service role, skip its pkg_ role. covered_by_services: Set[str] = set() for s in service_snaps: @@ -893,41 +947,26 @@ def harvest( for tpath in timer_extra_by_pkg.get(pkg, []): candidates.setdefault(tpath, "related_timer") - conff = conffiles_by_pkg.get(pkg, {}) - md5sums = read_pkg_md5sums(pkg) - - for path in pkg_to_etc_paths.get(pkg, []): + etc_paths = pkg_to_etc_paths.get(pkg, []) + for path, reason in backend.modified_paths(pkg, etc_paths).items(): if not os.path.isfile(path) or os.path.islink(path): continue - if path.startswith("/etc/apt/"): + if backend.is_pkg_config_path(path): continue - if path in conff: - try: - current = file_md5(path) - except OSError: - continue - if current != conff[path]: - candidates.setdefault(path, "modified_conffile") - continue - rel = path.lstrip("/") - baseline = md5sums.get(rel) - if baseline: - try: - current = file_md5(path) - except OSError: - continue - if current != baseline: - candidates.setdefault(path, "modified_packaged_file") + candidates.setdefault(path, reason) topdirs = _topdirs_for_package(pkg, pkg_to_etc_paths) roots: List[str] = [] + # Collect candidate directories plus backend-specific common files. for td in sorted(topdirs): if td in SHARED_ETC_TOPDIRS: continue + if backend.is_pkg_config_path(f"/etc/{td}/") or backend.is_pkg_config_path( + f"/etc/{td}" + ): + continue roots.extend([f"/etc/{td}", f"/etc/{td}.d"]) - roots.extend([f"/etc/default/{td}"]) - roots.extend([f"/etc/init.d/{td}"]) - roots.extend([f"/etc/sysctl.d/{td}.conf"]) + roots.extend(_maybe_add_specific_paths(set(topdirs), backend)) # Capture any custom/unowned files under /etc/ for this # manually-installed package. This may include runtime-generated @@ -1031,26 +1070,48 @@ def harvest( ) # ------------------------- - # apt_config role (APT configuration and keyrings) + # Package manager config role + # - Debian: apt_config + # - Fedora/RHEL-like: dnf_config # ------------------------- apt_notes: List[str] = [] apt_excluded: List[ExcludedFile] = [] apt_managed: List[ManagedFile] = [] - apt_role_name = "apt_config" - apt_role_seen = seen_by_role.setdefault(apt_role_name, set()) + dnf_notes: List[str] = [] + dnf_excluded: List[ExcludedFile] = [] + dnf_managed: List[ManagedFile] = [] - for path, reason in _iter_apt_capture_paths(): - _capture_file( - bundle_dir=bundle_dir, - role_name=apt_role_name, - abs_path=path, - reason=reason, - policy=policy, - path_filter=path_filter, - managed_out=apt_managed, - excluded_out=apt_excluded, - seen_role=apt_role_seen, - ) + apt_role_name = "apt_config" + dnf_role_name = "dnf_config" + + if backend.name == "dpkg": + apt_role_seen = seen_by_role.setdefault(apt_role_name, set()) + for path, reason in _iter_apt_capture_paths(): + _capture_file( + bundle_dir=bundle_dir, + role_name=apt_role_name, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=apt_managed, + excluded_out=apt_excluded, + seen_role=apt_role_seen, + ) + elif backend.name == "rpm": + dnf_role_seen = seen_by_role.setdefault(dnf_role_name, set()) + for path, reason in _iter_dnf_capture_paths(): + _capture_file( + bundle_dir=bundle_dir, + role_name=dnf_role_name, + abs_path=path, + reason=reason, + policy=policy, + path_filter=path_filter, + managed_out=dnf_managed, + excluded_out=dnf_excluded, + seen_role=dnf_role_seen, + ) apt_config_snapshot = AptConfigSnapshot( role_name=apt_role_name, @@ -1058,6 +1119,12 @@ def harvest( excluded=apt_excluded, notes=apt_notes, ) + dnf_config_snapshot = DnfConfigSnapshot( + role_name=dnf_role_name, + managed_files=dnf_managed, + excluded=dnf_excluded, + notes=dnf_notes, + ) # ------------------------- # etc_custom role (unowned /etc files not already attributed elsewhere) @@ -1079,6 +1146,8 @@ def harvest( already.add(mf.path) for mf in apt_managed: already.add(mf.path) + for mf in dnf_managed: + already.add(mf.path) # Maps for re-attributing shared snippets (cron.d/logrotate.d) to existing roles. svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} @@ -1093,7 +1162,7 @@ def harvest( for pkg in s.packages: pkg_to_service_roles.setdefault(pkg, []).append(s.role_name) - # Alias -> role mapping used as a fallback when dpkg ownership is missing. + # Alias -> role mapping used as a fallback when package ownership is missing. # Prefer service roles over package roles when both would match. alias_ranked: Dict[str, tuple[int, str]] = {} @@ -1124,8 +1193,8 @@ def harvest( per service. Resolution order: - 1) dpkg owner -> service role (if any service references the package) - 2) dpkg owner -> package role (manual package role exists) + 1) package owner -> service role (if any service references the package) + 2) package owner -> package role (manual package role exists) 3) basename/stem alias match -> preferred role """ if path.startswith("/etc/logrotate.d/"): @@ -1147,7 +1216,7 @@ def harvest( seen.add(c) uniq.append(c) - pkg = dpkg_owner(path) + pkg = backend.owner_of_path(path) if pkg: svc_roles = sorted(set(pkg_to_service_roles.get(pkg, []))) if svc_roles: @@ -1226,7 +1295,7 @@ def harvest( for dirpath, _, filenames in os.walk("/etc"): for fn in filenames: path = os.path.join(dirpath, fn) - if path.startswith("/etc/apt/"): + if backend.is_pkg_config_path(path): continue if path in already: continue @@ -1413,13 +1482,22 @@ def harvest( ) state = { - "host": {"hostname": os.uname().nodename, "os": "debian"}, + "enroll": { + "version": get_enroll_version(), + }, + "host": { + "hostname": os.uname().nodename, + "os": platform.os_family, + "pkg_backend": backend.name, + "os_release": platform.os_release, + }, "users": asdict(users_snapshot), "services": [asdict(s) for s in service_snaps], "manual_packages": manual_pkgs, "manual_packages_skipped": manual_pkgs_skipped, "package_roles": [asdict(p) for p in pkg_snaps], "apt_config": asdict(apt_config_snapshot), + "dnf_config": asdict(dnf_config_snapshot), "etc_custom": asdict(etc_custom_snapshot), "usr_local_custom": asdict(usr_local_custom_snapshot), "extra_paths": asdict(extra_paths_snapshot), diff --git a/enroll/ignore.py b/enroll/ignore.py index ab2cb96..904997f 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -43,6 +43,7 @@ DEFAULT_ALLOW_BINARY_GLOBS = [ "/usr/share/keyrings/*.gpg", "/usr/share/keyrings/*.pgp", "/usr/share/keyrings/*.asc", + "/etc/pki/rpm-gpg/*", ] SENSITIVE_CONTENT_PATTERNS = [ diff --git a/enroll/manifest.py b/enroll/manifest.py index dbc2353..923040f 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -166,6 +166,7 @@ def _write_playbook_all(path: str, roles: List[str]) -> None: pb_lines = [ "---", "- name: Apply all roles on all hosts", + " gather_facts: true", " hosts: all", " become: true", " roles:", @@ -181,6 +182,7 @@ def _write_playbook_host(path: str, fqdn: str, roles: List[str]) -> None: "---", f"- name: Apply all roles on {fqdn}", f" hosts: {fqdn}", + " gather_facts: true", " become: true", " roles:", ] @@ -468,6 +470,51 @@ def _render_generic_files_tasks( """ +def _render_install_packages_tasks(role: str, var_prefix: str) -> str: + """Render cross-distro package installation tasks. + + We generate conditional tasks for apt/dnf/yum, falling back to the + generic `package` module. This keeps generated roles usable on both + Debian-like and RPM-like systems. + """ + return f"""# Generated by enroll + +- name: Install packages for {role} (APT) + ansible.builtin.apt: + name: "{{{{ {var_prefix}_packages | default([]) }}}}" + state: present + update_cache: true + when: + - ({var_prefix}_packages | default([])) | length > 0 + - ansible_facts.pkg_mgr | default('') == 'apt' + +- name: Install packages for {role} (DNF5) + ansible.builtin.dnf5: + name: "{{{{ {var_prefix}_packages | default([]) }}}}" + state: present + when: + - ({var_prefix}_packages | default([])) | length > 0 + - ansible_facts.pkg_mgr | default('') == 'dnf5' + +- name: Install packages for {role} (DNF/YUM) + ansible.builtin.dnf: + name: "{{{{ {var_prefix}_packages | default([]) }}}}" + state: present + when: + - ({var_prefix}_packages | default([])) | length > 0 + - ansible_facts.pkg_mgr | default('') in ['dnf', 'yum'] + +- name: Install packages for {role} (generic fallback) + ansible.builtin.package: + name: "{{{{ {var_prefix}_packages | default([]) }}}}" + state: present + when: + - ({var_prefix}_packages | default([])) | length > 0 + - ansible_facts.pkg_mgr | default('') not in ['apt', 'dnf', 'dnf5', 'yum'] + +""" + + def _prepare_bundle_dir( bundle: str, *, @@ -629,6 +676,7 @@ def _manifest_from_bundle_dir( package_roles: List[Dict[str, Any]] = state.get("package_roles", []) users_snapshot: Dict[str, Any] = state.get("users", {}) apt_config_snapshot: Dict[str, Any] = state.get("apt_config", {}) + dnf_config_snapshot: Dict[str, Any] = state.get("dnf_config", {}) etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {}) @@ -664,6 +712,7 @@ def _manifest_from_bundle_dir( manifested_users_roles: List[str] = [] manifested_apt_config_roles: List[str] = [] + manifested_dnf_config_roles: List[str] = [] manifested_etc_custom_roles: List[str] = [] manifested_usr_local_custom_roles: List[str] = [] manifested_extra_paths_roles: List[str] = [] @@ -1041,6 +1090,157 @@ APT configuration harvested from the system (sources, pinning, and keyrings). manifested_apt_config_roles.append(role) + # ------------------------- + # dnf_config role (DNF/YUM repos, config, and RPM GPG keys) + # ------------------------- + if dnf_config_snapshot and dnf_config_snapshot.get("managed_files"): + role = dnf_config_snapshot.get("role_name", "dnf_config") + role_dir = os.path.join(roles_root, role) + _write_role_scaffold(role_dir) + + var_prefix = role + + managed_files = dnf_config_snapshot.get("managed_files", []) + excluded = dnf_config_snapshot.get("excluded", []) + notes = dnf_config_snapshot.get("notes", []) + + templated, jt_vars = _jinjify_managed_files( + bundle_dir, + role, + role_dir, + managed_files, + jt_exe=jt_exe, + jt_enabled=jt_enabled, + overwrite_templates=not site_mode, + ) + + if site_mode: + _copy_artifacts( + bundle_dir, + role, + _host_role_files_dir(out_dir, fqdn or "", role), + exclude_rels=templated, + ) + else: + _copy_artifacts( + bundle_dir, + role, + os.path.join(role_dir, "files"), + exclude_rels=templated, + ) + + files_var = _build_managed_files_var( + managed_files, + templated, + notify_other=None, + notify_systemd=None, + ) + + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} + vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map = _merge_mappings_overwrite(vars_map, jt_map) + + if site_mode: + _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_hostvars(out_dir, fqdn or "", role, vars_map) + else: + _write_role_defaults(role_dir, vars_map) + + tasks = "---\n" + _render_generic_files_tasks( + var_prefix, include_restart_notify=False + ) + with open( + os.path.join(role_dir, "tasks", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write(tasks.rstrip() + "\n") + + with open( + os.path.join(role_dir, "meta", "main.yml"), "w", encoding="utf-8" + ) as f: + f.write("---\ndependencies: []\n") + + # README: summarise repos and GPG key material + repo_paths: List[str] = [] + key_paths: List[str] = [] + repo_hosts: Set[str] = set() + + url_re = re.compile(r"(?:https?|ftp)://([^/\s]+)", re.IGNORECASE) + file_url_re = re.compile(r"file://(/[^\s]+)") + + for mf in managed_files: + p = str(mf.get("path") or "") + src_rel = str(mf.get("src_rel") or "") + if not p or not src_rel: + continue + + if p.startswith("/etc/yum.repos.d/") and p.endswith(".repo"): + repo_paths.append(p) + art_path = os.path.join(bundle_dir, "artifacts", role, src_rel) + try: + with open(art_path, "r", encoding="utf-8", errors="replace") as rf: + for line in rf: + s = line.strip() + if not s or s.startswith("#") or s.startswith(";"): + continue + # Collect hostnames from URLs (baseurl, mirrorlist, metalink, gpgkey...) + for m in url_re.finditer(s): + repo_hosts.add(m.group(1)) + # Collect local gpgkey file paths referenced as file:///... + for m in file_url_re.finditer(s): + key_paths.append(m.group(1)) + except OSError: + pass # nosec + + if p.startswith("/etc/pki/rpm-gpg/"): + key_paths.append(p) + + repo_paths = sorted(set(repo_paths)) + key_paths = sorted(set(key_paths)) + repos = sorted(repo_hosts) + + readme = ( + """# dnf_config + +DNF/YUM configuration harvested from the system (repos, config files, and RPM GPG keys). + +## Repository hosts +""" + + ("\n".join([f"- {h}" for h in repos]) or "- (none)") + + """\n +## Repo files +""" + + ("\n".join([f"- {p}" for p in repo_paths]) or "- (none)") + + """\n +## GPG keys +""" + + ("\n".join([f"- {p}" for p in key_paths]) or "- (none)") + + """\n +## Managed files +""" + + ( + "\n".join( + [f"- {mf.get('path')} ({mf.get('reason')})" for mf in managed_files] + ) + or "- (none)" + ) + + """\n +## Excluded +""" + + ( + "\n".join([f"- {e.get('path')} ({e.get('reason')})" for e in excluded]) + or "- (none)" + ) + + """\n +## Notes +""" + + ("\n".join([f"- {n}" for n in notes]) or "- (none)") + + """\n""" + ) + with open(os.path.join(role_dir, "README.md"), "w", encoding="utf-8") as f: + f.write(readme) + + manifested_dnf_config_roles.append(role) + # ------------------------- # etc_custom role (unowned /etc not already attributed) # ------------------------- @@ -1457,19 +1657,7 @@ User-requested extra file harvesting. f.write(handlers) task_parts: List[str] = [] - task_parts.append( - f"""--- -# Generated by enroll - -- name: Install packages for {role} - ansible.builtin.apt: - name: "{{{{ {var_prefix}_packages | default([]) }}}}" - state: present - update_cache: true - when: ({var_prefix}_packages | default([])) | length > 0 - -""" - ) + task_parts.append("---\n" + _render_install_packages_tasks(role, var_prefix)) task_parts.append( _render_generic_files_tasks(var_prefix, include_restart_notify=True) @@ -1616,19 +1804,7 @@ Generated from `{unit}`. f.write(handlers) task_parts: List[str] = [] - task_parts.append( - f"""--- -# Generated by enroll - -- name: Install packages for {role} - ansible.builtin.apt: - name: "{{{{ {var_prefix}_packages | default([]) }}}}" - state: present - update_cache: true - when: ({var_prefix}_packages | default([])) | length > 0 - -""" - ) + task_parts.append("---\n" + _render_install_packages_tasks(role, var_prefix)) task_parts.append( _render_generic_files_tasks(var_prefix, include_restart_notify=False) ) @@ -1667,6 +1843,7 @@ Generated for package `{pkg}`. manifested_pkg_roles.append(role) all_roles = ( manifested_apt_config_roles + + manifested_dnf_config_roles + manifested_pkg_roles + manifested_service_roles + manifested_etc_custom_roles diff --git a/enroll/platform.py b/enroll/platform.py new file mode 100644 index 0000000..998b83d --- /dev/null +++ b/enroll/platform.py @@ -0,0 +1,261 @@ +from __future__ import annotations + +import shutil +from dataclasses import dataclass +from typing import Dict, List, Optional, Set, Tuple + +from .fsutil import file_md5 + + +def _read_os_release(path: str = "/etc/os-release") -> Dict[str, str]: + out: Dict[str, str] = {} + try: + with open(path, "r", encoding="utf-8", errors="replace") as f: + for raw in f: + line = raw.strip() + if not line or line.startswith("#") or "=" not in line: + continue + k, v = line.split("=", 1) + k = k.strip() + v = v.strip().strip('"') + out[k] = v + except OSError: + return {} + return out + + +@dataclass +class PlatformInfo: + os_family: str # debian|redhat|unknown + pkg_backend: str # dpkg|rpm|unknown + os_release: Dict[str, str] + + +def detect_platform() -> PlatformInfo: + """Detect platform family and package backend. + + Uses /etc/os-release when available, with a conservative fallback to + checking for dpkg/rpm binaries. + """ + + osr = _read_os_release() + os_id = (osr.get("ID") or "").strip().lower() + likes = (osr.get("ID_LIKE") or "").strip().lower().split() + + deb_ids = {"debian", "ubuntu", "linuxmint", "raspbian", "kali"} + rhel_ids = { + "fedora", + "rhel", + "centos", + "rocky", + "almalinux", + "ol", + "oracle", + "scientific", + } + + if os_id in deb_ids or "debian" in likes: + return PlatformInfo(os_family="debian", pkg_backend="dpkg", os_release=osr) + if os_id in rhel_ids or any( + x in likes for x in ("rhel", "fedora", "centos", "redhat") + ): + return PlatformInfo(os_family="redhat", pkg_backend="rpm", os_release=osr) + + # Fallback heuristics. + if shutil.which("dpkg"): + return PlatformInfo(os_family="debian", pkg_backend="dpkg", os_release=osr) + if shutil.which("rpm"): + return PlatformInfo(os_family="redhat", pkg_backend="rpm", os_release=osr) + return PlatformInfo(os_family="unknown", pkg_backend="unknown", os_release=osr) + + +class PackageBackend: + """Backend abstraction for package ownership, config detection, and manual package lists.""" + + name: str + pkg_config_prefixes: Tuple[str, ...] + + def owner_of_path(self, path: str) -> Optional[str]: # pragma: no cover + raise NotImplementedError + + def list_manual_packages(self) -> List[str]: # pragma: no cover + raise NotImplementedError + + def build_etc_index( + self, + ) -> Tuple[ + Set[str], Dict[str, str], Dict[str, Set[str]], Dict[str, List[str]] + ]: # pragma: no cover + raise NotImplementedError + + def specific_paths_for_hints(self, hints: Set[str]) -> List[str]: + return [] + + def is_pkg_config_path(self, path: str) -> bool: + for pfx in self.pkg_config_prefixes: + if path == pfx or path.startswith(pfx): + return True + return False + + def modified_paths(self, pkg: str, etc_paths: List[str]) -> Dict[str, str]: + """Return a mapping of modified file paths -> reason label.""" + return {} + + +class DpkgBackend(PackageBackend): + name = "dpkg" + pkg_config_prefixes = ("/etc/apt/",) + + def __init__(self) -> None: + from .debian import parse_status_conffiles + + self._conffiles_by_pkg = parse_status_conffiles() + + def owner_of_path(self, path: str) -> Optional[str]: + from .debian import dpkg_owner + + return dpkg_owner(path) + + def list_manual_packages(self) -> List[str]: + from .debian import list_manual_packages + + return list_manual_packages() + + def build_etc_index(self): + from .debian import build_dpkg_etc_index + + return build_dpkg_etc_index() + + def specific_paths_for_hints(self, hints: Set[str]) -> List[str]: + paths: List[str] = [] + for h in hints: + paths.extend( + [ + f"/etc/default/{h}", + f"/etc/init.d/{h}", + f"/etc/sysctl.d/{h}.conf", + ] + ) + return paths + + def modified_paths(self, pkg: str, etc_paths: List[str]) -> Dict[str, str]: + from .debian import read_pkg_md5sums + + out: Dict[str, str] = {} + conff = self._conffiles_by_pkg.get(pkg, {}) + md5sums = read_pkg_md5sums(pkg) + + for path in etc_paths: + if not path.startswith("/etc/"): + continue + if self.is_pkg_config_path(path): + continue + if path in conff: + try: + current = file_md5(path) + except OSError: + continue + if current != conff[path]: + out[path] = "modified_conffile" + continue + + rel = path.lstrip("/") + baseline = md5sums.get(rel) + if baseline: + try: + current = file_md5(path) + except OSError: + continue + if current != baseline: + out[path] = "modified_packaged_file" + return out + + +class RpmBackend(PackageBackend): + name = "rpm" + pkg_config_prefixes = ( + "/etc/dnf/", + "/etc/yum/", + "/etc/yum.repos.d/", + "/etc/yum.conf", + ) + + def __init__(self) -> None: + self._modified_cache: Dict[str, Set[str]] = {} + self._config_cache: Dict[str, Set[str]] = {} + + def owner_of_path(self, path: str) -> Optional[str]: + from .rpm import rpm_owner + + return rpm_owner(path) + + def list_manual_packages(self) -> List[str]: + from .rpm import list_manual_packages + + return list_manual_packages() + + def build_etc_index(self): + from .rpm import build_rpm_etc_index + + return build_rpm_etc_index() + + def specific_paths_for_hints(self, hints: Set[str]) -> List[str]: + paths: List[str] = [] + for h in hints: + paths.extend( + [ + f"/etc/sysconfig/{h}", + f"/etc/sysconfig/{h}.conf", + f"/etc/sysctl.d/{h}.conf", + ] + ) + return paths + + def _config_files(self, pkg: str) -> Set[str]: + if pkg in self._config_cache: + return self._config_cache[pkg] + from .rpm import rpm_config_files + + s = rpm_config_files(pkg) + self._config_cache[pkg] = s + return s + + def _modified_files(self, pkg: str) -> Set[str]: + if pkg in self._modified_cache: + return self._modified_cache[pkg] + from .rpm import rpm_modified_files + + s = rpm_modified_files(pkg) + self._modified_cache[pkg] = s + return s + + def modified_paths(self, pkg: str, etc_paths: List[str]) -> Dict[str, str]: + out: Dict[str, str] = {} + modified = self._modified_files(pkg) + if not modified: + return out + config = self._config_files(pkg) + + for path in etc_paths: + if not path.startswith("/etc/"): + continue + if self.is_pkg_config_path(path): + continue + if path not in modified: + continue + out[path] = ( + "modified_conffile" if path in config else "modified_packaged_file" + ) + return out + + +def get_backend(info: Optional[PlatformInfo] = None) -> PackageBackend: + info = info or detect_platform() + if info.pkg_backend == "dpkg": + return DpkgBackend() + if info.pkg_backend == "rpm": + return RpmBackend() + # Unknown: be conservative and use an rpm backend if rpm exists, otherwise dpkg. + if shutil.which("rpm"): + return RpmBackend() + return DpkgBackend() diff --git a/enroll/rpm.py b/enroll/rpm.py new file mode 100644 index 0000000..947617c --- /dev/null +++ b/enroll/rpm.py @@ -0,0 +1,266 @@ +from __future__ import annotations + +import os +import re +import shutil +import subprocess # nosec +from typing import Dict, List, Optional, Set, Tuple + + +def _run( + cmd: list[str], *, allow_fail: bool = False, merge_err: bool = False +) -> tuple[int, str]: + """Run a command and return (rc, stdout). + + If merge_err is True, stderr is merged into stdout to preserve ordering. + """ + p = subprocess.run( + cmd, + check=False, + text=True, + stdout=subprocess.PIPE, + stderr=(subprocess.STDOUT if merge_err else subprocess.PIPE), + ) # nosec + out = p.stdout or "" + if (not allow_fail) and p.returncode != 0: + err = "" if merge_err else (p.stderr or "") + raise RuntimeError(f"Command failed: {cmd}\n{err}{out}") + return p.returncode, out + + +def rpm_owner(path: str) -> Optional[str]: + """Return owning package name for a path, or None if unowned.""" + if not path: + return None + rc, out = _run( + ["rpm", "-qf", "--qf", "%{NAME}\n", path], allow_fail=True, merge_err=True + ) + if rc != 0: + return None + for line in out.splitlines(): + line = line.strip() + if not line: + continue + if "is not owned" in line: + return None + # With --qf we expect just the package name. + if re.match(r"^[A-Za-z0-9_.+:-]+$", line): + # Strip any accidental epoch/name-version-release output. + return line.split(":", 1)[-1].strip() if line else None + return None + + +_ARCH_SUFFIXES = { + "noarch", + "x86_64", + "i686", + "aarch64", + "armv7hl", + "ppc64le", + "s390x", + "riscv64", +} + + +def _strip_arch(token: str) -> str: + """Strip a trailing .ARCH from a yum/dnf package token.""" + t = token.strip() + if "." not in t: + return t + head, tail = t.rsplit(".", 1) + if tail in _ARCH_SUFFIXES: + return head + return t + + +def list_manual_packages() -> List[str]: + """Return packages considered "user-installed" on RPM-based systems. + + Best-effort: + 1) dnf repoquery --userinstalled + 2) dnf history userinstalled + 3) yum history userinstalled + + If none are available, returns an empty list. + """ + + def _dedupe(pkgs: List[str]) -> List[str]: + return sorted({p for p in (pkgs or []) if p}) + + if shutil.which("dnf"): + # Prefer a machine-friendly output. + for cmd in ( + ["dnf", "-q", "repoquery", "--userinstalled", "--qf", "%{name}\n"], + ["dnf", "-q", "repoquery", "--userinstalled"], + ): + rc, out = _run(cmd, allow_fail=True, merge_err=True) + if rc == 0 and out.strip(): + pkgs = [] + for line in out.splitlines(): + line = line.strip() + if not line or line.startswith("Loaded plugins"): + continue + pkgs.append(_strip_arch(line.split()[0])) + if pkgs: + return _dedupe(pkgs) + + # Fallback: human-oriented output. + rc, out = _run( + ["dnf", "-q", "history", "userinstalled"], allow_fail=True, merge_err=True + ) + if rc == 0 and out.strip(): + pkgs = [] + for line in out.splitlines(): + line = line.strip() + if not line or line.startswith("Installed") or line.startswith("Last"): + continue + # Often: "vim-enhanced.x86_64" + tok = line.split()[0] + pkgs.append(_strip_arch(tok)) + if pkgs: + return _dedupe(pkgs) + + if shutil.which("yum"): + rc, out = _run( + ["yum", "-q", "history", "userinstalled"], allow_fail=True, merge_err=True + ) + if rc == 0 and out.strip(): + pkgs = [] + for line in out.splitlines(): + line = line.strip() + if ( + not line + or line.startswith("Installed") + or line.startswith("Loaded") + ): + continue + tok = line.split()[0] + pkgs.append(_strip_arch(tok)) + if pkgs: + return _dedupe(pkgs) + + return [] + + +def _walk_etc_files() -> List[str]: + out: List[str] = [] + for dirpath, _, filenames in os.walk("/etc"): + for fn in filenames: + p = os.path.join(dirpath, fn) + if os.path.islink(p) or not os.path.isfile(p): + continue + out.append(p) + return out + + +def build_rpm_etc_index() -> ( + Tuple[Set[str], Dict[str, str], Dict[str, Set[str]], Dict[str, List[str]]] +): + """Best-effort equivalent of build_dpkg_etc_index for RPM systems. + + This builds indexes by walking the live /etc tree and querying RPM ownership + for each file. + + Returns: + owned_etc_paths: set of /etc paths owned by rpm + etc_owner_map: /etc/path -> pkg + topdir_to_pkgs: "nginx" -> {"nginx", ...} based on /etc//... + pkg_to_etc_paths: pkg -> list of owned /etc paths + """ + + owned: Set[str] = set() + owner: Dict[str, str] = {} + topdir_to_pkgs: Dict[str, Set[str]] = {} + pkg_to_etc: Dict[str, List[str]] = {} + + paths = _walk_etc_files() + + # Query in chunks to avoid excessive process spawns. + chunk_size = 250 + + not_owned_re = re.compile( + r"^file\s+(?P.+?)\s+is\s+not\s+owned\s+by\s+any\s+package", re.IGNORECASE + ) + + for i in range(0, len(paths), chunk_size): + chunk = paths[i : i + chunk_size] + rc, out = _run( + ["rpm", "-qf", "--qf", "%{NAME}\n", *chunk], + allow_fail=True, + merge_err=True, + ) + + lines = [ln.strip() for ln in out.splitlines() if ln.strip()] + # Heuristic: rpm prints one output line per input path. If that isn't + # true (warnings/errors), fall back to per-file queries for this chunk. + if len(lines) != len(chunk): + for p in chunk: + pkg = rpm_owner(p) + if not pkg: + continue + owned.add(p) + owner.setdefault(p, pkg) + pkg_to_etc.setdefault(pkg, []).append(p) + parts = p.split("/", 3) + if len(parts) >= 3 and parts[2]: + topdir_to_pkgs.setdefault(parts[2], set()).add(pkg) + continue + + for pth, line in zip(chunk, lines): + if not line: + continue + if not_owned_re.match(line) or "is not owned" in line: + continue + pkg = line.split()[0].strip() + if not pkg: + continue + owned.add(pth) + owner.setdefault(pth, pkg) + pkg_to_etc.setdefault(pkg, []).append(pth) + parts = pth.split("/", 3) + if len(parts) >= 3 and parts[2]: + topdir_to_pkgs.setdefault(parts[2], set()).add(pkg) + + for k, v in list(pkg_to_etc.items()): + pkg_to_etc[k] = sorted(set(v)) + + return owned, owner, topdir_to_pkgs, pkg_to_etc + + +def rpm_config_files(pkg: str) -> Set[str]: + """Return config files for a package (rpm -qc).""" + rc, out = _run(["rpm", "-qc", pkg], allow_fail=True, merge_err=True) + if rc != 0: + return set() + files: Set[str] = set() + for line in out.splitlines(): + line = line.strip() + if line.startswith("/"): + files.add(line) + return files + + +def rpm_modified_files(pkg: str) -> Set[str]: + """Return files reported as modified by rpm verification (rpm -V). + + rpm -V only prints lines for differences/missing files. + """ + rc, out = _run(["rpm", "-V", pkg], allow_fail=True, merge_err=True) + # rc is non-zero when there are differences; we still want the output. + files: Set[str] = set() + for raw in out.splitlines(): + line = raw.strip() + if not line: + continue + # Typical forms: + # S.5....T. c /etc/foo.conf + # missing /etc/bar + m = re.search(r"\s(/\S+)$", line) + if m: + files.add(m.group(1)) + continue + if line.startswith("missing"): + parts = line.split() + if parts and parts[-1].startswith("/"): + files.add(parts[-1]) + return files diff --git a/tests/test_debian.py b/tests/test_debian.py index 333afc1..abad361 100644 --- a/tests/test_debian.py +++ b/tests/test_debian.py @@ -1,6 +1,5 @@ from __future__ import annotations -import hashlib from pathlib import Path @@ -97,58 +96,3 @@ def test_parse_status_conffiles_handles_continuations(tmp_path: Path): assert m["nginx"]["/etc/nginx/nginx.conf"] == "abcdef" assert m["nginx"]["/etc/nginx/mime.types"] == "123456" assert "other" not in m - - -def test_read_pkg_md5sums_and_file_md5(tmp_path: Path, monkeypatch): - import enroll.debian as d - - # Patch /var/lib/dpkg/info/.md5sums lookup to a tmp file. - md5_file = tmp_path / "pkg.md5sums" - md5_file.write_text("0123456789abcdef etc/foo.conf\n", encoding="utf-8") - - def fake_exists(path: str) -> bool: - return path.endswith("/var/lib/dpkg/info/p1.md5sums") - - real_open = open - - def fake_open(path: str, *args, **kwargs): - if path.endswith("/var/lib/dpkg/info/p1.md5sums"): - return real_open(md5_file, *args, **kwargs) - return real_open(path, *args, **kwargs) - - monkeypatch.setattr(d.os.path, "exists", fake_exists) - monkeypatch.setattr("builtins.open", fake_open) - - m = d.read_pkg_md5sums("p1") - assert m == {"etc/foo.conf": "0123456789abcdef"} - - content = b"hello world\n" - p = tmp_path / "x" - p.write_bytes(content) - assert d.file_md5(str(p)) == hashlib.md5(content).hexdigest() - - -def test_stat_triplet_fallbacks(tmp_path: Path, monkeypatch): - import enroll.debian as d - import sys - - p = tmp_path / "f" - p.write_text("x", encoding="utf-8") - - class FakePwdMod: - @staticmethod - def getpwuid(_): # pragma: no cover - raise KeyError - - class FakeGrpMod: - @staticmethod - def getgrgid(_): # pragma: no cover - raise KeyError - - # stat_triplet imports pwd/grp inside the function, so patch sys.modules. - monkeypatch.setitem(sys.modules, "pwd", FakePwdMod) - monkeypatch.setitem(sys.modules, "grp", FakeGrpMod) - owner, group, mode = d.stat_triplet(str(p)) - assert owner.isdigit() - assert group.isdigit() - assert mode.isdigit() and len(mode) == 4 diff --git a/tests/test_fsutil.py b/tests/test_fsutil.py new file mode 100644 index 0000000..ebe2224 --- /dev/null +++ b/tests/test_fsutil.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import hashlib +import os +from pathlib import Path + +from enroll.fsutil import file_md5, stat_triplet + + +def test_file_md5_matches_hashlib(tmp_path: Path): + p = tmp_path / "x" + p.write_bytes(b"hello world") + expected = hashlib.md5(b"hello world").hexdigest() # nosec + assert file_md5(str(p)) == expected + + +def test_stat_triplet_reports_mode(tmp_path: Path): + p = tmp_path / "x" + p.write_text("x", encoding="utf-8") + os.chmod(p, 0o600) + + owner, group, mode = stat_triplet(str(p)) + assert mode == "0600" + assert owner # non-empty string + assert group # non-empty string diff --git a/tests/test_harvest.py b/tests/test_harvest.py index fa796f0..a0d22ec 100644 --- a/tests/test_harvest.py +++ b/tests/test_harvest.py @@ -2,6 +2,7 @@ import json from pathlib import Path import enroll.harvest as h +from enroll.platform import PlatformInfo from enroll.systemd import UnitInfo @@ -10,6 +11,64 @@ class AllowAllPolicy: return None +class FakeBackend: + """Minimal backend stub for harvest tests. + + The real backends (dpkg/rpm) enumerate the live system (dpkg status, rpm + databases, etc). These tests instead control all backend behaviour. + """ + + def __init__( + self, + *, + name: str, + owned_etc: set[str], + etc_owner_map: dict[str, str], + topdir_to_pkgs: dict[str, set[str]], + pkg_to_etc_paths: dict[str, list[str]], + manual_pkgs: list[str], + owner_fn, + modified_by_pkg: dict[str, dict[str, str]] | None = None, + pkg_config_prefixes: tuple[str, ...] = ("/etc/apt/",), + ): + self.name = name + self.pkg_config_prefixes = pkg_config_prefixes + self._owned_etc = owned_etc + self._etc_owner_map = etc_owner_map + self._topdir_to_pkgs = topdir_to_pkgs + self._pkg_to_etc_paths = pkg_to_etc_paths + self._manual = manual_pkgs + self._owner_fn = owner_fn + self._modified_by_pkg = modified_by_pkg or {} + + def build_etc_index(self): + return ( + self._owned_etc, + self._etc_owner_map, + self._topdir_to_pkgs, + self._pkg_to_etc_paths, + ) + + def owner_of_path(self, path: str): + return self._owner_fn(path) + + def list_manual_packages(self): + return list(self._manual) + + def specific_paths_for_hints(self, hints: set[str]): + return [] + + def is_pkg_config_path(self, path: str) -> bool: + for pfx in self.pkg_config_prefixes: + if path == pfx or path.startswith(pfx): + return True + return False + + def modified_paths(self, pkg: str, etc_paths: list[str]): + # Test-controlled; ignore etc_paths. + return dict(self._modified_by_pkg.get(pkg, {})) + + def test_harvest_dedup_manual_packages_and_builds_etc_custom( monkeypatch, tmp_path: Path ): @@ -22,7 +81,7 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( real_exists = os.path.exists real_islink = os.path.islink - # Fake filesystem: two /etc files exist, only one is dpkg-owned. + # Fake filesystem: two /etc files exist, only one is package-owned. # Also include some /usr/local files to populate usr_local_custom. files = { "/etc/openvpn/server.conf": b"server", @@ -93,6 +152,7 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( # Avoid real system access monkeypatch.setattr(h, "list_enabled_services", lambda: ["openvpn.service"]) + monkeypatch.setattr(h, "list_enabled_timers", lambda: []) monkeypatch.setattr( h, "get_unit_info", @@ -109,29 +169,30 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( ), ) - # Debian package index: openvpn owns /etc/openvpn/server.conf; keyboard is unowned. - def fake_build_index(): - owned_etc = {"/etc/openvpn/server.conf"} - etc_owner_map = {"/etc/openvpn/server.conf": "openvpn"} - topdir_to_pkgs = {"openvpn": {"openvpn"}} - pkg_to_etc_paths = {"openvpn": ["/etc/openvpn/server.conf"], "curl": []} - return owned_etc, etc_owner_map, topdir_to_pkgs, pkg_to_etc_paths + # Package index: openvpn owns /etc/openvpn/server.conf; keyboard is unowned. + owned_etc = {"/etc/openvpn/server.conf"} + etc_owner_map = {"/etc/openvpn/server.conf": "openvpn"} + topdir_to_pkgs = {"openvpn": {"openvpn"}} + pkg_to_etc_paths = {"openvpn": ["/etc/openvpn/server.conf"], "curl": []} - monkeypatch.setattr(h, "build_dpkg_etc_index", fake_build_index) - - # openvpn conffile hash mismatch => should be captured under service role - monkeypatch.setattr( - h, - "parse_status_conffiles", - lambda: {"openvpn": {"/etc/openvpn/server.conf": "old"}}, + backend = FakeBackend( + name="dpkg", + owned_etc=owned_etc, + etc_owner_map=etc_owner_map, + topdir_to_pkgs=topdir_to_pkgs, + pkg_to_etc_paths=pkg_to_etc_paths, + manual_pkgs=["openvpn", "curl"], + owner_fn=lambda p: "openvpn" if "openvpn" in (p or "") else None, + modified_by_pkg={ + "openvpn": {"/etc/openvpn/server.conf": "modified_conffile"}, + }, ) - monkeypatch.setattr(h, "read_pkg_md5sums", lambda pkg: {}) - monkeypatch.setattr(h, "file_md5", lambda path: "new") monkeypatch.setattr( - h, "dpkg_owner", lambda p: "openvpn" if "openvpn" in p else None + h, "detect_platform", lambda: PlatformInfo("debian", "dpkg", {}) ) - monkeypatch.setattr(h, "list_manual_packages", lambda: ["openvpn", "curl"]) + monkeypatch.setattr(h, "get_backend", lambda info=None: backend) + monkeypatch.setattr(h, "collect_non_system_users", lambda: []) def fake_stat_triplet(p: str): @@ -207,6 +268,7 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( monkeypatch.setattr( h, "list_enabled_services", lambda: ["apparmor.service", "ntpsec.service"] ) + monkeypatch.setattr(h, "list_enabled_timers", lambda: []) def fake_unit_info(unit: str) -> UnitInfo: if unit == "apparmor.service": @@ -235,31 +297,35 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( monkeypatch.setattr(h, "get_unit_info", fake_unit_info) - # Dpkg /etc index: no owned /etc paths needed for this test. - monkeypatch.setattr( - h, - "build_dpkg_etc_index", - lambda: (set(), {}, {}, {}), - ) - monkeypatch.setattr(h, "parse_status_conffiles", lambda: {}) - monkeypatch.setattr(h, "read_pkg_md5sums", lambda pkg: {}) - monkeypatch.setattr(h, "file_md5", lambda path: "x") - monkeypatch.setattr(h, "list_manual_packages", lambda: []) - monkeypatch.setattr(h, "collect_non_system_users", lambda: []) - # Make apparmor *also* claim the ntpsec package (simulates overly-broad # package inference). The snippet routing should still prefer role 'ntpsec'. - def fake_dpkg_owner(p: str): + def fake_owner(p: str): if p == "/etc/cron.d/ntpsec": return "ntpsec" - if "apparmor" in p: + if "apparmor" in (p or ""): return "ntpsec" # intentionally misleading - if "ntpsec" in p or "ntpd" in p: + if "ntpsec" in (p or "") or "ntpd" in (p or ""): return "ntpsec" return None - monkeypatch.setattr(h, "dpkg_owner", fake_dpkg_owner) + backend = FakeBackend( + name="dpkg", + owned_etc=set(), + etc_owner_map={}, + topdir_to_pkgs={}, + pkg_to_etc_paths={}, + manual_pkgs=[], + owner_fn=fake_owner, + modified_by_pkg={}, + ) + + monkeypatch.setattr( + h, "detect_platform", lambda: PlatformInfo("debian", "dpkg", {}) + ) + monkeypatch.setattr(h, "get_backend", lambda info=None: backend) + monkeypatch.setattr(h, "stat_triplet", lambda p: ("root", "root", "0644")) + monkeypatch.setattr(h, "collect_non_system_users", lambda: []) def fake_copy(bundle_dir: str, role_name: str, abs_path: str, src_rel: str): dst = Path(bundle_dir) / "artifacts" / role_name / src_rel @@ -268,11 +334,7 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( monkeypatch.setattr(h, "_copy_into_bundle", fake_copy) - class AllowAll: - def deny_reason(self, path: str): - return None - - state_path = h.harvest(str(bundle), policy=AllowAll()) + state_path = h.harvest(str(bundle), policy=AllowAllPolicy()) st = json.loads(Path(state_path).read_text(encoding="utf-8")) # Cron snippet should end up attached to the ntpsec role, not apparmor. diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 92c3dfc..cbfc208 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -322,3 +322,96 @@ def test_copy2_replace_overwrites_readonly_destination(tmp_path: Path): assert dst.read_text(encoding="utf-8") == "new" mode = stat.S_IMODE(dst.stat().st_mode) assert mode & stat.S_IWUSR # destination should remain mergeable + + +def test_manifest_includes_dnf_config_role_when_present(tmp_path: Path): + bundle = tmp_path / "bundle" + out = tmp_path / "ansible" + + # Create a dnf_config artifact. + (bundle / "artifacts" / "dnf_config" / "etc" / "dnf").mkdir( + parents=True, exist_ok=True + ) + (bundle / "artifacts" / "dnf_config" / "etc" / "dnf" / "dnf.conf").write_text( + "[main]\n", encoding="utf-8" + ) + + state = { + "host": {"hostname": "test", "os": "redhat", "pkg_backend": "rpm"}, + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [], + "package_roles": [], + "manual_packages": [], + "manual_packages_skipped": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_files": [ + { + "path": "/etc/dnf/dnf.conf", + "src_rel": "etc/dnf/dnf.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "dnf_config", + } + ], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + } + + bundle.mkdir(parents=True, exist_ok=True) + (bundle / "state.json").write_text(json.dumps(state, indent=2), encoding="utf-8") + + manifest(str(bundle), str(out)) + + pb = (out / "playbook.yml").read_text(encoding="utf-8") + assert "- dnf_config" in pb + + tasks = (out / "roles" / "dnf_config" / "tasks" / "main.yml").read_text( + encoding="utf-8" + ) + # Ensure the role exists and contains some file deployment logic. + assert "Deploy any other managed files" in tasks + + +def test_render_install_packages_tasks_contains_dnf_branch(): + from enroll.manifest import _render_install_packages_tasks + + txt = _render_install_packages_tasks("role", "role") + assert "ansible.builtin.apt" in txt + assert "ansible.builtin.dnf" in txt + assert "ansible.builtin.package" in txt + assert "pkg_mgr" in txt diff --git a/tests/test_platform.py b/tests/test_platform.py new file mode 100644 index 0000000..7ff66c6 --- /dev/null +++ b/tests/test_platform.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from pathlib import Path + +import enroll.platform as platform + + +def test_read_os_release_parses_kv_and_strips_quotes(tmp_path: Path): + p = tmp_path / "os-release" + p.write_text( + """ +# comment +ID=fedora +ID_LIKE=\"rhel centos\" +NAME=\"Fedora Linux\" +EMPTY= +NOEQUALS +""", + encoding="utf-8", + ) + + osr = platform._read_os_release(str(p)) + assert osr["ID"] == "fedora" + assert osr["ID_LIKE"] == "rhel centos" + assert osr["NAME"] == "Fedora Linux" + assert osr["EMPTY"] == "" + assert "NOEQUALS" not in osr + + +def test_detect_platform_prefers_os_release(monkeypatch): + monkeypatch.setattr( + platform, + "_read_os_release", + lambda path="/etc/os-release": {"ID": "fedora", "ID_LIKE": "rhel"}, + ) + # If os-release is decisive we shouldn't need which() + monkeypatch.setattr(platform.shutil, "which", lambda exe: None) + + info = platform.detect_platform() + assert info.os_family == "redhat" + assert info.pkg_backend == "rpm" + + +def test_detect_platform_fallbacks_to_dpkg_when_unknown(monkeypatch): + monkeypatch.setattr(platform, "_read_os_release", lambda path="/etc/os-release": {}) + monkeypatch.setattr( + platform.shutil, "which", lambda exe: "/usr/bin/dpkg" if exe == "dpkg" else None + ) + + info = platform.detect_platform() + assert info.os_family == "debian" + assert info.pkg_backend == "dpkg" + + +def test_get_backend_unknown_prefers_rpm_if_present(monkeypatch): + monkeypatch.setattr( + platform.shutil, "which", lambda exe: "/usr/bin/rpm" if exe == "rpm" else None + ) + + b = platform.get_backend( + platform.PlatformInfo(os_family="unknown", pkg_backend="unknown", os_release={}) + ) + assert isinstance(b, platform.RpmBackend) + + +def test_rpm_backend_modified_paths_labels_conffiles(monkeypatch): + b = platform.RpmBackend() + + # Pretend rpm -V says both files changed, but only one is a config file. + monkeypatch.setattr(b, "_modified_files", lambda pkg: {"/etc/foo.conf", "/etc/bar"}) + monkeypatch.setattr(b, "_config_files", lambda pkg: {"/etc/foo.conf"}) + + out = b.modified_paths("mypkg", ["/etc/foo.conf", "/etc/bar", "/etc/dnf/dnf.conf"]) + assert out["/etc/foo.conf"] == "modified_conffile" + assert out["/etc/bar"] == "modified_packaged_file" + # Package-manager config paths are excluded. + assert "/etc/dnf/dnf.conf" not in out + + +def test_specific_paths_for_hints_differs_between_backends(): + # We can exercise this without instantiating DpkgBackend (which reads dpkg status) + class Dummy(platform.PackageBackend): + name = "dummy" + pkg_config_prefixes = ("/etc/apt/",) + + d = Dummy() + assert d.is_pkg_config_path("/etc/apt/sources.list") + assert not d.is_pkg_config_path("/etc/ssh/sshd_config") + + r = platform.RpmBackend() + paths = set(r.specific_paths_for_hints({"nginx"})) + assert "/etc/sysconfig/nginx" in paths + assert "/etc/sysconfig/nginx.conf" in paths diff --git a/tests/test_rpm.py b/tests/test_rpm.py new file mode 100644 index 0000000..ea97c12 --- /dev/null +++ b/tests/test_rpm.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +import enroll.rpm as rpm + + +def test_rpm_owner_returns_none_when_unowned(monkeypatch): + monkeypatch.setattr( + rpm, + "_run", + lambda cmd, allow_fail=False, merge_err=False: ( + 1, + "file /etc/x is not owned by any package\n", + ), + ) + assert rpm.rpm_owner("/etc/x") is None + + +def test_rpm_owner_parses_name(monkeypatch): + monkeypatch.setattr( + rpm, "_run", lambda cmd, allow_fail=False, merge_err=False: (0, "bash\n") + ) + assert rpm.rpm_owner("/bin/bash") == "bash" + + +def test_strip_arch_strips_known_arches(): + assert rpm._strip_arch("vim-enhanced.x86_64") == "vim-enhanced" + assert rpm._strip_arch("foo.noarch") == "foo" + assert rpm._strip_arch("weird.token") == "weird.token" + + +def test_list_manual_packages_prefers_dnf_repoquery(monkeypatch): + monkeypatch.setattr( + rpm.shutil, "which", lambda exe: "/usr/bin/dnf" if exe == "dnf" else None + ) + + def fake_run(cmd, allow_fail=False, merge_err=False): + # First repoquery form returns usable output. + if cmd[:3] == ["dnf", "-q", "repoquery"]: + return 0, "vim-enhanced.x86_64\nhtop\nvim-enhanced.x86_64\n" + raise AssertionError(f"unexpected cmd: {cmd}") + + monkeypatch.setattr(rpm, "_run", fake_run) + + pkgs = rpm.list_manual_packages() + assert pkgs == ["htop", "vim-enhanced"] + + +def test_list_manual_packages_falls_back_to_history(monkeypatch): + monkeypatch.setattr( + rpm.shutil, "which", lambda exe: "/usr/bin/dnf" if exe == "dnf" else None + ) + + def fake_run(cmd, allow_fail=False, merge_err=False): + # repoquery fails + if cmd[:3] == ["dnf", "-q", "repoquery"]: + return 1, "" + if cmd[:3] == ["dnf", "-q", "history"]: + return ( + 0, + "Installed Packages\nvim-enhanced.x86_64\nLast metadata expiration check: 0:01:00 ago\n", + ) + raise AssertionError(f"unexpected cmd: {cmd}") + + monkeypatch.setattr(rpm, "_run", fake_run) + + pkgs = rpm.list_manual_packages() + assert pkgs == ["vim-enhanced"] + + +def test_build_rpm_etc_index_uses_fallback_when_rpm_output_mismatches(monkeypatch): + # Two files in /etc, one owned, one unowned. + monkeypatch.setattr( + rpm, "_walk_etc_files", lambda: ["/etc/owned.conf", "/etc/unowned.conf"] + ) + + # Simulate chunk query producing unexpected extra line (mismatch) -> triggers per-file fallback. + monkeypatch.setattr( + rpm, + "_run", + lambda cmd, allow_fail=False, merge_err=False: (0, "ownedpkg\nEXTRA\nTHIRD\n"), + ) + monkeypatch.setattr( + rpm, "rpm_owner", lambda p: "ownedpkg" if p == "/etc/owned.conf" else None + ) + + owned, owner_map, topdir_to_pkgs, pkg_to_etc = rpm.build_rpm_etc_index() + + assert owned == {"/etc/owned.conf"} + assert owner_map["/etc/owned.conf"] == "ownedpkg" + assert "owned.conf" in topdir_to_pkgs + assert pkg_to_etc["ownedpkg"] == ["/etc/owned.conf"] + + +def test_build_rpm_etc_index_parses_chunk_output(monkeypatch): + monkeypatch.setattr( + rpm, "_walk_etc_files", lambda: ["/etc/ssh/sshd_config", "/etc/notowned"] + ) + + def fake_run(cmd, allow_fail=False, merge_err=False): + # One output line per input path. + return 0, "openssh-server\nfile /etc/notowned is not owned by any package\n" + + monkeypatch.setattr(rpm, "_run", fake_run) + + owned, owner_map, topdir_to_pkgs, pkg_to_etc = rpm.build_rpm_etc_index() + + assert "/etc/ssh/sshd_config" in owned + assert "/etc/notowned" not in owned + assert owner_map["/etc/ssh/sshd_config"] == "openssh-server" + assert "ssh" in topdir_to_pkgs + assert "openssh-server" in topdir_to_pkgs["ssh"] + assert pkg_to_etc["openssh-server"] == ["/etc/ssh/sshd_config"] + + +def test_rpm_config_files_and_modified_files_parsing(monkeypatch): + monkeypatch.setattr( + rpm, + "_run", + lambda cmd, allow_fail=False, merge_err=False: ( + 0, + "/etc/foo.conf\n/usr/bin/tool\n", + ), + ) + assert rpm.rpm_config_files("mypkg") == {"/etc/foo.conf", "/usr/bin/tool"} + + # rpm -V returns only changed/missing files + out = "S.5....T. c /etc/foo.conf\nmissing /etc/bar\n" + monkeypatch.setattr( + rpm, "_run", lambda cmd, allow_fail=False, merge_err=False: (1, out) + ) + assert rpm.rpm_modified_files("mypkg") == {"/etc/foo.conf", "/etc/bar"} From 043802e80034b998c84a711e77ae8c69c8e0d137 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 16:10:27 +1100 Subject: [PATCH 055/115] Refactor state structure and capture versions of packages --- enroll/debian.py | 44 ++++++++++++++++++ enroll/diff.py | 112 +++++++++++++++++++++++++++++++++++++-------- enroll/harvest.py | 75 ++++++++++++++++++++++++++---- enroll/manifest.py | 27 ++++++----- enroll/platform.py | 21 +++++++++ enroll/rpm.py | 57 +++++++++++++++++++++++ 6 files changed, 294 insertions(+), 42 deletions(-) diff --git a/enroll/debian.py b/enroll/debian.py index 7e1ee2d..9bf847e 100644 --- a/enroll/debian.py +++ b/enroll/debian.py @@ -63,6 +63,50 @@ def list_manual_packages() -> List[str]: return sorted(set(pkgs)) +def list_installed_packages() -> Dict[str, List[Dict[str, str]]]: + """Return mapping of installed package name -> installed instances. + + Uses dpkg-query and is expected to work on Debian/Ubuntu-like systems. + + Output format: + {"pkg": [{"version": "...", "arch": "..."}, ...], ...} + """ + + try: + p = subprocess.run( + [ + "dpkg-query", + "-W", + "-f=${Package}\t${Version}\t${Architecture}\n", + ], + text=True, + capture_output=True, + check=False, + ) # nosec + except Exception: + return {} + + out: Dict[str, List[Dict[str, str]]] = {} + for raw in (p.stdout or "").splitlines(): + line = raw.strip("\n") + if not line: + continue + parts = line.split("\t") + if len(parts) < 3: + continue + name, ver, arch = parts[0].strip(), parts[1].strip(), parts[2].strip() + if not name: + continue + out.setdefault(name, []).append({"version": ver, "arch": arch}) + + # Stable ordering for deterministic JSON dumps. + for k in list(out.keys()): + out[k] = sorted( + out[k], key=lambda x: (x.get("arch") or "", x.get("version") or "") + ) + return out + + def build_dpkg_etc_index( info_dir: str = "/var/lib/dpkg/info", ) -> Tuple[Set[str], Dict[str, str], Dict[str, Set[str]], Dict[str, List[str]]]: diff --git a/enroll/diff.py b/enroll/diff.py index 0110d17..5ad0eac 100644 --- a/enroll/diff.py +++ b/enroll/diff.py @@ -126,18 +126,62 @@ def _load_state(bundle_dir: Path) -> Dict[str, Any]: return json.load(f) +def _packages_inventory(state: Dict[str, Any]) -> Dict[str, Any]: + return (state.get("inventory") or {}).get("packages") or {} + + def _all_packages(state: Dict[str, Any]) -> List[str]: - pkgs = set(state.get("manual_packages", []) or []) - pkgs |= set(state.get("manual_packages_skipped", []) or []) - for s in state.get("services", []) or []: - for p in s.get("packages", []) or []: - pkgs.add(p) - return sorted(pkgs) + return sorted(_packages_inventory(state).keys()) + + +def _roles(state: Dict[str, Any]) -> Dict[str, Any]: + return state.get("roles") or {} + + +def _pkg_version_key(entry: Dict[str, Any]) -> Optional[str]: + """Return a stable string used for version comparison.""" + installs = entry.get("installations") or [] + if isinstance(installs, list) and installs: + parts: List[str] = [] + for inst in installs: + if not isinstance(inst, dict): + continue + arch = str(inst.get("arch") or "") + ver = str(inst.get("version") or "") + if not ver: + continue + parts.append(f"{arch}:{ver}" if arch else ver) + if parts: + return "|".join(sorted(parts)) + v = entry.get("version") + if v: + return str(v) + return None + + +def _pkg_version_display(entry: Dict[str, Any]) -> Optional[str]: + v = entry.get("version") + if v: + return str(v) + installs = entry.get("installations") or [] + if isinstance(installs, list) and installs: + parts: List[str] = [] + for inst in installs: + if not isinstance(inst, dict): + continue + arch = str(inst.get("arch") or "") + ver = str(inst.get("version") or "") + if not ver: + continue + parts.append(f"{ver} ({arch})" if arch else ver) + if parts: + return ", ".join(sorted(parts)) + return None def _service_units(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: out: Dict[str, Dict[str, Any]] = {} - for s in state.get("services", []) or []: + for s in _roles(state).get("services") or []: unit = s.get("unit") if unit: out[str(unit)] = s @@ -145,7 +189,7 @@ def _service_units(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: def _users_by_name(state: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: - users = (state.get("users") or {}).get("users") or [] + users = (_roles(state).get("users") or {}).get("users") or [] out: Dict[str, Dict[str, Any]] = {} for u in users: name = u.get("name") @@ -167,43 +211,43 @@ class FileRec: def _iter_managed_files(state: Dict[str, Any]) -> Iterable[Tuple[str, Dict[str, Any]]]: # Services - for s in state.get("services", []) or []: + for s in _roles(state).get("services") or []: role = s.get("role_name") or "unknown" for mf in s.get("managed_files", []) or []: yield str(role), mf # Package roles - for p in state.get("package_roles", []) or []: + for p in _roles(state).get("packages") or []: role = p.get("role_name") or "unknown" for mf in p.get("managed_files", []) or []: yield str(role), mf # Users - u = state.get("users") or {} + u = _roles(state).get("users") or {} u_role = u.get("role_name") or "users" for mf in u.get("managed_files", []) or []: yield str(u_role), mf # apt_config - ac = state.get("apt_config") or {} + ac = _roles(state).get("apt_config") or {} ac_role = ac.get("role_name") or "apt_config" for mf in ac.get("managed_files", []) or []: yield str(ac_role), mf # etc_custom - ec = state.get("etc_custom") or {} + ec = _roles(state).get("etc_custom") or {} ec_role = ec.get("role_name") or "etc_custom" for mf in ec.get("managed_files", []) or []: yield str(ec_role), mf # usr_local_custom - ul = state.get("usr_local_custom") or {} + ul = _roles(state).get("usr_local_custom") or {} ul_role = ul.get("role_name") or "usr_local_custom" for mf in ul.get("managed_files", []) or []: yield str(ul_role), mf # extra_paths - xp = state.get("extra_paths") or {} + xp = _roles(state).get("extra_paths") or {} xp_role = xp.get("role_name") or "extra_paths" for mf in xp.get("managed_files", []) or []: yield str(xp_role), mf @@ -261,12 +305,28 @@ def compare_harvests( old_state = _load_state(old_b.dir) new_state = _load_state(new_b.dir) - old_pkgs = set(_all_packages(old_state)) - new_pkgs = set(_all_packages(new_state)) + old_inv = _packages_inventory(old_state) + new_inv = _packages_inventory(new_state) + + old_pkgs = set(old_inv.keys()) + new_pkgs = set(new_inv.keys()) pkgs_added = sorted(new_pkgs - old_pkgs) pkgs_removed = sorted(old_pkgs - new_pkgs) + pkgs_version_changed: List[Dict[str, Any]] = [] + for pkg in sorted(old_pkgs & new_pkgs): + a = old_inv.get(pkg) or {} + b = new_inv.get(pkg) or {} + if _pkg_version_key(a) != _pkg_version_key(b): + pkgs_version_changed.append( + { + "package": pkg, + "old": _pkg_version_display(a), + "new": _pkg_version_display(b), + } + ) + old_units = _service_units(old_state) new_units = _service_units(new_state) units_added = sorted(set(new_units) - set(old_units)) @@ -380,6 +440,7 @@ def compare_harvests( [ pkgs_added, pkgs_removed, + pkgs_version_changed, units_added, units_removed, units_changed, @@ -413,7 +474,11 @@ def compare_harvests( "state_mtime": _mtime_iso(new_b.state_path), "host": (new_state.get("host") or {}).get("hostname"), }, - "packages": {"added": pkgs_added, "removed": pkgs_removed}, + "packages": { + "added": pkgs_added, + "removed": pkgs_removed, + "version_changed": pkgs_version_changed, + }, "services": { "enabled_added": units_added, "enabled_removed": units_removed, @@ -471,10 +536,13 @@ def _report_text(report: Dict[str, Any]) -> str: lines.append("\nPackages") lines.append(f" added: {len(pk.get('added', []) or [])}") lines.append(f" removed: {len(pk.get('removed', []) or [])}") + lines.append(f" version_changed: {len(pk.get('version_changed', []) or [])}") for p in pk.get("added", []) or []: lines.append(f" + {p}") for p in pk.get("removed", []) or []: lines.append(f" - {p}") + for ch in pk.get("version_changed", []) or []: + lines.append(f" ~ {ch.get('package')}: {ch.get('old')} -> {ch.get('new')}") sv = report.get("services", {}) lines.append("\nServices (enabled systemd units)") @@ -542,6 +610,7 @@ def _report_text(report: Dict[str, Any]) -> str: [ (pk.get("added") or []), (pk.get("removed") or []), + (pk.get("version_changed") or []), (sv.get("enabled_added") or []), (sv.get("enabled_removed") or []), (sv.get("changed") or []), @@ -578,6 +647,12 @@ def _report_markdown(report: Dict[str, Any]) -> str: for p in pk.get("removed", []) or []: out.append(f" - `- {p}`\n") + out.append(f"- Version changed: {len(pk.get('version_changed', []) or [])}\n") + for ch in pk.get("version_changed", []) or []: + out.append( + f" - `~ {ch.get('package')}`: `{ch.get('old')}` → `{ch.get('new')}`\n" + ) + sv = report.get("services", {}) out.append("## Services (enabled systemd units)\n") if sv.get("enabled_added"): @@ -672,6 +747,7 @@ def _report_markdown(report: Dict[str, Any]) -> str: [ (pk.get("added") or []), (pk.get("removed") or []), + (pk.get("version_changed") or []), (sv.get("enabled_added") or []), (sv.get("enabled_removed") or []), (sv.get("changed") or []), diff --git a/enroll/harvest.py b/enroll/harvest.py index bb706b1..4ca3984 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -5,6 +5,7 @@ import json import os import re import shutil +import time from dataclasses import dataclass, asdict from typing import Dict, List, Optional, Set @@ -1481,9 +1482,60 @@ def harvest( notes=extra_notes, ) + # ------------------------- + # Inventory: packages (SBOM-ish) + # ------------------------- + installed = backend.installed_packages() or {} + + manual_set: Set[str] = set(manual_pkgs or []) + + pkg_units: Dict[str, Set[str]] = {} + pkg_roles_map: Dict[str, Set[str]] = {} + + for svc in service_snaps: + for p in svc.packages: + pkg_units.setdefault(p, set()).add(svc.unit) + pkg_roles_map.setdefault(p, set()).add(svc.role_name) + + pkg_role_names: Dict[str, List[str]] = {} + for ps in pkg_snaps: + pkg_roles_map.setdefault(ps.package, set()).add(ps.role_name) + pkg_role_names.setdefault(ps.package, []).append(ps.role_name) + + pkg_names: Set[str] = set() + pkg_names |= manual_set + pkg_names |= set(pkg_units.keys()) + pkg_names |= {ps.package for ps in pkg_snaps} + + packages_inventory: Dict[str, Dict[str, object]] = {} + for pkg in sorted(pkg_names): + installs = installed.get(pkg, []) or [] + arches = sorted({i.get("arch") for i in installs if i.get("arch")}) + vers = sorted({i.get("version") for i in installs if i.get("version")}) + version: Optional[str] = vers[0] if len(vers) == 1 else None + + observed: List[Dict[str, str]] = [] + if pkg in manual_set: + observed.append({"kind": "user_installed"}) + for unit in sorted(pkg_units.get(pkg, set())): + observed.append({"kind": "systemd_unit", "ref": unit}) + for rn in sorted(set(pkg_role_names.get(pkg, []))): + observed.append({"kind": "package_role", "ref": rn}) + + roles = sorted(pkg_roles_map.get(pkg, set())) + + packages_inventory[pkg] = { + "version": version, + "arches": arches, + "installations": installs, + "observed_via": observed, + "roles": roles, + } + state = { "enroll": { "version": get_enroll_version(), + "harvest_time": time.time_ns(), }, "host": { "hostname": os.uname().nodename, @@ -1491,16 +1543,19 @@ def harvest( "pkg_backend": backend.name, "os_release": platform.os_release, }, - "users": asdict(users_snapshot), - "services": [asdict(s) for s in service_snaps], - "manual_packages": manual_pkgs, - "manual_packages_skipped": manual_pkgs_skipped, - "package_roles": [asdict(p) for p in pkg_snaps], - "apt_config": asdict(apt_config_snapshot), - "dnf_config": asdict(dnf_config_snapshot), - "etc_custom": asdict(etc_custom_snapshot), - "usr_local_custom": asdict(usr_local_custom_snapshot), - "extra_paths": asdict(extra_paths_snapshot), + "inventory": { + "packages": packages_inventory, + }, + "roles": { + "users": asdict(users_snapshot), + "services": [asdict(s) for s in service_snaps], + "packages": [asdict(p) for p in pkg_snaps], + "apt_config": asdict(apt_config_snapshot), + "dnf_config": asdict(dnf_config_snapshot), + "etc_custom": asdict(etc_custom_snapshot), + "usr_local_custom": asdict(usr_local_custom_snapshot), + "extra_paths": asdict(extra_paths_snapshot), + }, } state_path = os.path.join(bundle_dir, "state.json") diff --git a/enroll/manifest.py b/enroll/manifest.py index 923040f..8b4008b 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -271,9 +271,7 @@ def _write_hostvars(site_root: str, fqdn: str, role: str, data: Dict[str, Any]) merged = _merge_mappings_overwrite(existing_map, data) - out = "# Generated by enroll (host-specific vars)\n---\n" + _yaml_dump_mapping( - merged, sort_keys=True - ) + out = "---\n" + _yaml_dump_mapping(merged, sort_keys=True) with open(path, "w", encoding="utf-8") as f: f.write(out) @@ -392,7 +390,7 @@ def _render_generic_files_tasks( # Using first_found makes roles work in both modes: # - site-mode: inventory/host_vars///.files/... # - non-site: roles//files/... - return f"""# Generated by enroll + return f""" - name: Deploy any systemd unit files (templates) ansible.builtin.template: @@ -477,7 +475,7 @@ def _render_install_packages_tasks(role: str, var_prefix: str) -> str: generic `package` module. This keeps generated roles usable on both Debian-like and RPM-like systems. """ - return f"""# Generated by enroll + return f""" - name: Install packages for {role} (APT) ansible.builtin.apt: @@ -672,14 +670,16 @@ def _manifest_from_bundle_dir( with open(state_path, "r", encoding="utf-8") as f: state = json.load(f) - services: List[Dict[str, Any]] = state.get("services", []) - package_roles: List[Dict[str, Any]] = state.get("package_roles", []) - users_snapshot: Dict[str, Any] = state.get("users", {}) - apt_config_snapshot: Dict[str, Any] = state.get("apt_config", {}) - dnf_config_snapshot: Dict[str, Any] = state.get("dnf_config", {}) - etc_custom_snapshot: Dict[str, Any] = state.get("etc_custom", {}) - usr_local_custom_snapshot: Dict[str, Any] = state.get("usr_local_custom", {}) - extra_paths_snapshot: Dict[str, Any] = state.get("extra_paths", {}) + roles: Dict[str, Any] = state.get("roles") or {} + + services: List[Dict[str, Any]] = roles.get("services", []) + package_roles: List[Dict[str, Any]] = roles.get("packages", []) + users_snapshot: Dict[str, Any] = roles.get("users", {}) + apt_config_snapshot: Dict[str, Any] = roles.get("apt_config", {}) + dnf_config_snapshot: Dict[str, Any] = roles.get("dnf_config", {}) + etc_custom_snapshot: Dict[str, Any] = roles.get("etc_custom", {}) + usr_local_custom_snapshot: Dict[str, Any] = roles.get("usr_local_custom", {}) + extra_paths_snapshot: Dict[str, Any] = roles.get("extra_paths", {}) site_mode = fqdn is not None and fqdn != "" @@ -839,7 +839,6 @@ def _manifest_from_bundle_dir( # tasks (data-driven) users_tasks = """--- -# Generated by enroll - name: Ensure groups exist ansible.builtin.group: diff --git a/enroll/platform.py b/enroll/platform.py index 998b83d..3c1904b 100644 --- a/enroll/platform.py +++ b/enroll/platform.py @@ -81,6 +81,17 @@ class PackageBackend: def list_manual_packages(self) -> List[str]: # pragma: no cover raise NotImplementedError + def installed_packages(self) -> Dict[str, List[Dict[str, str]]]: # pragma: no cover + """Return mapping of package name -> installed instances. + + Each instance is a dict with at least: + - version: package version string + - arch: architecture string + + Backends should be best-effort and return an empty mapping on failure. + """ + raise NotImplementedError + def build_etc_index( self, ) -> Tuple[ @@ -121,6 +132,11 @@ class DpkgBackend(PackageBackend): return list_manual_packages() + def installed_packages(self) -> Dict[str, List[Dict[str, str]]]: + from .debian import list_installed_packages + + return list_installed_packages() + def build_etc_index(self): from .debian import build_dpkg_etc_index @@ -194,6 +210,11 @@ class RpmBackend(PackageBackend): return list_manual_packages() + def installed_packages(self) -> Dict[str, List[Dict[str, str]]]: + from .rpm import list_installed_packages + + return list_installed_packages() + def build_etc_index(self): from .rpm import build_rpm_etc_index diff --git a/enroll/rpm.py b/enroll/rpm.py index 947617c..9e2892f 100644 --- a/enroll/rpm.py +++ b/enroll/rpm.py @@ -142,6 +142,63 @@ def list_manual_packages() -> List[str]: return [] +def list_installed_packages() -> Dict[str, List[Dict[str, str]]]: + """Return mapping of installed package name -> installed instances. + + Uses `rpm -qa` and is expected to work on RHEL/Fedora-like systems. + + Output format: + {"pkg": [{"version": "...", "arch": "..."}, ...], ...} + + The version string is formatted as: + - "-" for typical packages + - ":-" if a non-zero epoch is present + """ + + try: + _, out = _run( + [ + "rpm", + "-qa", + "--qf", + "%{NAME}\t%{EPOCHNUM}\t%{VERSION}\t%{RELEASE}\t%{ARCH}\n", + ], + allow_fail=False, + merge_err=True, + ) + except Exception: + return {} + + pkgs: Dict[str, List[Dict[str, str]]] = {} + for raw in (out or "").splitlines(): + line = raw.strip("\n") + if not line: + continue + parts = line.split("\t") + if len(parts) < 5: + continue + name, epoch, ver, rel, arch = [p.strip() for p in parts[:5]] + if not name or not ver: + continue + + # Normalise epoch. + epoch = epoch.strip() + if epoch.lower() in ("(none)", "none", ""): + epoch = "0" + + v = f"{ver}-{rel}" if rel else ver + if epoch and epoch.isdigit() and epoch != "0": + v = f"{epoch}:{v}" + + pkgs.setdefault(name, []).append({"version": v, "arch": arch}) + + for k in list(pkgs.keys()): + pkgs[k] = sorted( + pkgs[k], key=lambda x: (x.get("arch") or "", x.get("version") or "") + ) + return pkgs + + def _walk_etc_files() -> List[str]: out: List[str] = [] for dirpath, _, filenames in os.walk("/etc"): From 081739fd19ba4983fa00b28c9d6969e40bef712d Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 16:35:21 +1100 Subject: [PATCH 056/115] Fix tests --- enroll/manifest.py | 15 +- enroll/rpm.py | 2 +- tests/test_diff_usr_local_custom.py | 147 ++++++---- tests/test_harvest.py | 42 ++- tests/test_jinjaturtle.py | 104 ++++--- tests/test_manifest.py | 425 +++++++++++++++++----------- 6 files changed, 457 insertions(+), 278 deletions(-) diff --git a/enroll/manifest.py b/enroll/manifest.py index 8b4008b..bc629bb 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -390,9 +390,7 @@ def _render_generic_files_tasks( # Using first_found makes roles work in both modes: # - site-mode: inventory/host_vars///.files/... # - non-site: roles//files/... - return f""" - -- name: Deploy any systemd unit files (templates) + return f"""- name: Deploy any systemd unit files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" dest: "{{{{ item.dest }}}}" @@ -475,9 +473,7 @@ def _render_install_packages_tasks(role: str, var_prefix: str) -> str: generic `package` module. This keeps generated roles usable on both Debian-like and RPM-like systems. """ - return f""" - -- name: Install packages for {role} (APT) + return f"""- name: Install packages for {role} (APT) ansible.builtin.apt: name: "{{{{ {var_prefix}_packages | default([]) }}}}" state: present @@ -995,7 +991,7 @@ Generated non-system user accounts and SSH public material. else: _write_role_defaults(role_dir, vars_map) - tasks = """---\n""" + _render_generic_files_tasks( + tasks = "---\n" + _render_generic_files_tasks( var_prefix, include_restart_notify=False ) with open( @@ -1297,7 +1293,7 @@ DNF/YUM configuration harvested from the system (repos, config files, and RPM GP else: _write_role_defaults(role_dir, vars_map) - tasks = """---\n""" + _render_generic_files_tasks( + tasks = "---\n" + _render_generic_files_tasks( var_prefix, include_restart_notify=False ) with open( @@ -1663,8 +1659,7 @@ User-requested extra file harvesting. ) task_parts.append( - f""" -- name: Probe whether systemd unit exists and is manageable + f"""- name: Probe whether systemd unit exists and is manageable ansible.builtin.systemd: name: "{{{{ {var_prefix}_unit_name }}}}" check_mode: true diff --git a/enroll/rpm.py b/enroll/rpm.py index 9e2892f..0314670 100644 --- a/enroll/rpm.py +++ b/enroll/rpm.py @@ -104,7 +104,7 @@ def list_manual_packages() -> List[str]: if pkgs: return _dedupe(pkgs) - # Fallback: human-oriented output. + # Fallback rc, out = _run( ["dnf", "-q", "history", "userinstalled"], allow_fail=True, merge_err=True ) diff --git a/tests/test_diff_usr_local_custom.py b/tests/test_diff_usr_local_custom.py index 88d594f..28ec57c 100644 --- a/tests/test_diff_usr_local_custom.py +++ b/tests/test_diff_usr_local_custom.py @@ -18,65 +18,106 @@ def test_diff_includes_usr_local_custom_files(tmp_path: Path): new = tmp_path / "new" old_state = { - "host": {"hostname": "h1", "os": "debian"}, - "users": { - "role_name": "users", - "users": [], - "managed_files": [], - "excluded": [], - "notes": [], - }, - "services": [], - "package_roles": [], - "manual_packages": ["curl"], - "manual_packages_skipped": [], - "etc_custom": { - "role_name": "etc_custom", - "managed_files": [], - "excluded": [], - "notes": [], - }, - "usr_local_custom": { - "role_name": "usr_local_custom", - "managed_files": [ - { - "path": "/usr/local/etc/myapp.conf", - "src_rel": "usr/local/etc/myapp.conf", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "usr_local_etc_custom", + "schema_version": 3, + "host": {"hostname": "h1", "os": "debian", "pkg_backend": "dpkg"}, + "inventory": { + "packages": { + "curl": { + "version": "1.0", + "arches": [], + "installations": [{"version": "1.0", "arch": "amd64"}], + "observed_via": [{"kind": "user_installed"}], + "roles": [], } - ], - "excluded": [], - "notes": [], + } + }, + "roles": { + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [], + "packages": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + } + ], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, }, } + new_state = { **old_state, - "manual_packages": ["curl", "htop"], - "usr_local_custom": { - "role_name": "usr_local_custom", - "managed_files": [ - { - "path": "/usr/local/etc/myapp.conf", - "src_rel": "usr/local/etc/myapp.conf", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "usr_local_etc_custom", + "inventory": { + "packages": { + **old_state["inventory"]["packages"], + "htop": { + "version": "3.0", + "arches": [], + "installations": [{"version": "3.0", "arch": "amd64"}], + "observed_via": [{"kind": "user_installed"}], + "roles": [], }, - { - "path": "/usr/local/bin/myscript", - "src_rel": "usr/local/bin/myscript", - "owner": "root", - "group": "root", - "mode": "0755", - "reason": "usr_local_bin_script", - }, - ], - "excluded": [], - "notes": [], + } + }, + "roles": { + **old_state["roles"], + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + }, + { + "path": "/usr/local/bin/myscript", + "src_rel": "usr/local/bin/myscript", + "owner": "root", + "group": "root", + "mode": "0755", + "reason": "usr_local_bin_script", + }, + ], + "excluded": [], + "notes": [], + }, }, } diff --git a/tests/test_harvest.py b/tests/test_harvest.py index a0d22ec..1b884aa 100644 --- a/tests/test_harvest.py +++ b/tests/test_harvest.py @@ -30,6 +30,7 @@ class FakeBackend: owner_fn, modified_by_pkg: dict[str, dict[str, str]] | None = None, pkg_config_prefixes: tuple[str, ...] = ("/etc/apt/",), + installed: dict[str, list[dict[str, str]]] | None = None, ): self.name = name self.pkg_config_prefixes = pkg_config_prefixes @@ -40,6 +41,7 @@ class FakeBackend: self._manual = manual_pkgs self._owner_fn = owner_fn self._modified_by_pkg = modified_by_pkg or {} + self._installed = installed or {} def build_etc_index(self): return ( @@ -55,6 +57,14 @@ class FakeBackend: def list_manual_packages(self): return list(self._manual) + def installed_packages(self): + """Return mapping package -> installations. + + The real backends return: + {"pkg": [{"version": "...", "arch": "..."}, ...]} + """ + return dict(self._installed) + def specific_paths_for_hints(self, hints: set[str]): return [] @@ -214,26 +224,36 @@ def test_harvest_dedup_manual_packages_and_builds_etc_custom( state_path = h.harvest(str(bundle), policy=AllowAllPolicy()) st = json.loads(Path(state_path).read_text(encoding="utf-8")) - assert "openvpn" in st["manual_packages"] - assert "curl" in st["manual_packages"] - assert "openvpn" in st["manual_packages_skipped"] - assert all(pr["package"] != "openvpn" for pr in st["package_roles"]) - assert any(pr["package"] == "curl" for pr in st["package_roles"]) + inv = st["inventory"]["packages"] + assert "openvpn" in inv + assert "curl" in inv + + # openvpn is managed by the service role, so it should NOT appear as a package role. + pkg_roles = st["roles"]["packages"] + assert all(pr["package"] != "openvpn" for pr in pkg_roles) + assert any(pr["package"] == "curl" for pr in pkg_roles) + + # Inventory provenance: openvpn should be observed via systemd unit. + openvpn_obs = inv["openvpn"]["observed_via"] + assert any( + o.get("kind") == "systemd_unit" and o.get("ref") == "openvpn.service" + for o in openvpn_obs + ) # Service role captured modified conffile - svc = st["services"][0] + svc = st["roles"]["services"][0] assert svc["unit"] == "openvpn.service" assert "openvpn" in svc["packages"] assert any(mf["path"] == "/etc/openvpn/server.conf" for mf in svc["managed_files"]) # Unowned /etc/default/keyboard is attributed to etc_custom only - etc_custom = st["etc_custom"] + etc_custom = st["roles"]["etc_custom"] assert any( mf["path"] == "/etc/default/keyboard" for mf in etc_custom["managed_files"] ) # /usr/local content is attributed to usr_local_custom - ul = st["usr_local_custom"] + ul = st["roles"]["usr_local_custom"] assert any(mf["path"] == "/usr/local/etc/myapp.conf" for mf in ul["managed_files"]) assert any(mf["path"] == "/usr/local/bin/myscript" for mf in ul["managed_files"]) assert all(mf["path"] != "/usr/local/bin/readme.txt" for mf in ul["managed_files"]) @@ -338,10 +358,12 @@ def test_shared_cron_snippet_prefers_matching_role_over_lexicographic( st = json.loads(Path(state_path).read_text(encoding="utf-8")) # Cron snippet should end up attached to the ntpsec role, not apparmor. - svc_ntpsec = next(s for s in st["services"] if s["role_name"] == "ntpsec") + svc_ntpsec = next(s for s in st["roles"]["services"] if s["role_name"] == "ntpsec") assert any(mf["path"] == "/etc/cron.d/ntpsec" for mf in svc_ntpsec["managed_files"]) - svc_apparmor = next(s for s in st["services"] if s["role_name"] == "apparmor") + svc_apparmor = next( + s for s in st["roles"]["services"] if s["role_name"] == "apparmor" + ) assert all( mf["path"] != "/etc/cron.d/ntpsec" for mf in svc_apparmor["managed_files"] ) diff --git a/tests/test_jinjaturtle.py b/tests/test_jinjaturtle.py index 68bb04c..c0447b1 100644 --- a/tests/test_jinjaturtle.py +++ b/tests/test_jinjaturtle.py @@ -24,44 +24,78 @@ def test_manifest_uses_jinjaturtle_templates_and_does_not_copy_raw( ) state = { - "host": {"hostname": "test", "os": "debian"}, - "users": { - "role_name": "users", - "users": [], - "managed_files": [], - "excluded": [], - "notes": [], + "schema_version": 3, + "host": {"hostname": "test", "os": "debian", "pkg_backend": "dpkg"}, + "inventory": { + "packages": { + "foo": { + "version": "1.0", + "arches": [], + "installations": [{"version": "1.0", "arch": "amd64"}], + "observed_via": [{"kind": "systemd_unit", "ref": "foo.service"}], + "roles": ["foo"], + } + } }, - "etc_custom": { - "role_name": "etc_custom", - "managed_files": [], - "excluded": [], - "notes": [], - }, - "services": [ - { - "unit": "foo.service", - "role_name": "foo", - "packages": ["foo"], - "active_state": "inactive", - "sub_state": "dead", - "unit_file_state": "disabled", - "condition_result": "no", - "managed_files": [ - { - "path": "/etc/foo.ini", - "src_rel": "etc/foo.ini", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "modified_conffile", - } - ], + "roles": { + "users": { + "role_name": "users", + "users": [], + "managed_files": [], "excluded": [], "notes": [], - } - ], - "package_roles": [], + }, + "services": [ + { + "unit": "foo.service", + "role_name": "foo", + "packages": ["foo"], + "active_state": "inactive", + "sub_state": "dead", + "unit_file_state": "disabled", + "condition_result": "no", + "managed_files": [ + { + "path": "/etc/foo.ini", + "src_rel": "etc/foo.ini", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "modified_conffile", + } + ], + "excluded": [], + "notes": [], + } + ], + "packages": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + }, } bundle.mkdir(parents=True, exist_ok=True) diff --git a/tests/test_manifest.py b/tests/test_manifest.py index cbfc208..fec9cc3 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -13,95 +13,136 @@ def test_manifest_writes_roles_and_playbook_with_clean_when(tmp_path: Path): ) state = { - "host": {"hostname": "test", "os": "debian"}, - "users": { - "role_name": "users", - "users": [ + "schema_version": 3, + "host": {"hostname": "test", "os": "debian", "pkg_backend": "dpkg"}, + "inventory": { + "packages": { + "foo": { + "version": "1.0", + "arches": [], + "installations": [{"version": "1.0", "arch": "amd64"}], + "observed_via": [{"kind": "systemd_unit", "ref": "foo.service"}], + "roles": ["foo"], + }, + "curl": { + "version": "8.0", + "arches": [], + "installations": [{"version": "8.0", "arch": "amd64"}], + "observed_via": [{"kind": "package_role", "ref": "curl"}], + "roles": ["curl"], + }, + } + }, + "roles": { + "users": { + "role_name": "users", + "users": [ + { + "name": "alice", + "uid": 1000, + "gid": 1000, + "gecos": "Alice", + "home": "/home/alice", + "shell": "/bin/bash", + "primary_group": "alice", + "supplementary_groups": ["docker", "qubes"], + } + ], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [ { - "name": "alice", - "uid": 1000, - "gid": 1000, - "gecos": "Alice", - "home": "/home/alice", - "shell": "/bin/bash", - "primary_group": "alice", - "supplementary_groups": ["docker", "qubes"], + "unit": "foo.service", + "role_name": "foo", + "packages": ["foo"], + "active_state": "inactive", + "sub_state": "dead", + "unit_file_state": "enabled", + "condition_result": "no", + "managed_files": [ + { + "path": "/etc/foo.conf", + "src_rel": "etc/foo.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "modified_conffile", + } + ], + "excluded": [], + "notes": [], } ], - "managed_files": [], - "excluded": [], - "notes": [], - }, - "etc_custom": { - "role_name": "etc_custom", - "managed_files": [ + "packages": [ { - "path": "/etc/default/keyboard", - "src_rel": "etc/default/keyboard", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "custom_unowned", + "package": "curl", + "role_name": "curl", + "managed_files": [], + "excluded": [], + "notes": [], } ], - "excluded": [], - "notes": [], - }, - "usr_local_custom": { - "role_name": "usr_local_custom", - "managed_files": [ - { - "path": "/usr/local/etc/myapp.conf", - "src_rel": "usr/local/etc/myapp.conf", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "usr_local_etc_custom", - }, - { - "path": "/usr/local/bin/myscript", - "src_rel": "usr/local/bin/myscript", - "owner": "root", - "group": "root", - "mode": "0755", - "reason": "usr_local_bin_script", - }, - ], - "excluded": [], - "notes": [], - }, - "services": [ - { - "unit": "foo.service", - "role_name": "foo", - "packages": ["foo"], - "active_state": "inactive", - "sub_state": "dead", - "unit_file_state": "enabled", - "condition_result": "no", + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", "managed_files": [ { - "path": "/etc/foo.conf", - "src_rel": "etc/foo.conf", + "path": "/etc/default/keyboard", + "src_rel": "etc/default/keyboard", "owner": "root", "group": "root", "mode": "0644", - "reason": "modified_conffile", + "reason": "custom_unowned", } ], "excluded": [], "notes": [], - } - ], - "package_roles": [ - { - "package": "curl", - "role_name": "curl", + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + }, + { + "path": "/usr/local/bin/myscript", + "src_rel": "usr/local/bin/myscript", + "owner": "root", + "group": "root", + "mode": "0755", + "reason": "usr_local_bin_script", + }, + ], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], "managed_files": [], "excluded": [], "notes": [], - } - ], + }, + }, } bundle.mkdir(parents=True, exist_ok=True) @@ -189,68 +230,102 @@ def test_manifest_site_mode_creates_host_inventory_and_raw_files(tmp_path: Path) ) state = { - "host": {"hostname": "test", "os": "debian"}, - "users": { - "role_name": "users", - "users": [], - "managed_files": [], - "excluded": [], - "notes": [], + "schema_version": 3, + "host": {"hostname": "test", "os": "debian", "pkg_backend": "dpkg"}, + "inventory": { + "packages": { + "foo": { + "version": "1.0", + "arches": [], + "installations": [{"version": "1.0", "arch": "amd64"}], + "observed_via": [{"kind": "systemd_unit", "ref": "foo.service"}], + "roles": ["foo"], + } + } }, - "etc_custom": { - "role_name": "etc_custom", - "managed_files": [ + "roles": { + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [ { - "path": "/etc/default/keyboard", - "src_rel": "etc/default/keyboard", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "custom_unowned", + "unit": "foo.service", + "role_name": "foo", + "packages": ["foo"], + "active_state": "active", + "sub_state": "running", + "unit_file_state": "enabled", + "condition_result": "yes", + "managed_files": [ + { + "path": "/etc/foo.conf", + "src_rel": "etc/foo.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "modified_conffile", + } + ], + "excluded": [], + "notes": [], } ], - "excluded": [], - "notes": [], - }, - "usr_local_custom": { - "role_name": "usr_local_custom", - "managed_files": [ - { - "path": "/usr/local/etc/myapp.conf", - "src_rel": "usr/local/etc/myapp.conf", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "usr_local_etc_custom", - } - ], - "excluded": [], - "notes": [], - }, - "services": [ - { - "unit": "foo.service", - "role_name": "foo", - "packages": ["foo"], - "active_state": "active", - "sub_state": "running", - "unit_file_state": "enabled", - "condition_result": "yes", + "packages": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", "managed_files": [ { - "path": "/etc/foo.conf", - "src_rel": "etc/foo.conf", + "path": "/etc/default/keyboard", + "src_rel": "etc/default/keyboard", "owner": "root", "group": "root", "mode": "0644", - "reason": "modified_conffile", + "reason": "custom_unowned", } ], "excluded": [], "notes": [], - } - ], - "package_roles": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [ + { + "path": "/usr/local/etc/myapp.conf", + "src_rel": "usr/local/etc/myapp.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "usr_local_etc_custom", + } + ], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + }, } bundle.mkdir(parents=True, exist_ok=True) @@ -337,58 +412,70 @@ def test_manifest_includes_dnf_config_role_when_present(tmp_path: Path): ) state = { + "schema_version": 3, "host": {"hostname": "test", "os": "redhat", "pkg_backend": "rpm"}, - "users": { - "role_name": "users", - "users": [], - "managed_files": [], - "excluded": [], - "notes": [], - }, - "services": [], - "package_roles": [], - "manual_packages": [], - "manual_packages_skipped": [], - "apt_config": { - "role_name": "apt_config", - "managed_files": [], - "excluded": [], - "notes": [], - }, - "dnf_config": { - "role_name": "dnf_config", - "managed_files": [ - { - "path": "/etc/dnf/dnf.conf", - "src_rel": "etc/dnf/dnf.conf", - "owner": "root", - "group": "root", - "mode": "0644", - "reason": "dnf_config", + "inventory": { + "packages": { + "dnf": { + "version": "4.0", + "arches": [], + "installations": [{"version": "4.0", "arch": "x86_64"}], + "observed_via": [{"kind": "dnf_config"}], + "roles": [], } - ], - "excluded": [], - "notes": [], + } }, - "etc_custom": { - "role_name": "etc_custom", - "managed_files": [], - "excluded": [], - "notes": [], - }, - "usr_local_custom": { - "role_name": "usr_local_custom", - "managed_files": [], - "excluded": [], - "notes": [], - }, - "extra_paths": { - "role_name": "extra_paths", - "include_patterns": [], - "exclude_patterns": [], - "managed_files": [], - "excluded": [], - "notes": [], + "roles": { + "users": { + "role_name": "users", + "users": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, + "services": [], + "packages": [], + "apt_config": { + "role_name": "apt_config", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "dnf_config": { + "role_name": "dnf_config", + "managed_files": [ + { + "path": "/etc/dnf/dnf.conf", + "src_rel": "etc/dnf/dnf.conf", + "owner": "root", + "group": "root", + "mode": "0644", + "reason": "dnf_config", + } + ], + "excluded": [], + "notes": [], + }, + "etc_custom": { + "role_name": "etc_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "usr_local_custom": { + "role_name": "usr_local_custom", + "managed_files": [], + "excluded": [], + "notes": [], + }, + "extra_paths": { + "role_name": "extra_paths", + "include_patterns": [], + "exclude_patterns": [], + "managed_files": [], + "excluded": [], + "notes": [], + }, }, } From f01603dac484ab5c2d835d60e3edf510577cb6d9 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 17:19:59 +1100 Subject: [PATCH 057/115] Better attribution of config files to parent service/role (not systemd helpers) --- enroll/harvest.py | 108 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 80 insertions(+), 28 deletions(-) diff --git a/enroll/harvest.py b/enroll/harvest.py index 4ca3984..74ac516 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -676,6 +676,10 @@ def harvest( backend.build_etc_index() ) + # Global de-duplication across roles: each absolute path is captured at most once. + # This avoids multiple Ansible roles managing the same destination file. + captured_global: Set[str] = set() + # ------------------------- # Service roles # ------------------------- @@ -685,8 +689,45 @@ def harvest( service_role_aliases: Dict[str, Set[str]] = {} # De-dupe per-role captures (avoids duplicate tasks in manifest generation). seen_by_role: Dict[str, Set[str]] = {} - for unit in list_enabled_services(): + # Managed/excluded lists keyed by role so helper services can attribute shared + # configuration to their parent service role. + managed_by_role: Dict[str, List[ManagedFile]] = {} + excluded_by_role: Dict[str, List[ExcludedFile]] = {} + + enabled_services = list_enabled_services() + enabled_set = set(enabled_services) + + def _service_sort_key(unit: str) -> tuple[int, str, str]: + # Prefer "parent" services over helpers (e.g. NetworkManager.service before + # NetworkManager-dispatcher.service) so shared config lands in the main role. + base = unit.removesuffix(".service") + base = base.split("@", 1)[0] + return (base.count("-"), base.lower(), unit.lower()) + + def _parent_service_unit(unit: str) -> Optional[str]: + # If unit name contains '-' segments, treat dashed prefixes as potential parents. + # Example: NetworkManager-dispatcher.service -> NetworkManager.service (if enabled). + if not unit.endswith(".service"): + return None + base = unit.removesuffix(".service") + base = base.split("@", 1)[0] + parts = base.split("-") + for i in range(len(parts) - 1, 0, -1): + cand = "-".join(parts[:i]) + ".service" + if cand in enabled_set: + return cand + return None + + parent_unit_for: Dict[str, str] = {} + for u in enabled_services: + pu = _parent_service_unit(u) + if pu: + parent_unit_for[u] = pu + + for unit in sorted(enabled_services, key=_service_sort_key): role = _role_name_from_unit(unit) + parent_unit = parent_unit_for.get(unit) + parent_role = _role_name_from_unit(parent_unit) if parent_unit else None try: ui = get_unit_info(unit) @@ -695,6 +736,8 @@ def harvest( # shared snippets can still be attributed to this role by name. service_role_aliases.setdefault(role, _hint_names(unit, set()) | {role}) seen_by_role.setdefault(role, set()) + managed = managed_by_role.setdefault(role, []) + excluded = excluded_by_role.setdefault(role, []) service_snaps.append( ServiceSnapshot( unit=unit, @@ -704,8 +747,8 @@ def harvest( sub_state=None, unit_file_state=None, condition_result=None, - managed_files=[], - excluded=[], + managed_files=managed, + excluded=excluded, notes=[str(e)], ) ) @@ -713,8 +756,8 @@ def harvest( pkgs: Set[str] = set() notes: List[str] = [] - excluded: List[ExcludedFile] = [] - managed: List[ManagedFile] = [] + excluded = excluded_by_role.setdefault(role, []) + managed = managed_by_role.setdefault(role, []) candidates: Dict[str, str] = {} if ui.fragment_path: @@ -810,18 +853,31 @@ def harvest( # De-dupe within this role while capturing. This also avoids emitting # duplicate Ansible tasks for the same destination path. - role_seen = seen_by_role.setdefault(role, set()) + # Attribute shared /etc config to the parent service role when this unit looks + # like a helper (e.g. NetworkManager-dispatcher.service -> NetworkManager.service). for path, reason in sorted(candidates.items()): + dest_role = role + if ( + parent_role + and path.startswith("/etc/") + and reason not in ("systemd_dropin", "systemd_envfile") + ): + dest_role = parent_role + + dest_managed = managed_by_role.setdefault(dest_role, []) + dest_excluded = excluded_by_role.setdefault(dest_role, []) + dest_seen = seen_by_role.setdefault(dest_role, set()) _capture_file( bundle_dir=bundle_dir, - role_name=role, + role_name=dest_role, abs_path=path, reason=reason, policy=policy, path_filter=path_filter, - managed_out=managed, - excluded_out=excluded, - seen_role=role_seen, + managed_out=dest_managed, + excluded_out=dest_excluded, + seen_role=dest_seen, + seen_global=captured_global, ) service_snaps.append( @@ -857,7 +913,7 @@ def harvest( s.unit: s for s in service_snaps } - for t in enabled_timers: + for t in sorted(enabled_timers): try: ti = get_timer_info(t) except Exception: # nosec @@ -895,6 +951,7 @@ def harvest( managed_out=snap.managed_files, excluded_out=snap.excluded, seen_role=role_seen, + seen_global=captured_global, ) continue @@ -935,7 +992,7 @@ def harvest( manual_pkgs_skipped: List[str] = [] pkg_snaps: List[PackageSnapshot] = [] - for pkg in manual_pkgs: + for pkg in sorted(manual_pkgs): if pkg in covered_by_services: manual_pkgs_skipped.append(pkg) continue @@ -997,6 +1054,7 @@ def harvest( managed_out=managed, excluded_out=excluded, seen_role=role_seen, + seen_global=captured_global, ) if not pkg_to_etc_paths.get(pkg, []) and not managed: @@ -1060,6 +1118,7 @@ def harvest( managed_out=users_managed, excluded_out=users_excluded, seen_role=users_role_seen, + seen_global=captured_global, ) users_snapshot = UsersSnapshot( @@ -1098,6 +1157,7 @@ def harvest( managed_out=apt_managed, excluded_out=apt_excluded, seen_role=apt_role_seen, + seen_global=captured_global, ) elif backend.name == "rpm": dnf_role_seen = seen_by_role.setdefault(dnf_role_name, set()) @@ -1112,6 +1172,7 @@ def harvest( managed_out=dnf_managed, excluded_out=dnf_excluded, seen_role=dnf_role_seen, + seen_global=captured_global, ) apt_config_snapshot = AptConfigSnapshot( @@ -1135,20 +1196,9 @@ def harvest( etc_managed: List[ManagedFile] = [] etc_role_name = "etc_custom" - # Build a set of files already captured by other roles. - already: Set[str] = set() - for s in service_snaps: - for mf in s.managed_files: - already.add(mf.path) - for p in pkg_snaps: - for mf in p.managed_files: - already.add(mf.path) - for mf in users_managed: - already.add(mf.path) - for mf in apt_managed: - already.add(mf.path) - for mf in dnf_managed: - already.add(mf.path) + # Files already captured by earlier roles. Use the global set so we never + # end up with the same destination path managed by multiple roles. + already: Set[str] = captured_global # Maps for re-attributing shared snippets (cron.d/logrotate.d) to existing roles. svc_by_role: Dict[str, ServiceSnapshot] = {s.role_name: s for s in service_snaps} @@ -1288,7 +1338,7 @@ def harvest( managed_out=managed_out, excluded_out=excluded_out, seen_role=role_seen, - seen_global=already, + seen_global=captured_global, ) # Walk /etc for remaining unowned config-ish files @@ -1327,7 +1377,7 @@ def harvest( managed_out=managed_out, excluded_out=excluded_out, seen_role=role_seen, - seen_global=already, + seen_global=captured_global, ): scanned += 1 if scanned >= MAX_FILES_CAP: @@ -1396,6 +1446,7 @@ def harvest( managed_out=ul_managed, excluded_out=ul_excluded, seen_role=role_seen, + seen_global=captured_global, metadata=(owner, group, mode), ): already_all.add(path) @@ -1470,6 +1521,7 @@ def harvest( managed_out=extra_managed, excluded_out=extra_excluded, seen_role=extra_role_seen, + seen_global=captured_global, ): already_all.add(path) From e44e4aaf3aa554daf5128d0cfa4720cd9d0f7f03 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Mon, 29 Dec 2025 17:39:39 +1100 Subject: [PATCH 058/115] 0.2.0 --- CHANGELOG.md | 1 + debian/changelog | 9 ++++++++- pyproject.toml | 2 +- rpm/enroll.spec | 6 +++++- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f92e0b7..49217f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ * Add version CLI arg * Add ability to enroll RH-style systems (DNF5/DNF/RPM) + * Refactor harvest state to track package versions # 0.1.7 diff --git a/debian/changelog b/debian/changelog index eabdefc..f050e7f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,12 @@ -enroll (0.1.7) unstable; urgency=medium +enroll (0.2.0) unstable; urgency=medium + * Add version CLI arg + * Add ability to enroll RH-style systems (DNF5/DNF/RPM) + * Refactor harvest state to track package versions + + -- Miguel Jacq Mon, 29 Dec 2025 17:30:00 +1100 + +enroll (0.1.7) unstable; urgency=medium * Fix an attribution bug for certain files ending up in the wrong package/role. -- Miguel Jacq Sun, 28 Dec 2025 18:30:00 +1100 diff --git a/pyproject.toml b/pyproject.toml index ca875e8..683a9b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.1.7" +version = "0.2.0" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/rpm/enroll.spec b/rpm/enroll.spec index f63a12c..3beac03 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.1.7 +%global upstream_version 0.2.0 Name: enroll Version: %{upstream_version} @@ -43,6 +43,10 @@ Enroll a server's running state retrospectively into Ansible. %{_bindir}/enroll %changelog +* Mon Dec 29 2025 Miguel Jacq - %{version}-%{release} +- Add version CLI arg +- Add ability to enroll RH-style systems (DNF5/DNF/RPM) +- Refactor harvest state to track package versions * Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} - Fix an attribution bug for certain files ending up in the wrong package/role. * Sun Dec 28 2025 Miguel Jacq - %{version}-%{release} From e4887b7add36f3e926f7362e3e159fd9c523beeb Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 1 Jan 2026 11:02:30 +1100 Subject: [PATCH 059/115] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d075951..f4920b5 100644 --- a/README.md +++ b/README.md @@ -191,7 +191,7 @@ sudo apt update sudo apt install enroll ``` -### Fedora 42 +## Fedora ```bash sudo rpm --import https://mig5.net/static/mig5.asc From 09438246ae0557185c3343c0db6e0101f2d75385 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 1 Jan 2026 15:24:21 +1100 Subject: [PATCH 060/115] Build for Fedora 43 --- Dockerfile.rpmbuild | 8 +++++--- release.sh | 45 ++++++++++++++++++++++++++++++--------------- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/Dockerfile.rpmbuild b/Dockerfile.rpmbuild index c928cea..f76a673 100644 --- a/Dockerfile.rpmbuild +++ b/Dockerfile.rpmbuild @@ -1,5 +1,6 @@ # syntax=docker/dockerfile:1 -FROM fedora:42 +ARG BASE_IMAGE=fedora:42 +FROM ${BASE_IMAGE} RUN set -eux; \ dnf -y update; \ @@ -34,11 +35,12 @@ SRC="${SRC:-/src}" WORKROOT="${WORKROOT:-/work}" OUT="${OUT:-/out}" DEPS_DIR="${DEPS_DIR:-/deps}" - +VERSION_ID="$(grep VERSION_ID /etc/os-release | cut -d= -f2)" +echo "Version ID is ${VERSION_ID}" # Install jinjaturtle from local rpm # Filter out .src.rpm and debug* subpackages if present. if [ -d "${DEPS_DIR}" ] && compgen -G "${DEPS_DIR}/*.rpm" > /dev/null; then - mapfile -t rpms < <(ls -1 "${DEPS_DIR}"/*.rpm | grep -vE '(\.src\.rpm$|-(debuginfo|debugsource)-)') + mapfile -t rpms < <(ls -1 "${DEPS_DIR}"/*.rpm | grep -vE '(\.src\.rpm$|-(debuginfo|debugsource)-)' | grep "${VERSION_ID}") if [ "${#rpms[@]}" -gt 0 ]; then echo "Installing dependency RPMs from ${DEPS_DIR}:" printf ' - %s\n' "${rpms[@]}" diff --git a/release.sh b/release.sh index fdbe771..0a052c7 100755 --- a/release.sh +++ b/release.sh @@ -44,31 +44,46 @@ for dist in ${DISTS[@]}; do done # RPM -sudo apt-get -y install createrepo-c rpm -docker build -f Dockerfile.rpmbuild -t enroll:f42 --progress=plain . -docker run --rm -v "$PWD":/src -v "$PWD/dist/rpm":/out -v "$HOME/git/jinjaturtle/dist/rpm":/deps:ro enroll:f42 -sudo chown -R "${USER}" "$PWD/dist" - REPO_ROOT="${HOME}/git/repo_rpm" RPM_REPO="${REPO_ROOT}/rpm/x86_64" BUILD_OUTPUT="${HOME}/git/enroll/dist" REMOTE="letessier.mig5.net:/opt/repo_rpm" KEYID="00AE817C24A10C2540461A9C1D7CDE0234DB458D" -echo "==> Updating RPM repo..." mkdir -p "$RPM_REPO" +sudo apt-get -y install createrepo-c rpm -for file in `ls -1 "${BUILD_OUTPUT}/rpm"`; do - rpmsign --addsign "${BUILD_OUTPUT}/rpm/$file" +DISTS=( + fedora:43 + fedora:42 +) + +for dist in ${DISTS[@]}; do + release=$(echo ${dist} | cut -d: -f2) + docker build \ + --no-cache \ + -f Dockerfile.rpmbuild \ + -t enroll-rpm:${release} \ + --progress=plain \ + --build-arg BASE_IMAGE=${dist} \ + . + + docker run --rm -v "$PWD":/src -v "$PWD/dist/rpm":/out -v "$HOME/git/jinjaturtle/dist/rpm":/deps:ro enroll-rpm:${release} + sudo chown -R "${USER}" "$PWD/dist" + + echo "==> Updating RPM repo..." + for file in `ls -1 "${BUILD_OUTPUT}/rpm"`; do + rpmsign --addsign "${BUILD_OUTPUT}/rpm/$file" + done + + cp "${BUILD_OUTPUT}/rpm/"*.rpm "$RPM_REPO/" + + createrepo_c "$RPM_REPO" + + echo "==> Signing repomd.xml..." + qubes-gpg-client --local-user "$KEYID" --detach-sign --armor "$RPM_REPO/repodata/repomd.xml" > "$RPM_REPO/repodata/repomd.xml.asc" done -cp "${BUILD_OUTPUT}/rpm/"*.rpm "$RPM_REPO/" - -createrepo_c "$RPM_REPO" - -echo "==> Signing repomd.xml..." -qubes-gpg-client --local-user "$KEYID" --detach-sign --armor "$RPM_REPO/repodata/repomd.xml" > "$RPM_REPO/repodata/repomd.xml.asc" - echo "==> Syncing repo to server..." rsync -aHPvz --exclude=.git --delete "$REPO_ROOT/" "$REMOTE/" From 781efef4678d4ee1d176a264d62423aefe6680b6 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 20:19:47 +1100 Subject: [PATCH 061/115] Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook --- CHANGELOG.md | 4 ++++ enroll/manifest.py | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49217f0..8283b5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# 0.2.1 + + * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook + # 0.2.0 * Add version CLI arg diff --git a/enroll/manifest.py b/enroll/manifest.py index bc629bb..839ebab 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -1551,8 +1551,6 @@ User-requested extra file harvesting. manifested_extra_paths_roles.append(role) - manifested_usr_local_custom_roles.append(role) - # ------------------------- # Service roles # ------------------------- From c88405ef01510b554846b55a5d3dd9593bb46352 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 21:10:32 +1100 Subject: [PATCH 062/115] Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files --- CHANGELOG.md | 1 + enroll/fsutil.py | 2 +- enroll/harvest.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++ enroll/ignore.py | 30 ++++++++++++++++++ enroll/manifest.py | 65 +++++++++++++++++++++++++++++++++++--- 5 files changed, 170 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8283b5b..3c41210 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # 0.2.1 * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook + * Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files # 0.2.0 diff --git a/enroll/fsutil.py b/enroll/fsutil.py index 3d18df6..c852b9e 100644 --- a/enroll/fsutil.py +++ b/enroll/fsutil.py @@ -24,7 +24,7 @@ def stat_triplet(path: str) -> Tuple[str, str, str]: mode is a zero-padded octal string (e.g. "0644"). """ st = os.stat(path, follow_symlinks=True) - mode = oct(st.st_mode & 0o777)[2:].zfill(4) + mode = oct(st.st_mode & 0o7777)[2:].zfill(4) import grp import pwd diff --git a/enroll/harvest.py b/enroll/harvest.py index 74ac516..98e1404 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -34,6 +34,15 @@ class ManagedFile: reason: str +@dataclass +class ManagedDir: + path: str + owner: str + group: str + mode: str + reason: str + + @dataclass class ExcludedFile: path: str @@ -109,6 +118,7 @@ class ExtraPathsSnapshot: role_name: str include_patterns: List[str] exclude_patterns: List[str] + managed_dirs: List[ManagedDir] managed_files: List[ManagedFile] excluded: List[ExcludedFile] notes: List[str] @@ -1484,12 +1494,78 @@ def harvest( extra_notes: List[str] = [] extra_excluded: List[ExcludedFile] = [] extra_managed: List[ManagedFile] = [] + extra_managed_dirs: List[ManagedDir] = [] + extra_dir_seen: Set[str] = set() + + def _walk_and_capture_dirs(root: str) -> None: + root = os.path.normpath(root) + if not root.startswith("/"): + root = "/" + root + if not os.path.isdir(root) or os.path.islink(root): + return + for dirpath, dirnames, _ in os.walk(root, followlinks=False): + if len(extra_managed_dirs) >= MAX_FILES_CAP: + extra_notes.append( + f"Reached directory cap ({MAX_FILES_CAP}) while scanning {root}." + ) + return + dirpath = os.path.normpath(dirpath) + if not dirpath.startswith("/"): + dirpath = "/" + dirpath + if path_filter.is_excluded(dirpath): + # Prune excluded subtrees. + dirnames[:] = [] + continue + if os.path.islink(dirpath) or not os.path.isdir(dirpath): + dirnames[:] = [] + continue + + if dirpath not in extra_dir_seen: + deny = policy.deny_reason_dir(dirpath) + if not deny: + try: + owner, group, mode = stat_triplet(dirpath) + extra_managed_dirs.append( + ManagedDir( + path=dirpath, + owner=owner, + group=group, + mode=mode, + reason="user_include_dir", + ) + ) + except OSError: + pass + extra_dir_seen.add(dirpath) + + # Prune excluded dirs and symlinks early. + pruned: List[str] = [] + for d in dirnames: + p = os.path.join(dirpath, d) + if os.path.islink(p) or path_filter.is_excluded(p): + continue + pruned.append(d) + dirnames[:] = pruned + extra_role_name = "extra_paths" extra_role_seen = seen_by_role.setdefault(extra_role_name, set()) include_specs = list(include_paths or []) exclude_specs = list(exclude_paths or []) + # If any include pattern points at a directory, capture that directory tree's + # ownership/mode so the manifest can recreate it accurately. + include_pats = path_filter.iter_include_patterns() + for pat in include_pats: + if pat.kind == "prefix": + p = pat.value + if os.path.isdir(p) and not os.path.islink(p): + _walk_and_capture_dirs(p) + elif pat.kind == "glob": + for h in glob.glob(pat.value, recursive=True): + if os.path.isdir(h) and not os.path.islink(h): + _walk_and_capture_dirs(h) + if include_specs: extra_notes.append("User include patterns:") extra_notes.extend([f"- {p}" for p in include_specs]) @@ -1529,6 +1605,7 @@ def harvest( role_name=extra_role_name, include_patterns=include_specs, exclude_patterns=exclude_specs, + managed_dirs=extra_managed_dirs, managed_files=extra_managed, excluded=extra_excluded, notes=extra_notes, diff --git a/enroll/ignore.py b/enroll/ignore.py index 904997f..895c030 100644 --- a/enroll/ignore.py +++ b/enroll/ignore.py @@ -137,3 +137,33 @@ class IgnorePolicy: return "sensitive_content" return None + + def deny_reason_dir(self, path: str) -> Optional[str]: + """Directory-specific deny logic. + + deny_reason() is file-oriented (it rejects directories as "not_regular_file"). + For directory metadata capture (so roles can recreate directory trees), we need + a lighter-weight check: + - apply deny_globs (unless dangerous) + - require the path to be a real directory (no symlink) + - ensure it's stat'able/readable + + No size checks or content scanning are performed for directories. + """ + if not self.dangerous: + for g in self.deny_globs or []: + if fnmatch.fnmatch(path, g): + return "denied_path" + + try: + os.stat(path, follow_symlinks=True) + except OSError: + return "unreadable" + + if os.path.islink(path): + return "symlink" + + if not os.path.isdir(path): + return "not_directory" + + return None diff --git a/enroll/manifest.py b/enroll/manifest.py index 839ebab..a373773 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -344,6 +344,29 @@ def _write_role_defaults(role_dir: str, mapping: Dict[str, Any]) -> None: f.write(out) +def _build_managed_dirs_var( + managed_dirs: List[Dict[str, Any]], +) -> List[Dict[str, Any]]: + """Convert enroll managed_dirs into an Ansible-friendly list of dicts. + + Each dict drives a role task loop and is safe across hosts. + """ + out: List[Dict[str, Any]] = [] + for d in managed_dirs: + dest = d.get("path") or "" + if not dest: + continue + out.append( + { + "dest": dest, + "owner": d.get("owner") or "root", + "group": d.get("group") or "root", + "mode": d.get("mode") or "0755", + } + ) + return out + + def _build_managed_files_var( managed_files: List[Dict[str, Any]], templated_src_rels: Set[str], @@ -390,7 +413,22 @@ def _render_generic_files_tasks( # Using first_found makes roles work in both modes: # - site-mode: inventory/host_vars///.files/... # - non-site: roles//files/... - return f"""- name: Deploy any systemd unit files (templates) + return f"""- name: Ensure managed directories exist (preserve owner/group/mode) + ansible.builtin.file: + path: "{{{{ item.dest }}}}" + state: directory + owner: "{{{{ item.owner }}}}" + group: "{{{{ item.group }}}}" + mode: "{{{{ item.mode }}}}" + loop: "{{{{ {var_prefix}_managed_dirs | default([]) }}}}" + +- name: Ensure destination directories exist + ansible.builtin.file: + path: "{{{{ item.dest | dirname }}}}" + state: directory + loop: "{{{{ {var_prefix}_managed_files | default([]) }}}}" + +- name: Deploy any systemd unit files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" dest: "{{{{ item.dest }}}}" @@ -1444,13 +1482,17 @@ Unowned /etc config files not attributed to packages or services. # ------------------------- # extra_paths role (user-requested includes) # ------------------------- - if extra_paths_snapshot and extra_paths_snapshot.get("managed_files"): + if extra_paths_snapshot and ( + extra_paths_snapshot.get("managed_files") + or extra_paths_snapshot.get("managed_dirs") + ): role = extra_paths_snapshot.get("role_name", "extra_paths") role_dir = os.path.join(roles_root, role) _write_role_scaffold(role_dir) var_prefix = role + managed_dirs = extra_paths_snapshot.get("managed_dirs", []) or [] managed_files = extra_paths_snapshot.get("managed_files", []) excluded = extra_paths_snapshot.get("excluded", []) notes = extra_paths_snapshot.get("notes", []) @@ -1489,12 +1531,23 @@ Unowned /etc config files not attributed to packages or services. notify_systemd=None, ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_dirs": dirs_var, + f"{var_prefix}_managed_files": files_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + { + f"{var_prefix}_managed_dirs": [], + f"{var_prefix}_managed_files": [], + }, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1530,6 +1583,10 @@ User-requested extra file harvesting. """ + ("\n".join([f"- {p}" for p in exclude_pats]) or "- (none)") + """\n +## Managed directories +""" + + ("\n".join([f"- {d.get('path')}" for d in managed_dirs]) or "- (none)") + + """\n ## Managed files """ + ("\n".join([f"- {mf.get('path')}" for mf in managed_files]) or "- (none)") From 29b52d451d4d477ea2f9d05fdc5c85fe8f8ecd16 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 21:29:16 +1100 Subject: [PATCH 063/115] 0.2.1 --- debian/changelog | 7 +++++++ pyproject.toml | 2 +- rpm/enroll.spec | 5 ++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index f050e7f..dbc7548 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.2.1) unstable; urgency=medium + + * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook + * Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files + + -- Miguel Jacq Fri, 01 Jan 2026 21:30:00 +1100 + enroll (0.2.0) unstable; urgency=medium * Add version CLI arg diff --git a/pyproject.toml b/pyproject.toml index 683a9b2..34f411e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.2.0" +version = "0.2.1" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 3beac03..8fc8cac 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.2.0 +%global upstream_version 0.2.1 Name: enroll Version: %{upstream_version} @@ -43,6 +43,9 @@ Enroll a server's running state retrospectively into Ansible. %{_bindir}/enroll %changelog +* Fri Jan 01 2026 Miguel Jacq - %{version}-%{release} +- Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook +- Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files * Mon Dec 29 2025 Miguel Jacq - %{version}-%{release} - Add version CLI arg - Add ability to enroll RH-style systems (DNF5/DNF/RPM) From 824010b2ab15865b0c1845d8cc9e67a80c7accf2 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 3 Jan 2026 11:39:57 +1100 Subject: [PATCH 064/115] Several bug fixes and prep for 0.2.2 - Fix stat() of parent directory so that we set directory perms correct on --include paths. - Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty --- CHANGELOG.md | 5 ++ README.md | 2 +- debian/changelog | 7 ++ enroll/harvest.py | 171 +++++++++++++++++++++++++++++++++++++++------ enroll/manifest.py | 68 ++++++++++++++---- enroll/remote.py | 34 +++++---- pyproject.toml | 2 +- release.sh | 16 ++--- rpm/enroll.spec | 5 +- 9 files changed, 249 insertions(+), 61 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c41210..0740cb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +# 0.2.2 + + * Fix stat() of parent directory so that we set directory perms correct on --include paths. + * Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty + # 0.2.1 * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook diff --git a/README.md b/README.md index f4920b5..e399633 100644 --- a/README.md +++ b/README.md @@ -199,7 +199,7 @@ sudo rpm --import https://mig5.net/static/mig5.asc sudo tee /etc/yum.repos.d/mig5.repo > /dev/null << 'EOF' [mig5] name=mig5 Repository -baseurl=https://rpm.mig5.net/rpm/$basearch +baseurl=https://rpm.mig5.net/rpm/$releasever/$basearch enabled=1 gpgcheck=1 repo_gpgcheck=1 diff --git a/debian/changelog b/debian/changelog index dbc7548..8c2f4b9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +enroll (0.2.2) unstable; urgency=medium + + * Fix stat() of parent directory so that we set directory perms correct on --include paths. + * Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty + + -- Miguel Jacq Sat, 02 Jan 2026 09:56:00 +1100 + enroll (0.2.1) unstable; urgency=medium * Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook diff --git a/enroll/harvest.py b/enroll/harvest.py index 98e1404..7aba7c6 100644 --- a/enroll/harvest.py +++ b/enroll/harvest.py @@ -6,7 +6,7 @@ import os import re import shutil import time -from dataclasses import dataclass, asdict +from dataclasses import dataclass, asdict, field from typing import Dict, List, Optional, Set from .systemd import ( @@ -58,59 +58,66 @@ class ServiceSnapshot: sub_state: Optional[str] unit_file_state: Optional[str] condition_result: Optional[str] - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class PackageSnapshot: package: str role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class UsersSnapshot: role_name: str users: List[dict] - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class AptConfigSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class DnfConfigSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class EtcCustomSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass class UsrLocalCustomSnapshot: role_name: str - managed_files: List[ManagedFile] - excluded: List[ExcludedFile] - notes: List[str] + managed_dirs: List[ManagedDir] = field(default_factory=list) + managed_files: List[ManagedFile] = field(default_factory=list) + excluded: List[ExcludedFile] = field(default_factory=list) + notes: List[str] = field(default_factory=list) @dataclass @@ -149,6 +156,71 @@ ALLOWED_UNOWNED_EXTS = { MAX_FILES_CAP = 4000 MAX_UNOWNED_FILES_PER_ROLE = 500 + +def _merge_parent_dirs( + existing_dirs: List[ManagedDir], + managed_files: List[ManagedFile], + *, + policy: IgnorePolicy, +) -> List[ManagedDir]: + """Ensure parent directories for managed_files are present in managed_dirs. + + This is used so the Ansible manifest can create destination directories with + explicit owner/group/mode (ansible-lint friendly) without needing a separate + "mkdir without perms" task. + + We only add the immediate parent directory for each managed file. For + explicit directory includes (extra_paths), existing_dirs will already + contain the walked directory tree. + """ + by_path: Dict[str, ManagedDir] = { + d.path: d for d in (existing_dirs or []) if d.path + } + + for mf in managed_files or []: + p = str(mf.path or "").rstrip("/") + if not p: + continue + dpath = os.path.dirname(p) + if not dpath or dpath == "/": + continue + if dpath in by_path: + continue + + # Directory-deny logic: newer IgnorePolicy implementations provide + # deny_reason_dir(). Older/simple policies (including unit tests) may + # only implement deny_reason(), which is file-oriented and may return + # "not_regular_file" for directories. + deny = None + deny_dir = getattr(policy, "deny_reason_dir", None) + if callable(deny_dir): + deny = deny_dir(dpath) + else: + deny = policy.deny_reason(dpath) + if deny in ("not_regular_file", "not_file", "not_regular"): + deny = None + if deny: + # If the file itself was captured, its parent directory is likely safe, + # but still respect deny globs for directories to avoid managing + # sensitive/forbidden trees. + continue + + try: + owner, group, mode = stat_triplet(dpath) + except OSError: + continue + + by_path[dpath] = ManagedDir( + path=dpath, + owner=owner, + group=group, + mode=mode, + reason="parent_of_managed_file", + ) + + return [by_path[k] for k in sorted(by_path)] + + # Directories that are shared across many packages. # Never attribute all unowned files in these trees # to one single package. @@ -1521,7 +1593,14 @@ def harvest( continue if dirpath not in extra_dir_seen: - deny = policy.deny_reason_dir(dirpath) + deny = None + deny_dir = getattr(policy, "deny_reason_dir", None) + if callable(deny_dir): + deny = deny_dir(dirpath) + else: + deny = policy.deny_reason(dirpath) + if deny in ("not_regular_file", "not_file", "not_regular"): + deny = None if not deny: try: owner, group, mode = stat_triplet(dirpath) @@ -1661,6 +1740,52 @@ def harvest( "roles": roles, } + # Ensure every role has explicit managed_dirs for parent directories of managed files. + # This lets the manifest create directories with owner/group/mode (ansible-lint friendly) + # without a separate "mkdir without perms" task. + users_snapshot.managed_dirs = _merge_parent_dirs( + users_snapshot.managed_dirs, users_snapshot.managed_files, policy=policy + ) + for s in service_snaps: + s.managed_dirs = _merge_parent_dirs( + s.managed_dirs, s.managed_files, policy=policy + ) + for p in pkg_snaps: + p.managed_dirs = _merge_parent_dirs( + p.managed_dirs, p.managed_files, policy=policy + ) + + if apt_config_snapshot: + apt_config_snapshot.managed_dirs = _merge_parent_dirs( + apt_config_snapshot.managed_dirs, + apt_config_snapshot.managed_files, + policy=policy, + ) + if dnf_config_snapshot: + dnf_config_snapshot.managed_dirs = _merge_parent_dirs( + dnf_config_snapshot.managed_dirs, + dnf_config_snapshot.managed_files, + policy=policy, + ) + if etc_custom_snapshot: + etc_custom_snapshot.managed_dirs = _merge_parent_dirs( + etc_custom_snapshot.managed_dirs, + etc_custom_snapshot.managed_files, + policy=policy, + ) + if usr_local_custom_snapshot: + usr_local_custom_snapshot.managed_dirs = _merge_parent_dirs( + usr_local_custom_snapshot.managed_dirs, + usr_local_custom_snapshot.managed_files, + policy=policy, + ) + if extra_paths_snapshot: + extra_paths_snapshot.managed_dirs = _merge_parent_dirs( + extra_paths_snapshot.managed_dirs, + extra_paths_snapshot.managed_files, + policy=policy, + ) + state = { "enroll": { "version": get_enroll_version(), diff --git a/enroll/manifest.py b/enroll/manifest.py index a373773..f30e5f3 100644 --- a/enroll/manifest.py +++ b/enroll/manifest.py @@ -422,12 +422,6 @@ def _render_generic_files_tasks( mode: "{{{{ item.mode }}}}" loop: "{{{{ {var_prefix}_managed_dirs | default([]) }}}}" -- name: Ensure destination directories exist - ansible.builtin.file: - path: "{{{{ item.dest | dirname }}}}" - state: directory - loop: "{{{{ {var_prefix}_managed_files | default([]) }}}}" - - name: Deploy any systemd unit files (templates) ansible.builtin.template: src: "{{{{ item.src_rel }}}}.j2" @@ -983,6 +977,7 @@ Generated non-system user accounts and SSH public material. var_prefix = role managed_files = apt_config_snapshot.get("managed_files", []) + managed_dirs = apt_config_snapshot.get("managed_dirs", []) or [] excluded = apt_config_snapshot.get("excluded", []) notes = apt_config_snapshot.get("notes", []) @@ -1019,12 +1014,20 @@ Generated non-system user accounts and SSH public material. notify_systemd=None, ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1134,6 +1137,7 @@ APT configuration harvested from the system (sources, pinning, and keyrings). var_prefix = role managed_files = dnf_config_snapshot.get("managed_files", []) + managed_dirs = dnf_config_snapshot.get("managed_dirs", []) or [] excluded = dnf_config_snapshot.get("excluded", []) notes = dnf_config_snapshot.get("notes", []) @@ -1169,12 +1173,20 @@ APT configuration harvested from the system (sources, pinning, and keyrings). notify_systemd=None, ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1285,6 +1297,7 @@ DNF/YUM configuration harvested from the system (repos, config files, and RPM GP var_prefix = role managed_files = etc_custom_snapshot.get("managed_files", []) + managed_dirs = etc_custom_snapshot.get("managed_dirs", []) or [] excluded = etc_custom_snapshot.get("excluded", []) notes = etc_custom_snapshot.get("notes", []) @@ -1321,12 +1334,20 @@ DNF/YUM configuration harvested from the system (repos, config files, and RPM GP notify_systemd="Run systemd daemon-reload", ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1395,6 +1416,7 @@ Unowned /etc config files not attributed to packages or services. var_prefix = role managed_files = usr_local_custom_snapshot.get("managed_files", []) + managed_dirs = usr_local_custom_snapshot.get("managed_dirs", []) or [] excluded = usr_local_custom_snapshot.get("excluded", []) notes = usr_local_custom_snapshot.get("notes", []) @@ -1431,12 +1453,20 @@ Unowned /etc config files not attributed to packages or services. notify_systemd=None, ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} - vars_map: Dict[str, Any] = {f"{var_prefix}_managed_files": files_var} + vars_map: Dict[str, Any] = { + f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, + } vars_map = _merge_mappings_overwrite(vars_map, jt_map) if site_mode: - _write_role_defaults(role_dir, {f"{var_prefix}_managed_files": []}) + _write_role_defaults( + role_dir, + {f"{var_prefix}_managed_files": [], f"{var_prefix}_managed_dirs": []}, + ) _write_hostvars(out_dir, fqdn or "", role, vars_map) else: _write_role_defaults(role_dir, vars_map) @@ -1616,6 +1646,7 @@ User-requested extra file harvesting. unit = svc["unit"] pkgs = svc.get("packages", []) or [] managed_files = svc.get("managed_files", []) or [] + managed_dirs = svc.get("managed_dirs", []) or [] role_dir = os.path.join(roles_root, role) _write_role_scaffold(role_dir) @@ -1660,11 +1691,14 @@ User-requested extra file harvesting. notify_systemd="Run systemd daemon-reload", ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} base_vars: Dict[str, Any] = { f"{var_prefix}_unit_name": unit, f"{var_prefix}_packages": pkgs, f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, f"{var_prefix}_manage_unit": True, f"{var_prefix}_systemd_enabled": bool(enabled_at_harvest), f"{var_prefix}_systemd_state": desired_state, @@ -1679,6 +1713,7 @@ User-requested extra file harvesting. f"{var_prefix}_unit_name": unit, f"{var_prefix}_packages": [], f"{var_prefix}_managed_files": [], + f"{var_prefix}_managed_dirs": [], f"{var_prefix}_manage_unit": False, f"{var_prefix}_systemd_enabled": False, f"{var_prefix}_systemd_state": "stopped", @@ -1782,6 +1817,7 @@ Generated from `{unit}`. role = pr["role_name"] pkg = pr.get("package") or "" managed_files = pr.get("managed_files", []) or [] + managed_dirs = pr.get("managed_dirs", []) or [] role_dir = os.path.join(roles_root, role) _write_role_scaffold(role_dir) @@ -1823,10 +1859,13 @@ Generated from `{unit}`. notify_systemd="Run systemd daemon-reload", ) + dirs_var = _build_managed_dirs_var(managed_dirs) + jt_map = _yaml_load_mapping(jt_vars) if jt_vars.strip() else {} base_vars: Dict[str, Any] = { f"{var_prefix}_packages": pkgs, f"{var_prefix}_managed_files": files_var, + f"{var_prefix}_managed_dirs": dirs_var, } base_vars = _merge_mappings_overwrite(base_vars, jt_map) @@ -1836,6 +1875,7 @@ Generated from `{unit}`. { f"{var_prefix}_packages": [], f"{var_prefix}_managed_files": [], + f"{var_prefix}_managed_dirs": [], }, ) _write_hostvars(out_dir, fqdn or "", role, base_vars) diff --git a/enroll/remote.py b/enroll/remote.py index 9618512..b86cd08 100644 --- a/enroll/remote.py +++ b/enroll/remote.py @@ -16,7 +16,6 @@ def _safe_extract_tar(tar: tarfile.TarFile, dest: Path) -> None: Protects against path traversal (e.g. entries containing ../). """ - # Note: tar member names use POSIX separators regardless of platform. dest = dest.resolve() @@ -80,9 +79,18 @@ def _build_enroll_pyz(tmpdir: Path) -> Path: return pyz_path -def _ssh_run(ssh, cmd: str) -> tuple[int, str, str]: - """Run a command over a Paramiko SSHClient.""" - _stdin, stdout, stderr = ssh.exec_command(cmd) +def _ssh_run(ssh, cmd: str, *, get_pty: bool = False) -> tuple[int, str, str]: + """Run a command over a Paramiko SSHClient. + + Paramiko's exec_command runs commands without a TTY by default. + Some hosts have sudoers "requiretty" enabled, which causes sudo to + fail even when passwordless sudo is configured. For those commands, + request a PTY. + + We do not request a PTY for commands that stream binary data + (e.g. tar/gzip output), as a PTY can corrupt the byte stream. + """ + _stdin, stdout, stderr = ssh.exec_command(cmd, get_pty=get_pty) out = stdout.read().decode("utf-8", errors="replace") err = stderr.read().decode("utf-8", errors="replace") rc = stdout.channel.recv_exit_status() @@ -105,7 +113,6 @@ def remote_harvest( Returns the local path to state.json inside local_out_dir. """ - try: import paramiko # type: ignore except Exception as e: @@ -182,34 +189,35 @@ def remote_harvest( for p in exclude_paths or []: argv.extend(["--exclude-path", str(p)]) - _cmd = " ".join(shlex.quote(a) for a in argv) - if not no_sudo: - cmd = f"sudo {_cmd}" - else: - cmd = _cmd - rc, out, err = _ssh_run(ssh, cmd) + _cmd = " ".join(map(shlex.quote, argv)) + cmd = f"sudo {_cmd}" if not no_sudo else _cmd + + # PTY for sudo commands (helps sudoers requiretty). + rc, out, err = _ssh_run(ssh, cmd, get_pty=(not no_sudo)) if rc != 0: raise RuntimeError( "Remote harvest failed.\n" f"Command: {cmd}\n" f"Exit code: {rc}\n" + f"Stdout: {out.strip()}\n" f"Stderr: {err.strip()}" ) if not no_sudo: - # Ensure user can read the files, before we tar it + # Ensure user can read the files, before we tar it. if not resolved_user: raise RuntimeError( "Unable to determine remote username for chown. " "Pass --remote-user explicitly or use --no-sudo." ) cmd = f"sudo chown -R {resolved_user} {rbundle}" - rc, out, err = _ssh_run(ssh, cmd) + rc, out, err = _ssh_run(ssh, cmd, get_pty=True) if rc != 0: raise RuntimeError( "chown of harvest failed.\n" f"Command: {cmd}\n" f"Exit code: {rc}\n" + f"Stdout: {out.strip()}\n" f"Stderr: {err.strip()}" ) diff --git a/pyproject.toml b/pyproject.toml index 34f411e..72dd732 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "enroll" -version = "0.2.1" +version = "0.2.2" description = "Enroll a server's running state retrospectively into Ansible" authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/release.sh b/release.sh index 0a052c7..db3f27b 100755 --- a/release.sh +++ b/release.sh @@ -44,14 +44,11 @@ for dist in ${DISTS[@]}; do done # RPM -REPO_ROOT="${HOME}/git/repo_rpm" -RPM_REPO="${REPO_ROOT}/rpm/x86_64" -BUILD_OUTPUT="${HOME}/git/enroll/dist" -REMOTE="letessier.mig5.net:/opt/repo_rpm" -KEYID="00AE817C24A10C2540461A9C1D7CDE0234DB458D" - -mkdir -p "$RPM_REPO" sudo apt-get -y install createrepo-c rpm +BUILD_OUTPUT="${HOME}/git/enroll/dist" +KEYID="00AE817C24A10C2540461A9C1D7CDE0234DB458D" +REPO_ROOT="${HOME}/git/repo_rpm" +REMOTE="letessier.mig5.net:/opt/repo_rpm" DISTS=( fedora:43 @@ -60,6 +57,10 @@ DISTS=( for dist in ${DISTS[@]}; do release=$(echo ${dist} | cut -d: -f2) + REPO_RELEASE_ROOT="${REPO_ROOT}/fc${release}" + RPM_REPO="${REPO_RELEASE_ROOT}/rpm/x86_64" + mkdir -p "$RPM_REPO" + docker build \ --no-cache \ -f Dockerfile.rpmbuild \ @@ -71,7 +72,6 @@ for dist in ${DISTS[@]}; do docker run --rm -v "$PWD":/src -v "$PWD/dist/rpm":/out -v "$HOME/git/jinjaturtle/dist/rpm":/deps:ro enroll-rpm:${release} sudo chown -R "${USER}" "$PWD/dist" - echo "==> Updating RPM repo..." for file in `ls -1 "${BUILD_OUTPUT}/rpm"`; do rpmsign --addsign "${BUILD_OUTPUT}/rpm/$file" done diff --git a/rpm/enroll.spec b/rpm/enroll.spec index 8fc8cac..12286fa 100644 --- a/rpm/enroll.spec +++ b/rpm/enroll.spec @@ -1,4 +1,4 @@ -%global upstream_version 0.2.1 +%global upstream_version 0.2.2 Name: enroll Version: %{upstream_version} @@ -43,6 +43,9 @@ Enroll a server's running state retrospectively into Ansible. %{_bindir}/enroll %changelog +* Sat Jan 02 2026 Miguel Jacq - %{version}-%{release} +- Fix stat() of parent directory so that we set directory perms correct on --include paths. +- Set pty for remote calls when sudo is required, to help systems with limits on sudo without pty * Fri Jan 01 2026 Miguel Jacq - %{version}-%{release} - Don't accidentally add extra_paths role to usr_local_custom list, resulting in extra_paths appearing twice in manifested playbook - Ensure directories in the tree of anything included with --include are defined in the state and manifest so we make dirs before we try to create files From 6c3275b44a9ca1ebeac4caec02cb650e996837c5 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 3 Jan 2026 11:46:40 +1100 Subject: [PATCH 065/115] Fix tests --- tests/test_cli_config_and_sops.py | 189 +++++++++++++++++ tests/test_more_coverage.py | 323 ++++++++++++++++++++++++++++++ tests/test_remote.py | 18 +- 3 files changed, 525 insertions(+), 5 deletions(-) create mode 100644 tests/test_cli_config_and_sops.py create mode 100644 tests/test_more_coverage.py diff --git a/tests/test_cli_config_and_sops.py b/tests/test_cli_config_and_sops.py new file mode 100644 index 0000000..7e3fe5b --- /dev/null +++ b/tests/test_cli_config_and_sops.py @@ -0,0 +1,189 @@ +from __future__ import annotations + +import argparse +import configparser +import tarfile +from pathlib import Path + + +def test_discover_config_path_precedence(monkeypatch, tmp_path: Path): + from enroll.cli import _discover_config_path + + cfg = tmp_path / "cfg.ini" + cfg.write_text("[enroll]\n", encoding="utf-8") + + # --no-config always wins + monkeypatch.setenv("ENROLL_CONFIG", str(cfg)) + assert _discover_config_path(["--no-config", "harvest"]) is None + + # explicit --config wins + assert _discover_config_path(["--config", str(cfg), "harvest"]) == cfg + + # env var used when present + assert _discover_config_path(["harvest"]) == cfg + + +def test_discover_config_path_finds_local_and_xdg(monkeypatch, tmp_path: Path): + from enroll.cli import _discover_config_path + + # local file in cwd + cwd = tmp_path / "cwd" + cwd.mkdir() + local = cwd / "enroll.ini" + local.write_text("[enroll]\n", encoding="utf-8") + + monkeypatch.chdir(cwd) + monkeypatch.delenv("ENROLL_CONFIG", raising=False) + monkeypatch.delenv("XDG_CONFIG_HOME", raising=False) + assert _discover_config_path(["harvest"]) == local + + # xdg config fallback + monkeypatch.chdir(tmp_path) + xdg = tmp_path / "xdg" + (xdg / "enroll").mkdir(parents=True) + xcfg = xdg / "enroll" / "enroll.ini" + xcfg.write_text("[enroll]\n", encoding="utf-8") + monkeypatch.setenv("XDG_CONFIG_HOME", str(xdg)) + assert _discover_config_path(["harvest"]) == xcfg + + +def test_section_to_argv_supports_bool_append_count_and_unknown(monkeypatch, capsys): + from enroll.cli import _section_to_argv + + ap = argparse.ArgumentParser(add_help=False) + ap.add_argument("--flag", action="store_true") + ap.add_argument("--no-flag", action="store_false", dest="flag2") + ap.add_argument("--item", action="append", default=[]) + ap.add_argument("-v", action="count", default=0) + + cfg = configparser.ConfigParser() + cfg.read_dict( + { + "enroll": { + "flag": "true", + "no_flag": "false", + "item": "a,b", + "v": "2", + "unknown_key": "zzz", + } + } + ) + + argv = _section_to_argv(ap, cfg, "enroll") + + # bools set + assert "--flag" in argv + assert "--no-flag" in argv + + # append expanded + assert argv.count("--item") == 2 + assert "a" in argv and "b" in argv + + # count flag expanded + assert argv.count("-v") == 2 + + # unknown key prints warning + err = capsys.readouterr().err + assert "unknown option" in err + + +def test_inject_config_argv_inserts_global_and_command_tokens(tmp_path: Path): + from enroll.cli import _inject_config_argv + + root = argparse.ArgumentParser(add_help=False) + root.add_argument("--root-flag", action="store_true") + sub = root.add_subparsers(dest="cmd", required=True) + p_h = sub.add_parser("harvest", add_help=False) + p_h.add_argument("--dangerous", action="store_true") + p_h.add_argument("--include-path", action="append", default=[]) + + cfg_path = tmp_path / "enroll.ini" + cfg_path.write_text( + """[enroll] +root-flag = true + +[harvest] +dangerous = true +include-path = /etc/one,/etc/two +""", + encoding="utf-8", + ) + + argv = ["harvest", "--include-path", "/etc/cli"] + injected = _inject_config_argv( + argv, + cfg_path=cfg_path, + root_parser=root, + subparsers={"harvest": p_h}, + ) + + # global inserted before cmd, subcommand tokens right after cmd + assert injected[:2] == ["--root-flag", "harvest"] + # include-path from config inserted before CLI include-path (CLI wins later if duplicates) + joined = " ".join(injected) + assert "--include-path /etc/one" in joined + assert "--include-path /etc/cli" in joined + + +def test_resolve_sops_out_file_and_encrypt_path(monkeypatch, tmp_path: Path): + from enroll import cli + + # directory output should yield harvest.tar.gz.sops inside + out_dir = tmp_path / "o" + out_dir.mkdir() + assert ( + cli._resolve_sops_out_file(str(out_dir), hint="h").name == "harvest.tar.gz.sops" + ) + + # file-like output retained + out_file = tmp_path / "x.sops" + assert cli._resolve_sops_out_file(str(out_file), hint="h") == out_file + + # None uses cache dir + class HC: + def __init__(self, d: Path): + self.dir = d + + monkeypatch.setattr( + cli, "new_harvest_cache_dir", lambda hint: HC(tmp_path / "cache") + ) + p = cli._resolve_sops_out_file(None, hint="h") + assert str(p).endswith("harvest.tar.gz.sops") + + # Cover _tar_dir_to quickly (writes a tarball) + bundle = tmp_path / "bundle" + bundle.mkdir() + (bundle / "state.json").write_text("{}", encoding="utf-8") + tar_path = tmp_path / "b.tar.gz" + cli._tar_dir_to(bundle, tar_path) + assert tar_path.exists() + with tarfile.open(tar_path, "r:gz") as tf: + names = tf.getnames() + assert "state.json" in names or "./state.json" in names + + +def test_encrypt_harvest_dir_to_sops_cleans_up_tmp_tgz(monkeypatch, tmp_path: Path): + from enroll.cli import _encrypt_harvest_dir_to_sops + + bundle = tmp_path / "bundle" + bundle.mkdir() + (bundle / "state.json").write_text("{}", encoding="utf-8") + out_file = tmp_path / "out.sops" + + seen = {} + + def fake_encrypt(src: Path, dst: Path, pgp_fingerprints, mode): # noqa: ARG001 + # write something so we can see output created + seen["src"] = src + dst.write_bytes(b"enc") + + monkeypatch.setattr("enroll.cli.encrypt_file_binary", fake_encrypt) + + # Make os.unlink raise FileNotFoundError to hit the except branch in finally. + monkeypatch.setattr( + "enroll.cli.os.unlink", lambda p: (_ for _ in ()).throw(FileNotFoundError()) + ) + + res = _encrypt_harvest_dir_to_sops(bundle, out_file, fps=["ABC"]) + assert res == out_file + assert out_file.read_bytes() == b"enc" diff --git a/tests/test_more_coverage.py b/tests/test_more_coverage.py new file mode 100644 index 0000000..2c6693a --- /dev/null +++ b/tests/test_more_coverage.py @@ -0,0 +1,323 @@ +from __future__ import annotations + +import json +import os +import subprocess +import sys +import types +from pathlib import Path +from types import SimpleNamespace + +import pytest + + +def test_cache_dir_defaults_to_home_cache(monkeypatch, tmp_path: Path): + # Ensure default path uses ~/.cache when XDG_CACHE_HOME is unset. + from enroll.cache import enroll_cache_dir + + monkeypatch.delenv("XDG_CACHE_HOME", raising=False) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + + p = enroll_cache_dir() + assert str(p).startswith(str(tmp_path)) + assert p.name == "enroll" + + +def test_harvest_cache_state_json_property(tmp_path: Path): + from enroll.cache import HarvestCache + + hc = HarvestCache(tmp_path / "h1") + assert hc.state_json == hc.dir / "state.json" + + +def test_cache_dir_security_rejects_symlink(tmp_path: Path): + from enroll.cache import _ensure_dir_secure + + real = tmp_path / "real" + real.mkdir() + link = tmp_path / "link" + link.symlink_to(real, target_is_directory=True) + + with pytest.raises(RuntimeError, match="Refusing to use symlink"): + _ensure_dir_secure(link) + + +def test_cache_dir_chmod_failures_are_ignored(monkeypatch, tmp_path: Path): + from enroll import cache + + # Make the cache base path deterministic and writable. + monkeypatch.setattr(cache, "enroll_cache_dir", lambda: tmp_path) + + # Force os.chmod to fail to cover the "except OSError: pass" paths. + monkeypatch.setattr( + os, "chmod", lambda *a, **k: (_ for _ in ()).throw(OSError("nope")) + ) + + hc = cache.new_harvest_cache_dir() + assert hc.dir.exists() + assert hc.dir.is_dir() + + +def test_stat_triplet_falls_back_to_numeric_ids(monkeypatch, tmp_path: Path): + from enroll.fsutil import stat_triplet + import pwd + import grp + + p = tmp_path / "x" + p.write_text("x", encoding="utf-8") + + # Force username/group resolution failures. + monkeypatch.setattr( + pwd, "getpwuid", lambda _uid: (_ for _ in ()).throw(KeyError("no user")) + ) + monkeypatch.setattr( + grp, "getgrgid", lambda _gid: (_ for _ in ()).throw(KeyError("no group")) + ) + + owner, group, mode = stat_triplet(str(p)) + assert owner.isdigit() + assert group.isdigit() + assert len(mode) == 4 + + +def test_ignore_policy_iter_effective_lines_removes_block_comments(): + from enroll.ignore import IgnorePolicy + + pol = IgnorePolicy() + data = b"""keep1 +/* +drop me +*/ +keep2 +""" + assert list(pol.iter_effective_lines(data)) == [b"keep1", b"keep2"] + + +def test_ignore_policy_deny_reason_dir_variants(tmp_path: Path): + from enroll.ignore import IgnorePolicy + + pol = IgnorePolicy() + + # denied by glob + assert pol.deny_reason_dir("/etc/shadow") == "denied_path" + + # symlink rejected + d = tmp_path / "d" + d.mkdir() + link = tmp_path / "l" + link.symlink_to(d, target_is_directory=True) + assert pol.deny_reason_dir(str(link)) == "symlink" + + # not a directory + f = tmp_path / "f" + f.write_text("x", encoding="utf-8") + assert pol.deny_reason_dir(str(f)) == "not_directory" + + # ok + assert pol.deny_reason_dir(str(d)) is None + + +def test_run_jinjaturtle_parses_outputs(monkeypatch, tmp_path: Path): + # Fully unit-test enroll.jinjaturtle.run_jinjaturtle by stubbing subprocess.run. + from enroll.jinjaturtle import run_jinjaturtle + + def fake_run(cmd, **kwargs): # noqa: ARG001 + # cmd includes "-d -t