Introduce 'enroll validate' to check a harvest meets the schema spec and isn't lacking artifacts or contains orphaned ones
Some checks failed
CI / test (push) Failing after 1m47s
Lint / test (push) Successful in 31s
Trivy / test (push) Successful in 23s

This commit is contained in:
Miguel Jacq 2026-01-05 21:17:50 +11:00
parent 45e0d9bb16
commit 66d032d981
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
16 changed files with 1426 additions and 26 deletions

View file

@ -2,6 +2,7 @@ from __future__ import annotations
import argparse
import configparser
import json
import os
import sys
import tarfile
@ -16,6 +17,7 @@ from .harvest import harvest
from .manifest import manifest
from .remote import remote_harvest, RemoteSudoPasswordRequired
from .sopsutil import SopsError, encrypt_file_binary
from .validate import validate_harvest
from .version import get_enroll_version
@ -632,6 +634,49 @@ def main() -> None:
help="How many example paths/refs to show per reason.",
)
v = sub.add_parser(
"validate", help="Validate a harvest bundle (state.json + artifacts)"
)
_add_config_args(v)
v.add_argument(
"harvest",
help=(
"Harvest input (directory, a path to state.json, a tarball, or a SOPS-encrypted bundle)."
),
)
v.add_argument(
"--sops",
action="store_true",
help="Treat the input as a SOPS-encrypted bundle (auto-detected if the filename ends with .sops).",
)
v.add_argument(
"--schema",
help=(
"Optional JSON schema source (file path or https:// URL). "
"If omitted, uses the schema vendored in the enroll codebase."
),
)
v.add_argument(
"--no-schema",
action="store_true",
help="Skip JSON schema validation and only perform bundle consistency checks.",
)
v.add_argument(
"--fail-on-warnings",
action="store_true",
help="Exit non-zero if validation produces warnings.",
)
v.add_argument(
"--format",
choices=["text", "json"],
default="text",
help="Output format.",
)
v.add_argument(
"--out",
help="Write the report to this file instead of stdout.",
)
argv = sys.argv[1:]
cfg_path = _discover_config_path(argv)
argv = _inject_config_argv(
@ -644,6 +689,7 @@ def main() -> None:
"single-shot": s,
"diff": d,
"explain": e,
"validate": v,
},
)
args = ap.parse_args(argv)
@ -739,6 +785,33 @@ def main() -> None:
)
sys.stdout.write(out)
elif args.cmd == "validate":
res = validate_harvest(
args.harvest,
sops_mode=bool(getattr(args, "sops", False)),
schema=getattr(args, "schema", None),
no_schema=bool(getattr(args, "no_schema", False)),
)
fmt = str(getattr(args, "format", "text"))
if fmt == "json":
txt = json.dumps(res.to_dict(), indent=2, sort_keys=True) + "\n"
else:
txt = res.to_text()
out_path = getattr(args, "out", None)
if out_path:
p = Path(out_path).expanduser()
p.parent.mkdir(parents=True, exist_ok=True)
p.write_text(txt, encoding="utf-8")
else:
sys.stdout.write(txt)
if res.errors:
raise SystemExit(1)
if res.warnings and bool(getattr(args, "fail_on_warnings", False)):
raise SystemExit(1)
elif args.cmd == "manifest":
out_enc = manifest(
args.harvest,

View file

@ -0,0 +1,4 @@
"""Vendored JSON schemas.
These are used by `enroll validate` so validation can run offline.
"""

View file

@ -0,0 +1,712 @@
{
"$defs": {
"AptConfigSnapshot": {
"allOf": [
{
"$ref": "#/$defs/RoleCommon"
},
{
"properties": {
"role_name": {
"const": "apt_config"
}
},
"type": "object"
}
],
"unevaluatedProperties": false
},
"DnfConfigSnapshot": {
"allOf": [
{
"$ref": "#/$defs/RoleCommon"
},
{
"properties": {
"role_name": {
"const": "dnf_config"
}
},
"type": "object"
}
],
"unevaluatedProperties": false
},
"EtcCustomSnapshot": {
"allOf": [
{
"$ref": "#/$defs/RoleCommon"
},
{
"properties": {
"role_name": {
"const": "etc_custom"
}
},
"type": "object"
}
],
"unevaluatedProperties": false
},
"ExcludedFile": {
"additionalProperties": false,
"properties": {
"path": {
"minLength": 1,
"pattern": "^/.*",
"type": "string"
},
"reason": {
"enum": [
"user_excluded",
"unreadable",
"backup_file",
"log_file",
"denied_path",
"too_large",
"not_regular_file",
"not_symlink",
"binary_like",
"sensitive_content"
],
"type": "string"
}
},
"required": [
"path",
"reason"
],
"type": "object"
},
"ExtraPathsSnapshot": {
"allOf": [
{
"$ref": "#/$defs/RoleCommon"
},
{
"properties": {
"exclude_patterns": {
"items": {
"type": "string"
},
"type": "array"
},
"include_patterns": {
"items": {
"type": "string"
},
"type": "array"
},
"role_name": {
"const": "extra_paths"
}
},
"required": [
"include_patterns",
"exclude_patterns"
],
"type": "object"
}
],
"unevaluatedProperties": false
},
"InstalledPackageInstance": {
"additionalProperties": false,
"properties": {
"arch": {
"minLength": 1,
"type": "string"
},
"version": {
"minLength": 1,
"type": "string"
}
},
"required": [
"version",
"arch"
],
"type": "object"
},
"ManagedDir": {
"additionalProperties": false,
"properties": {
"group": {
"minLength": 1,
"type": "string"
},
"mode": {
"pattern": "^[0-7]{4}$",
"type": "string"
},
"owner": {
"minLength": 1,
"type": "string"
},
"path": {
"minLength": 1,
"pattern": "^/.*",
"type": "string"
},
"reason": {
"enum": [
"parent_of_managed_file",
"user_include_dir"
],
"type": "string"
}
},
"required": [
"path",
"owner",
"group",
"mode",
"reason"
],
"type": "object"
},
"ManagedFile": {
"additionalProperties": false,
"properties": {
"group": {
"minLength": 1,
"type": "string"
},
"mode": {
"pattern": "^[0-7]{4}$",
"type": "string"
},
"owner": {
"minLength": 1,
"type": "string"
},
"path": {
"minLength": 1,
"pattern": "^/.*",
"type": "string"
},
"reason": {
"enum": [
"apt_config",
"apt_keyring",
"apt_signed_by_keyring",
"apt_source",
"authorized_keys",
"cron_snippet",
"custom_specific_path",
"custom_unowned",
"dnf_config",
"logrotate_snippet",
"modified_conffile",
"modified_packaged_file",
"related_timer",
"rpm_gpg_key",
"ssh_public_key",
"system_cron",
"system_firewall",
"system_logrotate",
"system_modprobe",
"system_mounts",
"system_network",
"system_rc",
"system_security",
"system_sysctl",
"systemd_dropin",
"systemd_envfile",
"user_include",
"user_profile",
"user_shell_aliases",
"user_shell_logout",
"user_shell_rc",
"usr_local_bin_script",
"usr_local_etc_custom",
"yum_conf",
"yum_config",
"yum_repo"
],
"type": "string"
},
"src_rel": {
"minLength": 1,
"pattern": "^[^/].*",
"type": "string"
}
},
"required": [
"path",
"src_rel",
"owner",
"group",
"mode",
"reason"
],
"type": "object"
},
"ManagedLink": {
"additionalProperties": false,
"type": "object",
"properties": {
"path": {
"type": "string",
"minLength": 1,
"pattern": "^/.*"
},
"target": {
"type": "string",
"minLength": 1
},
"reason": {
"type": "string",
"enum": [
"enabled_symlink"
]
}
},
"required": [
"path",
"target",
"reason"
]
},
"ObservedVia": {
"oneOf": [
{
"additionalProperties": false,
"properties": {
"kind": {
"const": "user_installed"
}
},
"required": [
"kind"
],
"type": "object"
},
{
"additionalProperties": false,
"properties": {
"kind": {
"const": "systemd_unit"
},
"ref": {
"minLength": 1,
"type": "string"
}
},
"required": [
"kind",
"ref"
],
"type": "object"
},
{
"additionalProperties": false,
"properties": {
"kind": {
"const": "package_role"
},
"ref": {
"minLength": 1,
"type": "string"
}
},
"required": [
"kind",
"ref"
],
"type": "object"
}
]
},
"PackageInventoryEntry": {
"additionalProperties": false,
"properties": {
"arches": {
"items": {
"minLength": 1,
"type": "string"
},
"type": "array"
},
"installations": {
"items": {
"$ref": "#/$defs/InstalledPackageInstance"
},
"type": "array"
},
"observed_via": {
"items": {
"$ref": "#/$defs/ObservedVia"
},
"type": "array"
},
"roles": {
"items": {
"minLength": 1,
"type": "string"
},
"type": "array"
},
"version": {
"type": [
"string",
"null"
]
}
},
"required": [
"version",
"arches",
"installations",
"observed_via",
"roles"
],
"type": "object"
},
"PackageSnapshot": {
"allOf": [
{
"$ref": "#/$defs/RoleCommon"
},
{
"properties": {
"package": {
"minLength": 1,
"type": "string"
}
},
"required": [
"package"
],
"type": "object"
}
],
"unevaluatedProperties": false
},
"RoleCommon": {
"properties": {
"excluded": {
"items": {
"$ref": "#/$defs/ExcludedFile"
},
"type": "array"
},
"managed_dirs": {
"items": {
"$ref": "#/$defs/ManagedDir"
},
"type": "array"
},
"managed_files": {
"items": {
"$ref": "#/$defs/ManagedFile"
},
"type": "array"
},
"managed_links": {
"items": {
"$ref": "#/$defs/ManagedLink"
},
"type": "array"
},
"notes": {
"items": {
"type": "string"
},
"type": "array"
},
"role_name": {
"minLength": 1,
"pattern": "^[A-Za-z0-9_]+$",
"type": "string"
}
},
"required": [
"role_name",
"managed_dirs",
"managed_files",
"excluded",
"notes"
],
"type": "object"
},
"ServiceSnapshot": {
"allOf": [
{
"$ref": "#/$defs/RoleCommon"
},
{
"properties": {
"active_state": {
"type": [
"string",
"null"
]
},
"condition_result": {
"type": [
"string",
"null"
]
},
"packages": {
"items": {
"minLength": 1,
"type": "string"
},
"type": "array"
},
"role_name": {
"minLength": 1,
"pattern": "^[a-z_][a-z0-9_]*$",
"type": "string"
},
"sub_state": {
"type": [
"string",
"null"
]
},
"unit": {
"minLength": 1,
"type": "string"
},
"unit_file_state": {
"type": [
"string",
"null"
]
}
},
"required": [
"unit",
"packages",
"active_state",
"sub_state",
"unit_file_state",
"condition_result"
],
"type": "object"
}
],
"unevaluatedProperties": false
},
"UserEntry": {
"additionalProperties": false,
"properties": {
"gecos": {
"type": "string"
},
"gid": {
"minimum": 0,
"type": "integer"
},
"home": {
"type": "string"
},
"name": {
"minLength": 1,
"type": "string"
},
"primary_group": {
"minLength": 1,
"type": "string"
},
"shell": {
"type": "string"
},
"supplementary_groups": {
"items": {
"minLength": 1,
"type": "string"
},
"type": "array"
},
"uid": {
"minimum": 0,
"type": "integer"
}
},
"required": [
"name",
"uid",
"gid",
"gecos",
"home",
"shell",
"primary_group",
"supplementary_groups"
],
"type": "object"
},
"UsersSnapshot": {
"allOf": [
{
"$ref": "#/$defs/RoleCommon"
},
{
"properties": {
"role_name": {
"const": "users"
},
"users": {
"items": {
"$ref": "#/$defs/UserEntry"
},
"type": "array"
}
},
"required": [
"users"
],
"type": "object"
}
],
"unevaluatedProperties": false
},
"UsrLocalCustomSnapshot": {
"allOf": [
{
"$ref": "#/$defs/RoleCommon"
},
{
"properties": {
"role_name": {
"const": "usr_local_custom"
}
},
"type": "object"
}
],
"unevaluatedProperties": false
}
},
"$id": "https://enroll.sh/schema/state.schema.json",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"additionalProperties": false,
"properties": {
"enroll": {
"additionalProperties": false,
"properties": {
"harvest_time": {
"minimum": 0,
"type": "integer"
},
"version": {
"type": "string"
}
},
"required": [
"version",
"harvest_time"
],
"type": "object"
},
"host": {
"additionalProperties": false,
"properties": {
"hostname": {
"minLength": 1,
"type": "string"
},
"os": {
"enum": [
"debian",
"redhat",
"unknown"
],
"type": "string"
},
"os_release": {
"additionalProperties": {
"type": "string"
},
"type": "object"
},
"pkg_backend": {
"enum": [
"dpkg",
"rpm"
],
"type": "string"
}
},
"required": [
"hostname",
"os",
"pkg_backend",
"os_release"
],
"type": "object"
},
"inventory": {
"additionalProperties": false,
"properties": {
"packages": {
"additionalProperties": {
"$ref": "#/$defs/PackageInventoryEntry"
},
"type": "object"
}
},
"required": [
"packages"
],
"type": "object"
},
"roles": {
"additionalProperties": false,
"properties": {
"apt_config": {
"$ref": "#/$defs/AptConfigSnapshot"
},
"dnf_config": {
"$ref": "#/$defs/DnfConfigSnapshot"
},
"etc_custom": {
"$ref": "#/$defs/EtcCustomSnapshot"
},
"extra_paths": {
"$ref": "#/$defs/ExtraPathsSnapshot"
},
"packages": {
"items": {
"$ref": "#/$defs/PackageSnapshot"
},
"type": "array"
},
"services": {
"items": {
"$ref": "#/$defs/ServiceSnapshot"
},
"type": "array"
},
"users": {
"$ref": "#/$defs/UsersSnapshot"
},
"usr_local_custom": {
"$ref": "#/$defs/UsrLocalCustomSnapshot"
}
},
"required": [
"users",
"services",
"packages",
"apt_config",
"dnf_config",
"etc_custom",
"usr_local_custom",
"extra_paths"
],
"type": "object"
}
},
"required": [
"enroll",
"host",
"inventory",
"roles"
],
"title": "Enroll harvest state.json schema (latest)",
"type": "object"
}

223
enroll/validate.py Normal file
View file

@ -0,0 +1,223 @@
from __future__ import annotations
import json
import urllib.request
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple
import jsonschema
from .diff import BundleRef, _bundle_from_input
@dataclass
class ValidationResult:
errors: List[str]
warnings: List[str]
@property
def ok(self) -> bool:
return not self.errors
def to_dict(self) -> Dict[str, Any]:
return {
"ok": self.ok,
"errors": list(self.errors),
"warnings": list(self.warnings),
}
def to_text(self) -> str:
lines: List[str] = []
if not self.errors and not self.warnings:
lines.append("OK: harvest bundle validated")
elif not self.errors and self.warnings:
lines.append(f"WARN: {len(self.warnings)} warning(s)")
else:
lines.append(f"ERROR: {len(self.errors)} validation error(s)")
if self.errors:
lines.append("")
lines.append("Errors:")
for e in self.errors:
lines.append(f"- {e}")
if self.warnings:
lines.append("")
lines.append("Warnings:")
for w in self.warnings:
lines.append(f"- {w}")
return "\n".join(lines) + "\n"
def _default_schema_path() -> Path:
# Keep the schema vendored with the codebase so enroll can validate offline.
return Path(__file__).resolve().parent / "schema" / "state.schema.json"
def _load_schema(schema: Optional[str]) -> Dict[str, Any]:
"""Load a JSON schema.
If schema is None, load the vendored schema.
If schema begins with http(s)://, fetch it.
Otherwise, treat it as a local file path.
"""
if not schema:
p = _default_schema_path()
with open(p, "r", encoding="utf-8") as f:
return json.load(f)
if schema.startswith("http://") or schema.startswith("https://"):
with urllib.request.urlopen(schema, timeout=10) as resp: # nosec
data = resp.read()
return json.loads(data.decode("utf-8"))
p = Path(schema).expanduser()
with open(p, "r", encoding="utf-8") as f:
return json.load(f)
def _json_pointer(err: jsonschema.ValidationError) -> str:
# Build a JSON pointer-ish path that is easy to read.
if err.absolute_path:
parts = [str(p) for p in err.absolute_path]
return "/" + "/".join(parts)
return "/"
def _iter_managed_files(state: Dict[str, Any]) -> List[Tuple[str, Dict[str, Any]]]:
"""Return (role_name, managed_file_dict) tuples across all roles."""
roles = state.get("roles") or {}
out: List[Tuple[str, Dict[str, Any]]] = []
# Singleton roles
for rn in [
"users",
"apt_config",
"dnf_config",
"etc_custom",
"usr_local_custom",
"extra_paths",
]:
snap = roles.get(rn) or {}
for mf in snap.get("managed_files") or []:
if isinstance(mf, dict):
out.append((rn, mf))
# Array roles
for s in roles.get("services") or []:
if not isinstance(s, dict):
continue
role_name = str(s.get("role_name") or "unknown")
for mf in s.get("managed_files") or []:
if isinstance(mf, dict):
out.append((role_name, mf))
for p in roles.get("packages") or []:
if not isinstance(p, dict):
continue
role_name = str(p.get("role_name") or "unknown")
for mf in p.get("managed_files") or []:
if isinstance(mf, dict):
out.append((role_name, mf))
return out
def validate_harvest(
harvest_input: str,
*,
sops_mode: bool = False,
schema: Optional[str] = None,
no_schema: bool = False,
) -> ValidationResult:
"""Validate an enroll harvest bundle.
Checks:
- state.json parses
- state.json validates against the schema (unless no_schema)
- every managed_file src_rel exists in artifacts/<role>/<src_rel>
"""
errors: List[str] = []
warnings: List[str] = []
bundle: BundleRef = _bundle_from_input(harvest_input, sops_mode=sops_mode)
try:
state_path = bundle.state_path
if not state_path.exists():
return ValidationResult(
errors=[f"missing state.json at {state_path}"], warnings=[]
)
try:
state = json.loads(state_path.read_text(encoding="utf-8"))
except Exception as e: # noqa: BLE001
return ValidationResult(
errors=[f"failed to parse state.json: {e!r}"], warnings=[]
)
if not no_schema:
try:
sch = _load_schema(schema)
validator = jsonschema.Draft202012Validator(sch)
for err in sorted(validator.iter_errors(state), key=str):
ptr = _json_pointer(err)
msg = err.message
errors.append(f"schema {ptr}: {msg}")
except Exception as e: # noqa: BLE001
errors.append(f"failed to load/validate schema: {e!r}")
# Artifact existence checks
artifacts_dir = bundle.dir / "artifacts"
referenced: Set[Tuple[str, str]] = set()
for role_name, mf in _iter_managed_files(state):
src_rel = str(mf.get("src_rel") or "")
if not src_rel:
errors.append(
f"managed_file missing src_rel for role {role_name} (path={mf.get('path')!r})"
)
continue
if src_rel.startswith("/") or ".." in src_rel.split("/"):
errors.append(
f"managed_file has suspicious src_rel for role {role_name}: {src_rel!r}"
)
continue
referenced.add((role_name, src_rel))
p = artifacts_dir / role_name / src_rel
if not p.exists():
errors.append(
f"missing artifact for role {role_name}: artifacts/{role_name}/{src_rel}"
)
continue
if not p.is_file():
errors.append(
f"artifact is not a file for role {role_name}: artifacts/{role_name}/{src_rel}"
)
# Warn if there are extra files in artifacts not referenced.
if artifacts_dir.exists() and artifacts_dir.is_dir():
for fp in artifacts_dir.rglob("*"):
if not fp.is_file():
continue
try:
rel = fp.relative_to(artifacts_dir)
except ValueError:
continue
parts = rel.parts
if len(parts) < 2:
continue
role_name = parts[0]
src_rel = "/".join(parts[1:])
if (role_name, src_rel) not in referenced:
warnings.append(
f"unreferenced artifact present: artifacts/{role_name}/{src_rel}"
)
return ValidationResult(errors=errors, warnings=warnings)
finally:
# Ensure any temp extraction dirs are cleaned up.
if bundle.tempdir is not None:
bundle.tempdir.cleanup()