from __future__ import annotations
import os
import shlex
import shutil
import tarfile
import tempfile
import zipapp
from pathlib import Path
from pathlib import PurePosixPath
from typing import Optional
def _safe_extract_tar(tar: tarfile.TarFile, dest: Path) -> None:
"""Safely extract a tar archive into dest.
Protects against path traversal (e.g. entries containing ../).
"""
# Note: tar member names use POSIX separators regardless of platform.
dest = dest.resolve()
for m in tar.getmembers():
name = m.name
# Some tar implementations include a top-level '.' entry when created
# with `tar -C
.`. That's harmless and should be allowed.
if name in {".", "./"}:
continue
# Reject absolute paths and any '..' components up front.
p = PurePosixPath(name)
if p.is_absolute() or ".." in p.parts:
raise RuntimeError(f"Unsafe tar member path: {name}")
# Refuse to extract links or device nodes from an untrusted archive.
# (A symlink can be used to redirect subsequent writes outside dest.)
if m.issym() or m.islnk() or m.isdev():
raise RuntimeError(f"Refusing to extract special tar member: {name}")
member_path = (dest / Path(*p.parts)).resolve()
if member_path != dest and not str(member_path).startswith(str(dest) + os.sep):
raise RuntimeError(f"Unsafe tar member path: {name}")
# Extract members one-by-one after validation.
for m in tar.getmembers():
if m.name in {".", "./"}:
continue
tar.extract(m, path=dest)
def _build_enroll_pyz(tmpdir: Path) -> Path:
"""Build a self-contained enroll zipapp (pyz) on the local machine.
The resulting file is stdlib-only and can be executed on the remote host
as long as it has Python 3 available.
"""
import enroll as pkg
pkg_dir = Path(pkg.__file__).resolve().parent
stage = tmpdir / "stage"
(stage / "enroll").mkdir(parents=True, exist_ok=True)
def _ignore(d: str, names: list[str]) -> set[str]:
return {
n
for n in names
if n in {"__pycache__", ".pytest_cache"} or n.endswith(".pyc")
}
shutil.copytree(pkg_dir, stage / "enroll", dirs_exist_ok=True, ignore=_ignore)
pyz_path = tmpdir / "enroll.pyz"
zipapp.create_archive(
stage,
target=pyz_path,
main="enroll.cli:main",
compressed=True,
)
return pyz_path
def _ssh_run(ssh, cmd: str) -> tuple[int, str, str]:
"""Run a command over a Paramiko SSHClient."""
_stdin, stdout, stderr = ssh.exec_command(cmd)
out = stdout.read().decode("utf-8", errors="replace")
err = stderr.read().decode("utf-8", errors="replace")
rc = stdout.channel.recv_exit_status()
return rc, out, err
def remote_harvest(
*,
local_out_dir: Path,
remote_host: str,
remote_port: int = 22,
remote_user: Optional[str] = None,
remote_python: str = "python3",
dangerous: bool = False,
no_sudo: bool = False,
include_paths: Optional[list[str]] = None,
exclude_paths: Optional[list[str]] = None,
) -> Path:
"""Run enroll harvest on a remote host via SSH and pull the bundle locally.
Returns the local path to state.json inside local_out_dir.
"""
try:
import paramiko # type: ignore
except Exception as e:
raise RuntimeError(
"Remote harvesting requires the 'paramiko' package. "
"Install it with: pip install paramiko"
) from e
local_out_dir = Path(local_out_dir)
local_out_dir.mkdir(parents=True, exist_ok=True)
try:
os.chmod(local_out_dir, 0o700)
except OSError:
pass
# Build a zipapp locally and upload it to the remote.
with tempfile.TemporaryDirectory(prefix="enroll-remote-") as td:
td_path = Path(td)
pyz = _build_enroll_pyz(td_path)
local_tgz = td_path / "bundle.tgz"
ssh = paramiko.SSHClient()
ssh.load_system_host_keys()
# Default: refuse unknown host keys.
# Users should add the key to known_hosts.
ssh.set_missing_host_key_policy(paramiko.RejectPolicy())
ssh.connect(
hostname=remote_host,
port=int(remote_port),
username=remote_user,
allow_agent=True,
look_for_keys=True,
)
# If no username was explicitly provided, SSH may have selected a default.
# We need a concrete username for the (sudo) chown step below.
resolved_user = remote_user
if not resolved_user:
rc, out, err = _ssh_run(ssh, "id -un")
if rc == 0 and out.strip():
resolved_user = out.strip()
sftp = ssh.open_sftp()
rtmp: Optional[str] = None
try:
rc, out, err = _ssh_run(ssh, "mktemp -d")
if rc != 0:
raise RuntimeError(f"Remote mktemp failed: {err.strip()}")
rtmp = out.strip()
# Be explicit: restrict the remote staging area to the current user.
rc, out, err = _ssh_run(ssh, f"chmod 700 {rtmp}")
if rc != 0:
raise RuntimeError(f"Remote chmod failed: {err.strip()}")
rapp = f"{rtmp}/enroll.pyz"
rbundle = f"{rtmp}/bundle"
sftp.put(str(pyz), rapp)
# Run remote harvest.
argv: list[str] = [
remote_python,
rapp,
"harvest",
"--out",
rbundle,
]
if dangerous:
argv.append("--dangerous")
for p in include_paths or []:
argv.extend(["--include-path", str(p)])
for p in exclude_paths or []:
argv.extend(["--exclude-path", str(p)])
_cmd = " ".join(shlex.quote(a) for a in argv)
if not no_sudo:
cmd = f"sudo {_cmd}"
else:
cmd = _cmd
rc, out, err = _ssh_run(ssh, cmd)
if rc != 0:
raise RuntimeError(
"Remote harvest failed.\n"
f"Command: {cmd}\n"
f"Exit code: {rc}\n"
f"Stderr: {err.strip()}"
)
if not no_sudo:
# Ensure user can read the files, before we tar it
if not resolved_user:
raise RuntimeError(
"Unable to determine remote username for chown. "
"Pass --remote-user explicitly or use --no-sudo."
)
cmd = f"sudo chown -R {resolved_user} {rbundle}"
rc, out, err = _ssh_run(ssh, cmd)
if rc != 0:
raise RuntimeError(
"chown of harvest failed.\n"
f"Command: {cmd}\n"
f"Exit code: {rc}\n"
f"Stderr: {err.strip()}"
)
# Stream a tarball back to the local machine (avoid creating a tar file on the remote).
cmd = f"tar -cz -C {rbundle} ."
_stdin, stdout, stderr = ssh.exec_command(cmd) # nosec
with open(local_tgz, "wb") as f:
while True:
chunk = stdout.read(1024 * 128)
if not chunk:
break
f.write(chunk)
rc = stdout.channel.recv_exit_status()
err_text = stderr.read().decode("utf-8", errors="replace")
if rc != 0:
raise RuntimeError(
"Remote tar stream failed.\n"
f"Command: {cmd}\n"
f"Exit code: {rc}\n"
f"Stderr: {err_text.strip()}"
)
# Extract into the destination.
with tarfile.open(local_tgz, mode="r:gz") as tf:
_safe_extract_tar(tf, local_out_dir)
finally:
# Cleanup remote tmpdir even on failure.
if rtmp:
_ssh_run(ssh, f"rm -rf {rtmp}")
try:
sftp.close()
ssh.close()
except Exception:
ssh.close()
raise RuntimeError("Something went wrong generating the harvest")
return local_out_dir / "state.json"