diff --git a/CHANGELOG.md b/CHANGELOG.md index 319821a..e147d0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ * Fix prog name * Add --ignore-non-html option to skip pages that weren't HTML (which might trigger Chromium's 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' hash) + * Fix detection of Python for AppImage if it needs to install browsers via playwright ## 0.1.0 diff --git a/src/cspresso/ensure_playwright.py b/src/cspresso/ensure_playwright.py index e8230b7..12fd650 100644 --- a/src/cspresso/ensure_playwright.py +++ b/src/cspresso/ensure_playwright.py @@ -1,14 +1,18 @@ from __future__ import annotations import os +import shutil +import subprocess import sys +import tempfile import time -import subprocess # nosec from dataclasses import dataclass from pathlib import Path from playwright.async_api import async_playwright, Error as PlaywrightError +__all__ = ["EnsureResult", "ensure_chromium_installed"] + @dataclass(frozen=True) class EnsureResult: @@ -16,9 +20,93 @@ class EnsureResult: installed: bool +def _user_cache_dir() -> Path: + """ + Cross-platform cache dir without extra deps. + Linux: $XDG_CACHE_HOME or ~/.cache + macOS: ~/Library/Caches + Windows: %LOCALAPPDATA% + """ + if os.name == "nt": + base = os.environ.get("LOCALAPPDATA") or str(Path.home() / "AppData" / "Local") + return Path(base) + + if sys.platform == "darwin": + return Path.home() / "Library" / "Caches" + + return Path(os.environ.get("XDG_CACHE_HOME", str(Path.home() / ".cache"))) + + def _default_browsers_path() -> Path: - # Project-local by default. Override with PLAYWRIGHT_BROWSERS_PATH or CLI flag. - return Path(__file__).resolve().parents[2] / ".pw-browsers" + """ + If PLAYWRIGHT_BROWSERS_PATH is set, honor it (Playwright-standard). + Otherwise use a user-writable cache path (safe for AppImage/pip installs). + """ + env = os.environ.get("PLAYWRIGHT_BROWSERS_PATH") + if env and env.strip() and env.strip() != "0": + return Path(env).expanduser() + + return _user_cache_dir() / "cspresso" / "pw-browsers" + + +def _looks_like_python(path: str) -> bool: + p = Path(path) + name = p.name.lower() + return ( + p.exists() + and os.access(str(p), os.X_OK) + and ( + name == "python" or name.startswith("python3") or name.startswith("python") + ) + ) + + +def _find_python_executable() -> str: + """ + In AppImage bundles, sys.executable may be the AppImage itself. + We need the embedded python binary so we can run: python -m playwright install chromium + """ + # 1) Normal venv/system case + if _looks_like_python(sys.executable): + return sys.executable + + # 2) Sometimes present + base = getattr(sys, "_base_executable", None) + if base and _looks_like_python(base): + return base + + # 3) Embedded python typically lives under sys.prefix/bin + bindir = "Scripts" if os.name == "nt" else "bin" + candidates = [ + Path(sys.prefix) + / bindir + / f"python{sys.version_info.major}.{sys.version_info.minor}", + Path(sys.prefix) / bindir / f"python{sys.version_info.major}", + Path(sys.prefix) / bindir / "python3", + Path(sys.prefix) / bindir / "python", + Path(sys.base_prefix) + / bindir + / f"python{sys.version_info.major}.{sys.version_info.minor}", + Path(sys.base_prefix) / bindir / f"python{sys.version_info.major}", + Path(sys.base_prefix) / bindir / "python3", + Path(sys.base_prefix) / bindir / "python", + ] + for c in candidates: + if _looks_like_python(str(c)): + return str(c) + + # 4) Last resort: host python on PATH + for name in ( + f"python{sys.version_info.major}.{sys.version_info.minor}", + "python3", + "python", + ): + p = shutil.which(name) + if p and _looks_like_python(p): + return p + + # Fallback (won't fix AppImage, but avoids crashing) + return sys.executable def _env_with_browsers_path(browsers_path: Path) -> dict[str, str]: @@ -27,14 +115,20 @@ def _env_with_browsers_path(browsers_path: Path) -> dict[str, str]: return env +def _is_writable_dir(path: Path) -> bool: + try: + path.mkdir(parents=True, exist_ok=True) + probe = path / ".write_probe" + probe.write_text("x", encoding="utf-8") + probe.unlink(missing_ok=True) + return True + except OSError: + return False + + def _acquire_install_lock( lock_path: Path, timeout_s: float = 120.0, poll_s: float = 0.2 ) -> None: - """Very small cross-platform lock using atomic file creation. - Avoids concurrent Playwright installs when multiple processes start at once. - - Not perfect, but good enough for most CLI usage. - """ start = time.time() while True: try: @@ -49,14 +143,16 @@ def _acquire_install_lock( def _release_install_lock(lock_path: Path) -> None: try: - lock_path.unlink(missing_ok=True) # Python 3.8+ + lock_path.unlink(missing_ok=True) except Exception: - pass # nosec + pass def _install_chromium(browsers_path: Path, with_deps: bool = False) -> None: env = _env_with_browsers_path(browsers_path) - cmd = [sys.executable, "-m", "playwright", "install"] + py = _find_python_executable() + + cmd = [py, "-m", "playwright", "install"] if with_deps: cmd.append("--with-deps") cmd.append("chromium") @@ -65,7 +161,6 @@ def _install_chromium(browsers_path: Path, with_deps: bool = False) -> None: async def _can_launch_chromium(browsers_path: Path) -> bool: - # Ensure this process uses the same path too. os.environ["PLAYWRIGHT_BROWSERS_PATH"] = str(browsers_path) try: async with async_playwright() as p: @@ -82,23 +177,36 @@ async def ensure_chromium_installed( with_deps: bool = False, lock_timeout_s: float = 120.0, ) -> EnsureResult: - """Ensure Playwright's Chromium is installed and launchable. - - Strategy: - - Attempt a tiny headless launch. - - If it fails, acquire a lock and run `python -m playwright install chromium` (optionally --with-deps). - - Retry launch once. """ - bp = browsers_path or _default_browsers_path() - bp.mkdir(parents=True, exist_ok=True) + Ensure Playwright Chromium is installed and launchable. + - Honors PLAYWRIGHT_BROWSERS_PATH if set. + - Defaults to a user cache dir (safe for AppImage readonly mounts). + - Uses embedded python to run playwright installer when sys.executable is the AppImage. + """ + explicit = browsers_path is not None + bp = browsers_path or _default_browsers_path() + + # If it already works, do nothing. if await _can_launch_chromium(bp): return EnsureResult(browsers_path=bp, installed=False) + # If we need to install and the chosen dir isn't writable, fall back (unless explicit). + if not explicit and not _is_writable_dir(bp): + bp = _user_cache_dir() / "cspresso" / "pw-browsers" + if not _is_writable_dir(bp): + bp = Path(tempfile.gettempdir()) / "cspresso" / "pw-browsers" + bp.mkdir(parents=True, exist_ok=True) + + if explicit and not _is_writable_dir(bp): + raise OSError( + f"Browsers path is not writable: {bp}\n" + "Choose a writable directory via --browsers-path or set PLAYWRIGHT_BROWSERS_PATH." + ) + lock_path = bp / ".install.lock" _acquire_install_lock(lock_path, timeout_s=lock_timeout_s) try: - # Another process might have installed while we waited; check again. if await _can_launch_chromium(bp): return EnsureResult(browsers_path=bp, installed=False) @@ -106,7 +214,7 @@ async def ensure_chromium_installed( if not await _can_launch_chromium(bp): raise RuntimeError( - "Playwright Chromium install completed, but Chromium still failed to launch. " + "Chromium install completed, but Chromium still failed to launch. " "On Linux, you may need additional system dependencies." )