Fix detection of Python for AppImage if it needs to install browsers via playwright
Some checks failed
CI / test (push) Successful in 2m29s
Lint / test (push) Failing after 29s
Trivy / test (push) Successful in 23s

This commit is contained in:
Miguel Jacq 2026-01-02 10:50:53 +11:00
parent bfa16a145a
commit 2f2eccf053
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
2 changed files with 131 additions and 22 deletions

View file

@ -2,6 +2,7 @@
* Fix prog name * Fix prog name
* Add --ignore-non-html option to skip pages that weren't HTML (which might trigger Chromium's 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' hash) * Add --ignore-non-html option to skip pages that weren't HTML (which might trigger Chromium's 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' hash)
* Fix detection of Python for AppImage if it needs to install browsers via playwright
## 0.1.0 ## 0.1.0

View file

@ -1,14 +1,18 @@
from __future__ import annotations from __future__ import annotations
import os import os
import shutil
import subprocess
import sys import sys
import tempfile
import time import time
import subprocess # nosec
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from playwright.async_api import async_playwright, Error as PlaywrightError from playwright.async_api import async_playwright, Error as PlaywrightError
__all__ = ["EnsureResult", "ensure_chromium_installed"]
@dataclass(frozen=True) @dataclass(frozen=True)
class EnsureResult: class EnsureResult:
@ -16,9 +20,93 @@ class EnsureResult:
installed: bool installed: bool
def _user_cache_dir() -> Path:
"""
Cross-platform cache dir without extra deps.
Linux: $XDG_CACHE_HOME or ~/.cache
macOS: ~/Library/Caches
Windows: %LOCALAPPDATA%
"""
if os.name == "nt":
base = os.environ.get("LOCALAPPDATA") or str(Path.home() / "AppData" / "Local")
return Path(base)
if sys.platform == "darwin":
return Path.home() / "Library" / "Caches"
return Path(os.environ.get("XDG_CACHE_HOME", str(Path.home() / ".cache")))
def _default_browsers_path() -> Path: def _default_browsers_path() -> Path:
# Project-local by default. Override with PLAYWRIGHT_BROWSERS_PATH or CLI flag. """
return Path(__file__).resolve().parents[2] / ".pw-browsers" If PLAYWRIGHT_BROWSERS_PATH is set, honor it (Playwright-standard).
Otherwise use a user-writable cache path (safe for AppImage/pip installs).
"""
env = os.environ.get("PLAYWRIGHT_BROWSERS_PATH")
if env and env.strip() and env.strip() != "0":
return Path(env).expanduser()
return _user_cache_dir() / "cspresso" / "pw-browsers"
def _looks_like_python(path: str) -> bool:
p = Path(path)
name = p.name.lower()
return (
p.exists()
and os.access(str(p), os.X_OK)
and (
name == "python" or name.startswith("python3") or name.startswith("python")
)
)
def _find_python_executable() -> str:
"""
In AppImage bundles, sys.executable may be the AppImage itself.
We need the embedded python binary so we can run: python -m playwright install chromium
"""
# 1) Normal venv/system case
if _looks_like_python(sys.executable):
return sys.executable
# 2) Sometimes present
base = getattr(sys, "_base_executable", None)
if base and _looks_like_python(base):
return base
# 3) Embedded python typically lives under sys.prefix/bin
bindir = "Scripts" if os.name == "nt" else "bin"
candidates = [
Path(sys.prefix)
/ bindir
/ f"python{sys.version_info.major}.{sys.version_info.minor}",
Path(sys.prefix) / bindir / f"python{sys.version_info.major}",
Path(sys.prefix) / bindir / "python3",
Path(sys.prefix) / bindir / "python",
Path(sys.base_prefix)
/ bindir
/ f"python{sys.version_info.major}.{sys.version_info.minor}",
Path(sys.base_prefix) / bindir / f"python{sys.version_info.major}",
Path(sys.base_prefix) / bindir / "python3",
Path(sys.base_prefix) / bindir / "python",
]
for c in candidates:
if _looks_like_python(str(c)):
return str(c)
# 4) Last resort: host python on PATH
for name in (
f"python{sys.version_info.major}.{sys.version_info.minor}",
"python3",
"python",
):
p = shutil.which(name)
if p and _looks_like_python(p):
return p
# Fallback (won't fix AppImage, but avoids crashing)
return sys.executable
def _env_with_browsers_path(browsers_path: Path) -> dict[str, str]: def _env_with_browsers_path(browsers_path: Path) -> dict[str, str]:
@ -27,14 +115,20 @@ def _env_with_browsers_path(browsers_path: Path) -> dict[str, str]:
return env return env
def _is_writable_dir(path: Path) -> bool:
try:
path.mkdir(parents=True, exist_ok=True)
probe = path / ".write_probe"
probe.write_text("x", encoding="utf-8")
probe.unlink(missing_ok=True)
return True
except OSError:
return False
def _acquire_install_lock( def _acquire_install_lock(
lock_path: Path, timeout_s: float = 120.0, poll_s: float = 0.2 lock_path: Path, timeout_s: float = 120.0, poll_s: float = 0.2
) -> None: ) -> None:
"""Very small cross-platform lock using atomic file creation.
Avoids concurrent Playwright installs when multiple processes start at once.
Not perfect, but good enough for most CLI usage.
"""
start = time.time() start = time.time()
while True: while True:
try: try:
@ -49,14 +143,16 @@ def _acquire_install_lock(
def _release_install_lock(lock_path: Path) -> None: def _release_install_lock(lock_path: Path) -> None:
try: try:
lock_path.unlink(missing_ok=True) # Python 3.8+ lock_path.unlink(missing_ok=True)
except Exception: except Exception:
pass # nosec pass
def _install_chromium(browsers_path: Path, with_deps: bool = False) -> None: def _install_chromium(browsers_path: Path, with_deps: bool = False) -> None:
env = _env_with_browsers_path(browsers_path) env = _env_with_browsers_path(browsers_path)
cmd = [sys.executable, "-m", "playwright", "install"] py = _find_python_executable()
cmd = [py, "-m", "playwright", "install"]
if with_deps: if with_deps:
cmd.append("--with-deps") cmd.append("--with-deps")
cmd.append("chromium") cmd.append("chromium")
@ -65,7 +161,6 @@ def _install_chromium(browsers_path: Path, with_deps: bool = False) -> None:
async def _can_launch_chromium(browsers_path: Path) -> bool: async def _can_launch_chromium(browsers_path: Path) -> bool:
# Ensure this process uses the same path too.
os.environ["PLAYWRIGHT_BROWSERS_PATH"] = str(browsers_path) os.environ["PLAYWRIGHT_BROWSERS_PATH"] = str(browsers_path)
try: try:
async with async_playwright() as p: async with async_playwright() as p:
@ -82,23 +177,36 @@ async def ensure_chromium_installed(
with_deps: bool = False, with_deps: bool = False,
lock_timeout_s: float = 120.0, lock_timeout_s: float = 120.0,
) -> EnsureResult: ) -> EnsureResult:
"""Ensure Playwright's Chromium is installed and launchable.
Strategy:
- Attempt a tiny headless launch.
- If it fails, acquire a lock and run `python -m playwright install chromium` (optionally --with-deps).
- Retry launch once.
""" """
bp = browsers_path or _default_browsers_path() Ensure Playwright Chromium is installed and launchable.
bp.mkdir(parents=True, exist_ok=True)
- Honors PLAYWRIGHT_BROWSERS_PATH if set.
- Defaults to a user cache dir (safe for AppImage readonly mounts).
- Uses embedded python to run playwright installer when sys.executable is the AppImage.
"""
explicit = browsers_path is not None
bp = browsers_path or _default_browsers_path()
# If it already works, do nothing.
if await _can_launch_chromium(bp): if await _can_launch_chromium(bp):
return EnsureResult(browsers_path=bp, installed=False) return EnsureResult(browsers_path=bp, installed=False)
# If we need to install and the chosen dir isn't writable, fall back (unless explicit).
if not explicit and not _is_writable_dir(bp):
bp = _user_cache_dir() / "cspresso" / "pw-browsers"
if not _is_writable_dir(bp):
bp = Path(tempfile.gettempdir()) / "cspresso" / "pw-browsers"
bp.mkdir(parents=True, exist_ok=True)
if explicit and not _is_writable_dir(bp):
raise OSError(
f"Browsers path is not writable: {bp}\n"
"Choose a writable directory via --browsers-path or set PLAYWRIGHT_BROWSERS_PATH."
)
lock_path = bp / ".install.lock" lock_path = bp / ".install.lock"
_acquire_install_lock(lock_path, timeout_s=lock_timeout_s) _acquire_install_lock(lock_path, timeout_s=lock_timeout_s)
try: try:
# Another process might have installed while we waited; check again.
if await _can_launch_chromium(bp): if await _can_launch_chromium(bp):
return EnsureResult(browsers_path=bp, installed=False) return EnsureResult(browsers_path=bp, installed=False)
@ -106,7 +214,7 @@ async def ensure_chromium_installed(
if not await _can_launch_chromium(bp): if not await _can_launch_chromium(bp):
raise RuntimeError( raise RuntimeError(
"Playwright Chromium install completed, but Chromium still failed to launch. " "Chromium install completed, but Chromium still failed to launch. "
"On Linux, you may need additional system dependencies." "On Linux, you may need additional system dependencies."
) )