Fix detection of Python for AppImage if it needs to install browsers via playwright
This commit is contained in:
parent
bfa16a145a
commit
2f2eccf053
2 changed files with 131 additions and 22 deletions
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
* Fix prog name
|
||||
* Add --ignore-non-html option to skip pages that weren't HTML (which might trigger Chromium's 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' hash)
|
||||
* Fix detection of Python for AppImage if it needs to install browsers via playwright
|
||||
|
||||
## 0.1.0
|
||||
|
||||
|
|
|
|||
|
|
@ -1,14 +1,18 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import subprocess # nosec
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from playwright.async_api import async_playwright, Error as PlaywrightError
|
||||
|
||||
__all__ = ["EnsureResult", "ensure_chromium_installed"]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class EnsureResult:
|
||||
|
|
@ -16,9 +20,93 @@ class EnsureResult:
|
|||
installed: bool
|
||||
|
||||
|
||||
def _user_cache_dir() -> Path:
|
||||
"""
|
||||
Cross-platform cache dir without extra deps.
|
||||
Linux: $XDG_CACHE_HOME or ~/.cache
|
||||
macOS: ~/Library/Caches
|
||||
Windows: %LOCALAPPDATA%
|
||||
"""
|
||||
if os.name == "nt":
|
||||
base = os.environ.get("LOCALAPPDATA") or str(Path.home() / "AppData" / "Local")
|
||||
return Path(base)
|
||||
|
||||
if sys.platform == "darwin":
|
||||
return Path.home() / "Library" / "Caches"
|
||||
|
||||
return Path(os.environ.get("XDG_CACHE_HOME", str(Path.home() / ".cache")))
|
||||
|
||||
|
||||
def _default_browsers_path() -> Path:
|
||||
# Project-local by default. Override with PLAYWRIGHT_BROWSERS_PATH or CLI flag.
|
||||
return Path(__file__).resolve().parents[2] / ".pw-browsers"
|
||||
"""
|
||||
If PLAYWRIGHT_BROWSERS_PATH is set, honor it (Playwright-standard).
|
||||
Otherwise use a user-writable cache path (safe for AppImage/pip installs).
|
||||
"""
|
||||
env = os.environ.get("PLAYWRIGHT_BROWSERS_PATH")
|
||||
if env and env.strip() and env.strip() != "0":
|
||||
return Path(env).expanduser()
|
||||
|
||||
return _user_cache_dir() / "cspresso" / "pw-browsers"
|
||||
|
||||
|
||||
def _looks_like_python(path: str) -> bool:
|
||||
p = Path(path)
|
||||
name = p.name.lower()
|
||||
return (
|
||||
p.exists()
|
||||
and os.access(str(p), os.X_OK)
|
||||
and (
|
||||
name == "python" or name.startswith("python3") or name.startswith("python")
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _find_python_executable() -> str:
|
||||
"""
|
||||
In AppImage bundles, sys.executable may be the AppImage itself.
|
||||
We need the embedded python binary so we can run: python -m playwright install chromium
|
||||
"""
|
||||
# 1) Normal venv/system case
|
||||
if _looks_like_python(sys.executable):
|
||||
return sys.executable
|
||||
|
||||
# 2) Sometimes present
|
||||
base = getattr(sys, "_base_executable", None)
|
||||
if base and _looks_like_python(base):
|
||||
return base
|
||||
|
||||
# 3) Embedded python typically lives under sys.prefix/bin
|
||||
bindir = "Scripts" if os.name == "nt" else "bin"
|
||||
candidates = [
|
||||
Path(sys.prefix)
|
||||
/ bindir
|
||||
/ f"python{sys.version_info.major}.{sys.version_info.minor}",
|
||||
Path(sys.prefix) / bindir / f"python{sys.version_info.major}",
|
||||
Path(sys.prefix) / bindir / "python3",
|
||||
Path(sys.prefix) / bindir / "python",
|
||||
Path(sys.base_prefix)
|
||||
/ bindir
|
||||
/ f"python{sys.version_info.major}.{sys.version_info.minor}",
|
||||
Path(sys.base_prefix) / bindir / f"python{sys.version_info.major}",
|
||||
Path(sys.base_prefix) / bindir / "python3",
|
||||
Path(sys.base_prefix) / bindir / "python",
|
||||
]
|
||||
for c in candidates:
|
||||
if _looks_like_python(str(c)):
|
||||
return str(c)
|
||||
|
||||
# 4) Last resort: host python on PATH
|
||||
for name in (
|
||||
f"python{sys.version_info.major}.{sys.version_info.minor}",
|
||||
"python3",
|
||||
"python",
|
||||
):
|
||||
p = shutil.which(name)
|
||||
if p and _looks_like_python(p):
|
||||
return p
|
||||
|
||||
# Fallback (won't fix AppImage, but avoids crashing)
|
||||
return sys.executable
|
||||
|
||||
|
||||
def _env_with_browsers_path(browsers_path: Path) -> dict[str, str]:
|
||||
|
|
@ -27,14 +115,20 @@ def _env_with_browsers_path(browsers_path: Path) -> dict[str, str]:
|
|||
return env
|
||||
|
||||
|
||||
def _is_writable_dir(path: Path) -> bool:
|
||||
try:
|
||||
path.mkdir(parents=True, exist_ok=True)
|
||||
probe = path / ".write_probe"
|
||||
probe.write_text("x", encoding="utf-8")
|
||||
probe.unlink(missing_ok=True)
|
||||
return True
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
def _acquire_install_lock(
|
||||
lock_path: Path, timeout_s: float = 120.0, poll_s: float = 0.2
|
||||
) -> None:
|
||||
"""Very small cross-platform lock using atomic file creation.
|
||||
Avoids concurrent Playwright installs when multiple processes start at once.
|
||||
|
||||
Not perfect, but good enough for most CLI usage.
|
||||
"""
|
||||
start = time.time()
|
||||
while True:
|
||||
try:
|
||||
|
|
@ -49,14 +143,16 @@ def _acquire_install_lock(
|
|||
|
||||
def _release_install_lock(lock_path: Path) -> None:
|
||||
try:
|
||||
lock_path.unlink(missing_ok=True) # Python 3.8+
|
||||
lock_path.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass # nosec
|
||||
pass
|
||||
|
||||
|
||||
def _install_chromium(browsers_path: Path, with_deps: bool = False) -> None:
|
||||
env = _env_with_browsers_path(browsers_path)
|
||||
cmd = [sys.executable, "-m", "playwright", "install"]
|
||||
py = _find_python_executable()
|
||||
|
||||
cmd = [py, "-m", "playwright", "install"]
|
||||
if with_deps:
|
||||
cmd.append("--with-deps")
|
||||
cmd.append("chromium")
|
||||
|
|
@ -65,7 +161,6 @@ def _install_chromium(browsers_path: Path, with_deps: bool = False) -> None:
|
|||
|
||||
|
||||
async def _can_launch_chromium(browsers_path: Path) -> bool:
|
||||
# Ensure this process uses the same path too.
|
||||
os.environ["PLAYWRIGHT_BROWSERS_PATH"] = str(browsers_path)
|
||||
try:
|
||||
async with async_playwright() as p:
|
||||
|
|
@ -82,23 +177,36 @@ async def ensure_chromium_installed(
|
|||
with_deps: bool = False,
|
||||
lock_timeout_s: float = 120.0,
|
||||
) -> EnsureResult:
|
||||
"""Ensure Playwright's Chromium is installed and launchable.
|
||||
|
||||
Strategy:
|
||||
- Attempt a tiny headless launch.
|
||||
- If it fails, acquire a lock and run `python -m playwright install chromium` (optionally --with-deps).
|
||||
- Retry launch once.
|
||||
"""
|
||||
bp = browsers_path or _default_browsers_path()
|
||||
bp.mkdir(parents=True, exist_ok=True)
|
||||
Ensure Playwright Chromium is installed and launchable.
|
||||
|
||||
- Honors PLAYWRIGHT_BROWSERS_PATH if set.
|
||||
- Defaults to a user cache dir (safe for AppImage readonly mounts).
|
||||
- Uses embedded python to run playwright installer when sys.executable is the AppImage.
|
||||
"""
|
||||
explicit = browsers_path is not None
|
||||
bp = browsers_path or _default_browsers_path()
|
||||
|
||||
# If it already works, do nothing.
|
||||
if await _can_launch_chromium(bp):
|
||||
return EnsureResult(browsers_path=bp, installed=False)
|
||||
|
||||
# If we need to install and the chosen dir isn't writable, fall back (unless explicit).
|
||||
if not explicit and not _is_writable_dir(bp):
|
||||
bp = _user_cache_dir() / "cspresso" / "pw-browsers"
|
||||
if not _is_writable_dir(bp):
|
||||
bp = Path(tempfile.gettempdir()) / "cspresso" / "pw-browsers"
|
||||
bp.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if explicit and not _is_writable_dir(bp):
|
||||
raise OSError(
|
||||
f"Browsers path is not writable: {bp}\n"
|
||||
"Choose a writable directory via --browsers-path or set PLAYWRIGHT_BROWSERS_PATH."
|
||||
)
|
||||
|
||||
lock_path = bp / ".install.lock"
|
||||
_acquire_install_lock(lock_path, timeout_s=lock_timeout_s)
|
||||
try:
|
||||
# Another process might have installed while we waited; check again.
|
||||
if await _can_launch_chromium(bp):
|
||||
return EnsureResult(browsers_path=bp, installed=False)
|
||||
|
||||
|
|
@ -106,7 +214,7 @@ async def ensure_chromium_installed(
|
|||
|
||||
if not await _can_launch_chromium(bp):
|
||||
raise RuntimeError(
|
||||
"Playwright Chromium install completed, but Chromium still failed to launch. "
|
||||
"Chromium install completed, but Chromium still failed to launch. "
|
||||
"On Linux, you may need additional system dependencies."
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue