Fix detection of Python for AppImage if it needs to install browsers via playwright
This commit is contained in:
parent
bfa16a145a
commit
2f2eccf053
2 changed files with 131 additions and 22 deletions
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
* Fix prog name
|
* Fix prog name
|
||||||
* Add --ignore-non-html option to skip pages that weren't HTML (which might trigger Chromium's 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' hash)
|
* Add --ignore-non-html option to skip pages that weren't HTML (which might trigger Chromium's 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' hash)
|
||||||
|
* Fix detection of Python for AppImage if it needs to install browsers via playwright
|
||||||
|
|
||||||
## 0.1.0
|
## 0.1.0
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,18 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import tempfile
|
||||||
import time
|
import time
|
||||||
import subprocess # nosec
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from playwright.async_api import async_playwright, Error as PlaywrightError
|
from playwright.async_api import async_playwright, Error as PlaywrightError
|
||||||
|
|
||||||
|
__all__ = ["EnsureResult", "ensure_chromium_installed"]
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class EnsureResult:
|
class EnsureResult:
|
||||||
|
|
@ -16,9 +20,93 @@ class EnsureResult:
|
||||||
installed: bool
|
installed: bool
|
||||||
|
|
||||||
|
|
||||||
|
def _user_cache_dir() -> Path:
|
||||||
|
"""
|
||||||
|
Cross-platform cache dir without extra deps.
|
||||||
|
Linux: $XDG_CACHE_HOME or ~/.cache
|
||||||
|
macOS: ~/Library/Caches
|
||||||
|
Windows: %LOCALAPPDATA%
|
||||||
|
"""
|
||||||
|
if os.name == "nt":
|
||||||
|
base = os.environ.get("LOCALAPPDATA") or str(Path.home() / "AppData" / "Local")
|
||||||
|
return Path(base)
|
||||||
|
|
||||||
|
if sys.platform == "darwin":
|
||||||
|
return Path.home() / "Library" / "Caches"
|
||||||
|
|
||||||
|
return Path(os.environ.get("XDG_CACHE_HOME", str(Path.home() / ".cache")))
|
||||||
|
|
||||||
|
|
||||||
def _default_browsers_path() -> Path:
|
def _default_browsers_path() -> Path:
|
||||||
# Project-local by default. Override with PLAYWRIGHT_BROWSERS_PATH or CLI flag.
|
"""
|
||||||
return Path(__file__).resolve().parents[2] / ".pw-browsers"
|
If PLAYWRIGHT_BROWSERS_PATH is set, honor it (Playwright-standard).
|
||||||
|
Otherwise use a user-writable cache path (safe for AppImage/pip installs).
|
||||||
|
"""
|
||||||
|
env = os.environ.get("PLAYWRIGHT_BROWSERS_PATH")
|
||||||
|
if env and env.strip() and env.strip() != "0":
|
||||||
|
return Path(env).expanduser()
|
||||||
|
|
||||||
|
return _user_cache_dir() / "cspresso" / "pw-browsers"
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_like_python(path: str) -> bool:
|
||||||
|
p = Path(path)
|
||||||
|
name = p.name.lower()
|
||||||
|
return (
|
||||||
|
p.exists()
|
||||||
|
and os.access(str(p), os.X_OK)
|
||||||
|
and (
|
||||||
|
name == "python" or name.startswith("python3") or name.startswith("python")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _find_python_executable() -> str:
|
||||||
|
"""
|
||||||
|
In AppImage bundles, sys.executable may be the AppImage itself.
|
||||||
|
We need the embedded python binary so we can run: python -m playwright install chromium
|
||||||
|
"""
|
||||||
|
# 1) Normal venv/system case
|
||||||
|
if _looks_like_python(sys.executable):
|
||||||
|
return sys.executable
|
||||||
|
|
||||||
|
# 2) Sometimes present
|
||||||
|
base = getattr(sys, "_base_executable", None)
|
||||||
|
if base and _looks_like_python(base):
|
||||||
|
return base
|
||||||
|
|
||||||
|
# 3) Embedded python typically lives under sys.prefix/bin
|
||||||
|
bindir = "Scripts" if os.name == "nt" else "bin"
|
||||||
|
candidates = [
|
||||||
|
Path(sys.prefix)
|
||||||
|
/ bindir
|
||||||
|
/ f"python{sys.version_info.major}.{sys.version_info.minor}",
|
||||||
|
Path(sys.prefix) / bindir / f"python{sys.version_info.major}",
|
||||||
|
Path(sys.prefix) / bindir / "python3",
|
||||||
|
Path(sys.prefix) / bindir / "python",
|
||||||
|
Path(sys.base_prefix)
|
||||||
|
/ bindir
|
||||||
|
/ f"python{sys.version_info.major}.{sys.version_info.minor}",
|
||||||
|
Path(sys.base_prefix) / bindir / f"python{sys.version_info.major}",
|
||||||
|
Path(sys.base_prefix) / bindir / "python3",
|
||||||
|
Path(sys.base_prefix) / bindir / "python",
|
||||||
|
]
|
||||||
|
for c in candidates:
|
||||||
|
if _looks_like_python(str(c)):
|
||||||
|
return str(c)
|
||||||
|
|
||||||
|
# 4) Last resort: host python on PATH
|
||||||
|
for name in (
|
||||||
|
f"python{sys.version_info.major}.{sys.version_info.minor}",
|
||||||
|
"python3",
|
||||||
|
"python",
|
||||||
|
):
|
||||||
|
p = shutil.which(name)
|
||||||
|
if p and _looks_like_python(p):
|
||||||
|
return p
|
||||||
|
|
||||||
|
# Fallback (won't fix AppImage, but avoids crashing)
|
||||||
|
return sys.executable
|
||||||
|
|
||||||
|
|
||||||
def _env_with_browsers_path(browsers_path: Path) -> dict[str, str]:
|
def _env_with_browsers_path(browsers_path: Path) -> dict[str, str]:
|
||||||
|
|
@ -27,14 +115,20 @@ def _env_with_browsers_path(browsers_path: Path) -> dict[str, str]:
|
||||||
return env
|
return env
|
||||||
|
|
||||||
|
|
||||||
|
def _is_writable_dir(path: Path) -> bool:
|
||||||
|
try:
|
||||||
|
path.mkdir(parents=True, exist_ok=True)
|
||||||
|
probe = path / ".write_probe"
|
||||||
|
probe.write_text("x", encoding="utf-8")
|
||||||
|
probe.unlink(missing_ok=True)
|
||||||
|
return True
|
||||||
|
except OSError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _acquire_install_lock(
|
def _acquire_install_lock(
|
||||||
lock_path: Path, timeout_s: float = 120.0, poll_s: float = 0.2
|
lock_path: Path, timeout_s: float = 120.0, poll_s: float = 0.2
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Very small cross-platform lock using atomic file creation.
|
|
||||||
Avoids concurrent Playwright installs when multiple processes start at once.
|
|
||||||
|
|
||||||
Not perfect, but good enough for most CLI usage.
|
|
||||||
"""
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
|
@ -49,14 +143,16 @@ def _acquire_install_lock(
|
||||||
|
|
||||||
def _release_install_lock(lock_path: Path) -> None:
|
def _release_install_lock(lock_path: Path) -> None:
|
||||||
try:
|
try:
|
||||||
lock_path.unlink(missing_ok=True) # Python 3.8+
|
lock_path.unlink(missing_ok=True)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass # nosec
|
pass
|
||||||
|
|
||||||
|
|
||||||
def _install_chromium(browsers_path: Path, with_deps: bool = False) -> None:
|
def _install_chromium(browsers_path: Path, with_deps: bool = False) -> None:
|
||||||
env = _env_with_browsers_path(browsers_path)
|
env = _env_with_browsers_path(browsers_path)
|
||||||
cmd = [sys.executable, "-m", "playwright", "install"]
|
py = _find_python_executable()
|
||||||
|
|
||||||
|
cmd = [py, "-m", "playwright", "install"]
|
||||||
if with_deps:
|
if with_deps:
|
||||||
cmd.append("--with-deps")
|
cmd.append("--with-deps")
|
||||||
cmd.append("chromium")
|
cmd.append("chromium")
|
||||||
|
|
@ -65,7 +161,6 @@ def _install_chromium(browsers_path: Path, with_deps: bool = False) -> None:
|
||||||
|
|
||||||
|
|
||||||
async def _can_launch_chromium(browsers_path: Path) -> bool:
|
async def _can_launch_chromium(browsers_path: Path) -> bool:
|
||||||
# Ensure this process uses the same path too.
|
|
||||||
os.environ["PLAYWRIGHT_BROWSERS_PATH"] = str(browsers_path)
|
os.environ["PLAYWRIGHT_BROWSERS_PATH"] = str(browsers_path)
|
||||||
try:
|
try:
|
||||||
async with async_playwright() as p:
|
async with async_playwright() as p:
|
||||||
|
|
@ -82,23 +177,36 @@ async def ensure_chromium_installed(
|
||||||
with_deps: bool = False,
|
with_deps: bool = False,
|
||||||
lock_timeout_s: float = 120.0,
|
lock_timeout_s: float = 120.0,
|
||||||
) -> EnsureResult:
|
) -> EnsureResult:
|
||||||
"""Ensure Playwright's Chromium is installed and launchable.
|
|
||||||
|
|
||||||
Strategy:
|
|
||||||
- Attempt a tiny headless launch.
|
|
||||||
- If it fails, acquire a lock and run `python -m playwright install chromium` (optionally --with-deps).
|
|
||||||
- Retry launch once.
|
|
||||||
"""
|
"""
|
||||||
bp = browsers_path or _default_browsers_path()
|
Ensure Playwright Chromium is installed and launchable.
|
||||||
bp.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
|
- Honors PLAYWRIGHT_BROWSERS_PATH if set.
|
||||||
|
- Defaults to a user cache dir (safe for AppImage readonly mounts).
|
||||||
|
- Uses embedded python to run playwright installer when sys.executable is the AppImage.
|
||||||
|
"""
|
||||||
|
explicit = browsers_path is not None
|
||||||
|
bp = browsers_path or _default_browsers_path()
|
||||||
|
|
||||||
|
# If it already works, do nothing.
|
||||||
if await _can_launch_chromium(bp):
|
if await _can_launch_chromium(bp):
|
||||||
return EnsureResult(browsers_path=bp, installed=False)
|
return EnsureResult(browsers_path=bp, installed=False)
|
||||||
|
|
||||||
|
# If we need to install and the chosen dir isn't writable, fall back (unless explicit).
|
||||||
|
if not explicit and not _is_writable_dir(bp):
|
||||||
|
bp = _user_cache_dir() / "cspresso" / "pw-browsers"
|
||||||
|
if not _is_writable_dir(bp):
|
||||||
|
bp = Path(tempfile.gettempdir()) / "cspresso" / "pw-browsers"
|
||||||
|
bp.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
if explicit and not _is_writable_dir(bp):
|
||||||
|
raise OSError(
|
||||||
|
f"Browsers path is not writable: {bp}\n"
|
||||||
|
"Choose a writable directory via --browsers-path or set PLAYWRIGHT_BROWSERS_PATH."
|
||||||
|
)
|
||||||
|
|
||||||
lock_path = bp / ".install.lock"
|
lock_path = bp / ".install.lock"
|
||||||
_acquire_install_lock(lock_path, timeout_s=lock_timeout_s)
|
_acquire_install_lock(lock_path, timeout_s=lock_timeout_s)
|
||||||
try:
|
try:
|
||||||
# Another process might have installed while we waited; check again.
|
|
||||||
if await _can_launch_chromium(bp):
|
if await _can_launch_chromium(bp):
|
||||||
return EnsureResult(browsers_path=bp, installed=False)
|
return EnsureResult(browsers_path=bp, installed=False)
|
||||||
|
|
||||||
|
|
@ -106,7 +214,7 @@ async def ensure_chromium_installed(
|
||||||
|
|
||||||
if not await _can_launch_chromium(bp):
|
if not await _can_launch_chromium(bp):
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"Playwright Chromium install completed, but Chromium still failed to launch. "
|
"Chromium install completed, but Chromium still failed to launch. "
|
||||||
"On Linux, you may need additional system dependencies."
|
"On Linux, you may need additional system dependencies."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue