From 09aa2ded5ebd67f36e6b0f00a8a75f5b2e41413a Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 10:28:46 +1100 Subject: [PATCH 01/10] Fix prog name --- src/cspresso/crawl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cspresso/crawl.py b/src/cspresso/crawl.py index cd7df5b..e24539c 100644 --- a/src/cspresso/crawl.py +++ b/src/cspresso/crawl.py @@ -499,7 +499,7 @@ async def crawl_and_generate_csp( def _parse_args(argv: list[str] | None = None) -> argparse.Namespace: ap = argparse.ArgumentParser( - prog="csp-crawl", + prog="cspresso", description="Crawl up to N pages (same-origin) with Playwright and generate a draft CSP.", ) ap.add_argument("url", help="Start URL (e.g. https://example.com)") From bfa16a145abc40edc3f2a9342f0132c38007fd3c Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 10:41:57 +1100 Subject: [PATCH 02/10] Add --ignore-non-html option to skip pages that weren't HTML (which might trigger Chromium's 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' hash) --- CHANGELOG.md | 8 ++++++++ README.md | 7 ++++--- src/cspresso/crawl.py | 21 ++++++++++++++++++++- 3 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..319821a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,8 @@ +## 0.1.1 + + * Fix prog name + * Add --ignore-non-html option to skip pages that weren't HTML (which might trigger Chromium's 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' hash) + +## 0.1.0 + + * Initial release diff --git a/README.md b/README.md index dafe7aa..97d066b 100644 --- a/README.md +++ b/README.md @@ -81,9 +81,9 @@ poetry run cspresso https://example.com --json ## Full usage info ``` -usage: csp-crawl [-h] [--max-pages MAX_PAGES] [--timeout-ms TIMEOUT_MS] [--settle-ms SETTLE_MS] [--headed] [--no-install] [--with-deps] [--browsers-path BROWSERS_PATH] [--allow-blob] [--unsafe-eval] - [--upgrade-insecure-requests] [--include-sourcemaps] [--json] - url +usage: cspresso [-h] [--max-pages MAX_PAGES] [--timeout-ms TIMEOUT_MS] [--settle-ms SETTLE_MS] [--headed] [--no-install] [--with-deps] [--browsers-path BROWSERS_PATH] [--allow-blob] [--unsafe-eval] + [--upgrade-insecure-requests] [--include-sourcemaps] [--ignore-non-html] [--json] + url Crawl up to N pages (same-origin) with Playwright and generate a draft CSP. @@ -108,5 +108,6 @@ options: --upgrade-insecure-requests Add upgrade-insecure-requests directive --include-sourcemaps Analyze JS/CSS for sourceMappingURL and add map origins to connect-src + --ignore-non-html Ignore non-HTML pages that get crawled (which might trigger Chromium's word-wrap hash: https://stackoverflow.com/a/69838710) --json Output JSON instead of a header line ``` diff --git a/src/cspresso/crawl.py b/src/cspresso/crawl.py index e24539c..6f9c8bc 100644 --- a/src/cspresso/crawl.py +++ b/src/cspresso/crawl.py @@ -307,6 +307,7 @@ async def crawl_and_generate_csp( allow_unsafe_eval: bool = False, upgrade_insecure_requests: bool = False, include_sourcemaps: bool = False, + ignore_non_html: bool = False, ) -> CrawlResult: start_url, _ = urldefrag(start_url) base_origin = origin_of(start_url) @@ -413,7 +414,18 @@ async def crawl_and_generate_csp( page.on("response", on_response) try: - await page.goto(url, wait_until="networkidle", timeout=timeout_ms) + resp = await page.goto( + url, wait_until="networkidle", timeout=timeout_ms + ) + + ct = "" + if resp is not None: + ct = (await resp.header_value("content-type") or "").lower() + + is_html = ("text/html" in ct) or ("application/xhtml+xml" in ct) + if not is_html and ignore_non_html: + # Still count as visited, but don't hash inline attrs / don't extract links. + continue # Give the page a moment to run hydration / delayed fetches. if settle_ms > 0: @@ -565,6 +577,12 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace: default=False, help="Analyze JS/CSS for sourceMappingURL and add map origins to connect-src", ) + ap.add_argument( + "--ignore-non-html", + action="store_true", + default=False, + help="Ignore non-HTML pages that get crawled (which might trigger Chromium's word-wrap hash: https://stackoverflow.com/a/69838710)", + ) ap.add_argument( "--json", action="store_true", help="Output JSON instead of a header line" ) @@ -589,6 +607,7 @@ def main(argv: list[str] | None = None) -> None: allow_unsafe_eval=args.unsafe_eval, upgrade_insecure_requests=args.upgrade_insecure_requests, include_sourcemaps=args.include_sourcemaps, + ignore_non_html=args.ignore_non_html, ) ) From 2f2eccf053746e62da4133efa2827ba6a2226eb2 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 10:50:53 +1100 Subject: [PATCH 03/10] Fix detection of Python for AppImage if it needs to install browsers via playwright --- CHANGELOG.md | 1 + src/cspresso/ensure_playwright.py | 152 +++++++++++++++++++++++++----- 2 files changed, 131 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 319821a..e147d0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ * Fix prog name * Add --ignore-non-html option to skip pages that weren't HTML (which might trigger Chromium's 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' hash) + * Fix detection of Python for AppImage if it needs to install browsers via playwright ## 0.1.0 diff --git a/src/cspresso/ensure_playwright.py b/src/cspresso/ensure_playwright.py index e8230b7..12fd650 100644 --- a/src/cspresso/ensure_playwright.py +++ b/src/cspresso/ensure_playwright.py @@ -1,14 +1,18 @@ from __future__ import annotations import os +import shutil +import subprocess import sys +import tempfile import time -import subprocess # nosec from dataclasses import dataclass from pathlib import Path from playwright.async_api import async_playwright, Error as PlaywrightError +__all__ = ["EnsureResult", "ensure_chromium_installed"] + @dataclass(frozen=True) class EnsureResult: @@ -16,9 +20,93 @@ class EnsureResult: installed: bool +def _user_cache_dir() -> Path: + """ + Cross-platform cache dir without extra deps. + Linux: $XDG_CACHE_HOME or ~/.cache + macOS: ~/Library/Caches + Windows: %LOCALAPPDATA% + """ + if os.name == "nt": + base = os.environ.get("LOCALAPPDATA") or str(Path.home() / "AppData" / "Local") + return Path(base) + + if sys.platform == "darwin": + return Path.home() / "Library" / "Caches" + + return Path(os.environ.get("XDG_CACHE_HOME", str(Path.home() / ".cache"))) + + def _default_browsers_path() -> Path: - # Project-local by default. Override with PLAYWRIGHT_BROWSERS_PATH or CLI flag. - return Path(__file__).resolve().parents[2] / ".pw-browsers" + """ + If PLAYWRIGHT_BROWSERS_PATH is set, honor it (Playwright-standard). + Otherwise use a user-writable cache path (safe for AppImage/pip installs). + """ + env = os.environ.get("PLAYWRIGHT_BROWSERS_PATH") + if env and env.strip() and env.strip() != "0": + return Path(env).expanduser() + + return _user_cache_dir() / "cspresso" / "pw-browsers" + + +def _looks_like_python(path: str) -> bool: + p = Path(path) + name = p.name.lower() + return ( + p.exists() + and os.access(str(p), os.X_OK) + and ( + name == "python" or name.startswith("python3") or name.startswith("python") + ) + ) + + +def _find_python_executable() -> str: + """ + In AppImage bundles, sys.executable may be the AppImage itself. + We need the embedded python binary so we can run: python -m playwright install chromium + """ + # 1) Normal venv/system case + if _looks_like_python(sys.executable): + return sys.executable + + # 2) Sometimes present + base = getattr(sys, "_base_executable", None) + if base and _looks_like_python(base): + return base + + # 3) Embedded python typically lives under sys.prefix/bin + bindir = "Scripts" if os.name == "nt" else "bin" + candidates = [ + Path(sys.prefix) + / bindir + / f"python{sys.version_info.major}.{sys.version_info.minor}", + Path(sys.prefix) / bindir / f"python{sys.version_info.major}", + Path(sys.prefix) / bindir / "python3", + Path(sys.prefix) / bindir / "python", + Path(sys.base_prefix) + / bindir + / f"python{sys.version_info.major}.{sys.version_info.minor}", + Path(sys.base_prefix) / bindir / f"python{sys.version_info.major}", + Path(sys.base_prefix) / bindir / "python3", + Path(sys.base_prefix) / bindir / "python", + ] + for c in candidates: + if _looks_like_python(str(c)): + return str(c) + + # 4) Last resort: host python on PATH + for name in ( + f"python{sys.version_info.major}.{sys.version_info.minor}", + "python3", + "python", + ): + p = shutil.which(name) + if p and _looks_like_python(p): + return p + + # Fallback (won't fix AppImage, but avoids crashing) + return sys.executable def _env_with_browsers_path(browsers_path: Path) -> dict[str, str]: @@ -27,14 +115,20 @@ def _env_with_browsers_path(browsers_path: Path) -> dict[str, str]: return env +def _is_writable_dir(path: Path) -> bool: + try: + path.mkdir(parents=True, exist_ok=True) + probe = path / ".write_probe" + probe.write_text("x", encoding="utf-8") + probe.unlink(missing_ok=True) + return True + except OSError: + return False + + def _acquire_install_lock( lock_path: Path, timeout_s: float = 120.0, poll_s: float = 0.2 ) -> None: - """Very small cross-platform lock using atomic file creation. - Avoids concurrent Playwright installs when multiple processes start at once. - - Not perfect, but good enough for most CLI usage. - """ start = time.time() while True: try: @@ -49,14 +143,16 @@ def _acquire_install_lock( def _release_install_lock(lock_path: Path) -> None: try: - lock_path.unlink(missing_ok=True) # Python 3.8+ + lock_path.unlink(missing_ok=True) except Exception: - pass # nosec + pass def _install_chromium(browsers_path: Path, with_deps: bool = False) -> None: env = _env_with_browsers_path(browsers_path) - cmd = [sys.executable, "-m", "playwright", "install"] + py = _find_python_executable() + + cmd = [py, "-m", "playwright", "install"] if with_deps: cmd.append("--with-deps") cmd.append("chromium") @@ -65,7 +161,6 @@ def _install_chromium(browsers_path: Path, with_deps: bool = False) -> None: async def _can_launch_chromium(browsers_path: Path) -> bool: - # Ensure this process uses the same path too. os.environ["PLAYWRIGHT_BROWSERS_PATH"] = str(browsers_path) try: async with async_playwright() as p: @@ -82,23 +177,36 @@ async def ensure_chromium_installed( with_deps: bool = False, lock_timeout_s: float = 120.0, ) -> EnsureResult: - """Ensure Playwright's Chromium is installed and launchable. - - Strategy: - - Attempt a tiny headless launch. - - If it fails, acquire a lock and run `python -m playwright install chromium` (optionally --with-deps). - - Retry launch once. """ - bp = browsers_path or _default_browsers_path() - bp.mkdir(parents=True, exist_ok=True) + Ensure Playwright Chromium is installed and launchable. + - Honors PLAYWRIGHT_BROWSERS_PATH if set. + - Defaults to a user cache dir (safe for AppImage readonly mounts). + - Uses embedded python to run playwright installer when sys.executable is the AppImage. + """ + explicit = browsers_path is not None + bp = browsers_path or _default_browsers_path() + + # If it already works, do nothing. if await _can_launch_chromium(bp): return EnsureResult(browsers_path=bp, installed=False) + # If we need to install and the chosen dir isn't writable, fall back (unless explicit). + if not explicit and not _is_writable_dir(bp): + bp = _user_cache_dir() / "cspresso" / "pw-browsers" + if not _is_writable_dir(bp): + bp = Path(tempfile.gettempdir()) / "cspresso" / "pw-browsers" + bp.mkdir(parents=True, exist_ok=True) + + if explicit and not _is_writable_dir(bp): + raise OSError( + f"Browsers path is not writable: {bp}\n" + "Choose a writable directory via --browsers-path or set PLAYWRIGHT_BROWSERS_PATH." + ) + lock_path = bp / ".install.lock" _acquire_install_lock(lock_path, timeout_s=lock_timeout_s) try: - # Another process might have installed while we waited; check again. if await _can_launch_chromium(bp): return EnsureResult(browsers_path=bp, installed=False) @@ -106,7 +214,7 @@ async def ensure_chromium_installed( if not await _can_launch_chromium(bp): raise RuntimeError( - "Playwright Chromium install completed, but Chromium still failed to launch. " + "Chromium install completed, but Chromium still failed to launch. " "On Linux, you may need additional system dependencies." ) From 9c9ab92a8d218bcc5807a92f58191177a22b5967 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 10:53:18 +1100 Subject: [PATCH 04/10] update README --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 97d066b..305a7fc 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,10 @@ This is meant as a **starting point**. Review and tighten the resulting policy b ## Install +If using my artifacts from the Releases page, you may wish to verify the GPG signatures with the key. + +It can be found at https://mig5.net/static/mig5.asc . The fingerprint is `00AE817C24A10C2540461A9C1D7CDE0234DB458D`. + ### Poetry ```bash @@ -42,7 +46,7 @@ Download the CSPresso.AppImage from the releases page, make it executable with ` ## Run ```bash -poetry run cspresso https://example.com --max-pages 10 +cspresso https://example.com --max-pages 10 ``` The tool will: From 25d50c375be94091f1d1b4f8cf9d7bb1b7d608dd Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 10:55:08 +1100 Subject: [PATCH 05/10] nosec --- src/cspresso/ensure_playwright.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cspresso/ensure_playwright.py b/src/cspresso/ensure_playwright.py index 12fd650..1f8def6 100644 --- a/src/cspresso/ensure_playwright.py +++ b/src/cspresso/ensure_playwright.py @@ -2,7 +2,7 @@ from __future__ import annotations import os import shutil -import subprocess +import subprocess # nosec import sys import tempfile import time @@ -145,7 +145,7 @@ def _release_install_lock(lock_path: Path) -> None: try: lock_path.unlink(missing_ok=True) except Exception: - pass + pass # nosec def _install_chromium(browsers_path: Path, with_deps: bool = False) -> None: From 052187d30857bf1f2614c39c37f92824cd30d597 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 10:56:05 +1100 Subject: [PATCH 06/10] 0.1.1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 18e0b6b..bd5d4e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cspresso" -version = "0.1.0" +version = "0.1.1" description = "Crawl a website with a headless browser and generate a draft Content-Security-Policy (CSP)." authors = ["Miguel Jacq "] readme = "README.md" From 16cd1e4b4075b6439cb6caf941439dd91e8ffe6f Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 11:04:39 +1100 Subject: [PATCH 07/10] Update README --- README.md | 1 - src/cspresso/crawl.py | 1 - 2 files changed, 2 deletions(-) diff --git a/README.md b/README.md index 305a7fc..71a58a9 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,6 @@ This is meant as a **starting point**. Review and tighten the resulting policy b ## Requirements - Python 3.10+ -- Poetry - Playwright's Chromium browser binaries (auto-installed by this tool if missing) ## Install diff --git a/src/cspresso/crawl.py b/src/cspresso/crawl.py index 6f9c8bc..0b74b99 100644 --- a/src/cspresso/crawl.py +++ b/src/cspresso/crawl.py @@ -403,7 +403,6 @@ async def crawl_and_generate_csp( directives.setdefault("connect-src", set()).add(o) except Exception: - # If you want to debug failures, print(traceback.format_exc()) return def on_response(resp): From 55a815564fe14dbea37122bd51cd5c3d4dbdb402 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 14:09:56 +1100 Subject: [PATCH 08/10] * Add `--bypass-csp` option to ignore an existing enforcing CSP to avoid it skewing results * Add `--evaluate` option to test a proposed CSP without needing to install it (best to use in conjunction with --bypass-csp`) --- CHANGELOG.md | 5 ++ README.md | 68 +++++++++++++++- pyproject.toml | 2 +- src/cspresso/__main__.py | 3 +- src/cspresso/crawl.py | 164 ++++++++++++++++++++++++++++++++++++++- 5 files changed, 234 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e147d0a..66ca668 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## 0.1.2 + + * Add `--bypass-csp` option to ignore an existing enforcing CSP to avoid it skewing results + * Add `--evaluate` option to test a proposed CSP without needing to install it (best to use in conjunction with --bypass-csp`) + ## 0.1.1 * Fix prog name diff --git a/README.md b/README.md index 71a58a9..6c4cb50 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,15 @@ The tool will: 3) crawl same-origin links up to the page limit 4) print the visited URLs and a CSP header +### Avoiding an existing enforcing CSP header during analysis + +**NOTE**: If you have an existing CSP header in place on your site, this could negatively influence +`cspresso`'s ability to evaluate what's on the page. Consider adding `--bypass-csp` to ignore the +current CSP (noting that if your site is compromised, doing so could put your machine at risk if +it evaluates malicious javascript/css etc). + +See also the `--evaluate` option below. + ## Where Playwright installs browsers By default, this project installs Playwright browsers into a local folder: `./.pw-browsers`. @@ -66,7 +75,7 @@ You can override with `--browsers-path` or by setting `PLAYWRIGHT_BROWSERS_PATH` If Chromium fails to start due to missing system libraries, try: ```bash -poetry run cspresso https://example.com --with-deps +cspresso https://example.com --with-deps ``` That runs `python -m playwright install --with-deps chromium` (may require sudo depending on your environment). @@ -78,14 +87,65 @@ Default output is a single CSP header line. For JSON: ```bash -poetry run cspresso https://example.com --json +cspresso https://example.com --json +``` + + +## Evaluate a proposed CSP without installing it + +You can use `cspresso` to evaluate a *proposed* CSP against a site. When you do this, cspresso converts +the response from the website to implant `Content-Security-Policy-Report-Only` headers using the CSP +you supplied to `--evaluate`. If it detects any violations, it will report them and exit with code 1, +which may be useful for CSP. + +**NOTE**: It is highly recommended to use `--bypass-csp` in addition to `--evaluate`, so that your +results are not influenced by any existing CSP's enforcement. + +**Example:** + +```bash +❯ poetry run cspresso https://mig5.net --evaluate "default-src 'none'" --bypass-csp --json +{ + "csp": "base-uri 'self'; default-src 'self'; form-action 'self'; frame-ancestors 'self'; object-src 'none'; style-src 'self' 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' 'unsafe-hashes'; style-src-attr 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' 'unsafe-hashes';", + "directives": {}, + "evaluated_policy": "default-src 'none'", + "nonce_detected": false, + "notes": [ + "Detected inline attribute code (style=\"...\" and/or on*=\"...\"). Hashes for these require 'unsafe-hashes' (and modern browsers may use style-src-attr/script-src-attr)." + ], + "violations": [ + { + "console": true, + "disposition": "report", + "documentURI": "https://mig5.net/", + "text": "Loading the stylesheet 'https://mig5.net/style.css' violates the following Content Security Policy directive: \"default-src 'none'\". Note that 'style-src-elem' was not explicitly set, so 'default-src' is used as a fallback. The policy is report-only, so the violation has been logged but no further action has been taken.", + "type": "info" + }, + { + "console": true, + "disposition": "report", + "documentURI": "https://mig5.net/static/mig5.asc", + "text": "Applying inline style violates the following Content Security Policy directive 'default-src 'none''. Either the 'unsafe-inline' keyword, a hash ('sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ='), or a nonce ('nonce-...') is required to enable inline execution. Note that hashes do not apply to event handlers, style attributes and javascript: navigations unless the 'unsafe-hashes' keyword is present. Note also that 'style-src' was not explicitly set, so 'default-src' is used as a fallback. The policy is report-only, so the violation has been logged but no further action has been taken.", + "type": "info" + } + ], + "visited": [ + "https://mig5.net", + "https://mig5.net/", + "https://mig5.net/static/mig5.asc" + ] +} + +cspresso on  main [!] via 🐍 v3.13.5 took 18s +❯ echo $? +1 ``` ## Full usage info ``` usage: cspresso [-h] [--max-pages MAX_PAGES] [--timeout-ms TIMEOUT_MS] [--settle-ms SETTLE_MS] [--headed] [--no-install] [--with-deps] [--browsers-path BROWSERS_PATH] [--allow-blob] [--unsafe-eval] - [--upgrade-insecure-requests] [--include-sourcemaps] [--ignore-non-html] [--json] + [--upgrade-insecure-requests] [--include-sourcemaps] [--bypass-csp] [--evaluate CSP] [--ignore-non-html] [--json] url Crawl up to N pages (same-origin) with Playwright and generate a draft CSP. @@ -111,6 +171,8 @@ options: --upgrade-insecure-requests Add upgrade-insecure-requests directive --include-sourcemaps Analyze JS/CSS for sourceMappingURL and add map origins to connect-src + --bypass-csp Strip any existing CSP/CSP-Report-Only response headers from HTML documents (useful for discovery or evaluation). + --evaluate CSP Inject the provided CSP string as Content-Security-Policy-Report-Only on HTML documents and exit 1 if any Report-Only violations are detected. Quote the value. --ignore-non-html Ignore non-HTML pages that get crawled (which might trigger Chromium's word-wrap hash: https://stackoverflow.com/a/69838710) --json Output JSON instead of a header line ``` diff --git a/pyproject.toml b/pyproject.toml index bd5d4e3..64ef944 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cspresso" -version = "0.1.1" +version = "0.1.2" description = "Crawl a website with a headless browser and generate a draft Content-Security-Policy (CSP)." authors = ["Miguel Jacq "] readme = "README.md" diff --git a/src/cspresso/__main__.py b/src/cspresso/__main__.py index 8f2db72..84cb2d2 100644 --- a/src/cspresso/__main__.py +++ b/src/cspresso/__main__.py @@ -1,4 +1,5 @@ +import sys from .crawl import main if __name__ == "__main__": - main() + sys.exit(main()) diff --git a/src/cspresso/crawl.py b/src/cspresso/crawl.py index 0b74b99..55992cd 100644 --- a/src/cspresso/crawl.py +++ b/src/cspresso/crawl.py @@ -48,6 +48,13 @@ def sha256_base64(s: str) -> str: return base64.b64encode(h).decode("ascii") +def normalize_csp_string(csp: str) -> str: + s = (csp or "").strip() + if not s: + return s + return s if s.endswith(";") else s + ";" + + async def collect_inline(page, *, max_attr_hashes: int = 2000): """ Collect inline