From 55a815564fe14dbea37122bd51cd5c3d4dbdb402 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Fri, 2 Jan 2026 14:09:56 +1100 Subject: [PATCH] * Add `--bypass-csp` option to ignore an existing enforcing CSP to avoid it skewing results * Add `--evaluate` option to test a proposed CSP without needing to install it (best to use in conjunction with --bypass-csp`) --- CHANGELOG.md | 5 ++ README.md | 68 +++++++++++++++- pyproject.toml | 2 +- src/cspresso/__main__.py | 3 +- src/cspresso/crawl.py | 164 ++++++++++++++++++++++++++++++++++++++- 5 files changed, 234 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e147d0a..66ca668 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## 0.1.2 + + * Add `--bypass-csp` option to ignore an existing enforcing CSP to avoid it skewing results + * Add `--evaluate` option to test a proposed CSP without needing to install it (best to use in conjunction with --bypass-csp`) + ## 0.1.1 * Fix prog name diff --git a/README.md b/README.md index 71a58a9..6c4cb50 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,15 @@ The tool will: 3) crawl same-origin links up to the page limit 4) print the visited URLs and a CSP header +### Avoiding an existing enforcing CSP header during analysis + +**NOTE**: If you have an existing CSP header in place on your site, this could negatively influence +`cspresso`'s ability to evaluate what's on the page. Consider adding `--bypass-csp` to ignore the +current CSP (noting that if your site is compromised, doing so could put your machine at risk if +it evaluates malicious javascript/css etc). + +See also the `--evaluate` option below. + ## Where Playwright installs browsers By default, this project installs Playwright browsers into a local folder: `./.pw-browsers`. @@ -66,7 +75,7 @@ You can override with `--browsers-path` or by setting `PLAYWRIGHT_BROWSERS_PATH` If Chromium fails to start due to missing system libraries, try: ```bash -poetry run cspresso https://example.com --with-deps +cspresso https://example.com --with-deps ``` That runs `python -m playwright install --with-deps chromium` (may require sudo depending on your environment). @@ -78,14 +87,65 @@ Default output is a single CSP header line. For JSON: ```bash -poetry run cspresso https://example.com --json +cspresso https://example.com --json +``` + + +## Evaluate a proposed CSP without installing it + +You can use `cspresso` to evaluate a *proposed* CSP against a site. When you do this, cspresso converts +the response from the website to implant `Content-Security-Policy-Report-Only` headers using the CSP +you supplied to `--evaluate`. If it detects any violations, it will report them and exit with code 1, +which may be useful for CSP. + +**NOTE**: It is highly recommended to use `--bypass-csp` in addition to `--evaluate`, so that your +results are not influenced by any existing CSP's enforcement. + +**Example:** + +```bash +❯ poetry run cspresso https://mig5.net --evaluate "default-src 'none'" --bypass-csp --json +{ + "csp": "base-uri 'self'; default-src 'self'; form-action 'self'; frame-ancestors 'self'; object-src 'none'; style-src 'self' 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' 'unsafe-hashes'; style-src-attr 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' 'unsafe-hashes';", + "directives": {}, + "evaluated_policy": "default-src 'none'", + "nonce_detected": false, + "notes": [ + "Detected inline attribute code (style=\"...\" and/or on*=\"...\"). Hashes for these require 'unsafe-hashes' (and modern browsers may use style-src-attr/script-src-attr)." + ], + "violations": [ + { + "console": true, + "disposition": "report", + "documentURI": "https://mig5.net/", + "text": "Loading the stylesheet 'https://mig5.net/style.css' violates the following Content Security Policy directive: \"default-src 'none'\". Note that 'style-src-elem' was not explicitly set, so 'default-src' is used as a fallback. The policy is report-only, so the violation has been logged but no further action has been taken.", + "type": "info" + }, + { + "console": true, + "disposition": "report", + "documentURI": "https://mig5.net/static/mig5.asc", + "text": "Applying inline style violates the following Content Security Policy directive 'default-src 'none''. Either the 'unsafe-inline' keyword, a hash ('sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ='), or a nonce ('nonce-...') is required to enable inline execution. Note that hashes do not apply to event handlers, style attributes and javascript: navigations unless the 'unsafe-hashes' keyword is present. Note also that 'style-src' was not explicitly set, so 'default-src' is used as a fallback. The policy is report-only, so the violation has been logged but no further action has been taken.", + "type": "info" + } + ], + "visited": [ + "https://mig5.net", + "https://mig5.net/", + "https://mig5.net/static/mig5.asc" + ] +} + +cspresso on  main [!] via 🐍 v3.13.5 took 18s +❯ echo $? +1 ``` ## Full usage info ``` usage: cspresso [-h] [--max-pages MAX_PAGES] [--timeout-ms TIMEOUT_MS] [--settle-ms SETTLE_MS] [--headed] [--no-install] [--with-deps] [--browsers-path BROWSERS_PATH] [--allow-blob] [--unsafe-eval] - [--upgrade-insecure-requests] [--include-sourcemaps] [--ignore-non-html] [--json] + [--upgrade-insecure-requests] [--include-sourcemaps] [--bypass-csp] [--evaluate CSP] [--ignore-non-html] [--json] url Crawl up to N pages (same-origin) with Playwright and generate a draft CSP. @@ -111,6 +171,8 @@ options: --upgrade-insecure-requests Add upgrade-insecure-requests directive --include-sourcemaps Analyze JS/CSS for sourceMappingURL and add map origins to connect-src + --bypass-csp Strip any existing CSP/CSP-Report-Only response headers from HTML documents (useful for discovery or evaluation). + --evaluate CSP Inject the provided CSP string as Content-Security-Policy-Report-Only on HTML documents and exit 1 if any Report-Only violations are detected. Quote the value. --ignore-non-html Ignore non-HTML pages that get crawled (which might trigger Chromium's word-wrap hash: https://stackoverflow.com/a/69838710) --json Output JSON instead of a header line ``` diff --git a/pyproject.toml b/pyproject.toml index bd5d4e3..64ef944 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cspresso" -version = "0.1.1" +version = "0.1.2" description = "Crawl a website with a headless browser and generate a draft Content-Security-Policy (CSP)." authors = ["Miguel Jacq "] readme = "README.md" diff --git a/src/cspresso/__main__.py b/src/cspresso/__main__.py index 8f2db72..84cb2d2 100644 --- a/src/cspresso/__main__.py +++ b/src/cspresso/__main__.py @@ -1,4 +1,5 @@ +import sys from .crawl import main if __name__ == "__main__": - main() + sys.exit(main()) diff --git a/src/cspresso/crawl.py b/src/cspresso/crawl.py index 0b74b99..55992cd 100644 --- a/src/cspresso/crawl.py +++ b/src/cspresso/crawl.py @@ -48,6 +48,13 @@ def sha256_base64(s: str) -> str: return base64.b64encode(h).decode("ascii") +def normalize_csp_string(csp: str) -> str: + s = (csp or "").strip() + if not s: + return s + return s if s.endswith(";") else s + ";" + + async def collect_inline(page, *, max_attr_hashes: int = 2000): """ Collect inline