diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..66ca668 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,14 @@ +## 0.1.2 + + * Add `--bypass-csp` option to ignore an existing enforcing CSP to avoid it skewing results + * Add `--evaluate` option to test a proposed CSP without needing to install it (best to use in conjunction with --bypass-csp`) + +## 0.1.1 + + * Fix prog name + * Add --ignore-non-html option to skip pages that weren't HTML (which might trigger Chromium's 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' hash) + * Fix detection of Python for AppImage if it needs to install browsers via playwright + +## 0.1.0 + + * Initial release diff --git a/README.md b/README.md index dafe7aa..6c4cb50 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,14 @@ This is meant as a **starting point**. Review and tighten the resulting policy b ## Requirements - Python 3.10+ -- Poetry - Playwright's Chromium browser binaries (auto-installed by this tool if missing) ## Install +If using my artifacts from the Releases page, you may wish to verify the GPG signatures with the key. + +It can be found at https://mig5.net/static/mig5.asc . The fingerprint is `00AE817C24A10C2540461A9C1D7CDE0234DB458D`. + ### Poetry ```bash @@ -42,7 +45,7 @@ Download the CSPresso.AppImage from the releases page, make it executable with ` ## Run ```bash -poetry run cspresso https://example.com --max-pages 10 +cspresso https://example.com --max-pages 10 ``` The tool will: @@ -51,6 +54,15 @@ The tool will: 3) crawl same-origin links up to the page limit 4) print the visited URLs and a CSP header +### Avoiding an existing enforcing CSP header during analysis + +**NOTE**: If you have an existing CSP header in place on your site, this could negatively influence +`cspresso`'s ability to evaluate what's on the page. Consider adding `--bypass-csp` to ignore the +current CSP (noting that if your site is compromised, doing so could put your machine at risk if +it evaluates malicious javascript/css etc). + +See also the `--evaluate` option below. + ## Where Playwright installs browsers By default, this project installs Playwright browsers into a local folder: `./.pw-browsers`. @@ -63,7 +75,7 @@ You can override with `--browsers-path` or by setting `PLAYWRIGHT_BROWSERS_PATH` If Chromium fails to start due to missing system libraries, try: ```bash -poetry run cspresso https://example.com --with-deps +cspresso https://example.com --with-deps ``` That runs `python -m playwright install --with-deps chromium` (may require sudo depending on your environment). @@ -75,15 +87,66 @@ Default output is a single CSP header line. For JSON: ```bash -poetry run cspresso https://example.com --json +cspresso https://example.com --json +``` + + +## Evaluate a proposed CSP without installing it + +You can use `cspresso` to evaluate a *proposed* CSP against a site. When you do this, cspresso converts +the response from the website to implant `Content-Security-Policy-Report-Only` headers using the CSP +you supplied to `--evaluate`. If it detects any violations, it will report them and exit with code 1, +which may be useful for CSP. + +**NOTE**: It is highly recommended to use `--bypass-csp` in addition to `--evaluate`, so that your +results are not influenced by any existing CSP's enforcement. + +**Example:** + +```bash +❯ poetry run cspresso https://mig5.net --evaluate "default-src 'none'" --bypass-csp --json +{ + "csp": "base-uri 'self'; default-src 'self'; form-action 'self'; frame-ancestors 'self'; object-src 'none'; style-src 'self' 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' 'unsafe-hashes'; style-src-attr 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' 'unsafe-hashes';", + "directives": {}, + "evaluated_policy": "default-src 'none'", + "nonce_detected": false, + "notes": [ + "Detected inline attribute code (style=\"...\" and/or on*=\"...\"). Hashes for these require 'unsafe-hashes' (and modern browsers may use style-src-attr/script-src-attr)." + ], + "violations": [ + { + "console": true, + "disposition": "report", + "documentURI": "https://mig5.net/", + "text": "Loading the stylesheet 'https://mig5.net/style.css' violates the following Content Security Policy directive: \"default-src 'none'\". Note that 'style-src-elem' was not explicitly set, so 'default-src' is used as a fallback. The policy is report-only, so the violation has been logged but no further action has been taken.", + "type": "info" + }, + { + "console": true, + "disposition": "report", + "documentURI": "https://mig5.net/static/mig5.asc", + "text": "Applying inline style violates the following Content Security Policy directive 'default-src 'none''. Either the 'unsafe-inline' keyword, a hash ('sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ='), or a nonce ('nonce-...') is required to enable inline execution. Note that hashes do not apply to event handlers, style attributes and javascript: navigations unless the 'unsafe-hashes' keyword is present. Note also that 'style-src' was not explicitly set, so 'default-src' is used as a fallback. The policy is report-only, so the violation has been logged but no further action has been taken.", + "type": "info" + } + ], + "visited": [ + "https://mig5.net", + "https://mig5.net/", + "https://mig5.net/static/mig5.asc" + ] +} + +cspresso on  main [!] via 🐍 v3.13.5 took 18s +❯ echo $? +1 ``` ## Full usage info ``` -usage: csp-crawl [-h] [--max-pages MAX_PAGES] [--timeout-ms TIMEOUT_MS] [--settle-ms SETTLE_MS] [--headed] [--no-install] [--with-deps] [--browsers-path BROWSERS_PATH] [--allow-blob] [--unsafe-eval] - [--upgrade-insecure-requests] [--include-sourcemaps] [--json] - url +usage: cspresso [-h] [--max-pages MAX_PAGES] [--timeout-ms TIMEOUT_MS] [--settle-ms SETTLE_MS] [--headed] [--no-install] [--with-deps] [--browsers-path BROWSERS_PATH] [--allow-blob] [--unsafe-eval] + [--upgrade-insecure-requests] [--include-sourcemaps] [--bypass-csp] [--evaluate CSP] [--ignore-non-html] [--json] + url Crawl up to N pages (same-origin) with Playwright and generate a draft CSP. @@ -108,5 +171,8 @@ options: --upgrade-insecure-requests Add upgrade-insecure-requests directive --include-sourcemaps Analyze JS/CSS for sourceMappingURL and add map origins to connect-src + --bypass-csp Strip any existing CSP/CSP-Report-Only response headers from HTML documents (useful for discovery or evaluation). + --evaluate CSP Inject the provided CSP string as Content-Security-Policy-Report-Only on HTML documents and exit 1 if any Report-Only violations are detected. Quote the value. + --ignore-non-html Ignore non-HTML pages that get crawled (which might trigger Chromium's word-wrap hash: https://stackoverflow.com/a/69838710) --json Output JSON instead of a header line ``` diff --git a/pyproject.toml b/pyproject.toml index 18e0b6b..5c29369 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,12 @@ [tool.poetry] name = "cspresso" -version = "0.1.0" +version = "0.1.2" description = "Crawl a website with a headless browser and generate a draft Content-Security-Policy (CSP)." authors = ["Miguel Jacq "] readme = "README.md" packages = [{ include = "cspresso", from = "src" }] license = "GPL-3.0-or-later" +homepage = "https://cspresso.cafe" repository = "https://git.mig5.net/mig5/cspresso" [tool.poetry.dependencies] diff --git a/src/cspresso/__main__.py b/src/cspresso/__main__.py index 8f2db72..84cb2d2 100644 --- a/src/cspresso/__main__.py +++ b/src/cspresso/__main__.py @@ -1,4 +1,5 @@ +import sys from .crawl import main if __name__ == "__main__": - main() + sys.exit(main()) diff --git a/src/cspresso/crawl.py b/src/cspresso/crawl.py index cd7df5b..44c96ad 100644 --- a/src/cspresso/crawl.py +++ b/src/cspresso/crawl.py @@ -48,6 +48,13 @@ def sha256_base64(s: str) -> str: return base64.b64encode(h).decode("ascii") +def normalize_csp_string(csp: str) -> str: + s = (csp or "").strip() + if not s: + return s + return s if s.endswith(";") else s + ";" + + async def collect_inline(page, *, max_attr_hashes: int = 2000): """ Collect inline