diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index 66ca668..0000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,14 +0,0 @@ -## 0.1.2 - - * Add `--bypass-csp` option to ignore an existing enforcing CSP to avoid it skewing results - * Add `--evaluate` option to test a proposed CSP without needing to install it (best to use in conjunction with --bypass-csp`) - -## 0.1.1 - - * Fix prog name - * Add --ignore-non-html option to skip pages that weren't HTML (which might trigger Chromium's 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' hash) - * Fix detection of Python for AppImage if it needs to install browsers via playwright - -## 0.1.0 - - * Initial release diff --git a/README.md b/README.md index 6c4cb50..dafe7aa 100644 --- a/README.md +++ b/README.md @@ -18,14 +18,11 @@ This is meant as a **starting point**. Review and tighten the resulting policy b ## Requirements - Python 3.10+ +- Poetry - Playwright's Chromium browser binaries (auto-installed by this tool if missing) ## Install -If using my artifacts from the Releases page, you may wish to verify the GPG signatures with the key. - -It can be found at https://mig5.net/static/mig5.asc . The fingerprint is `00AE817C24A10C2540461A9C1D7CDE0234DB458D`. - ### Poetry ```bash @@ -45,7 +42,7 @@ Download the CSPresso.AppImage from the releases page, make it executable with ` ## Run ```bash -cspresso https://example.com --max-pages 10 +poetry run cspresso https://example.com --max-pages 10 ``` The tool will: @@ -54,15 +51,6 @@ The tool will: 3) crawl same-origin links up to the page limit 4) print the visited URLs and a CSP header -### Avoiding an existing enforcing CSP header during analysis - -**NOTE**: If you have an existing CSP header in place on your site, this could negatively influence -`cspresso`'s ability to evaluate what's on the page. Consider adding `--bypass-csp` to ignore the -current CSP (noting that if your site is compromised, doing so could put your machine at risk if -it evaluates malicious javascript/css etc). - -See also the `--evaluate` option below. - ## Where Playwright installs browsers By default, this project installs Playwright browsers into a local folder: `./.pw-browsers`. @@ -75,7 +63,7 @@ You can override with `--browsers-path` or by setting `PLAYWRIGHT_BROWSERS_PATH` If Chromium fails to start due to missing system libraries, try: ```bash -cspresso https://example.com --with-deps +poetry run cspresso https://example.com --with-deps ``` That runs `python -m playwright install --with-deps chromium` (may require sudo depending on your environment). @@ -87,66 +75,15 @@ Default output is a single CSP header line. For JSON: ```bash -cspresso https://example.com --json -``` - - -## Evaluate a proposed CSP without installing it - -You can use `cspresso` to evaluate a *proposed* CSP against a site. When you do this, cspresso converts -the response from the website to implant `Content-Security-Policy-Report-Only` headers using the CSP -you supplied to `--evaluate`. If it detects any violations, it will report them and exit with code 1, -which may be useful for CSP. - -**NOTE**: It is highly recommended to use `--bypass-csp` in addition to `--evaluate`, so that your -results are not influenced by any existing CSP's enforcement. - -**Example:** - -```bash -❯ poetry run cspresso https://mig5.net --evaluate "default-src 'none'" --bypass-csp --json -{ - "csp": "base-uri 'self'; default-src 'self'; form-action 'self'; frame-ancestors 'self'; object-src 'none'; style-src 'self' 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' 'unsafe-hashes'; style-src-attr 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' 'unsafe-hashes';", - "directives": {}, - "evaluated_policy": "default-src 'none'", - "nonce_detected": false, - "notes": [ - "Detected inline attribute code (style=\"...\" and/or on*=\"...\"). Hashes for these require 'unsafe-hashes' (and modern browsers may use style-src-attr/script-src-attr)." - ], - "violations": [ - { - "console": true, - "disposition": "report", - "documentURI": "https://mig5.net/", - "text": "Loading the stylesheet 'https://mig5.net/style.css' violates the following Content Security Policy directive: \"default-src 'none'\". Note that 'style-src-elem' was not explicitly set, so 'default-src' is used as a fallback. The policy is report-only, so the violation has been logged but no further action has been taken.", - "type": "info" - }, - { - "console": true, - "disposition": "report", - "documentURI": "https://mig5.net/static/mig5.asc", - "text": "Applying inline style violates the following Content Security Policy directive 'default-src 'none''. Either the 'unsafe-inline' keyword, a hash ('sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ='), or a nonce ('nonce-...') is required to enable inline execution. Note that hashes do not apply to event handlers, style attributes and javascript: navigations unless the 'unsafe-hashes' keyword is present. Note also that 'style-src' was not explicitly set, so 'default-src' is used as a fallback. The policy is report-only, so the violation has been logged but no further action has been taken.", - "type": "info" - } - ], - "visited": [ - "https://mig5.net", - "https://mig5.net/", - "https://mig5.net/static/mig5.asc" - ] -} - -cspresso on  main [!] via 🐍 v3.13.5 took 18s -❯ echo $? -1 +poetry run cspresso https://example.com --json ``` ## Full usage info ``` -usage: cspresso [-h] [--max-pages MAX_PAGES] [--timeout-ms TIMEOUT_MS] [--settle-ms SETTLE_MS] [--headed] [--no-install] [--with-deps] [--browsers-path BROWSERS_PATH] [--allow-blob] [--unsafe-eval] - [--upgrade-insecure-requests] [--include-sourcemaps] [--bypass-csp] [--evaluate CSP] [--ignore-non-html] [--json] - url +usage: csp-crawl [-h] [--max-pages MAX_PAGES] [--timeout-ms TIMEOUT_MS] [--settle-ms SETTLE_MS] [--headed] [--no-install] [--with-deps] [--browsers-path BROWSERS_PATH] [--allow-blob] [--unsafe-eval] + [--upgrade-insecure-requests] [--include-sourcemaps] [--json] + url Crawl up to N pages (same-origin) with Playwright and generate a draft CSP. @@ -171,8 +108,5 @@ options: --upgrade-insecure-requests Add upgrade-insecure-requests directive --include-sourcemaps Analyze JS/CSS for sourceMappingURL and add map origins to connect-src - --bypass-csp Strip any existing CSP/CSP-Report-Only response headers from HTML documents (useful for discovery or evaluation). - --evaluate CSP Inject the provided CSP string as Content-Security-Policy-Report-Only on HTML documents and exit 1 if any Report-Only violations are detected. Quote the value. - --ignore-non-html Ignore non-HTML pages that get crawled (which might trigger Chromium's word-wrap hash: https://stackoverflow.com/a/69838710) --json Output JSON instead of a header line ``` diff --git a/pyproject.toml b/pyproject.toml index 5c29369..18e0b6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,11 @@ [tool.poetry] name = "cspresso" -version = "0.1.2" +version = "0.1.0" description = "Crawl a website with a headless browser and generate a draft Content-Security-Policy (CSP)." authors = ["Miguel Jacq "] readme = "README.md" packages = [{ include = "cspresso", from = "src" }] license = "GPL-3.0-or-later" -homepage = "https://cspresso.cafe" repository = "https://git.mig5.net/mig5/cspresso" [tool.poetry.dependencies] diff --git a/src/cspresso/__main__.py b/src/cspresso/__main__.py index 84cb2d2..8f2db72 100644 --- a/src/cspresso/__main__.py +++ b/src/cspresso/__main__.py @@ -1,5 +1,4 @@ -import sys from .crawl import main if __name__ == "__main__": - sys.exit(main()) + main() diff --git a/src/cspresso/crawl.py b/src/cspresso/crawl.py index 44c96ad..cd7df5b 100644 --- a/src/cspresso/crawl.py +++ b/src/cspresso/crawl.py @@ -48,13 +48,6 @@ def sha256_base64(s: str) -> str: return base64.b64encode(h).decode("ascii") -def normalize_csp_string(csp: str) -> str: - s = (csp or "").strip() - if not s: - return s - return s if s.endswith(";") else s + ";" - - async def collect_inline(page, *, max_attr_hashes: int = 2000): """ Collect inline