Compare commits
No commits in common. "main" and "0.1.1" have entirely different histories.
5 changed files with 10 additions and 235 deletions
|
|
@ -1,8 +1,3 @@
|
|||
## 0.1.2
|
||||
|
||||
* Add `--bypass-csp` option to ignore an existing enforcing CSP to avoid it skewing results
|
||||
* Add `--evaluate` option to test a proposed CSP without needing to install it (best to use in conjunction with --bypass-csp`)
|
||||
|
||||
## 0.1.1
|
||||
|
||||
* Fix prog name
|
||||
|
|
|
|||
69
README.md
69
README.md
|
|
@ -18,6 +18,7 @@ This is meant as a **starting point**. Review and tighten the resulting policy b
|
|||
## Requirements
|
||||
|
||||
- Python 3.10+
|
||||
- Poetry
|
||||
- Playwright's Chromium browser binaries (auto-installed by this tool if missing)
|
||||
|
||||
## Install
|
||||
|
|
@ -54,15 +55,6 @@ The tool will:
|
|||
3) crawl same-origin links up to the page limit
|
||||
4) print the visited URLs and a CSP header
|
||||
|
||||
### Avoiding an existing enforcing CSP header during analysis
|
||||
|
||||
**NOTE**: If you have an existing CSP header in place on your site, this could negatively influence
|
||||
`cspresso`'s ability to evaluate what's on the page. Consider adding `--bypass-csp` to ignore the
|
||||
current CSP (noting that if your site is compromised, doing so could put your machine at risk if
|
||||
it evaluates malicious javascript/css etc).
|
||||
|
||||
See also the `--evaluate` option below.
|
||||
|
||||
## Where Playwright installs browsers
|
||||
|
||||
By default, this project installs Playwright browsers into a local folder: `./.pw-browsers`.
|
||||
|
|
@ -75,7 +67,7 @@ You can override with `--browsers-path` or by setting `PLAYWRIGHT_BROWSERS_PATH`
|
|||
If Chromium fails to start due to missing system libraries, try:
|
||||
|
||||
```bash
|
||||
cspresso https://example.com --with-deps
|
||||
poetry run cspresso https://example.com --with-deps
|
||||
```
|
||||
|
||||
That runs `python -m playwright install --with-deps chromium` (may require sudo depending on your environment).
|
||||
|
|
@ -87,65 +79,14 @@ Default output is a single CSP header line.
|
|||
For JSON:
|
||||
|
||||
```bash
|
||||
cspresso https://example.com --json
|
||||
```
|
||||
|
||||
|
||||
## Evaluate a proposed CSP without installing it
|
||||
|
||||
You can use `cspresso` to evaluate a *proposed* CSP against a site. When you do this, cspresso converts
|
||||
the response from the website to implant `Content-Security-Policy-Report-Only` headers using the CSP
|
||||
you supplied to `--evaluate`. If it detects any violations, it will report them and exit with code 1,
|
||||
which may be useful for CSP.
|
||||
|
||||
**NOTE**: It is highly recommended to use `--bypass-csp` in addition to `--evaluate`, so that your
|
||||
results are not influenced by any existing CSP's enforcement.
|
||||
|
||||
**Example:**
|
||||
|
||||
```bash
|
||||
❯ poetry run cspresso https://mig5.net --evaluate "default-src 'none'" --bypass-csp --json
|
||||
{
|
||||
"csp": "base-uri 'self'; default-src 'self'; form-action 'self'; frame-ancestors 'self'; object-src 'none'; style-src 'self' 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' 'unsafe-hashes'; style-src-attr 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' 'unsafe-hashes';",
|
||||
"directives": {},
|
||||
"evaluated_policy": "default-src 'none'",
|
||||
"nonce_detected": false,
|
||||
"notes": [
|
||||
"Detected inline attribute code (style=\"...\" and/or on*=\"...\"). Hashes for these require 'unsafe-hashes' (and modern browsers may use style-src-attr/script-src-attr)."
|
||||
],
|
||||
"violations": [
|
||||
{
|
||||
"console": true,
|
||||
"disposition": "report",
|
||||
"documentURI": "https://mig5.net/",
|
||||
"text": "Loading the stylesheet 'https://mig5.net/style.css' violates the following Content Security Policy directive: \"default-src 'none'\". Note that 'style-src-elem' was not explicitly set, so 'default-src' is used as a fallback. The policy is report-only, so the violation has been logged but no further action has been taken.",
|
||||
"type": "info"
|
||||
},
|
||||
{
|
||||
"console": true,
|
||||
"disposition": "report",
|
||||
"documentURI": "https://mig5.net/static/mig5.asc",
|
||||
"text": "Applying inline style violates the following Content Security Policy directive 'default-src 'none''. Either the 'unsafe-inline' keyword, a hash ('sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ='), or a nonce ('nonce-...') is required to enable inline execution. Note that hashes do not apply to event handlers, style attributes and javascript: navigations unless the 'unsafe-hashes' keyword is present. Note also that 'style-src' was not explicitly set, so 'default-src' is used as a fallback. The policy is report-only, so the violation has been logged but no further action has been taken.",
|
||||
"type": "info"
|
||||
}
|
||||
],
|
||||
"visited": [
|
||||
"https://mig5.net",
|
||||
"https://mig5.net/",
|
||||
"https://mig5.net/static/mig5.asc"
|
||||
]
|
||||
}
|
||||
|
||||
cspresso on main [!] via 🐍 v3.13.5 took 18s
|
||||
❯ echo $?
|
||||
1
|
||||
poetry run cspresso https://example.com --json
|
||||
```
|
||||
|
||||
## Full usage info
|
||||
|
||||
```
|
||||
usage: cspresso [-h] [--max-pages MAX_PAGES] [--timeout-ms TIMEOUT_MS] [--settle-ms SETTLE_MS] [--headed] [--no-install] [--with-deps] [--browsers-path BROWSERS_PATH] [--allow-blob] [--unsafe-eval]
|
||||
[--upgrade-insecure-requests] [--include-sourcemaps] [--bypass-csp] [--evaluate CSP] [--ignore-non-html] [--json]
|
||||
[--upgrade-insecure-requests] [--include-sourcemaps] [--ignore-non-html] [--json]
|
||||
url
|
||||
|
||||
Crawl up to N pages (same-origin) with Playwright and generate a draft CSP.
|
||||
|
|
@ -171,8 +112,6 @@ options:
|
|||
--upgrade-insecure-requests
|
||||
Add upgrade-insecure-requests directive
|
||||
--include-sourcemaps Analyze JS/CSS for sourceMappingURL and add map origins to connect-src
|
||||
--bypass-csp Strip any existing CSP/CSP-Report-Only response headers from HTML documents (useful for discovery or evaluation).
|
||||
--evaluate CSP Inject the provided CSP string as Content-Security-Policy-Report-Only on HTML documents and exit 1 if any Report-Only violations are detected. Quote the value.
|
||||
--ignore-non-html Ignore non-HTML pages that get crawled (which might trigger Chromium's word-wrap hash: https://stackoverflow.com/a/69838710)
|
||||
--json Output JSON instead of a header line
|
||||
```
|
||||
|
|
|
|||
|
|
@ -1,12 +1,11 @@
|
|||
[tool.poetry]
|
||||
name = "cspresso"
|
||||
version = "0.1.2"
|
||||
version = "0.1.1"
|
||||
description = "Crawl a website with a headless browser and generate a draft Content-Security-Policy (CSP)."
|
||||
authors = ["Miguel Jacq <mig@mig5.net>"]
|
||||
readme = "README.md"
|
||||
packages = [{ include = "cspresso", from = "src" }]
|
||||
license = "GPL-3.0-or-later"
|
||||
homepage = "https://cspresso.cafe"
|
||||
repository = "https://git.mig5.net/mig5/cspresso"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import sys
|
||||
from .crawl import main
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
main()
|
||||
|
|
|
|||
|
|
@ -48,13 +48,6 @@ def sha256_base64(s: str) -> str:
|
|||
return base64.b64encode(h).decode("ascii")
|
||||
|
||||
|
||||
def normalize_csp_string(csp: str) -> str:
|
||||
s = (csp or "").strip()
|
||||
if not s:
|
||||
return s
|
||||
return s if s.endswith(";") else s + ";"
|
||||
|
||||
|
||||
async def collect_inline(page, *, max_attr_hashes: int = 2000):
|
||||
"""
|
||||
Collect inline <script> (no src), <style> blocks, plus:
|
||||
|
|
@ -298,7 +291,6 @@ class CrawlResult:
|
|||
nonce_detected: bool
|
||||
directives: dict[str, list[str]]
|
||||
notes: list[str]
|
||||
violations: list[dict]
|
||||
|
||||
|
||||
async def crawl_and_generate_csp(
|
||||
|
|
@ -316,8 +308,6 @@ async def crawl_and_generate_csp(
|
|||
upgrade_insecure_requests: bool = False,
|
||||
include_sourcemaps: bool = False,
|
||||
ignore_non_html: bool = False,
|
||||
bypass_csp: bool = False,
|
||||
evaluate: str | None = None, # CSP string to inject as Report-Only and evaluate
|
||||
) -> CrawlResult:
|
||||
start_url, _ = urldefrag(start_url)
|
||||
base_origin = origin_of(start_url)
|
||||
|
|
@ -345,48 +335,10 @@ async def crawl_and_generate_csp(
|
|||
allow_data_font = False
|
||||
notes: list[str] = []
|
||||
|
||||
evaluate_policy = normalize_csp_string(evaluate) if evaluate else None
|
||||
# Captured CSP violations (Report-Only) when --evaluate is used.
|
||||
violations: list[dict] = []
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=headless)
|
||||
context = await browser.new_context()
|
||||
|
||||
# Optionally strip any existing CSP headers, and/or inject a Report-Only CSP for evaluation.
|
||||
# NOTE: This operates on *document response headers* only.
|
||||
if bypass_csp or evaluate_policy:
|
||||
|
||||
async def _route_handler(route, request):
|
||||
try:
|
||||
if request.resource_type != "document":
|
||||
return await route.continue_()
|
||||
|
||||
resp = await route.fetch()
|
||||
hdrs = {k.lower(): v for k, v in (resp.headers or {}).items()}
|
||||
|
||||
if bypass_csp:
|
||||
hdrs.pop("content-security-policy", None)
|
||||
hdrs.pop("content-security-policy-report-only", None)
|
||||
|
||||
if evaluate_policy:
|
||||
hdrs["content-security-policy-report-only"] = evaluate_policy
|
||||
|
||||
try:
|
||||
return await route.fulfill(response=resp, headers=hdrs)
|
||||
except TypeError:
|
||||
body = await resp.body()
|
||||
return await route.fulfill(
|
||||
status=resp.status, headers=hdrs, body=body
|
||||
)
|
||||
except Exception:
|
||||
try:
|
||||
return await route.continue_()
|
||||
except Exception:
|
||||
return
|
||||
|
||||
await context.route("**/*", _route_handler)
|
||||
|
||||
def on_request(req):
|
||||
"""
|
||||
Playwright sometimes classifies "connect-like" activity as resource_type == "other".
|
||||
|
|
@ -428,59 +380,6 @@ async def crawl_and_generate_csp(
|
|||
|
||||
page = await context.new_page()
|
||||
|
||||
# If evaluating a candidate CSP, capture Report-Only violations.
|
||||
if evaluate_policy:
|
||||
|
||||
def _record_violation(_source, payload):
|
||||
try:
|
||||
if (
|
||||
isinstance(payload, dict)
|
||||
and payload.get("disposition") == "report"
|
||||
):
|
||||
violations.append(payload)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
try:
|
||||
await page.expose_binding("__cspresso_violation", _record_violation)
|
||||
await page.add_init_script(
|
||||
"() => { try { window.addEventListener('securitypolicyviolation', (e) => { "
|
||||
"const payload = {documentURI:e.documentURI, referrer:e.referrer, blockedURI:e.blockedURI, "
|
||||
"violatedDirective:e.violatedDirective, effectiveDirective:e.effectiveDirective, originalPolicy:e.originalPolicy, "
|
||||
"disposition:e.disposition, sourceFile:e.sourceFile, lineNumber:e.lineNumber, columnNumber:e.columnNumber, "
|
||||
"statusCode:e.statusCode, sample:e.sample}; "
|
||||
"if (typeof window.__cspresso_violation === 'function') { window.__cspresso_violation(payload); }"
|
||||
"}, true); } catch(_){} }"
|
||||
)
|
||||
except Exception:
|
||||
pass # nosec
|
||||
|
||||
def _on_console(msg):
|
||||
try:
|
||||
t = msg.text or ""
|
||||
tl = t.lower()
|
||||
if (
|
||||
"content security policy" in tl
|
||||
or "content-security-policy" in tl
|
||||
) and (
|
||||
"would violate" in tl
|
||||
or "report-only" in tl
|
||||
or "report only" in tl
|
||||
):
|
||||
violations.append(
|
||||
{
|
||||
"console": True,
|
||||
"type": msg.type,
|
||||
"text": t,
|
||||
"documentURI": page.url,
|
||||
"disposition": "report",
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
page.on("console", _on_console)
|
||||
|
||||
pending: set[asyncio.Task] = set()
|
||||
|
||||
if include_sourcemaps:
|
||||
|
|
@ -504,6 +403,7 @@ async def crawl_and_generate_csp(
|
|||
directives.setdefault("connect-src", set()).add(o)
|
||||
|
||||
except Exception:
|
||||
# If you want to debug failures, print(traceback.format_exc())
|
||||
return
|
||||
|
||||
def on_response(resp):
|
||||
|
|
@ -600,35 +500,12 @@ async def crawl_and_generate_csp(
|
|||
)
|
||||
|
||||
directives_out = {k: sorted(v) for k, v in directives.items() if v}
|
||||
|
||||
# De-duplicate violations (same doc+directive+blocked URI) to keep output stable.
|
||||
if violations:
|
||||
seen = set()
|
||||
uniq: list[dict] = []
|
||||
for v in violations:
|
||||
if not isinstance(v, dict):
|
||||
continue
|
||||
key = (
|
||||
v.get("documentURI"),
|
||||
v.get("effectiveDirective") or v.get("violatedDirective"),
|
||||
v.get("blockedURI"),
|
||||
v.get("sourceFile"),
|
||||
v.get("lineNumber"),
|
||||
v.get("columnNumber"),
|
||||
)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
uniq.append(v)
|
||||
violations = uniq
|
||||
|
||||
return CrawlResult(
|
||||
visited=sorted(visited),
|
||||
csp=csp,
|
||||
nonce_detected=nonce_detected,
|
||||
directives=directives_out,
|
||||
notes=notes,
|
||||
violations=violations,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -700,18 +577,6 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
|
|||
default=False,
|
||||
help="Analyze JS/CSS for sourceMappingURL and add map origins to connect-src",
|
||||
)
|
||||
|
||||
ap.add_argument(
|
||||
"--bypass-csp",
|
||||
action="store_true",
|
||||
help="Strip any existing CSP/CSP-Report-Only response headers from HTML documents (useful for discovery or evaluation).",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--evaluate",
|
||||
metavar="CSP",
|
||||
default=None,
|
||||
help="Inject the provided CSP string as Content-Security-Policy-Report-Only on HTML documents and exit 1 if any Report-Only violations are detected. Quote the value.",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--ignore-non-html",
|
||||
action="store_true",
|
||||
|
|
@ -724,7 +589,7 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
|
|||
return ap.parse_args(argv)
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
def main(argv: list[str] | None = None) -> None:
|
||||
args = _parse_args(argv)
|
||||
browsers_path = Path(args.browsers_path).resolve() if args.browsers_path else None
|
||||
|
||||
|
|
@ -742,8 +607,6 @@ def main(argv: list[str] | None = None) -> int:
|
|||
allow_unsafe_eval=args.unsafe_eval,
|
||||
upgrade_insecure_requests=args.upgrade_insecure_requests,
|
||||
include_sourcemaps=args.include_sourcemaps,
|
||||
bypass_csp=args.bypass_csp,
|
||||
evaluate=args.evaluate,
|
||||
ignore_non_html=args.ignore_non_html,
|
||||
)
|
||||
)
|
||||
|
|
@ -757,14 +620,12 @@ def main(argv: list[str] | None = None) -> int:
|
|||
"csp": result.csp,
|
||||
"directives": result.directives,
|
||||
"notes": result.notes,
|
||||
"violations": result.violations,
|
||||
"evaluated_policy": args.evaluate,
|
||||
},
|
||||
indent=2,
|
||||
sort_keys=True,
|
||||
)
|
||||
)
|
||||
return 1 if (args.evaluate and result.violations) else 0
|
||||
return
|
||||
|
||||
# Default: print header + visited pages as comments.
|
||||
for u in result.visited:
|
||||
|
|
@ -773,24 +634,6 @@ def main(argv: list[str] | None = None) -> int:
|
|||
print(f"# NOTE: {n}")
|
||||
print("Content-Security-Policy:", result.csp)
|
||||
|
||||
if args.evaluate:
|
||||
if result.violations:
|
||||
print("# CSP Report-Only violations detected:")
|
||||
for v in result.violations:
|
||||
try:
|
||||
blocked = v.get("blockedURI")
|
||||
eff = v.get("effectiveDirective") or v.get("violatedDirective")
|
||||
doc = v.get("documentURI")
|
||||
print(f"# - {eff} blocked={blocked} on {doc}")
|
||||
except Exception:
|
||||
print(f"# - {v}")
|
||||
return 1
|
||||
return 0
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
sys.exit(main())
|
||||
main()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue