3 changed files with 5 additions and 33 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,8 +0,0 @@
-## 0.1.1
-
- * Fix prog name
- * Add --ignore-non-html option to skip pages that weren't HTML (which might trigger Chromium's 'sha256-4Su6mBWzEIFnH4pAGMOuaeBrstwJN4Z3pq/s1Kn4/KQ=' hash)
-
-## 0.1.0
-
- * Initial release
--- a/README.md
+++ b/README.md
@ -81,8 +81,8 @@ poetry run cspresso https://example.com --json
 ## Full usage info

 ```
-usage: cspresso [-h] [--max-pages MAX_PAGES] [--timeout-ms TIMEOUT_MS] [--settle-ms SETTLE_MS] [--headed] [--no-install] [--with-deps] [--browsers-path BROWSERS_PATH] [--allow-blob] [--unsafe-eval]
-                [--upgrade-insecure-requests] [--include-sourcemaps] [--ignore-non-html] [--json]
+usage: csp-crawl [-h] [--max-pages MAX_PAGES] [--timeout-ms TIMEOUT_MS] [--settle-ms SETTLE_MS] [--headed] [--no-install] [--with-deps] [--browsers-path BROWSERS_PATH] [--allow-blob] [--unsafe-eval]
+                 [--upgrade-insecure-requests] [--include-sourcemaps] [--json]
                 url

 Crawl up to N pages (same-origin) with Playwright and generate a draft CSP.
@ -108,6 +108,5 @@ options:
  --upgrade-insecure-requests
                        Add upgrade-insecure-requests directive
  --include-sourcemaps  Analyze JS/CSS for sourceMappingURL and add map origins to connect-src
-  --ignore-non-html     Ignore non-HTML pages that get crawled (which might trigger Chromium's word-wrap hash: https://stackoverflow.com/a/69838710)
  --json                Output JSON instead of a header line
 ```
--- a/src/cspresso/crawl.py
+++ b/src/cspresso/crawl.py
@ -307,7 +307,6 @@ async def crawl_and_generate_csp(
    allow_unsafe_eval: bool = False,
    upgrade_insecure_requests: bool = False,
    include_sourcemaps: bool = False,
-    ignore_non_html: bool = False,
 ) -> CrawlResult:
    start_url, _ = urldefrag(start_url)
    base_origin = origin_of(start_url)
@ -414,18 +413,7 @@ async def crawl_and_generate_csp(
                page.on("response", on_response)

            try:
-                resp = await page.goto(
-                    url, wait_until="networkidle", timeout=timeout_ms
-                )
-
-                ct = ""
-                if resp is not None:
-                    ct = (await resp.header_value("content-type") or "").lower()
-
-                is_html = ("text/html" in ct) or ("application/xhtml+xml" in ct)
-                if not is_html and ignore_non_html:
-                    # Still count as visited, but don't hash inline attrs / don't extract links.
-                    continue
+                await page.goto(url, wait_until="networkidle", timeout=timeout_ms)

                # Give the page a moment to run hydration / delayed fetches.
                if settle_ms > 0:
@ -511,7 +499,7 @@ async def crawl_and_generate_csp(

 def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
    ap = argparse.ArgumentParser(
-        prog="cspresso",
+        prog="csp-crawl",
        description="Crawl up to N pages (same-origin) with Playwright and generate a draft CSP.",
    )
    ap.add_argument("url", help="Start URL (e.g. https://example.com)")
@ -577,12 +565,6 @@ def _parse_args(argv: list[str] | None = None) -> argparse.Namespace:
        default=False,
        help="Analyze JS/CSS for sourceMappingURL and add map origins to connect-src",
    )
-    ap.add_argument(
-        "--ignore-non-html",
-        action="store_true",
-        default=False,
-        help="Ignore non-HTML pages that get crawled (which might trigger Chromium's word-wrap hash: https://stackoverflow.com/a/69838710)",
-    )
    ap.add_argument(
        "--json", action="store_true", help="Output JSON instead of a header line"
    )
@ -607,7 +589,6 @@ def main(argv: list[str] | None = None) -> None:
            allow_unsafe_eval=args.unsafe_eval,
            upgrade_insecure_requests=args.upgrade_insecure_requests,
            include_sourcemaps=args.include_sourcemaps,
-            ignore_non_html=args.ignore_non_html,
        )
    )