Compare commits
2 commits
022990a337
...
8b8a95a796
| Author | SHA1 | Date | |
|---|---|---|---|
| 8b8a95a796 | |||
| 24f7dbea02 |
5 changed files with 658 additions and 6 deletions
|
|
@ -25,10 +25,14 @@ stdout. However, it is possible to output the results to new files.
|
|||
|
||||
## What sort of config files can it handle?
|
||||
|
||||
TOML, YAML, INI and JSON style config files should be okay. There are always
|
||||
TOML, YAML, INI, JSON and XML-style config files should be okay. There are always
|
||||
going to be some edge cases in very complex files that are difficult to work
|
||||
with, though, so you may still find that you need to tweak the results.
|
||||
|
||||
The tool does not do anything intelligent like detect common sections that
|
||||
could practically be turned into 'for' loops in Jinja. You'd have to do those
|
||||
sorts of optimisations yourself.
|
||||
|
||||
The goal here is really to *speed up* converting files into Ansible/Jinja2,
|
||||
but not necessarily to make it perfect.
|
||||
|
||||
|
|
@ -68,7 +72,7 @@ jinjaturtle php.ini \
|
|||
## Full usage info
|
||||
|
||||
```
|
||||
usage: jinjaturtle [-h] -r ROLE_NAME [-f {json,ini,toml,yaml}] [-d DEFAULTS_OUTPUT] [-t TEMPLATE_OUTPUT] config
|
||||
usage: jinjaturtle [-h] -r ROLE_NAME [-f {json,ini,toml,yaml,xml}] [-d DEFAULTS_OUTPUT] [-t TEMPLATE_OUTPUT] config
|
||||
|
||||
Convert a config file into an Ansible defaults file and Jinja2 template.
|
||||
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ def _build_arg_parser() -> argparse.ArgumentParser:
|
|||
ap.add_argument(
|
||||
"-f",
|
||||
"--format",
|
||||
choices=["ini", "json", "toml", "yaml"],
|
||||
choices=["ini", "json", "toml", "yaml", "xml"],
|
||||
help="Force config format instead of auto-detecting from filename.",
|
||||
)
|
||||
ap.add_argument(
|
||||
|
|
|
|||
|
|
@ -2,9 +2,12 @@ from __future__ import annotations
|
|||
|
||||
import configparser
|
||||
import json
|
||||
import xml.etree.ElementTree as ET
|
||||
import yaml
|
||||
|
||||
from collections import Counter, defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
import yaml
|
||||
|
||||
try:
|
||||
import tomllib # Python 3.11+
|
||||
|
|
@ -46,7 +49,7 @@ _TurtleDumper.add_representer(None, _fallback_str_representer)
|
|||
|
||||
def detect_format(path: Path, explicit: str | None = None) -> str:
|
||||
"""
|
||||
Determine config format (toml, yaml, json, ini-ish) from argument or filename.
|
||||
Determine config format (toml, yaml, json, ini-ish, xml) from argument or filename.
|
||||
"""
|
||||
if explicit:
|
||||
return explicit
|
||||
|
|
@ -60,6 +63,8 @@ def detect_format(path: Path, explicit: str | None = None) -> str:
|
|||
return "json"
|
||||
if suffix in {".ini", ".cfg", ".conf"} or name.endswith(".ini"):
|
||||
return "ini"
|
||||
if suffix == ".xml":
|
||||
return "xml"
|
||||
# Fallback: treat as INI-ish
|
||||
return "ini"
|
||||
|
||||
|
|
@ -96,9 +101,74 @@ def parse_config(path: Path, fmt: str | None = None) -> tuple[str, Any]:
|
|||
parser.read_file(f)
|
||||
return fmt, parser
|
||||
|
||||
if fmt == "xml":
|
||||
text = path.read_text(encoding="utf-8")
|
||||
parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=False))
|
||||
root = ET.fromstring(text, parser=parser)
|
||||
return fmt, root
|
||||
|
||||
raise ValueError(f"Unsupported config format: {fmt}")
|
||||
|
||||
|
||||
def _flatten_xml(root: ET.Element) -> list[tuple[tuple[str, ...], Any]]:
|
||||
"""
|
||||
Flatten an XML tree into (path, value) pairs.
|
||||
|
||||
Path conventions:
|
||||
- Root element's children are treated as top-level (root tag is *not* included).
|
||||
- Element text:
|
||||
<foo>bar</foo> -> path ("foo",) value "bar"
|
||||
<foo attr="x">bar</foo> -> path ("foo", "value") value "bar"
|
||||
<foo><bar>baz</bar></foo> -> ("foo", "bar") / etc.
|
||||
- Attributes:
|
||||
<server host="localhost">
|
||||
-> path ("server", "@host") value "localhost"
|
||||
- Repeated sibling elements:
|
||||
<endpoint>/a</endpoint>
|
||||
<endpoint>/b</endpoint>
|
||||
-> ("endpoint", "0") "/a"
|
||||
("endpoint", "1") "/b"
|
||||
"""
|
||||
items: list[tuple[tuple[str, ...], Any]] = []
|
||||
|
||||
def walk(elem: ET.Element, path: tuple[str, ...]) -> None:
|
||||
# Attributes
|
||||
for attr_name, attr_val in elem.attrib.items():
|
||||
attr_path = path + (f"@{attr_name}",)
|
||||
items.append((attr_path, attr_val))
|
||||
|
||||
# Children
|
||||
children = [c for c in list(elem) if isinstance(c.tag, str)]
|
||||
|
||||
# Text content
|
||||
text = (elem.text or "").strip()
|
||||
if text:
|
||||
if not elem.attrib and not children:
|
||||
# Simple <foo>bar</foo>
|
||||
items.append((path, text))
|
||||
else:
|
||||
# Text alongside attrs/children
|
||||
items.append((path + ("value",), text))
|
||||
|
||||
# Repeated siblings get an index; singletons just use the tag
|
||||
counts = Counter(child.tag for child in children)
|
||||
index_counters: dict[str, int] = defaultdict(int)
|
||||
|
||||
for child in children:
|
||||
tag = child.tag
|
||||
if counts[tag] > 1:
|
||||
idx = index_counters[tag]
|
||||
index_counters[tag] += 1
|
||||
child_path = path + (tag, str(idx))
|
||||
else:
|
||||
child_path = path + (tag,)
|
||||
walk(child, child_path)
|
||||
|
||||
# Treat root as a container: its children are top-level
|
||||
walk(root, ())
|
||||
return items
|
||||
|
||||
|
||||
def flatten_config(fmt: str, parsed: Any) -> list[tuple[tuple[str, ...], Any]]:
|
||||
"""
|
||||
Flatten parsed config into a list of (path_tuple, value).
|
||||
|
|
@ -141,6 +211,12 @@ def flatten_config(fmt: str, parsed: Any) -> list[tuple[tuple[str, ...], Any]]:
|
|||
else:
|
||||
processed = raw
|
||||
items.append(((section, key), processed))
|
||||
|
||||
elif fmt == "xml":
|
||||
if not isinstance(parsed, ET.Element):
|
||||
raise TypeError("XML parser result must be an Element")
|
||||
items = _flatten_xml(parsed)
|
||||
|
||||
else: # pragma: no cover
|
||||
raise ValueError(f"Unsupported format: {fmt}")
|
||||
|
||||
|
|
@ -677,6 +753,135 @@ def _generate_json_template(role_prefix: str, data: Any) -> str:
|
|||
return json.dumps(templated, indent=2, ensure_ascii=False) + "\n"
|
||||
|
||||
|
||||
def _split_xml_prolog(text: str) -> tuple[str, str]:
|
||||
"""
|
||||
Split an XML document into (prolog, body), where prolog includes:
|
||||
- XML declaration (<?xml ...?>)
|
||||
- top-level comments
|
||||
- DOCTYPE
|
||||
The body starts at the root element.
|
||||
"""
|
||||
i = 0
|
||||
n = len(text)
|
||||
prolog_parts: list[str] = []
|
||||
|
||||
while i < n:
|
||||
# Preserve leading whitespace
|
||||
while i < n and text[i].isspace():
|
||||
prolog_parts.append(text[i])
|
||||
i += 1
|
||||
if i >= n:
|
||||
break
|
||||
|
||||
if text.startswith("<?", i):
|
||||
end = text.find("?>", i + 2)
|
||||
if end == -1:
|
||||
break
|
||||
prolog_parts.append(text[i : end + 2])
|
||||
i = end + 2
|
||||
continue
|
||||
|
||||
if text.startswith("<!--", i):
|
||||
end = text.find("-->", i + 4)
|
||||
if end == -1:
|
||||
break
|
||||
prolog_parts.append(text[i : end + 3])
|
||||
i = end + 3
|
||||
continue
|
||||
|
||||
if text.startswith("<!DOCTYPE", i):
|
||||
end = text.find(">", i + 9)
|
||||
if end == -1:
|
||||
break
|
||||
prolog_parts.append(text[i : end + 1])
|
||||
i = end + 1
|
||||
continue
|
||||
|
||||
if text[i] == "<":
|
||||
# Assume root element starts here
|
||||
break
|
||||
|
||||
# Unexpected content: stop treating as prolog
|
||||
break
|
||||
|
||||
return "".join(prolog_parts), text[i:]
|
||||
|
||||
|
||||
def _apply_jinja_to_xml_tree(role_prefix: str, root: ET.Element) -> None:
|
||||
"""
|
||||
Mutate the XML tree in-place, replacing scalar values with Jinja
|
||||
expressions based on the same paths used in _flatten_xml.
|
||||
"""
|
||||
|
||||
def walk(elem: ET.Element, path: tuple[str, ...]) -> None:
|
||||
# Attributes
|
||||
for attr_name in list(elem.attrib.keys()):
|
||||
attr_path = path + (f"@{attr_name}",)
|
||||
var_name = make_var_name(role_prefix, attr_path)
|
||||
elem.set(attr_name, f"{{{{ {var_name} }}}}")
|
||||
|
||||
# Children
|
||||
children = [c for c in list(elem) if isinstance(c.tag, str)]
|
||||
|
||||
# Text content
|
||||
text = (elem.text or "").strip()
|
||||
if text:
|
||||
if not elem.attrib and not children:
|
||||
text_path = path
|
||||
else:
|
||||
text_path = path + ("value",)
|
||||
var_name = make_var_name(role_prefix, text_path)
|
||||
elem.text = f"{{{{ {var_name} }}}}"
|
||||
|
||||
# Repeated children get indexes just like in _flatten_xml
|
||||
counts = Counter(child.tag for child in children)
|
||||
index_counters: dict[str, int] = defaultdict(int)
|
||||
|
||||
for child in children:
|
||||
tag = child.tag
|
||||
if counts[tag] > 1:
|
||||
idx = index_counters[tag]
|
||||
index_counters[tag] += 1
|
||||
child_path = path + (tag, str(idx))
|
||||
else:
|
||||
child_path = path + (tag,)
|
||||
walk(child, child_path)
|
||||
|
||||
walk(root, ())
|
||||
|
||||
|
||||
def _generate_xml_template_from_text(role_prefix: str, text: str) -> str:
|
||||
"""
|
||||
Generate a Jinja2 template for an XML file, preserving comments and prolog.
|
||||
|
||||
- Attributes become Jinja placeholders:
|
||||
<server host="localhost" />
|
||||
-> <server host="{{ prefix_server_host }}" />
|
||||
|
||||
- Text nodes become placeholders:
|
||||
<port>8080</port>
|
||||
-> <port>{{ prefix_port }}</port>
|
||||
|
||||
but if the element also has attributes/children, the value path
|
||||
gets a trailing "value" component, matching flattening.
|
||||
"""
|
||||
prolog, body = _split_xml_prolog(text)
|
||||
|
||||
# Parse with comments included so <!-- --> are preserved
|
||||
parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))
|
||||
root = ET.fromstring(body, parser=parser)
|
||||
|
||||
_apply_jinja_to_xml_tree(role_prefix, root)
|
||||
|
||||
# Pretty indentation if available (Python 3.9+)
|
||||
indent = getattr(ET, "indent", None)
|
||||
if indent is not None:
|
||||
indent(root, space=" ") # type: ignore[arg-type]
|
||||
|
||||
xml_body = ET.tostring(root, encoding="unicode")
|
||||
return prolog + xml_body
|
||||
|
||||
|
||||
def generate_template(
|
||||
fmt: str,
|
||||
parsed: Any,
|
||||
|
|
@ -698,11 +903,13 @@ def generate_template(
|
|||
return _generate_ini_template_from_text(role_prefix, original_text)
|
||||
if fmt == "yaml":
|
||||
return _generate_yaml_template_from_text(role_prefix, original_text)
|
||||
if fmt == "xml":
|
||||
return _generate_xml_template_from_text(role_prefix, original_text)
|
||||
# For JSON we ignore original_text and reconstruct from parsed structure below
|
||||
if fmt != "json":
|
||||
raise ValueError(f"Unsupported format: {fmt}")
|
||||
|
||||
# Fallback: previous behaviour (no comments preserved)
|
||||
# Fallback: no comments preserved
|
||||
if fmt == "toml":
|
||||
if not isinstance(parsed, dict):
|
||||
raise TypeError("TOML parser result must be a dict")
|
||||
|
|
@ -721,4 +928,9 @@ def generate_template(
|
|||
if not isinstance(parsed, (dict, list)):
|
||||
raise TypeError("JSON parser result must be a dict or list")
|
||||
return _generate_json_template(role_prefix, parsed)
|
||||
if fmt == "xml":
|
||||
if not isinstance(parsed, ET.Element):
|
||||
raise TypeError("XML parser result must be an Element")
|
||||
xml_str = ET.tostring(parsed, encoding="unicode")
|
||||
return _generate_xml_template_from_text(role_prefix, xml_str)
|
||||
raise ValueError(f"Unsupported format: {fmt}")
|
||||
|
|
|
|||
225
tests/samples/ossec.xml
Normal file
225
tests/samples/ossec.xml
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
<!-- @(#) $Id: ./etc/rules/web_rules.xml, 2013/02/28 dcid Exp $
|
||||
|
||||
-
|
||||
- Official Web access rules for OSSEC.
|
||||
-
|
||||
- Copyright (C) 2009 Trend Micro Inc.
|
||||
- All rights reserved.
|
||||
-
|
||||
- This program is a free software; you can redistribute it
|
||||
- and/or modify it under the terms of the GNU General Public
|
||||
- License (version 2) as published by the FSF - Free Software
|
||||
- Foundation.
|
||||
-
|
||||
- License details: http://www.ossec.net/en/licensing.html
|
||||
-->
|
||||
|
||||
|
||||
<group name="web,accesslog,">
|
||||
<rule id="31100" level="0">
|
||||
<category>web-log</category>
|
||||
<description>Access log messages grouped.</description>
|
||||
</rule>
|
||||
|
||||
<rule id="31108" level="0">
|
||||
<if_sid>31100</if_sid>
|
||||
<id_pcre2>^2|^3</id_pcre2>
|
||||
<compiled_rule>is_simple_http_request</compiled_rule>
|
||||
<description>Ignored URLs (simple queries).</description>
|
||||
</rule>
|
||||
|
||||
<rule id="31101" level="5">
|
||||
<if_sid>31100</if_sid>
|
||||
<id_pcre2>^4</id_pcre2>
|
||||
<description>Web server 400 error code.</description>
|
||||
</rule>
|
||||
|
||||
<rule id="31102" level="0">
|
||||
<if_sid>31101</if_sid>
|
||||
<url_pcre2>\.jpg$|\.gif$|favicon\.ico$|\.png$|robots\.txt$|\.css$|\.js$|\.jpeg$</url_pcre2>
|
||||
<compiled_rule>is_simple_http_request</compiled_rule>
|
||||
<description>Ignored extensions on 400 error codes.</description>
|
||||
</rule>
|
||||
|
||||
<rule id="31103" level="6">
|
||||
<if_sid>31100,31108</if_sid>
|
||||
<url_pcre2>=select%20|select\+|insert%20|%20from%20|%20where%20|union%20|</url_pcre2>
|
||||
<url_pcre2>union\+|where\+|null,null|xp_cmdshell</url_pcre2>
|
||||
<description>SQL injection attempt.</description>
|
||||
<group>attack,sql_injection,</group>
|
||||
</rule>
|
||||
|
||||
<rule id="31104" level="6">
|
||||
<if_sid>31100</if_sid>
|
||||
|
||||
<!-- Attempt to do directory transversal, simple sql injections,
|
||||
- or access to the etc or bin directory (unix). -->
|
||||
<url_pcre2>%027|%00|%01|%7f|%2E%2E|%0A|%0D|\.\./\.\.|\.\.\\\.\.|echo;|</url_pcre2>
|
||||
<url_pcre2>cmd\.exe|root\.exe|_mem_bin|msadc|/winnt/|/boot\.ini|</url_pcre2>
|
||||
<url_pcre2>/x90/|default\.ida|/sumthin|nsiislog\.dll|chmod%|wget%|cd%20|</url_pcre2>
|
||||
<url_pcre2>exec%20|\.\./\.\.//|%5C\.\./%5C|\./\./\./\./|2e%2e%5c%2e|\\x5C\\x5C</url_pcre2>
|
||||
<description>Common web attack.</description>
|
||||
<group>attack,</group>
|
||||
</rule>
|
||||
|
||||
<rule id="31105" level="6">
|
||||
<if_sid>31100</if_sid>
|
||||
<url_pcre2>%3Cscript|%3C%2Fscript|script>|script%3E|SRC=javascript|IMG%20|</url_pcre2>
|
||||
<url_pcre2>%20ONLOAD=|INPUT%20|iframe%20</url_pcre2>
|
||||
<description>XSS (Cross Site Scripting) attempt.</description>
|
||||
<group>attack,</group>
|
||||
</rule>
|
||||
|
||||
<rule id="31106" level="6">
|
||||
<if_sid>31103, 31104, 31105</if_sid>
|
||||
<id_pcre2>^200</id_pcre2>
|
||||
<description>A web attack returned code 200 (success).</description>
|
||||
<group>attack,</group>
|
||||
</rule>
|
||||
|
||||
<rule id="31110" level="6">
|
||||
<if_sid>31100</if_sid>
|
||||
<url_pcre2>\?-d|\?-s|\?-a|\?-b|\?-w</url_pcre2>
|
||||
<description>PHP CGI-bin vulnerability attempt.</description>
|
||||
<group>attack,</group>
|
||||
</rule>
|
||||
|
||||
<rule id="31109" level="6">
|
||||
<if_sid>31100</if_sid>
|
||||
<url_pcre2>\+as\+varchar</url_pcre2>
|
||||
<pcre2>%2Bchar\(\d+\)%2Bchar\(\d+\)%2Bchar\(\d+\)%2Bchar\(\d+\)%2Bchar\(\d+\)%2Bchar\(\d+\)</pcre2>
|
||||
<description>MSSQL Injection attempt (/ur.php, urchin.js)</description>
|
||||
<group>attack,</group>
|
||||
</rule>
|
||||
|
||||
|
||||
<!-- If your site have a search engine, you may need to ignore
|
||||
- it in here.
|
||||
-->
|
||||
<rule id="31107" level="0">
|
||||
<if_sid>31103, 31104, 31105</if_sid>
|
||||
<url_pcre2>^/search\.php\?search=|^/index\.php\?searchword=</url_pcre2>
|
||||
<description>Ignored URLs for the web attacks</description>
|
||||
</rule>
|
||||
|
||||
<rule id="31115" level="13" maxsize="7900">
|
||||
<if_sid>31100</if_sid>
|
||||
<description>URL too long. Higher than allowed on most </description>
|
||||
<description>browsers. Possible attack.</description>
|
||||
<group>invalid_access,</group>
|
||||
</rule>
|
||||
|
||||
|
||||
<!-- 500 error codes, server error
|
||||
- http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
|
||||
-->
|
||||
<rule id="31120" level="5">
|
||||
<if_sid>31100</if_sid>
|
||||
<id_pcre2>^50</id_pcre2>
|
||||
<description>Web server 500 error code (server error).</description>
|
||||
</rule>
|
||||
|
||||
<rule id="31121" level="4">
|
||||
<if_sid>31120</if_sid>
|
||||
<id_pcre2>^501</id_pcre2>
|
||||
<description>Web server 501 error code (Not Implemented).</description>
|
||||
</rule>
|
||||
|
||||
<rule id="31122" level="5">
|
||||
<if_sid>31120</if_sid>
|
||||
<id_pcre2>^500</id_pcre2>
|
||||
<options>alert_by_email</options>
|
||||
<description>Web server 500 error code (Internal Error).</description>
|
||||
<group>system_error,</group>
|
||||
</rule>
|
||||
|
||||
<rule id="31123" level="4">
|
||||
<if_sid>31120</if_sid>
|
||||
<id_pcre2>^503</id_pcre2>
|
||||
<options>alert_by_email</options>
|
||||
<description>Web server 503 error code (Service unavailable).</description>
|
||||
</rule>
|
||||
|
||||
|
||||
<!-- Rules to ignore crawlers -->
|
||||
<rule id="31140" level="0">
|
||||
<if_sid>31101</if_sid>
|
||||
<compiled_rule>is_valid_crawler</compiled_rule>
|
||||
<description>Ignoring google/msn/yahoo bots.</description>
|
||||
</rule>
|
||||
|
||||
<!-- Ignoring nginx 499's -->
|
||||
<rule id="31141" level="0">
|
||||
<if_sid>31101</if_sid>
|
||||
<id_pcre2>^499</id_pcre2>
|
||||
<description>Ignored 499's on nginx.</description>
|
||||
</rule>
|
||||
|
||||
|
||||
<rule id="31151" level="10" frequency="12" timeframe="90">
|
||||
<if_matched_sid>31101</if_matched_sid>
|
||||
<same_source_ip />
|
||||
<description>Multiple web server 400 error codes </description>
|
||||
<description>from same source ip.</description>
|
||||
<group>web_scan,recon,</group>
|
||||
</rule>
|
||||
|
||||
<rule id="31152" level="10" frequency="6" timeframe="120">
|
||||
<if_matched_sid>31103</if_matched_sid>
|
||||
<same_source_ip />
|
||||
<description>Multiple SQL injection attempts from same </description>
|
||||
<description>source ip.</description>
|
||||
<group>attack,sql_injection,</group>
|
||||
</rule>
|
||||
|
||||
<rule id="31153" level="10" frequency="8" timeframe="120">
|
||||
<if_matched_sid>31104</if_matched_sid>
|
||||
<same_source_ip />
|
||||
<description>Multiple common web attacks from same source ip.</description>
|
||||
<group>attack,</group>
|
||||
</rule>
|
||||
|
||||
<rule id="31154" level="10" frequency="8" timeframe="120">
|
||||
<if_matched_sid>31105</if_matched_sid>
|
||||
<same_source_ip />
|
||||
<description>Multiple XSS (Cross Site Scripting) attempts </description>
|
||||
<description>from same source ip.</description>
|
||||
<group>attack,</group>
|
||||
</rule>
|
||||
|
||||
<rule id="31161" level="10" frequency="12" timeframe="120">
|
||||
<if_matched_sid>31121</if_matched_sid>
|
||||
<same_source_ip />
|
||||
<description>Multiple web server 501 error code (Not Implemented).</description>
|
||||
<group>web_scan,recon,</group>
|
||||
</rule>
|
||||
|
||||
<rule id="31162" level="10" frequency="12" timeframe="120">
|
||||
<if_matched_sid>31122</if_matched_sid>
|
||||
<same_source_ip />
|
||||
<description>Multiple web server 500 error code (Internal Error).</description>
|
||||
<group>system_error,</group>
|
||||
</rule>
|
||||
|
||||
<rule id="31163" level="10" frequency="12" timeframe="120">
|
||||
<if_matched_sid>31123</if_matched_sid>
|
||||
<same_source_ip />
|
||||
<description>Multiple web server 503 error code (Service unavailable).</description>
|
||||
<group>web_scan,recon,</group>
|
||||
</rule>
|
||||
|
||||
<rule id="31164" level="6">
|
||||
<if_sid>31100</if_sid>
|
||||
<url_pcre2>=%27|select%2B|insert%2B|%2Bfrom%2B|%2Bwhere%2B|%2Bunion%2B</url_pcre2>
|
||||
<description>SQL injection attempt.</description>
|
||||
<group>attack,sqlinjection,</group>
|
||||
</rule>
|
||||
|
||||
<rule id="31165" level="6">
|
||||
<if_sid>31100</if_sid>
|
||||
<url_pcre2>%EF%BC%87|%EF%BC%87|%EF%BC%87|%2531|%u0053%u0045</url_pcre2>
|
||||
<description>SQL injection attempt.</description>
|
||||
<group>attack,sqlinjection,</group>
|
||||
</rule>
|
||||
|
||||
</group> <!-- Web access log -->
|
||||
|
|
@ -5,6 +5,7 @@ import configparser
|
|||
import pytest
|
||||
import textwrap
|
||||
import yaml
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
import jinjaturtle.core as core
|
||||
from jinjaturtle.core import (
|
||||
|
|
@ -147,12 +148,15 @@ def test_formats_match_expected_extensions():
|
|||
"""
|
||||
toml_path = SAMPLES_DIR / "tom.toml"
|
||||
ini_path = SAMPLES_DIR / "php.ini"
|
||||
xml_path = SAMPLES_DIR / "ossec.xml"
|
||||
|
||||
fmt_toml, _ = parse_config(toml_path)
|
||||
fmt_ini, _ = parse_config(ini_path)
|
||||
fmt_xml, _ = parse_config(xml_path)
|
||||
|
||||
assert fmt_toml == "toml"
|
||||
assert fmt_ini == "ini"
|
||||
assert fmt_xml == "xml"
|
||||
|
||||
|
||||
def test_parse_config_toml_missing_tomllib(monkeypatch):
|
||||
|
|
@ -442,3 +446,210 @@ def test_fallback_str_representer_for_unknown_type():
|
|||
|
||||
# It should serialize without error, and the string form should appear.
|
||||
assert "weird-value" in dumped
|
||||
|
||||
|
||||
def test_xml_roundtrip_ossec_web_rules():
|
||||
xml_path = SAMPLES_DIR / "ossec.xml"
|
||||
assert xml_path.is_file(), f"Missing sample XML file: {xml_path}"
|
||||
|
||||
fmt, parsed = parse_config(xml_path)
|
||||
assert fmt == "xml"
|
||||
|
||||
flat_items = flatten_config(fmt, parsed)
|
||||
assert flat_items, "Expected at least one flattened item from XML sample"
|
||||
|
||||
defaults_yaml = generate_defaults_yaml("ossec", flat_items)
|
||||
defaults = yaml.safe_load(defaults_yaml)
|
||||
|
||||
# defaults should be a non-empty dict
|
||||
assert isinstance(defaults, dict)
|
||||
assert defaults, "Expected non-empty defaults for XML sample"
|
||||
|
||||
# all keys should be lowercase, start with prefix, and have no spaces
|
||||
for key in defaults:
|
||||
assert key.startswith("ossec_")
|
||||
assert key == key.lower()
|
||||
assert " " not in key
|
||||
|
||||
# Root <group name="web,accesslog,"> attribute should flatten to ossec_name
|
||||
assert defaults["ossec_name"] == "web,accesslog,"
|
||||
|
||||
# There should be at least one default for rule id="31100"
|
||||
id_keys = [k for k, v in defaults.items() if v == "31100"]
|
||||
assert id_keys, "Expected to find a default for rule id 31100"
|
||||
|
||||
# At least one of them should be the rule *id* attribute
|
||||
assert any(
|
||||
key.startswith("ossec_rule_") and key.endswith("_id") for key in id_keys
|
||||
), f"Expected at least one *_id var for value 31100, got: {id_keys}"
|
||||
|
||||
# Template generation (preserving comments)
|
||||
original_text = xml_path.read_text(encoding="utf-8")
|
||||
template = generate_template(fmt, parsed, "ossec", original_text=original_text)
|
||||
assert isinstance(template, str)
|
||||
assert template.strip(), "Template for XML sample should not be empty"
|
||||
|
||||
# Top-of-file and mid-file comments should be preserved
|
||||
assert "Official Web access rules for OSSEC." in template
|
||||
assert "Rules to ignore crawlers" in template
|
||||
|
||||
# Each default variable name should appear in the template as a Jinja placeholder
|
||||
for var_name in defaults:
|
||||
assert (
|
||||
var_name in template
|
||||
), f"Variable {var_name} not referenced in XML template"
|
||||
|
||||
|
||||
def test_generate_xml_template_from_text_edge_cases():
|
||||
"""
|
||||
Exercise XML text edge cases:
|
||||
- XML declaration and DOCTYPE in prolog
|
||||
- top-level and inner comments
|
||||
- repeated child elements (indexing)
|
||||
- attributes and text content
|
||||
"""
|
||||
text = textwrap.dedent(
|
||||
"""\
|
||||
<?xml version="1.0"?>
|
||||
<!-- top comment -->
|
||||
<!DOCTYPE something>
|
||||
<root attr="1">
|
||||
<!-- inner comment -->
|
||||
<child attr="2">text</child>
|
||||
<child>other</child>
|
||||
</root>
|
||||
"""
|
||||
)
|
||||
|
||||
tmpl = core._generate_xml_template_from_text("role", text)
|
||||
|
||||
# Prolog and comments preserved
|
||||
assert "<?xml version" in tmpl
|
||||
assert "top comment" in tmpl
|
||||
assert "inner comment" in tmpl
|
||||
|
||||
# Root attribute becomes a variable (path ("@attr",) -> role_attr)
|
||||
assert "role_attr" in tmpl
|
||||
|
||||
# Repeated <child> elements should be indexed in both attr and text
|
||||
assert "role_child_0_attr" in tmpl
|
||||
assert "role_child_0" in tmpl
|
||||
assert "role_child_1" in tmpl
|
||||
|
||||
|
||||
def test_generate_template_xml_type_error():
|
||||
"""
|
||||
Wrong type for XML in generate_template should raise TypeError.
|
||||
"""
|
||||
with pytest.raises(TypeError):
|
||||
generate_template("xml", parsed="not an element", role_prefix="role")
|
||||
|
||||
|
||||
def test_flatten_config_xml_type_error():
|
||||
"""
|
||||
Wrong type for XML in flatten_config should raise TypeError.
|
||||
"""
|
||||
with pytest.raises(TypeError):
|
||||
flatten_config("xml", parsed="not-an-element")
|
||||
|
||||
|
||||
def test_generate_template_xml_structural_fallback():
|
||||
"""
|
||||
When original_text is not provided for XML, generate_template should use
|
||||
the structural fallback path (ET.tostring + _generate_xml_template_from_text).
|
||||
"""
|
||||
xml_text = textwrap.dedent(
|
||||
"""\
|
||||
<root attr="1">
|
||||
<child>2</child>
|
||||
<node attr="x">text</node>
|
||||
</root>
|
||||
"""
|
||||
)
|
||||
parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=False))
|
||||
root = ET.fromstring(xml_text, parser=parser)
|
||||
|
||||
tmpl = generate_template("xml", parsed=root, role_prefix="role")
|
||||
|
||||
# Root attribute path ("@attr",) -> role_attr
|
||||
assert "role_attr" in tmpl
|
||||
|
||||
# Simple child element text ("child",) -> role_child
|
||||
assert "role_child" in tmpl
|
||||
|
||||
# Element with both attr and text:
|
||||
# - attr -> ("node", "@attr") -> role_node_attr
|
||||
# - text -> ("node", "value") -> role_node_value
|
||||
assert "role_node_attr" in tmpl
|
||||
assert "role_node_value" in tmpl
|
||||
|
||||
|
||||
def test_split_xml_prolog_only_whitespace():
|
||||
"""
|
||||
Whitespace-only input: prolog is the whitespace, body is empty.
|
||||
Exercises the 'if i >= n: break' path.
|
||||
"""
|
||||
text = " \n\t"
|
||||
prolog, body = core._split_xml_prolog(text)
|
||||
assert prolog == text
|
||||
assert body == ""
|
||||
|
||||
|
||||
def test_split_xml_prolog_unterminated_declaration():
|
||||
"""
|
||||
Unterminated XML declaration should hit the 'end == -1' branch and
|
||||
treat the whole string as body.
|
||||
"""
|
||||
text = "<?xml version='1.0'"
|
||||
prolog, body = core._split_xml_prolog(text)
|
||||
assert prolog == ""
|
||||
assert body == text
|
||||
|
||||
|
||||
def test_split_xml_prolog_unterminated_comment():
|
||||
"""
|
||||
Unterminated comment should likewise hit its 'end == -1' branch.
|
||||
"""
|
||||
text = "<!-- no end"
|
||||
prolog, body = core._split_xml_prolog(text)
|
||||
assert prolog == ""
|
||||
assert body == text
|
||||
|
||||
|
||||
def test_split_xml_prolog_unterminated_doctype():
|
||||
"""
|
||||
Unterminated DOCTYPE should hit the DOCTYPE 'end == -1' branch.
|
||||
"""
|
||||
text = "<!DOCTYPE foo"
|
||||
prolog, body = core._split_xml_prolog(text)
|
||||
assert prolog == ""
|
||||
assert body == text
|
||||
|
||||
|
||||
def test_split_xml_prolog_unexpected_content():
|
||||
"""
|
||||
Non-XML content at the start should trigger the 'unexpected content'
|
||||
break and be returned entirely as body.
|
||||
"""
|
||||
text = "garbage<root/>"
|
||||
prolog, body = core._split_xml_prolog(text)
|
||||
assert prolog == ""
|
||||
assert body == text
|
||||
|
||||
|
||||
def test_flatten_xml_text_with_attributes_uses_value_suffix():
|
||||
"""
|
||||
When an element has both attributes and text, _flatten_xml should store
|
||||
the text at path + ('value',), not just path.
|
||||
"""
|
||||
xml_text = "<root><node attr='x'>text</node></root>"
|
||||
parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=False))
|
||||
root = ET.fromstring(xml_text, parser=parser)
|
||||
|
||||
items = flatten_config("xml", root)
|
||||
|
||||
# Attribute path: ("node", "@attr") -> "x"
|
||||
assert (("node", "@attr"), "x") in items
|
||||
|
||||
# Text-with-attrs path: ("node", "value") -> "text"
|
||||
assert (("node", "value"), "text") in items
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue