Add support for XML

This commit is contained in:
Miguel Jacq 2025-11-27 14:26:48 +11:00
parent 022990a337
commit 24f7dbea02
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
5 changed files with 662 additions and 6 deletions

225
tests/samples/ossec.xml Normal file
View file

@ -0,0 +1,225 @@
<!-- @(#) $Id: ./etc/rules/web_rules.xml, 2013/02/28 dcid Exp $
-
- Official Web access rules for OSSEC.
-
- Copyright (C) 2009 Trend Micro Inc.
- All rights reserved.
-
- This program is a free software; you can redistribute it
- and/or modify it under the terms of the GNU General Public
- License (version 2) as published by the FSF - Free Software
- Foundation.
-
- License details: http://www.ossec.net/en/licensing.html
-->
<group name="web,accesslog,">
<rule id="31100" level="0">
<category>web-log</category>
<description>Access log messages grouped.</description>
</rule>
<rule id="31108" level="0">
<if_sid>31100</if_sid>
<id_pcre2>^2|^3</id_pcre2>
<compiled_rule>is_simple_http_request</compiled_rule>
<description>Ignored URLs (simple queries).</description>
</rule>
<rule id="31101" level="5">
<if_sid>31100</if_sid>
<id_pcre2>^4</id_pcre2>
<description>Web server 400 error code.</description>
</rule>
<rule id="31102" level="0">
<if_sid>31101</if_sid>
<url_pcre2>\.jpg$|\.gif$|favicon\.ico$|\.png$|robots\.txt$|\.css$|\.js$|\.jpeg$</url_pcre2>
<compiled_rule>is_simple_http_request</compiled_rule>
<description>Ignored extensions on 400 error codes.</description>
</rule>
<rule id="31103" level="6">
<if_sid>31100,31108</if_sid>
<url_pcre2>=select%20|select\+|insert%20|%20from%20|%20where%20|union%20|</url_pcre2>
<url_pcre2>union\+|where\+|null,null|xp_cmdshell</url_pcre2>
<description>SQL injection attempt.</description>
<group>attack,sql_injection,</group>
</rule>
<rule id="31104" level="6">
<if_sid>31100</if_sid>
<!-- Attempt to do directory transversal, simple sql injections,
- or access to the etc or bin directory (unix). -->
<url_pcre2>%027|%00|%01|%7f|%2E%2E|%0A|%0D|\.\./\.\.|\.\.\\\.\.|echo;|</url_pcre2>
<url_pcre2>cmd\.exe|root\.exe|_mem_bin|msadc|/winnt/|/boot\.ini|</url_pcre2>
<url_pcre2>/x90/|default\.ida|/sumthin|nsiislog\.dll|chmod%|wget%|cd%20|</url_pcre2>
<url_pcre2>exec%20|\.\./\.\.//|%5C\.\./%5C|\./\./\./\./|2e%2e%5c%2e|\\x5C\\x5C</url_pcre2>
<description>Common web attack.</description>
<group>attack,</group>
</rule>
<rule id="31105" level="6">
<if_sid>31100</if_sid>
<url_pcre2>%3Cscript|%3C%2Fscript|script>|script%3E|SRC=javascript|IMG%20|</url_pcre2>
<url_pcre2>%20ONLOAD=|INPUT%20|iframe%20</url_pcre2>
<description>XSS (Cross Site Scripting) attempt.</description>
<group>attack,</group>
</rule>
<rule id="31106" level="6">
<if_sid>31103, 31104, 31105</if_sid>
<id_pcre2>^200</id_pcre2>
<description>A web attack returned code 200 (success).</description>
<group>attack,</group>
</rule>
<rule id="31110" level="6">
<if_sid>31100</if_sid>
<url_pcre2>\?-d|\?-s|\?-a|\?-b|\?-w</url_pcre2>
<description>PHP CGI-bin vulnerability attempt.</description>
<group>attack,</group>
</rule>
<rule id="31109" level="6">
<if_sid>31100</if_sid>
<url_pcre2>\+as\+varchar</url_pcre2>
<pcre2>%2Bchar\(\d+\)%2Bchar\(\d+\)%2Bchar\(\d+\)%2Bchar\(\d+\)%2Bchar\(\d+\)%2Bchar\(\d+\)</pcre2>
<description>MSSQL Injection attempt (/ur.php, urchin.js)</description>
<group>attack,</group>
</rule>
<!-- If your site have a search engine, you may need to ignore
- it in here.
-->
<rule id="31107" level="0">
<if_sid>31103, 31104, 31105</if_sid>
<url_pcre2>^/search\.php\?search=|^/index\.php\?searchword=</url_pcre2>
<description>Ignored URLs for the web attacks</description>
</rule>
<rule id="31115" level="13" maxsize="7900">
<if_sid>31100</if_sid>
<description>URL too long. Higher than allowed on most </description>
<description>browsers. Possible attack.</description>
<group>invalid_access,</group>
</rule>
<!-- 500 error codes, server error
- http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
-->
<rule id="31120" level="5">
<if_sid>31100</if_sid>
<id_pcre2>^50</id_pcre2>
<description>Web server 500 error code (server error).</description>
</rule>
<rule id="31121" level="4">
<if_sid>31120</if_sid>
<id_pcre2>^501</id_pcre2>
<description>Web server 501 error code (Not Implemented).</description>
</rule>
<rule id="31122" level="5">
<if_sid>31120</if_sid>
<id_pcre2>^500</id_pcre2>
<options>alert_by_email</options>
<description>Web server 500 error code (Internal Error).</description>
<group>system_error,</group>
</rule>
<rule id="31123" level="4">
<if_sid>31120</if_sid>
<id_pcre2>^503</id_pcre2>
<options>alert_by_email</options>
<description>Web server 503 error code (Service unavailable).</description>
</rule>
<!-- Rules to ignore crawlers -->
<rule id="31140" level="0">
<if_sid>31101</if_sid>
<compiled_rule>is_valid_crawler</compiled_rule>
<description>Ignoring google/msn/yahoo bots.</description>
</rule>
<!-- Ignoring nginx 499's -->
<rule id="31141" level="0">
<if_sid>31101</if_sid>
<id_pcre2>^499</id_pcre2>
<description>Ignored 499's on nginx.</description>
</rule>
<rule id="31151" level="10" frequency="12" timeframe="90">
<if_matched_sid>31101</if_matched_sid>
<same_source_ip />
<description>Multiple web server 400 error codes </description>
<description>from same source ip.</description>
<group>web_scan,recon,</group>
</rule>
<rule id="31152" level="10" frequency="6" timeframe="120">
<if_matched_sid>31103</if_matched_sid>
<same_source_ip />
<description>Multiple SQL injection attempts from same </description>
<description>source ip.</description>
<group>attack,sql_injection,</group>
</rule>
<rule id="31153" level="10" frequency="8" timeframe="120">
<if_matched_sid>31104</if_matched_sid>
<same_source_ip />
<description>Multiple common web attacks from same source ip.</description>
<group>attack,</group>
</rule>
<rule id="31154" level="10" frequency="8" timeframe="120">
<if_matched_sid>31105</if_matched_sid>
<same_source_ip />
<description>Multiple XSS (Cross Site Scripting) attempts </description>
<description>from same source ip.</description>
<group>attack,</group>
</rule>
<rule id="31161" level="10" frequency="12" timeframe="120">
<if_matched_sid>31121</if_matched_sid>
<same_source_ip />
<description>Multiple web server 501 error code (Not Implemented).</description>
<group>web_scan,recon,</group>
</rule>
<rule id="31162" level="10" frequency="12" timeframe="120">
<if_matched_sid>31122</if_matched_sid>
<same_source_ip />
<description>Multiple web server 500 error code (Internal Error).</description>
<group>system_error,</group>
</rule>
<rule id="31163" level="10" frequency="12" timeframe="120">
<if_matched_sid>31123</if_matched_sid>
<same_source_ip />
<description>Multiple web server 503 error code (Service unavailable).</description>
<group>web_scan,recon,</group>
</rule>
<rule id="31164" level="6">
<if_sid>31100</if_sid>
<url_pcre2>=%27|select%2B|insert%2B|%2Bfrom%2B|%2Bwhere%2B|%2Bunion%2B</url_pcre2>
<description>SQL injection attempt.</description>
<group>attack,sqlinjection,</group>
</rule>
<rule id="31165" level="6">
<if_sid>31100</if_sid>
<url_pcre2>%EF%BC%87|%EF%BC%87|%EF%BC%87|%2531|%u0053%u0045</url_pcre2>
<description>SQL injection attempt.</description>
<group>attack,sqlinjection,</group>
</rule>
</group> <!-- Web access log -->

View file

@ -5,6 +5,7 @@ import configparser
import pytest
import textwrap
import yaml
import xml.etree.ElementTree as ET
import jinjaturtle.core as core
from jinjaturtle.core import (
@ -147,12 +148,15 @@ def test_formats_match_expected_extensions():
"""
toml_path = SAMPLES_DIR / "tom.toml"
ini_path = SAMPLES_DIR / "php.ini"
xml_path = SAMPLES_DIR / "ossec.xml"
fmt_toml, _ = parse_config(toml_path)
fmt_ini, _ = parse_config(ini_path)
fmt_xml, _ = parse_config(xml_path)
assert fmt_toml == "toml"
assert fmt_ini == "ini"
assert fmt_xml == "xml"
def test_parse_config_toml_missing_tomllib(monkeypatch):
@ -442,3 +446,210 @@ def test_fallback_str_representer_for_unknown_type():
# It should serialize without error, and the string form should appear.
assert "weird-value" in dumped
def test_xml_roundtrip_ossec_web_rules():
xml_path = SAMPLES_DIR / "ossec.xml"
assert xml_path.is_file(), f"Missing sample XML file: {xml_path}"
fmt, parsed = parse_config(xml_path)
assert fmt == "xml"
flat_items = flatten_config(fmt, parsed)
assert flat_items, "Expected at least one flattened item from XML sample"
defaults_yaml = generate_defaults_yaml("ossec", flat_items)
defaults = yaml.safe_load(defaults_yaml)
# defaults should be a non-empty dict
assert isinstance(defaults, dict)
assert defaults, "Expected non-empty defaults for XML sample"
# all keys should be lowercase, start with prefix, and have no spaces
for key in defaults:
assert key.startswith("ossec_")
assert key == key.lower()
assert " " not in key
# Root <group name="web,accesslog,"> attribute should flatten to ossec_name
assert defaults["ossec_name"] == "web,accesslog,"
# There should be at least one default for rule id="31100"
id_keys = [k for k, v in defaults.items() if v == "31100"]
assert id_keys, "Expected to find a default for rule id 31100"
# At least one of them should be the rule *id* attribute
assert any(
key.startswith("ossec_rule_") and key.endswith("_id") for key in id_keys
), f"Expected at least one *_id var for value 31100, got: {id_keys}"
# Template generation (preserving comments)
original_text = xml_path.read_text(encoding="utf-8")
template = generate_template(fmt, parsed, "ossec", original_text=original_text)
assert isinstance(template, str)
assert template.strip(), "Template for XML sample should not be empty"
# Top-of-file and mid-file comments should be preserved
assert "Official Web access rules for OSSEC." in template
assert "Rules to ignore crawlers" in template
# Each default variable name should appear in the template as a Jinja placeholder
for var_name in defaults:
assert (
var_name in template
), f"Variable {var_name} not referenced in XML template"
def test_generate_xml_template_from_text_edge_cases():
"""
Exercise XML text edge cases:
- XML declaration and DOCTYPE in prolog
- top-level and inner comments
- repeated child elements (indexing)
- attributes and text content
"""
text = textwrap.dedent(
"""\
<?xml version="1.0"?>
<!-- top comment -->
<!DOCTYPE something>
<root attr="1">
<!-- inner comment -->
<child attr="2">text</child>
<child>other</child>
</root>
"""
)
tmpl = core._generate_xml_template_from_text("role", text)
# Prolog and comments preserved
assert "<?xml version" in tmpl
assert "top comment" in tmpl
assert "inner comment" in tmpl
# Root attribute becomes a variable (path ("@attr",) -> role_attr)
assert "role_attr" in tmpl
# Repeated <child> elements should be indexed in both attr and text
assert "role_child_0_attr" in tmpl
assert "role_child_0" in tmpl
assert "role_child_1" in tmpl
def test_generate_template_xml_type_error():
"""
Wrong type for XML in generate_template should raise TypeError.
"""
with pytest.raises(TypeError):
generate_template("xml", parsed="not an element", role_prefix="role")
def test_flatten_config_xml_type_error():
"""
Wrong type for XML in flatten_config should raise TypeError.
"""
with pytest.raises(TypeError):
flatten_config("xml", parsed="not-an-element")
def test_generate_template_xml_structural_fallback():
"""
When original_text is not provided for XML, generate_template should use
the structural fallback path (ET.tostring + _generate_xml_template_from_text).
"""
xml_text = textwrap.dedent(
"""\
<root attr="1">
<child>2</child>
<node attr="x">text</node>
</root>
"""
)
parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=False))
root = ET.fromstring(xml_text, parser=parser)
tmpl = generate_template("xml", parsed=root, role_prefix="role")
# Root attribute path ("@attr",) -> role_attr
assert "role_attr" in tmpl
# Simple child element text ("child",) -> role_child
assert "role_child" in tmpl
# Element with both attr and text:
# - attr -> ("node", "@attr") -> role_node_attr
# - text -> ("node", "value") -> role_node_value
assert "role_node_attr" in tmpl
assert "role_node_value" in tmpl
def test_split_xml_prolog_only_whitespace():
"""
Whitespace-only input: prolog is the whitespace, body is empty.
Exercises the 'if i >= n: break' path.
"""
text = " \n\t"
prolog, body = core._split_xml_prolog(text)
assert prolog == text
assert body == ""
def test_split_xml_prolog_unterminated_declaration():
"""
Unterminated XML declaration should hit the 'end == -1' branch and
treat the whole string as body.
"""
text = "<?xml version='1.0'"
prolog, body = core._split_xml_prolog(text)
assert prolog == ""
assert body == text
def test_split_xml_prolog_unterminated_comment():
"""
Unterminated comment should likewise hit its 'end == -1' branch.
"""
text = "<!-- no end"
prolog, body = core._split_xml_prolog(text)
assert prolog == ""
assert body == text
def test_split_xml_prolog_unterminated_doctype():
"""
Unterminated DOCTYPE should hit the DOCTYPE 'end == -1' branch.
"""
text = "<!DOCTYPE foo"
prolog, body = core._split_xml_prolog(text)
assert prolog == ""
assert body == text
def test_split_xml_prolog_unexpected_content():
"""
Non-XML content at the start should trigger the 'unexpected content'
break and be returned entirely as body.
"""
text = "garbage<root/>"
prolog, body = core._split_xml_prolog(text)
assert prolog == ""
assert body == text
def test_flatten_xml_text_with_attributes_uses_value_suffix():
"""
When an element has both attributes and text, _flatten_xml should store
the text at path + ('value',), not just path.
"""
xml_text = "<root><node attr='x'>text</node></root>"
parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=False))
root = ET.fromstring(xml_text, parser=parser)
items = flatten_config("xml", root)
# Attribute path: ("node", "@attr") -> "x"
assert (("node", "@attr"), "x") in items
# Text-with-attrs path: ("node", "value") -> "text"
assert (("node", "value"), "text") in items