From 85f21e739d4d7d1ce89746a92773f7afa7af7cb3 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Thu, 27 Nov 2025 20:41:10 +1100 Subject: [PATCH] Refactor handlers to be in their own classes for easier maintainability --- pyproject.toml | 2 +- src/jinjaturtle/core.py | 854 ++------------------------- src/jinjaturtle/handlers/__init__.py | 19 + src/jinjaturtle/handlers/base.py | 79 +++ src/jinjaturtle/handlers/dict.py | 31 + src/jinjaturtle/handlers/ini.py | 153 +++++ src/jinjaturtle/handlers/json.py | 47 ++ src/jinjaturtle/handlers/toml.py | 205 +++++++ src/jinjaturtle/handlers/xml.py | 230 ++++++++ src/jinjaturtle/handlers/yaml.py | 179 ++++++ tests.sh | 8 + tests/test_base_handler.py | 34 ++ tests/test_core.py | 653 -------------------- tests/test_core_utils.py | 202 +++++++ tests/test_ini_handler.py | 93 +++ tests/test_json_handler.py | 56 ++ tests/test_toml_handler.py | 114 ++++ tests/test_xml_handler.py | 230 ++++++++ tests/test_yaml_handler.py | 100 ++++ 19 files changed, 1826 insertions(+), 1463 deletions(-) create mode 100644 src/jinjaturtle/handlers/__init__.py create mode 100644 src/jinjaturtle/handlers/base.py create mode 100644 src/jinjaturtle/handlers/dict.py create mode 100644 src/jinjaturtle/handlers/ini.py create mode 100644 src/jinjaturtle/handlers/json.py create mode 100644 src/jinjaturtle/handlers/toml.py create mode 100644 src/jinjaturtle/handlers/xml.py create mode 100644 src/jinjaturtle/handlers/yaml.py create mode 100644 tests/test_base_handler.py delete mode 100644 tests/test_core.py create mode 100644 tests/test_core_utils.py create mode 100644 tests/test_ini_handler.py create mode 100644 tests/test_json_handler.py create mode 100644 tests/test_toml_handler.py create mode 100644 tests/test_xml_handler.py create mode 100644 tests/test_yaml_handler.py diff --git a/pyproject.toml b/pyproject.toml index 01b192b..a54c5c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "jinjaturtle" -version = "0.1.3" +version = "0.1.4" description = "Convert config files into Ansible defaults and Jinja2 templates." authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/src/jinjaturtle/core.py b/src/jinjaturtle/core.py index 53bc9fb..3fc46c5 100644 --- a/src/jinjaturtle/core.py +++ b/src/jinjaturtle/core.py @@ -1,21 +1,18 @@ from __future__ import annotations -import configparser -import json -import xml.etree.ElementTree as ET # nosec -import yaml - -from collections import Counter, defaultdict from pathlib import Path from typing import Any, Iterable -try: - import tomllib # Python 3.11+ -except ModuleNotFoundError: # pragma: no cover - try: - import tomli as tomllib # type: ignore - except ModuleNotFoundError: # pragma: no cover - tomllib = None # type: ignore +import yaml + +from .handlers import ( + BaseHandler, + IniHandler, + JsonHandler, + TomlHandler, + YamlHandler, + XmlHandler, +) class QuotedString(str): @@ -45,6 +42,27 @@ def _quoted_str_representer(dumper: yaml.SafeDumper, data: QuotedString): _TurtleDumper.add_representer(QuotedString, _quoted_str_representer) # Use our fallback for any unknown object types _TurtleDumper.add_representer(None, _fallback_str_representer) +_HANDLERS: dict[str, BaseHandler] = {} + +_INI_HANDLER = IniHandler() +_JSON_HANDLER = JsonHandler() +_TOML_HANDLER = TomlHandler() +_YAML_HANDLER = YamlHandler() +_XML_HANDLER = XmlHandler() +_HANDLERS["ini"] = _INI_HANDLER +_HANDLERS["json"] = _JSON_HANDLER +_HANDLERS["toml"] = _TOML_HANDLER +_HANDLERS["yaml"] = _YAML_HANDLER +_HANDLERS["xml"] = _XML_HANDLER + + +def make_var_name(role_prefix: str, path: Iterable[str]) -> str: + """Wrapper for :meth:`BaseHandler.make_var_name`. + + This keeps the public API (and tests) working while the implementation + lives on the BaseHandler class. + """ + return BaseHandler.make_var_name(role_prefix, path) def detect_format(path: Path, explicit: str | None = None) -> str: @@ -71,202 +89,25 @@ def detect_format(path: Path, explicit: str | None = None) -> str: def parse_config(path: Path, fmt: str | None = None) -> tuple[str, Any]: """ - Parse config file into a Python object + Parse config file into a Python object. """ fmt = detect_format(path, fmt) - - if fmt == "toml": - if tomllib is None: - raise RuntimeError( - "tomllib/tomli is required to parse TOML files but is not installed" - ) - with path.open("rb") as f: - data = tomllib.load(f) - return fmt, data - - if fmt == "yaml": - text = path.read_text(encoding="utf-8") - data = yaml.safe_load(text) or {} - return fmt, data - - if fmt == "json": - with path.open("r", encoding="utf-8") as f: - data = json.load(f) - return fmt, data - - if fmt == "ini": - parser = configparser.ConfigParser() - parser.optionxform = str # preserve key case - with path.open("r", encoding="utf-8") as f: - parser.read_file(f) - return fmt, parser - - if fmt == "xml": - text = path.read_text(encoding="utf-8") - root = ET.fromstring(text) # nosec B314 - return fmt, root - - raise ValueError(f"Unsupported config format: {fmt}") - - -def _flatten_xml(root: ET.Element) -> list[tuple[tuple[str, ...], Any]]: - """ - Flatten an XML tree into (path, value) pairs. - - Path conventions: - - Root element's children are treated as top-level (root tag is *not* included). - - Element text: - bar -> path ("foo",) value "bar" - bar -> path ("foo", "value") value "bar" - baz -> ("foo", "bar") / etc. - - Attributes: - - -> path ("server", "@host") value "localhost" - - Repeated sibling elements: - /a - /b - -> ("endpoint", "0") "/a" - ("endpoint", "1") "/b" - """ - items: list[tuple[tuple[str, ...], Any]] = [] - - def walk(elem: ET.Element, path: tuple[str, ...]) -> None: - # Attributes - for attr_name, attr_val in elem.attrib.items(): - attr_path = path + (f"@{attr_name}",) - items.append((attr_path, attr_val)) - - # Children - children = [c for c in list(elem) if isinstance(c.tag, str)] - - # Text content - text = (elem.text or "").strip() - if text: - if not elem.attrib and not children: - # Simple bar - items.append((path, text)) - else: - # Text alongside attrs/children - items.append((path + ("value",), text)) - - # Repeated siblings get an index; singletons just use the tag - counts = Counter(child.tag for child in children) - index_counters: dict[str, int] = defaultdict(int) - - for child in children: - tag = child.tag - if counts[tag] > 1: - idx = index_counters[tag] - index_counters[tag] += 1 - child_path = path + (tag, str(idx)) - else: - child_path = path + (tag,) - walk(child, child_path) - - # Treat root as a container: its children are top-level - walk(root, ()) - return items + handler = _HANDLERS.get(fmt) + if handler is None: + raise ValueError(f"Unsupported config format: {fmt}") + parsed = handler.parse(path) + return fmt, parsed def flatten_config(fmt: str, parsed: Any) -> list[tuple[tuple[str, ...], Any]]: """ Flatten parsed config into a list of (path_tuple, value). - - Examples: - TOML: [server.tls] enabled = true - -> (("server", "tls", "enabled"), True) - - INI: [somesection] foo = "bar" - -> (("somesection", "foo"), "bar") - - For INI, values are processed as strings (quotes stripped when obvious). """ - items: list[tuple[tuple[str, ...], Any]] = [] - - if fmt in {"toml", "yaml", "json"}: - - def _walk(obj: Any, path: tuple[str, ...] = ()) -> None: - if isinstance(obj, dict): - for k, v in obj.items(): - _walk(v, path + (str(k),)) - elif isinstance(obj, list) and fmt in {"yaml", "json"}: - # for YAML/JSON, flatten lists so each element can be templated; - # TOML still treats list as a single scalar (ports = [..]) which is fine. - for i, v in enumerate(obj): - _walk(v, path + (str(i),)) - else: - items.append((path, obj)) - - _walk(parsed) - - elif fmt == "ini": - parser: configparser.ConfigParser = parsed - for section in parser.sections(): - for key, value in parser.items(section, raw=True): - raw = value.strip() - # Strip surrounding quotes from INI values for defaults - if len(raw) >= 2 and raw[0] == raw[-1] and raw[0] in {'"', "'"}: - processed: Any = raw[1:-1] - else: - processed = raw - items.append(((section, key), processed)) - - elif fmt == "xml": - if not isinstance(parsed, ET.Element): - raise TypeError("XML parser result must be an Element") - items = _flatten_xml(parsed) - - else: # pragma: no cover + handler = _HANDLERS.get(fmt) + if handler is None: + # preserve previous ValueError for unsupported formats raise ValueError(f"Unsupported format: {fmt}") - - return items - - -def make_var_name(role_prefix: str, path: Iterable[str]) -> str: - """ - Build an Ansible var name like: - role_prefix_section_subsection_key - - Sanitises parts to lowercase [a-z0-9_] and strips extras. - """ - role_prefix = role_prefix.strip().lower() - clean_parts: list[str] = [] - - for part in path: - part = str(part).strip() - part = part.replace(" ", "_") - cleaned_chars: list[str] = [] - for c in part: - if c.isalnum() or c == "_": - cleaned_chars.append(c.lower()) - else: - cleaned_chars.append("_") - cleaned_part = "".join(cleaned_chars).strip("_") - if cleaned_part: - clean_parts.append(cleaned_part) - - if clean_parts: - return role_prefix + "_" + "_".join(clean_parts) - return role_prefix - - -def _split_inline_comment(text: str, comment_chars: set[str]) -> tuple[str, str]: - """ - Split 'value # comment' into (value_part, comment_part), where - comment_part starts at the first unquoted comment character. - - comment_chars is e.g. {'#'} for TOML/YAML, {'#', ';'} for INI. - """ - in_single = False - in_double = False - for i, ch in enumerate(text): - if ch == "'" and not in_double: - in_single = not in_single - elif ch == '"' and not in_single: - in_double = not in_double - elif ch in comment_chars and not in_single and not in_double: - return text[:i], text[i:] - return text, "" + return handler.flatten(parsed) def _normalize_default_value(value: Any) -> Any: @@ -312,577 +153,6 @@ def generate_defaults_yaml( ) -def _generate_toml_template(role_prefix: str, data: dict[str, Any]) -> str: - """ - Generate a TOML Jinja2 template from parsed TOML dict. - - Values become Jinja placeholders, with quoting preserved for strings: - foo = "bar" -> foo = "{{ prefix_foo }}" - port = 8080 -> port = {{ prefix_port }} - """ - lines: list[str] = [] - - def emit_kv(path: tuple[str, ...], key: str, value: Any) -> None: - var_name = make_var_name(role_prefix, path + (key,)) - if isinstance(value, str): - lines.append(f'{key} = "{{{{ {var_name} }}}}"') - else: - lines.append(f"{key} = {{{{ {var_name} }}}}") - - def walk(obj: dict[str, Any], path: tuple[str, ...] = ()) -> None: - scalar_items = {k: v for k, v in obj.items() if not isinstance(v, dict)} - nested_items = {k: v for k, v in obj.items() if isinstance(v, dict)} - - if path: - header = ".".join(path) - lines.append(f"[{header}]") - - for key, val in scalar_items.items(): - emit_kv(path, str(key), val) - - if scalar_items: - lines.append("") - - for key, val in nested_items.items(): - walk(val, path + (str(key),)) - - # Root scalars (no table header) - root_scalars = {k: v for k, v in data.items() if not isinstance(v, dict)} - for key, val in root_scalars.items(): - emit_kv((), str(key), val) - if root_scalars: - lines.append("") - - # Tables - for key, val in data.items(): - if isinstance(val, dict): - walk(val, (str(key),)) - - return "\n".join(lines).rstrip() + "\n" - - -def _generate_ini_template(role_prefix: str, parser: configparser.ConfigParser) -> str: - """ - Generate an INI-style Jinja2 template from a ConfigParser. - - Quoting heuristic: - foo = "bar" -> foo = "{{ prefix_section_foo }}" - num = 42 -> num = {{ prefix_section_num }} - """ - lines: list[str] = [] - - for section in parser.sections(): - lines.append(f"[{section}]") - for key, value in parser.items(section, raw=True): - path = (section, key) - var_name = make_var_name(role_prefix, path) - value = value.strip() - if len(value) >= 2 and value[0] == value[-1] and value[0] in {'"', "'"}: - lines.append(f'{key} = "{{{{ {var_name} }}}}"') - else: - lines.append(f"{key} = {{{{ {var_name} }}}}") - lines.append("") - - return "\n".join(lines).rstrip() + "\n" - - -def _generate_ini_template_from_text(role_prefix: str, text: str) -> str: - """ - Generate a Jinja2 template for an INI/php.ini-style file, preserving - comments, blank lines, and section headers by patching values in-place. - """ - lines = text.splitlines(keepends=True) - current_section: str | None = None - out_lines: list[str] = [] - - for raw_line in lines: - line = raw_line - stripped = line.lstrip() - - # Blank or pure comment: keep as-is - if not stripped or stripped[0] in {"#", ";"}: - out_lines.append(raw_line) - continue - - # Section header - if stripped.startswith("[") and "]" in stripped: - header_inner = stripped[1 : stripped.index("]")] - current_section = header_inner.strip() - out_lines.append(raw_line) - continue - - # Work without newline so we can re-attach it exactly - newline = "" - content = raw_line - if content.endswith("\r\n"): - newline = "\r\n" - content = content[:-2] - elif content.endswith("\n"): - newline = content[-1] - content = content[:-1] - - eq_index = content.find("=") - if eq_index == -1: - # Not a simple key=value line: leave untouched - out_lines.append(raw_line) - continue - - before_eq = content[:eq_index] - after_eq = content[eq_index + 1 :] - - key = before_eq.strip() - if not key: - out_lines.append(raw_line) - continue - - # Whitespace after '=' - value_ws_len = len(after_eq) - len(after_eq.lstrip(" \t")) - leading_ws = after_eq[:value_ws_len] - value_and_comment = after_eq[value_ws_len:] - - value_part, comment_part = _split_inline_comment(value_and_comment, {"#", ";"}) - raw_value = value_part.strip() - - path = (key,) if current_section is None else (current_section, key) - var_name = make_var_name(role_prefix, path) - - # Was the original value quoted? - use_quotes = ( - len(raw_value) >= 2 - and raw_value[0] == raw_value[-1] - and raw_value[0] in {'"', "'"} - ) - - if use_quotes: - quote_char = raw_value[0] - replacement_value = f"{quote_char}{{{{ {var_name} }}}}{quote_char}" - else: - replacement_value = f"{{{{ {var_name} }}}}" - - new_content = before_eq + "=" + leading_ws + replacement_value + comment_part - out_lines.append(new_content + newline) - - return "".join(out_lines) - - -def _generate_toml_template_from_text(role_prefix: str, text: str) -> str: - """ - Generate a Jinja2 template for a TOML file, preserving comments, - blank lines, and table headers by patching values in-place. - - Handles inline tables like: - temp_targets = { cpu = 79.5, case = 72.0 } - - by mapping them to: - temp_targets = { cpu = {{ prefix_database_temp_targets_cpu }}, - case = {{ prefix_database_temp_targets_case }} } - """ - lines = text.splitlines(keepends=True) - current_table: tuple[str, ...] = () - out_lines: list[str] = [] - - for raw_line in lines: - line = raw_line - stripped = line.lstrip() - - # Blank or pure comment - if not stripped or stripped.startswith("#"): - out_lines.append(raw_line) - continue - - # Table header: [server] or [server.tls] or [[array.of.tables]] - if stripped.startswith("[") and "]" in stripped: - header = stripped - first_bracket = header.find("[") - closing_bracket = header.find("]", first_bracket + 1) - if first_bracket != -1 and closing_bracket != -1: - inner = header[first_bracket + 1 : closing_bracket].strip() - inner = inner.strip("[]") # handle [[table]] as well - parts = [p.strip() for p in inner.split(".") if p.strip()] - current_table = tuple(parts) - out_lines.append(raw_line) - continue - - # Try key = value - newline = "" - content = raw_line - if content.endswith("\r\n"): - newline = "\r\n" - content = content[:-2] - elif content.endswith("\n"): - newline = content[-1] - content = content[:-1] - - eq_index = content.find("=") - if eq_index == -1: - out_lines.append(raw_line) - continue - - before_eq = content[:eq_index] - after_eq = content[eq_index + 1 :] - - key = before_eq.strip() - if not key: - out_lines.append(raw_line) - continue - - # Whitespace after '=' - value_ws_len = len(after_eq) - len(after_eq.lstrip(" \t")) - leading_ws = after_eq[:value_ws_len] - value_and_comment = after_eq[value_ws_len:] - - value_part, comment_part = _split_inline_comment(value_and_comment, {"#"}) - raw_value = value_part.strip() - - # Path for this key (table + key) - path = current_table + (key,) - - # Special case: inline table - if ( - raw_value.startswith("{") - and raw_value.endswith("}") - and tomllib is not None - ): - try: - # Parse the inline table as a tiny TOML document - mini_source = "table = " + raw_value + "\n" - mini_data = tomllib.loads(mini_source)["table"] - except Exception: - mini_data = None - - if isinstance(mini_data, dict): - inner_bits: list[str] = [] - for sub_key, sub_val in mini_data.items(): - nested_path = path + (sub_key,) - nested_var = make_var_name(role_prefix, nested_path) - if isinstance(sub_val, str): - inner_bits.append(f'{sub_key} = "{{{{ {nested_var} }}}}"') - else: - inner_bits.append(f"{sub_key} = {{{{ {nested_var} }}}}") - replacement_value = "{ " + ", ".join(inner_bits) + " }" - new_content = ( - before_eq + "=" + leading_ws + replacement_value + comment_part - ) - out_lines.append(new_content + newline) - continue - # If parsing fails, fall through to normal handling - - # Normal scalar value handling (including bools, numbers, strings) - var_name = make_var_name(role_prefix, path) - use_quotes = ( - len(raw_value) >= 2 - and raw_value[0] == raw_value[-1] - and raw_value[0] in {'"', "'"} - ) - - if use_quotes: - quote_char = raw_value[0] - replacement_value = f"{quote_char}{{{{ {var_name} }}}}{quote_char}" - else: - replacement_value = f"{{{{ {var_name} }}}}" - - new_content = before_eq + "=" + leading_ws + replacement_value + comment_part - out_lines.append(new_content + newline) - - return "".join(out_lines) - - -def _generate_yaml_template_from_text( - role_prefix: str, - text: str, -) -> str: - """ - Generate a Jinja2 template for a YAML file, preserving comments and - blank lines by patching scalar values in-place. - - This handles common "config-ish" YAML: - - top-level and nested mappings - - lists of scalars - - lists of small mapping objects - It does *not* aim to support all YAML edge cases (anchors, tags, etc.). - """ - lines = text.splitlines(keepends=True) - out_lines: list[str] = [] - - # Simple indentation-based context stack: (indent, path, kind) - # kind is "map" or "seq". - stack: list[tuple[int, tuple[str, ...], str]] = [] - - # Track index per parent path for sequences - seq_counters: dict[tuple[str, ...], int] = {} - - def current_path() -> tuple[str, ...]: - return stack[-1][1] if stack else () - - for raw_line in lines: - stripped = raw_line.lstrip() - indent = len(raw_line) - len(stripped) - - # Blank or pure comment lines unchanged - if not stripped or stripped.startswith("#"): - out_lines.append(raw_line) - continue - - # Adjust stack based on indent - while stack and indent < stack[-1][0]: - stack.pop() - - # --- Handle mapping key lines: "key:" or "key: value" - if ":" in stripped and not stripped.lstrip().startswith("- "): - # separate key and rest - key_part, rest = stripped.split(":", 1) - key = key_part.strip() - if not key: - out_lines.append(raw_line) - continue - - # Is this just "key:" or "key: value"? - rest_stripped = rest.lstrip(" \t") - - # Use the same inline-comment splitter to see if there's any real value - value_candidate, _ = _split_inline_comment(rest_stripped, {"#"}) - has_value = bool(value_candidate.strip()) - - # Update stack/context: current mapping at this indent - # Replace any existing mapping at same indent - if stack and stack[-1][0] == indent and stack[-1][2] == "map": - stack.pop() - path = current_path() + (key,) - stack.append((indent, path, "map")) - - if not has_value: - # Just "key:" -> collection or nested structure begins on following lines. - out_lines.append(raw_line) - continue - - # We have an inline scalar value on this same line. - - # Separate value from inline comment - value_part, comment_part = _split_inline_comment(rest_stripped, {"#"}) - raw_value = value_part.strip() - var_name = make_var_name(role_prefix, path) - - # Keep quote-style if original was quoted - use_quotes = ( - len(raw_value) >= 2 - and raw_value[0] == raw_value[-1] - and raw_value[0] in {'"', "'"} - ) - - if use_quotes: - q = raw_value[0] - replacement = f"{q}{{{{ {var_name} }}}}{q}" - else: - replacement = f"{{{{ {var_name} }}}}" - - leading = rest[: len(rest) - len(rest.lstrip(" \t"))] - new_stripped = f"{key}: {leading}{replacement}{comment_part}" - out_lines.append( - " " * indent + new_stripped + ("\n" if raw_line.endswith("\n") else "") - ) - continue - - # --- Handle list items: "- value" or "- key: value" - if stripped.startswith("- "): - # Determine parent path - # If top of stack isn't sequence at this indent, push one using current path - if not stack or stack[-1][0] != indent or stack[-1][2] != "seq": - parent_path = current_path() - stack.append((indent, parent_path, "seq")) - - parent_path = stack[-1][1] - content = stripped[2:] # after "- " - parent_path = stack[-1][1] - content = stripped[2:] # after "- " - - # Determine index for this parent path - index = seq_counters.get(parent_path, 0) - seq_counters[parent_path] = index + 1 - - path = parent_path + (str(index),) - - value_part, comment_part = _split_inline_comment(content, {"#"}) - raw_value = value_part.strip() - var_name = make_var_name(role_prefix, path) - - # If it's of the form "key: value" inside the list, we could try to - # support that, but a simple scalar is the common case: - use_quotes = ( - len(raw_value) >= 2 - and raw_value[0] == raw_value[-1] - and raw_value[0] in {'"', "'"} - ) - - if use_quotes: - q = raw_value[0] - replacement = f"{q}{{{{ {var_name} }}}}{q}" - else: - replacement = f"{{{{ {var_name} }}}}" - - new_stripped = f"- {replacement}{comment_part}" - out_lines.append( - " " * indent + new_stripped + ("\n" if raw_line.endswith("\n") else "") - ) - continue - - # Anything else (multi-line scalars, weird YAML): leave untouched - out_lines.append(raw_line) - - return "".join(out_lines) - - -def _generate_json_template(role_prefix: str, data: Any) -> str: - """ - Generate a JSON Jinja2 template from parsed JSON data. - - All scalar values are replaced with Jinja expressions whose names are - derived from the path, similar to TOML/YAML. - """ - - def _walk(obj: Any, path: tuple[str, ...] = ()) -> Any: - if isinstance(obj, dict): - return {k: _walk(v, path + (str(k),)) for k, v in obj.items()} - if isinstance(obj, list): - return [_walk(v, path + (str(i),)) for i, v in enumerate(obj)] - # scalar - var_name = make_var_name(role_prefix, path) - return f"{{{{ {var_name} }}}}" - - templated = _walk(data) - return json.dumps(templated, indent=2, ensure_ascii=False) + "\n" - - -def _split_xml_prolog(text: str) -> tuple[str, str]: - """ - Split an XML document into (prolog, body), where prolog includes: - - XML declaration () - - top-level comments - - DOCTYPE - The body starts at the root element. - """ - i = 0 - n = len(text) - prolog_parts: list[str] = [] - - while i < n: - # Preserve leading whitespace - while i < n and text[i].isspace(): - prolog_parts.append(text[i]) - i += 1 - if i >= n: - break - - if text.startswith("", i + 2) - if end == -1: - break - prolog_parts.append(text[i : end + 2]) - i = end + 2 - continue - - if text.startswith("", i + 4) - if end == -1: - break - prolog_parts.append(text[i : end + 3]) - i = end + 3 - continue - - if text.startswith("", i + 9) - if end == -1: - break - prolog_parts.append(text[i : end + 1]) - i = end + 1 - continue - - if text[i] == "<": - # Assume root element starts here - break - - # Unexpected content: stop treating as prolog - break - - return "".join(prolog_parts), text[i:] - - -def _apply_jinja_to_xml_tree(role_prefix: str, root: ET.Element) -> None: - """ - Mutate the XML tree in-place, replacing scalar values with Jinja - expressions based on the same paths used in _flatten_xml. - """ - - def walk(elem: ET.Element, path: tuple[str, ...]) -> None: - # Attributes - for attr_name in list(elem.attrib.keys()): - attr_path = path + (f"@{attr_name}",) - var_name = make_var_name(role_prefix, attr_path) - elem.set(attr_name, f"{{{{ {var_name} }}}}") - - # Children - children = [c for c in list(elem) if isinstance(c.tag, str)] - - # Text content - text = (elem.text or "").strip() - if text: - if not elem.attrib and not children: - text_path = path - else: - text_path = path + ("value",) - var_name = make_var_name(role_prefix, text_path) - elem.text = f"{{{{ {var_name} }}}}" - - # Repeated children get indexes just like in _flatten_xml - counts = Counter(child.tag for child in children) - index_counters: dict[str, int] = defaultdict(int) - - for child in children: - tag = child.tag - if counts[tag] > 1: - idx = index_counters[tag] - index_counters[tag] += 1 - child_path = path + (tag, str(idx)) - else: - child_path = path + (tag,) - walk(child, child_path) - - walk(root, ()) - - -def _generate_xml_template_from_text(role_prefix: str, text: str) -> str: - """ - Generate a Jinja2 template for an XML file, preserving comments and prolog. - - - Attributes become Jinja placeholders: - - -> - - - Text nodes become placeholders: - 8080 - -> {{ prefix_port }} - - but if the element also has attributes/children, the value path - gets a trailing "value" component, matching flattening. - """ - prolog, body = _split_xml_prolog(text) - - # Parse with comments included so are preserved - # defusedxml.defuse_stdlib() is called in CLI entrypoint - parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True)) # nosec B314 - parser.feed(body) - root = parser.close() - - _apply_jinja_to_xml_tree(role_prefix, root) - - # Pretty indentation if available (Python 3.9+) - indent = getattr(ET, "indent", None) - if indent is not None: - indent(root, space=" ") # type: ignore[arg-type] - - xml_body = ET.tostring(root, encoding="unicode") - return prolog + xml_body - - def generate_template( fmt: str, parsed: Any, @@ -897,41 +167,7 @@ def generate_template( the parsed structure (no comments). JSON of course does not support comments. """ - if original_text is not None: - if fmt == "toml": - return _generate_toml_template_from_text(role_prefix, original_text) - if fmt == "ini": - return _generate_ini_template_from_text(role_prefix, original_text) - if fmt == "yaml": - return _generate_yaml_template_from_text(role_prefix, original_text) - if fmt == "xml": - return _generate_xml_template_from_text(role_prefix, original_text) - # For JSON we ignore original_text and reconstruct from parsed structure below - if fmt != "json": - raise ValueError(f"Unsupported format: {fmt}") - - # Fallback: no comments preserved - if fmt == "toml": - if not isinstance(parsed, dict): - raise TypeError("TOML parser result must be a dict") - return _generate_toml_template(role_prefix, parsed) - if fmt == "ini": - if not isinstance(parsed, configparser.ConfigParser): - raise TypeError("INI parser result must be a ConfigParser") - return _generate_ini_template(role_prefix, parsed) - if fmt == "yaml": - if not isinstance(parsed, (dict, list)): - raise TypeError("YAML parser result must be a dict or list") - return _generate_yaml_template_from_text( - role_prefix, yaml.safe_dump(parsed, sort_keys=False) - ) - if fmt == "json": - if not isinstance(parsed, (dict, list)): - raise TypeError("JSON parser result must be a dict or list") - return _generate_json_template(role_prefix, parsed) - if fmt == "xml": - if not isinstance(parsed, ET.Element): - raise TypeError("XML parser result must be an Element") - xml_str = ET.tostring(parsed, encoding="unicode") - return _generate_xml_template_from_text(role_prefix, xml_str) - raise ValueError(f"Unsupported format: {fmt}") + handler = _HANDLERS.get(fmt) + if handler is None: + raise ValueError(f"Unsupported format: {fmt}") + return handler.generate_template(parsed, role_prefix, original_text=original_text) diff --git a/src/jinjaturtle/handlers/__init__.py b/src/jinjaturtle/handlers/__init__.py new file mode 100644 index 0000000..6bbcba1 --- /dev/null +++ b/src/jinjaturtle/handlers/__init__.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from .base import BaseHandler +from .dict import DictLikeHandler +from .ini import IniHandler +from .json import JsonHandler +from .toml import TomlHandler +from .yaml import YamlHandler +from .xml import XmlHandler + +__all__ = [ + "BaseHandler", + "DictLikeHandler", + "IniHandler", + "JsonHandler", + "TomlHandler", + "YamlHandler", + "XmlHandler", +] diff --git a/src/jinjaturtle/handlers/base.py b/src/jinjaturtle/handlers/base.py new file mode 100644 index 0000000..f427b76 --- /dev/null +++ b/src/jinjaturtle/handlers/base.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any, Iterable + + +class BaseHandler: + """ + Base class for a config format handler. + + Each handler is responsible for: + - parse(path) -> parsed object + - flatten(parsed) -> list[(path_tuple, value)] + - generate_template(parsed, role_prefix, original_text=None) -> str + """ + + fmt: str # e.g. "ini", "yaml", ... + + def parse(self, path: Path) -> Any: + raise NotImplementedError + + def flatten(self, parsed: Any) -> list[tuple[tuple[str, ...], Any]]: + raise NotImplementedError + + def generate_template( + self, + parsed: Any, + role_prefix: str, + original_text: str | None = None, + ) -> str: + raise NotImplementedError + + def _split_inline_comment( + self, text: str, comment_chars: set[str] + ) -> tuple[str, str]: + """ + Split 'value # comment' into (value_part, comment_part), where + comment_part starts at the first unquoted comment character. + + comment_chars is e.g. {'#'} for TOML/YAML, {'#', ';'} for INI. + """ + in_single = False + in_double = False + for i, ch in enumerate(text): + if ch == "'" and not in_double: + in_single = not in_single + elif ch == '"' and not in_single: + in_double = not in_double + elif ch in comment_chars and not in_single and not in_double: + return text[:i], text[i:] + return text, "" + + @staticmethod + def make_var_name(role_prefix: str, path: Iterable[str]) -> str: + """ + Build an Ansible var name like: + role_prefix_section_subsection_key + + Sanitises parts to lowercase [a-z0-9_] and strips extras. + """ + role_prefix = role_prefix.strip().lower() + clean_parts: list[str] = [] + + for part in path: + part = str(part).strip() + part = part.replace(" ", "_") + cleaned_chars: list[str] = [] + for c in part: + if c.isalnum() or c == "_": + cleaned_chars.append(c.lower()) + else: + cleaned_chars.append("_") + cleaned_part = "".join(cleaned_chars).strip("_") + if cleaned_part: + clean_parts.append(cleaned_part) + + if clean_parts: + return role_prefix + "_" + "_".join(clean_parts) + return role_prefix diff --git a/src/jinjaturtle/handlers/dict.py b/src/jinjaturtle/handlers/dict.py new file mode 100644 index 0000000..eb8d926 --- /dev/null +++ b/src/jinjaturtle/handlers/dict.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from typing import Any + +from . import BaseHandler + + +class DictLikeHandler(BaseHandler): + """ + Base for TOML/YAML/JSON: nested dict/list structures. + + Subclasses control whether lists are flattened. + """ + + flatten_lists: bool = False # override in subclasses + + def flatten(self, parsed: Any) -> list[tuple[tuple[str, ...], Any]]: + items: list[tuple[tuple[str, ...], Any]] = [] + + def _walk(obj: Any, path: tuple[str, ...] = ()) -> None: + if isinstance(obj, dict): + for k, v in obj.items(): + _walk(v, path + (str(k),)) + elif isinstance(obj, list) and self.flatten_lists: + for i, v in enumerate(obj): + _walk(v, path + (str(i),)) + else: + items.append((path, obj)) + + _walk(parsed) + return items diff --git a/src/jinjaturtle/handlers/ini.py b/src/jinjaturtle/handlers/ini.py new file mode 100644 index 0000000..24bf44f --- /dev/null +++ b/src/jinjaturtle/handlers/ini.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +import configparser +from pathlib import Path +from typing import Any + +from . import BaseHandler + + +class IniHandler(BaseHandler): + fmt = "ini" + + def parse(self, path: Path) -> configparser.ConfigParser: + parser = configparser.ConfigParser() + parser.optionxform = str # preserve key case + with path.open("r", encoding="utf-8") as f: + parser.read_file(f) + return parser + + def flatten(self, parsed: Any) -> list[tuple[tuple[str, ...], Any]]: + if not isinstance(parsed, configparser.ConfigParser): + raise TypeError("INI parser result must be a ConfigParser") + parser: configparser.ConfigParser = parsed + items: list[tuple[tuple[str, ...], Any]] = [] + for section in parser.sections(): + for key, value in parser.items(section, raw=True): + raw = value.strip() + if len(raw) >= 2 and raw[0] == raw[-1] and raw[0] in {'"', "'"}: + processed: Any = raw[1:-1] + else: + processed = raw + items.append(((section, key), processed)) + return items + + def generate_template( + self, + parsed: Any, + role_prefix: str, + original_text: str | None = None, + ) -> str: + if original_text is not None: + return self._generate_ini_template_from_text(role_prefix, original_text) + if not isinstance(parsed, configparser.ConfigParser): + raise TypeError("INI parser result must be a ConfigParser") + return self._generate_ini_template(role_prefix, parsed) + + def _generate_ini_template( + self, role_prefix: str, parser: configparser.ConfigParser + ) -> str: + """ + Generate an INI-style Jinja2 template from a ConfigParser. + + Quoting heuristic: + foo = "bar" -> foo = "{{ prefix_section_foo }}" + num = 42 -> num = {{ prefix_section_num }} + """ + lines: list[str] = [] + + for section in parser.sections(): + lines.append(f"[{section}]") + for key, value in parser.items(section, raw=True): + path = (section, key) + var_name = self.make_var_name(role_prefix, path) + value = value.strip() + if len(value) >= 2 and value[0] == value[-1] and value[0] in {'"', "'"}: + lines.append(f'{key} = "{{{{ {var_name} }}}}"') + else: + lines.append(f"{key} = {{{{ {var_name} }}}}") + lines.append("") + + return "\n".join(lines).rstrip() + "\n" + + def _generate_ini_template_from_text(self, role_prefix: str, text: str) -> str: + """ + Generate a Jinja2 template for an INI/php.ini-style file, preserving + comments, blank lines, and section headers by patching values in-place. + """ + lines = text.splitlines(keepends=True) + current_section: str | None = None + out_lines: list[str] = [] + + for raw_line in lines: + line = raw_line + stripped = line.lstrip() + + # Blank or pure comment: keep as-is + if not stripped or stripped[0] in {"#", ";"}: + out_lines.append(raw_line) + continue + + # Section header + if stripped.startswith("[") and "]" in stripped: + header_inner = stripped[1 : stripped.index("]")] + current_section = header_inner.strip() + out_lines.append(raw_line) + continue + + # Work without newline so we can re-attach it exactly + newline = "" + content = raw_line + if content.endswith("\r\n"): + newline = "\r\n" + content = content[:-2] + elif content.endswith("\n"): + newline = content[-1] + content = content[:-1] + + eq_index = content.find("=") + if eq_index == -1: + # Not a simple key=value line: leave untouched + out_lines.append(raw_line) + continue + + before_eq = content[:eq_index] + after_eq = content[eq_index + 1 :] + + key = before_eq.strip() + if not key: + out_lines.append(raw_line) + continue + + # Whitespace after '=' + value_ws_len = len(after_eq) - len(after_eq.lstrip(" \t")) + leading_ws = after_eq[:value_ws_len] + value_and_comment = after_eq[value_ws_len:] + + value_part, comment_part = self._split_inline_comment( + value_and_comment, {"#", ";"} + ) + raw_value = value_part.strip() + + path = (key,) if current_section is None else (current_section, key) + var_name = self.make_var_name(role_prefix, path) + + # Was the original value quoted? + use_quotes = ( + len(raw_value) >= 2 + and raw_value[0] == raw_value[-1] + and raw_value[0] in {'"', "'"} + ) + + if use_quotes: + quote_char = raw_value[0] + replacement_value = f"{quote_char}{{{{ {var_name} }}}}{quote_char}" + else: + replacement_value = f"{{{{ {var_name} }}}}" + + new_content = ( + before_eq + "=" + leading_ws + replacement_value + comment_part + ) + out_lines.append(new_content + newline) + + return "".join(out_lines) diff --git a/src/jinjaturtle/handlers/json.py b/src/jinjaturtle/handlers/json.py new file mode 100644 index 0000000..5149238 --- /dev/null +++ b/src/jinjaturtle/handlers/json.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from . import DictLikeHandler + + +class JsonHandler(DictLikeHandler): + fmt = "json" + flatten_lists = True + + def parse(self, path: Path) -> Any: + with path.open("r", encoding="utf-8") as f: + return json.load(f) + + def generate_template( + self, + parsed: Any, + role_prefix: str, + original_text: str | None = None, + ) -> str: + if not isinstance(parsed, (dict, list)): + raise TypeError("JSON parser result must be a dict or list") + # As before: ignore original_text and rebuild structurally + return self._generate_json_template(role_prefix, parsed) + + def _generate_json_template(self, role_prefix: str, data: Any) -> str: + """ + Generate a JSON Jinja2 template from parsed JSON data. + + All scalar values are replaced with Jinja expressions whose names are + derived from the path, similar to TOML/YAML. + """ + + def _walk(obj: Any, path: tuple[str, ...] = ()) -> Any: + if isinstance(obj, dict): + return {k: _walk(v, path + (str(k),)) for k, v in obj.items()} + if isinstance(obj, list): + return [_walk(v, path + (str(i),)) for i, v in enumerate(obj)] + # scalar + var_name = self.make_var_name(role_prefix, path) + return f"{{{{ {var_name} }}}}" + + templated = _walk(data) + return json.dumps(templated, indent=2, ensure_ascii=False) + "\n" diff --git a/src/jinjaturtle/handlers/toml.py b/src/jinjaturtle/handlers/toml.py new file mode 100644 index 0000000..b70a9c8 --- /dev/null +++ b/src/jinjaturtle/handlers/toml.py @@ -0,0 +1,205 @@ +from __future__ import annotations + +import tomllib +from pathlib import Path +from typing import Any + +from . import DictLikeHandler + + +class TomlHandler(DictLikeHandler): + fmt = "toml" + flatten_lists = False # keep lists as scalars + + def parse(self, path: Path) -> Any: + if tomllib is None: + raise RuntimeError( + "tomllib/tomli is required to parse TOML files but is not installed" + ) + with path.open("rb") as f: + return tomllib.load(f) + + def generate_template( + self, + parsed: Any, + role_prefix: str, + original_text: str | None = None, + ) -> str: + if original_text is not None: + return self._generate_toml_template_from_text(role_prefix, original_text) + if not isinstance(parsed, dict): + raise TypeError("TOML parser result must be a dict") + return self._generate_toml_template(role_prefix, parsed) + + def _generate_toml_template(self, role_prefix: str, data: dict[str, Any]) -> str: + """ + Generate a TOML Jinja2 template from parsed TOML dict. + + Values become Jinja placeholders, with quoting preserved for strings: + foo = "bar" -> foo = "{{ prefix_foo }}" + port = 8080 -> port = {{ prefix_port }} + """ + lines: list[str] = [] + + def emit_kv(path: tuple[str, ...], key: str, value: Any) -> None: + var_name = self.make_var_name(role_prefix, path + (key,)) + if isinstance(value, str): + lines.append(f'{key} = "{{{{ {var_name} }}}}"') + else: + lines.append(f"{key} = {{{{ {var_name} }}}}") + + def walk(obj: dict[str, Any], path: tuple[str, ...] = ()) -> None: + scalar_items = {k: v for k, v in obj.items() if not isinstance(v, dict)} + nested_items = {k: v for k, v in obj.items() if isinstance(v, dict)} + + if path: + header = ".".join(path) + lines.append(f"[{header}]") + + for key, val in scalar_items.items(): + emit_kv(path, str(key), val) + + if scalar_items: + lines.append("") + + for key, val in nested_items.items(): + walk(val, path + (str(key),)) + + # Root scalars (no table header) + root_scalars = {k: v for k, v in data.items() if not isinstance(v, dict)} + for key, val in root_scalars.items(): + emit_kv((), str(key), val) + if root_scalars: + lines.append("") + + # Tables + for key, val in data.items(): + if isinstance(val, dict): + walk(val, (str(key),)) + + return "\n".join(lines).rstrip() + "\n" + + def _generate_toml_template_from_text(self, role_prefix: str, text: str) -> str: + """ + Generate a Jinja2 template for a TOML file, preserving comments, + blank lines, and table headers by patching values in-place. + + Handles inline tables like: + temp_targets = { cpu = 79.5, case = 72.0 } + + by mapping them to: + temp_targets = { cpu = {{ prefix_database_temp_targets_cpu }}, + case = {{ prefix_database_temp_targets_case }} } + """ + lines = text.splitlines(keepends=True) + current_table: tuple[str, ...] = () + out_lines: list[str] = [] + + for raw_line in lines: + line = raw_line + stripped = line.lstrip() + + # Blank or pure comment + if not stripped or stripped.startswith("#"): + out_lines.append(raw_line) + continue + + # Table header: [server] or [server.tls] or [[array.of.tables]] + if stripped.startswith("[") and "]" in stripped: + header = stripped + first_bracket = header.find("[") + closing_bracket = header.find("]", first_bracket + 1) + if first_bracket != -1 and closing_bracket != -1: + inner = header[first_bracket + 1 : closing_bracket].strip() + inner = inner.strip("[]") # handle [[table]] as well + parts = [p.strip() for p in inner.split(".") if p.strip()] + current_table = tuple(parts) + out_lines.append(raw_line) + continue + + # Try key = value + newline = "" + content = raw_line + if content.endswith("\r\n"): + newline = "\r\n" + content = content[:-2] + elif content.endswith("\n"): + newline = content[-1] + content = content[:-1] + + eq_index = content.find("=") + if eq_index == -1: + out_lines.append(raw_line) + continue + + before_eq = content[:eq_index] + after_eq = content[eq_index + 1 :] + + key = before_eq.strip() + if not key: + out_lines.append(raw_line) + continue + + # Whitespace after '=' + value_ws_len = len(after_eq) - len(after_eq.lstrip(" \t")) + leading_ws = after_eq[:value_ws_len] + value_and_comment = after_eq[value_ws_len:] + + value_part, comment_part = self._split_inline_comment( + value_and_comment, {"#"} + ) + raw_value = value_part.strip() + + # Path for this key (table + key) + path = current_table + (key,) + + # Special case: inline table + if ( + raw_value.startswith("{") + and raw_value.endswith("}") + and tomllib is not None + ): + try: + # Parse the inline table as a tiny TOML document + mini_source = "table = " + raw_value + "\n" + mini_data = tomllib.loads(mini_source)["table"] + except Exception: + mini_data = None + + if isinstance(mini_data, dict): + inner_bits: list[str] = [] + for sub_key, sub_val in mini_data.items(): + nested_path = path + (sub_key,) + nested_var = self.make_var_name(role_prefix, nested_path) + if isinstance(sub_val, str): + inner_bits.append(f'{sub_key} = "{{{{ {nested_var} }}}}"') + else: + inner_bits.append(f"{sub_key} = {{{{ {nested_var} }}}}") + replacement_value = "{ " + ", ".join(inner_bits) + " }" + new_content = ( + before_eq + "=" + leading_ws + replacement_value + comment_part + ) + out_lines.append(new_content + newline) + continue + # If parsing fails, fall through to normal handling + + # Normal scalar value handling (including bools, numbers, strings) + var_name = self.make_var_name(role_prefix, path) + use_quotes = ( + len(raw_value) >= 2 + and raw_value[0] == raw_value[-1] + and raw_value[0] in {'"', "'"} + ) + + if use_quotes: + quote_char = raw_value[0] + replacement_value = f"{quote_char}{{{{ {var_name} }}}}{quote_char}" + else: + replacement_value = f"{{{{ {var_name} }}}}" + + new_content = ( + before_eq + "=" + leading_ws + replacement_value + comment_part + ) + out_lines.append(new_content + newline) + + return "".join(out_lines) diff --git a/src/jinjaturtle/handlers/xml.py b/src/jinjaturtle/handlers/xml.py new file mode 100644 index 0000000..4d99a7d --- /dev/null +++ b/src/jinjaturtle/handlers/xml.py @@ -0,0 +1,230 @@ +from __future__ import annotations + +from collections import Counter, defaultdict +from pathlib import Path +from typing import Any +import xml.etree.ElementTree as ET # nosec + +from . import BaseHandler + + +class XmlHandler(BaseHandler): + fmt = "xml" + + def parse(self, path: Path) -> ET.Element: + text = path.read_text(encoding="utf-8") + # Parse with an explicit XMLParser instance so this stays compatible + # with Python versions where xml.etree.ElementTree.fromstring() may + # not accept a ``parser=`` keyword argument. + # defusedxml.defuse_stdlib() is called in the CLI entrypoint, so using + # the stdlib XMLParser here is safe. + parser = ET.XMLParser( + target=ET.TreeBuilder(insert_comments=False) + ) # nosec B314 + parser.feed(text) + root = parser.close() + return root + + def flatten(self, parsed: Any) -> list[tuple[tuple[str, ...], Any]]: + if not isinstance(parsed, ET.Element): + raise TypeError("XML parser result must be an Element") + return self._flatten_xml(parsed) + + def generate_template( + self, + parsed: Any, + role_prefix: str, + original_text: str | None = None, + ) -> str: + if original_text is not None: + return self._generate_xml_template_from_text(role_prefix, original_text) + if not isinstance(parsed, ET.Element): + raise TypeError("XML parser result must be an Element") + xml_str = ET.tostring(parsed, encoding="unicode") + return self._generate_xml_template_from_text(role_prefix, xml_str) + + def _flatten_xml(self, root: ET.Element) -> list[tuple[tuple[str, ...], Any]]: + """ + Flatten an XML tree into (path, value) pairs. + + Path conventions: + - Root element's children are treated as top-level (root tag is *not* included). + - Element text: + bar -> path ("foo",) value "bar" + bar -> path ("foo", "value") value "bar" + baz -> ("foo", "bar") / etc. + - Attributes: + + -> path ("server", "@host") value "localhost" + - Repeated sibling elements: + /a + /b + -> ("endpoint", "0") "/a" + ("endpoint", "1") "/b" + """ + items: list[tuple[tuple[str, ...], Any]] = [] + + def walk(elem: ET.Element, path: tuple[str, ...]) -> None: + # Attributes + for attr_name, attr_val in elem.attrib.items(): + attr_path = path + (f"@{attr_name}",) + items.append((attr_path, attr_val)) + + # Children + children = [c for c in list(elem) if isinstance(c.tag, str)] + + # Text content + text = (elem.text or "").strip() + if text: + if not elem.attrib and not children: + # Simple bar + items.append((path, text)) + else: + # Text alongside attrs/children + items.append((path + ("value",), text)) + + # Repeated siblings get an index; singletons just use the tag + counts = Counter(child.tag for child in children) + index_counters: dict[str, int] = defaultdict(int) + + for child in children: + tag = child.tag + if counts[tag] > 1: + idx = index_counters[tag] + index_counters[tag] += 1 + child_path = path + (tag, str(idx)) + else: + child_path = path + (tag,) + walk(child, child_path) + + # Treat root as a container: its children are top-level + walk(root, ()) + return items + + def _split_xml_prolog(self, text: str) -> tuple[str, str]: + """ + Split an XML document into (prolog, body), where prolog includes: + - XML declaration () + - top-level comments + - DOCTYPE + The body starts at the root element. + """ + i = 0 + n = len(text) + prolog_parts: list[str] = [] + + while i < n: + # Preserve leading whitespace + while i < n and text[i].isspace(): + prolog_parts.append(text[i]) + i += 1 + if i >= n: + break + + if text.startswith("", i + 2) + if end == -1: + break + prolog_parts.append(text[i : end + 2]) + i = end + 2 + continue + + if text.startswith("", i + 4) + if end == -1: + break + prolog_parts.append(text[i : end + 3]) + i = end + 3 + continue + + if text.startswith("", i + 9) + if end == -1: + break + prolog_parts.append(text[i : end + 1]) + i = end + 1 + continue + + if text[i] == "<": + # Assume root element starts here + break + + # Unexpected content: stop treating as prolog + break + + return "".join(prolog_parts), text[i:] + + def _apply_jinja_to_xml_tree(self, role_prefix: str, root: ET.Element) -> None: + """ + Mutate the XML tree in-place, replacing scalar values with Jinja + expressions based on the same paths used in _flatten_xml. + """ + + def walk(elem: ET.Element, path: tuple[str, ...]) -> None: + # Attributes + for attr_name in list(elem.attrib.keys()): + attr_path = path + (f"@{attr_name}",) + var_name = self.make_var_name(role_prefix, attr_path) + elem.set(attr_name, f"{{{{ {var_name} }}}}") + + # Children + children = [c for c in list(elem) if isinstance(c.tag, str)] + + # Text content + text = (elem.text or "").strip() + if text: + if not elem.attrib and not children: + text_path = path + else: + text_path = path + ("value",) + var_name = self.make_var_name(role_prefix, text_path) + elem.text = f"{{{{ {var_name} }}}}" + + # Repeated children get indexes just like in _flatten_xml + counts = Counter(child.tag for child in children) + index_counters: dict[str, int] = defaultdict(int) + + for child in children: + tag = child.tag + if counts[tag] > 1: + idx = index_counters[tag] + index_counters[tag] += 1 + child_path = path + (tag, str(idx)) + else: + child_path = path + (tag,) + walk(child, child_path) + + walk(root, ()) + + def _generate_xml_template_from_text(self, role_prefix: str, text: str) -> str: + """ + Generate a Jinja2 template for an XML file, preserving comments and prolog. + + - Attributes become Jinja placeholders: + + -> + + - Text nodes become placeholders: + 8080 + -> {{ prefix_port }} + + but if the element also has attributes/children, the value path + gets a trailing "value" component, matching flattening. + """ + prolog, body = self._split_xml_prolog(text) + + # Parse with comments included so are preserved + # defusedxml.defuse_stdlib() is called in CLI entrypoint + parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True)) # nosec B314 + parser.feed(body) + root = parser.close() + + self._apply_jinja_to_xml_tree(role_prefix, root) + + # Pretty indentation if available (Python 3.9+) + indent = getattr(ET, "indent", None) + if indent is not None: + indent(root, space=" ") # type: ignore[arg-type] + + xml_body = ET.tostring(root, encoding="unicode") + return prolog + xml_body diff --git a/src/jinjaturtle/handlers/yaml.py b/src/jinjaturtle/handlers/yaml.py new file mode 100644 index 0000000..2ebaf3e --- /dev/null +++ b/src/jinjaturtle/handlers/yaml.py @@ -0,0 +1,179 @@ +from __future__ import annotations + +import yaml +from pathlib import Path +from typing import Any + +from . import DictLikeHandler + + +class YamlHandler(DictLikeHandler): + fmt = "yaml" + flatten_lists = True # you flatten YAML lists + + def parse(self, path: Path) -> Any: + text = path.read_text(encoding="utf-8") + return yaml.safe_load(text) or {} + + def generate_template( + self, + parsed: Any, + role_prefix: str, + original_text: str | None = None, + ) -> str: + if original_text is not None: + return self._generate_yaml_template_from_text(role_prefix, original_text) + if not isinstance(parsed, (dict, list)): + raise TypeError("YAML parser result must be a dict or list") + dumped = yaml.safe_dump(parsed, sort_keys=False) + return self._generate_yaml_template_from_text(role_prefix, dumped) + + def _generate_yaml_template_from_text( + self, + role_prefix: str, + text: str, + ) -> str: + """ + Generate a Jinja2 template for a YAML file, preserving comments and + blank lines by patching scalar values in-place. + + This handles common "config-ish" YAML: + - top-level and nested mappings + - lists of scalars + - lists of small mapping objects + It does *not* aim to support all YAML edge cases (anchors, tags, etc.). + """ + lines = text.splitlines(keepends=True) + out_lines: list[str] = [] + + # Simple indentation-based context stack: (indent, path, kind) + # kind is "map" or "seq". + stack: list[tuple[int, tuple[str, ...], str]] = [] + + # Track index per parent path for sequences + seq_counters: dict[tuple[str, ...], int] = {} + + def current_path() -> tuple[str, ...]: + return stack[-1][1] if stack else () + + for raw_line in lines: + stripped = raw_line.lstrip() + indent = len(raw_line) - len(stripped) + + # Blank or pure comment lines unchanged + if not stripped or stripped.startswith("#"): + out_lines.append(raw_line) + continue + + # Adjust stack based on indent + while stack and indent < stack[-1][0]: + stack.pop() + + # --- Handle mapping key lines: "key:" or "key: value" + if ":" in stripped and not stripped.lstrip().startswith("- "): + # separate key and rest + key_part, rest = stripped.split(":", 1) + key = key_part.strip() + if not key: + out_lines.append(raw_line) + continue + + # Is this just "key:" or "key: value"? + rest_stripped = rest.lstrip(" \t") + + # Use the same inline-comment splitter to see if there's any real value + value_candidate, _ = self._split_inline_comment(rest_stripped, {"#"}) + has_value = bool(value_candidate.strip()) + + # Update stack/context: current mapping at this indent + # Replace any existing mapping at same indent + if stack and stack[-1][0] == indent and stack[-1][2] == "map": + stack.pop() + path = current_path() + (key,) + stack.append((indent, path, "map")) + + if not has_value: + # Just "key:" -> collection or nested structure begins on following lines. + out_lines.append(raw_line) + continue + + # We have an inline scalar value on this same line. + + # Separate value from inline comment + value_part, comment_part = self._split_inline_comment( + rest_stripped, {"#"} + ) + raw_value = value_part.strip() + var_name = self.make_var_name(role_prefix, path) + + # Keep quote-style if original was quoted + use_quotes = ( + len(raw_value) >= 2 + and raw_value[0] == raw_value[-1] + and raw_value[0] in {'"', "'"} + ) + + if use_quotes: + q = raw_value[0] + replacement = f"{q}{{{{ {var_name} }}}}{q}" + else: + replacement = f"{{{{ {var_name} }}}}" + + leading = rest[: len(rest) - len(rest.lstrip(" \t"))] + new_stripped = f"{key}: {leading}{replacement}{comment_part}" + out_lines.append( + " " * indent + + new_stripped + + ("\n" if raw_line.endswith("\n") else "") + ) + continue + + # --- Handle list items: "- value" or "- key: value" + if stripped.startswith("- "): + # Determine parent path + # If top of stack isn't sequence at this indent, push one using current path + if not stack or stack[-1][0] != indent or stack[-1][2] != "seq": + parent_path = current_path() + stack.append((indent, parent_path, "seq")) + + parent_path = stack[-1][1] + content = stripped[2:] # after "- " + parent_path = stack[-1][1] + content = stripped[2:] # after "- " + + # Determine index for this parent path + index = seq_counters.get(parent_path, 0) + seq_counters[parent_path] = index + 1 + + path = parent_path + (str(index),) + + value_part, comment_part = self._split_inline_comment(content, {"#"}) + raw_value = value_part.strip() + var_name = self.make_var_name(role_prefix, path) + + # If it's of the form "key: value" inside the list, we could try to + # support that, but a simple scalar is the common case: + use_quotes = ( + len(raw_value) >= 2 + and raw_value[0] == raw_value[-1] + and raw_value[0] in {'"', "'"} + ) + + if use_quotes: + q = raw_value[0] + replacement = f"{q}{{{{ {var_name} }}}}{q}" + else: + replacement = f"{{{{ {var_name} }}}}" + + new_stripped = f"- {replacement}{comment_part}" + out_lines.append( + " " * indent + + new_stripped + + ("\n" if raw_line.endswith("\n") else "") + ) + continue + + # Anything else (multi-line scalars, weird YAML): leave untouched + out_lines.append(raw_line) + + return "".join(out_lines) diff --git a/tests.sh b/tests.sh index 3fc2763..056351f 100755 --- a/tests.sh +++ b/tests.sh @@ -1,3 +1,11 @@ #!/bin/bash +set -eo pipefail + +# Run pytests poetry run pytest -vvvv --cov=jinjaturtle --cov-report=term-missing --disable-warnings + +# Ensure we test the CLI like a human +for file in `ls -1 tests/samples/*`; do + poetry run jinjaturtle -r test $file -d test.yml -t test.j2 +done diff --git a/tests/test_base_handler.py b/tests/test_base_handler.py new file mode 100644 index 0000000..cd8b0c1 --- /dev/null +++ b/tests/test_base_handler.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + +from jinjaturtle.handlers.base import BaseHandler + + +def test_split_inline_comment_handles_quoted_hash(): + # The '#' inside quotes should not start a comment; the one outside should. + text = " 'foo # not comment' # real" + handler = BaseHandler() + value, comment = handler._split_inline_comment(text, {"#"}) + assert "not comment" in value + assert comment.strip() == "# real" + + +def test_base_handler_abstract_methods_raise_not_implemented(tmp_path: Path): + """ + Ensure the abstract methods on BaseHandler all raise NotImplementedError. + This covers the stub implementations. + """ + handler = BaseHandler() + dummy_path = tmp_path / "dummy.cfg" + + with pytest.raises(NotImplementedError): + handler.parse(dummy_path) + + with pytest.raises(NotImplementedError): + handler.flatten(object()) + + with pytest.raises(NotImplementedError): + handler.generate_template(parsed=object(), role_prefix="role") diff --git a/tests/test_core.py b/tests/test_core.py deleted file mode 100644 index 53e979c..0000000 --- a/tests/test_core.py +++ /dev/null @@ -1,653 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -import configparser -import pytest -import textwrap -import yaml -import xml.etree.ElementTree as ET - -import jinjaturtle.core as core -from jinjaturtle.core import ( - detect_format, - parse_config, - flatten_config, - generate_defaults_yaml, - generate_template, - make_var_name, -) - -SAMPLES_DIR = Path(__file__).parent / "samples" - - -def test_make_var_name_basic(): - # simple sanity checks on the naming rules - assert ( - make_var_name("jinjaturtle", ("somesection", "foo")) - == "jinjaturtle_somesection_foo" - ) - assert ( - make_var_name("JinjaTurtle", ("Other-Section", "some value")) - == "jinjaturtle_other_section_some_value" - ) - # no trailing underscores, all lowercase, no spaces - name = make_var_name("MyRole", (" Section Name ", "Key-Name ")) - assert name == name.lower() - assert " " not in name - assert not name.endswith("_") - - -def test_make_var_name_empty_path_returns_prefix(): - # Cover the branch where there are no path components. - assert make_var_name("MyRole", ()) == "myrole" - - -def test_detect_format_explicit_overrides_suffix(tmp_path: Path): - # Explicit format should win over file suffix. - cfg_path = tmp_path / "config.ini" - cfg_path.write_text("[section]\nkey=value\n", encoding="utf-8") - - fmt = detect_format(cfg_path, explicit="toml") - assert fmt == "toml" - - -def test_detect_format_fallback_ini(tmp_path: Path): - # Unknown suffix should fall back to "ini". - cfg_path = tmp_path / "weird.cnf" - cfg_path.write_text("[section]\nkey=value\n", encoding="utf-8") - - fmt, parsed = parse_config(cfg_path) # no explicit fmt - assert fmt == "ini" - # parsed should be an INI ConfigParser with our section/key - flat = flatten_config(fmt, parsed) - assert any(path == ("section", "key") for path, _ in flat) - - -def test_toml_sample_roundtrip(): - toml_path = SAMPLES_DIR / "tom.toml" - assert toml_path.is_file(), f"Missing sample TOML file: {toml_path}" - - fmt, parsed = parse_config(toml_path) - assert fmt == "toml" - - flat_items = flatten_config(fmt, parsed) - assert flat_items - - defaults_yaml = generate_defaults_yaml("jinjaturtle", flat_items) - defaults = yaml.safe_load(defaults_yaml) - - # defaults should be a non-empty dict - assert isinstance(defaults, dict) - assert defaults, "Expected non-empty defaults for TOML sample" - - # all keys should be lowercase, start with prefix, and have no spaces - for key in defaults: - assert key.startswith("jinjaturtle_") - assert key == key.lower() - assert " " not in key - - # template generation – **now with original_text** - original_text = toml_path.read_text(encoding="utf-8") - template = generate_template( - fmt, parsed, "jinjaturtle", original_text=original_text - ) - assert isinstance(template, str) - assert template.strip() - - # comments from the original file should now be preserved - assert "# This is a TOML document" in template - - # each default variable name should appear in the template as a Jinja placeholder - for var_name in defaults: - assert ( - var_name in template - ), f"Variable {var_name} not referenced in TOML template" - - -def test_ini_php_sample_roundtrip(): - ini_path = SAMPLES_DIR / "php.ini" - assert ini_path.is_file(), f"Missing sample INI file: {ini_path}" - - fmt, parsed = parse_config(ini_path) - assert fmt == "ini" - - flat_items = flatten_config(fmt, parsed) - assert flat_items, "Expected at least one flattened item from php.ini sample" - - defaults_yaml = generate_defaults_yaml("php", flat_items) - defaults = yaml.safe_load(defaults_yaml) - - # defaults should be a non-empty dict - assert isinstance(defaults, dict) - assert defaults, "Expected non-empty defaults for php.ini sample" - - # all keys should be lowercase, start with prefix, and have no spaces - for key in defaults: - assert key.startswith("php_") - assert key == key.lower() - assert " " not in key - - # template generation - original_text = ini_path.read_text(encoding="utf-8") - template = generate_template(fmt, parsed, "php", original_text=original_text) - assert "; About this file" in template - assert isinstance(template, str) - assert template.strip(), "Template for php.ini sample should not be empty" - - # each default variable name should appear in the template as a Jinja placeholder - for var_name in defaults: - assert ( - var_name in template - ), f"Variable {var_name} not referenced in INI template" - - -def test_formats_match_expected_extensions(): - """ - Sanity check that format detection lines up with the filenames - we’re using for the samples. - """ - toml_path = SAMPLES_DIR / "tom.toml" - ini_path = SAMPLES_DIR / "php.ini" - xml_path = SAMPLES_DIR / "ossec.xml" - - fmt_toml, _ = parse_config(toml_path) - fmt_ini, _ = parse_config(ini_path) - fmt_xml, _ = parse_config(xml_path) - - assert fmt_toml == "toml" - assert fmt_ini == "ini" - assert fmt_xml == "xml" - - -def test_parse_config_toml_missing_tomllib(monkeypatch): - """ - Force tomllib to None to hit the RuntimeError branch when parsing TOML. - """ - toml_path = SAMPLES_DIR / "tom.toml" - - # Simulate an environment without tomllib/tomli - monkeypatch.setattr(core, "tomllib", None) - - with pytest.raises(RuntimeError) as exc: - core.parse_config(toml_path, fmt="toml") - assert "tomllib/tomli is required" in str(exc.value) - - -def test_parse_config_unsupported_format(tmp_path: Path): - """ - Hit the ValueError in parse_config when fmt is not a supported format. - """ - cfg_path = tmp_path / "config.whatever" - cfg_path.write_text("", encoding="utf-8") - - with pytest.raises(ValueError): - parse_config(cfg_path, fmt="bogus") - - -def test_generate_template_type_and_format_errors(): - """ - Exercise the error branches in generate_template: - - toml with non-dict parsed - - ini with non-ConfigParser parsed - - yaml with wrong parsed type - - completely unsupported fmt (with and without original_text) - """ - # wrong type for TOML - with pytest.raises(TypeError): - generate_template("toml", parsed="not a dict", role_prefix="role") - - # wrong type for INI - with pytest.raises(TypeError): - generate_template("ini", parsed={"not": "a configparser"}, role_prefix="role") - - # wrong type for YAML - with pytest.raises(TypeError): - generate_template("yaml", parsed=None, role_prefix="role") - - # wrong type for JSON - with pytest.raises(TypeError): - generate_template("json", parsed=None, role_prefix="role") - - # unsupported format, no original_text - with pytest.raises(ValueError): - generate_template("bogusfmt", parsed=None, role_prefix="role") - - # unsupported format, with original_text - with pytest.raises(ValueError): - generate_template( - "bogusfmt", - parsed=None, - role_prefix="role", - original_text="foo=bar", - ) - - -def test_normalize_default_value_true_false_strings(): - # 'true'/'false' strings should be preserved as strings and double-quoted in YAML. - flat_items = [ - (("section", "foo"), "true"), - (("section", "bar"), "FALSE"), - ] - defaults_yaml = generate_defaults_yaml("role", flat_items) - data = yaml.safe_load(defaults_yaml) - assert data["role_section_foo"] == "true" - assert data["role_section_bar"] == "FALSE" - - -def test_split_inline_comment_handles_quoted_hash(): - # The '#' inside quotes should not start a comment; the one outside should. - text = " 'foo # not comment' # real" - value, comment = core._split_inline_comment(text, {"#"}) - assert "not comment" in value - assert comment.strip() == "# real" - - -def test_generate_template_fallback_toml_and_ini(): - # When original_text is not provided, generate_template should use the - # older fallback generators based on the parsed structures. - parsed_toml = { - "title": "Example", - "server": {"port": 8080, "host": "127.0.0.1"}, - "logging": { - "file": {"path": "/tmp/app.log"} - }, # nested table to hit recursive walk - } - tmpl_toml = generate_template("toml", parsed=parsed_toml, role_prefix="role") - assert "[server]" in tmpl_toml - assert "role_server_port" in tmpl_toml - assert "[logging]" in tmpl_toml or "[logging.file]" in tmpl_toml - - parser = configparser.ConfigParser() - # foo is quoted in the INI text to hit the "preserve quotes" branch - parser["section"] = {"foo": '"bar"', "num": "42"} - tmpl_ini = generate_template("ini", parsed=parser, role_prefix="role") - assert "[section]" in tmpl_ini - assert "role_section_foo" in tmpl_ini - assert '"{{ role_section_foo }}"' in tmpl_ini # came from quoted INI value - - -def test_generate_ini_template_from_text_edge_cases(): - # Cover CRLF newlines, lines without '=', and lines with no key before '='. - text = "[section]\r\nkey=value\r\nnoequals\r\n = bare\r\n" - tmpl = core._generate_ini_template_from_text("role", text) - # We don't care about exact formatting here, just that it runs and - # produces some reasonable output. - assert "[section]" in tmpl - assert "role_section_key" in tmpl - # The "noequals" line should be preserved as-is. - assert "noequals" in tmpl - # The " = bare" line has no key and should be left untouched. - assert " = bare" in tmpl - - -def test_generate_toml_template_from_text_edge_cases(): - # Cover CRLF newlines, lines without '=', empty keys, and inline tables - # that both parse successfully and fail parsing. - text = ( - "# comment\r\n" - "[table]\r\n" - "noequals\r\n" - " = 42\r\n" - 'inline_good = { name = "abc", value = 1 }\r\n' - "inline_bad = { invalid = }\r\n" - ) - tmpl = core._generate_toml_template_from_text("role", text) - # The good inline table should expand into two separate variables. - assert "role_table_inline_good_name" in tmpl - assert "role_table_inline_good_value" in tmpl - # The bad inline table should fall back to scalar handling. - assert "role_table_inline_bad" in tmpl - # Ensure the lines without '=' / empty key were handled without exploding. - assert "[table]" in tmpl - assert "noequals" in tmpl - - -def test_yaml_roundtrip_with_list_and_comment(tmp_path: Path): - yaml_path = SAMPLES_DIR / "bar.yaml" - assert yaml_path.is_file(), f"Missing sample YAML file: {yaml_path}" - - fmt, parsed = parse_config(yaml_path) - - assert fmt == "yaml" - - flat_items = flatten_config(fmt, parsed) - defaults_yaml = generate_defaults_yaml("foobar", flat_items) - defaults = yaml.safe_load(defaults_yaml) - - # Defaults: keys are flattened with indices - assert defaults["foobar_foo"] == "bar" - assert defaults["foobar_blah_0"] == "something" - assert defaults["foobar_blah_1"] == "else" - - # Template generation (preserving comments) - original_text = yaml_path.read_text(encoding="utf-8") - template = generate_template(fmt, parsed, "foobar", original_text=original_text) - - # Comment preserved - assert "# Top comment" in template - - # Scalar replacement - assert "foo:" in template - assert "foobar_foo" in template - - # List items use indexed vars, not "item" - assert "foobar_blah_0" in template - assert "foobar_blah_1" in template - assert "{{ foobar_blah }}" not in template - assert "foobar_blah_item" not in template - - -def test_json_roundtrip(tmp_path: Path): - json_path = SAMPLES_DIR / "foo.json" - assert json_path.is_file(), f"Missing sample JSON file: {json_path}" - - fmt, parsed = parse_config(json_path) - assert fmt == "json" - - flat_items = flatten_config(fmt, parsed) - defaults_yaml = generate_defaults_yaml("foobar", flat_items) - defaults = yaml.safe_load(defaults_yaml) - - # Defaults: nested keys and list indices - assert defaults["foobar_foo"] == "bar" - assert defaults["foobar_nested_a"] == 1 - # Bool normalized to string "true" - assert defaults["foobar_nested_b"] == "true" - assert defaults["foobar_list_0"] == 10 - assert defaults["foobar_list_1"] == 20 - - # Template generation (JSON has no comments, so we just rebuild) - template = generate_template(fmt, parsed, "foobar") - - assert '"foo": "{{ foobar_foo }}"' in template - assert "foobar_nested_a" in template - assert "foobar_nested_b" in template - assert "foobar_list_0" in template - assert "foobar_list_1" in template - - -def test_generate_yaml_template_from_text_edge_cases(): - """ - Exercise YAML text edge cases: - - indentation dedent (stack pop) - - empty key before ':' - - quoted and unquoted list items - """ - text = textwrap.dedent( - """ - root: - child: 1 - other: 2 - : 3 - list: - - "quoted" - - unquoted - """ - ) - - tmpl = core._generate_yaml_template_from_text("role", text) - - # Dedent from "root -> child" back to "other" exercises the stack-pop path. - # Just check the expected variable names appear. - assert "role_root_child" in tmpl - assert "role_other" in tmpl - - # The weird " : 3" line has no key and should be left untouched. - assert " : 3" in tmpl - - # The list should generate indexed variables for each item. - # First item is quoted (use_quotes=True), second is unquoted. - assert "role_list_0" in tmpl - assert "role_list_1" in tmpl - - -def test_generate_template_yaml_structural_fallback(): - """ - When original_text is not provided for YAML, generate_template should use - the structural fallback path (yaml.safe_dump + _generate_yaml_template_from_text). - """ - parsed = {"outer": {"inner": "val"}} - - tmpl = generate_template("yaml", parsed=parsed, role_prefix="role") - - # We don't care about exact formatting, just that the expected variable - # name shows up, proving we went through the structural path. - assert "role_outer_inner" in tmpl - - -def test_generate_template_json_type_error(): - """ - Wrong type for JSON in generate_template should raise TypeError. - """ - with pytest.raises(TypeError): - generate_template("json", parsed="not a dict", role_prefix="role") - - -def test_fallback_str_representer_for_unknown_type(): - """ - Ensure that the _fallback_str_representer is used for objects that - PyYAML doesn't know how to represent. - """ - - class Weird: - def __str__(self) -> str: - return "weird-value" - - data = {"foo": Weird()} - - # This will exercise _fallback_str_representer, because Weird has no - # dedicated representer and _TurtleDumper registers our fallback for None. - dumped = yaml.dump( - data, - Dumper=core._TurtleDumper, - sort_keys=False, - default_flow_style=False, - ) - - # It should serialize without error, and the string form should appear. - assert "weird-value" in dumped - - -def test_xml_roundtrip_ossec_web_rules(): - xml_path = SAMPLES_DIR / "ossec.xml" - assert xml_path.is_file(), f"Missing sample XML file: {xml_path}" - - fmt, parsed = parse_config(xml_path) - assert fmt == "xml" - - flat_items = flatten_config(fmt, parsed) - assert flat_items, "Expected at least one flattened item from XML sample" - - defaults_yaml = generate_defaults_yaml("ossec", flat_items) - defaults = yaml.safe_load(defaults_yaml) - - # defaults should be a non-empty dict - assert isinstance(defaults, dict) - assert defaults, "Expected non-empty defaults for XML sample" - - # all keys should be lowercase, start with prefix, and have no spaces - for key in defaults: - assert key.startswith("ossec_") - assert key == key.lower() - assert " " not in key - - # Root attribute should flatten to ossec_name - assert defaults["ossec_name"] == "web,accesslog," - - # There should be at least one default for rule id="31100" - id_keys = [k for k, v in defaults.items() if v == "31100"] - assert id_keys, "Expected to find a default for rule id 31100" - - # At least one of them should be the rule *id* attribute - assert any( - key.startswith("ossec_rule_") and key.endswith("_id") for key in id_keys - ), f"Expected at least one *_id var for value 31100, got: {id_keys}" - - # Template generation (preserving comments) - original_text = xml_path.read_text(encoding="utf-8") - template = generate_template(fmt, parsed, "ossec", original_text=original_text) - assert isinstance(template, str) - assert template.strip(), "Template for XML sample should not be empty" - - # Top-of-file and mid-file comments should be preserved - assert "Official Web access rules for OSSEC." in template - assert "Rules to ignore crawlers" in template - - # Each default variable name should appear in the template as a Jinja placeholder - for var_name in defaults: - assert ( - var_name in template - ), f"Variable {var_name} not referenced in XML template" - - -def test_generate_xml_template_from_text_edge_cases(): - """ - Exercise XML text edge cases: - - XML declaration and DOCTYPE in prolog - - top-level and inner comments - - repeated child elements (indexing) - - attributes and text content - """ - text = textwrap.dedent( - """\ - - - - - - text - other - - """ - ) - - tmpl = core._generate_xml_template_from_text("role", text) - - # Prolog and comments preserved - assert " role_attr) - assert "role_attr" in tmpl - - # Repeated elements should be indexed in both attr and text - assert "role_child_0_attr" in tmpl - assert "role_child_0" in tmpl - assert "role_child_1" in tmpl - - -def test_generate_template_xml_type_error(): - """ - Wrong type for XML in generate_template should raise TypeError. - """ - with pytest.raises(TypeError): - generate_template("xml", parsed="not an element", role_prefix="role") - - -def test_flatten_config_xml_type_error(): - """ - Wrong type for XML in flatten_config should raise TypeError. - """ - with pytest.raises(TypeError): - flatten_config("xml", parsed="not-an-element") - - -def test_generate_template_xml_structural_fallback(): - """ - When original_text is not provided for XML, generate_template should use - the structural fallback path (ET.tostring + _generate_xml_template_from_text). - """ - xml_text = textwrap.dedent( - """\ - - 2 - text - - """ - ) - root = ET.fromstring(xml_text) - - tmpl = generate_template("xml", parsed=root, role_prefix="role") - - # Root attribute path ("@attr",) -> role_attr - assert "role_attr" in tmpl - - # Simple child element text ("child",) -> role_child - assert "role_child" in tmpl - - # Element with both attr and text: - # - attr -> ("node", "@attr") -> role_node_attr - # - text -> ("node", "value") -> role_node_value - assert "role_node_attr" in tmpl - assert "role_node_value" in tmpl - - -def test_split_xml_prolog_only_whitespace(): - """ - Whitespace-only input: prolog is the whitespace, body is empty. - Exercises the 'if i >= n: break' path. - """ - text = " \n\t" - prolog, body = core._split_xml_prolog(text) - assert prolog == text - assert body == "" - - -def test_split_xml_prolog_unterminated_declaration(): - """ - Unterminated XML declaration should hit the 'end == -1' branch and - treat the whole string as body. - """ - text = "" - prolog, body = core._split_xml_prolog(text) - assert prolog == "" - assert body == text - - -def test_flatten_xml_text_with_attributes_uses_value_suffix(): - """ - When an element has both attributes and text, _flatten_xml should store - the text at path + ('value',), not just path. - """ - xml_text = "text" - root = ET.fromstring(xml_text) - - items = flatten_config("xml", root) - - # Attribute path: ("node", "@attr") -> "x" - assert (("node", "@attr"), "x") in items - - # Text-with-attrs path: ("node", "value") -> "text" - assert (("node", "value"), "text") in items diff --git a/tests/test_core_utils.py b/tests/test_core_utils.py new file mode 100644 index 0000000..3138970 --- /dev/null +++ b/tests/test_core_utils.py @@ -0,0 +1,202 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest +import yaml + +import jinjaturtle.core as core +from jinjaturtle.core import ( + detect_format, + parse_config, + flatten_config, + generate_defaults_yaml, + generate_template, + make_var_name, +) + +SAMPLES_DIR = Path(__file__).parent / "samples" + + +def test_make_var_name_basic(): + # simple sanity checks on the naming rules + assert ( + make_var_name("jinjaturtle", ("somesection", "foo")) + == "jinjaturtle_somesection_foo" + ) + assert ( + make_var_name("JinjaTurtle", ("Other-Section", "some value")) + == "jinjaturtle_other_section_some_value" + ) + # no trailing underscores, all lowercase, no spaces + name = make_var_name("MyRole", (" Section Name ", "Key-Name ")) + assert name == name.lower() + assert " " not in name + assert not name.endswith("_") + + +def test_make_var_name_empty_path_returns_prefix(): + # Cover the branch where there are no path components. + assert make_var_name("MyRole", ()) == "myrole" + + +def test_detect_format_explicit_overrides_suffix(tmp_path: Path): + # Explicit format should win over file suffix. + cfg_path = tmp_path / "config.ini" + cfg_path.write_text("[section]\nkey=value\n", encoding="utf-8") + + fmt = detect_format(cfg_path, explicit="toml") + assert fmt == "toml" + + +def test_detect_format_fallback_ini(tmp_path: Path): + # Unknown suffix should fall back to "ini". + cfg_path = tmp_path / "weird.cnf" + cfg_path.write_text("[section]\nkey=value\n", encoding="utf-8") + + fmt, parsed = parse_config(cfg_path) # no explicit fmt + assert fmt == "ini" + # parsed should be an INI ConfigParser with our section/key + flat = flatten_config(fmt, parsed) + assert any(path == ("section", "key") for path, _ in flat) + + +def test_formats_match_expected_extensions(): + """ + Sanity check that format detection lines up with the filenames + we’re using for the samples. + """ + toml_path = SAMPLES_DIR / "tom.toml" + ini_path = SAMPLES_DIR / "php.ini" + xml_path = SAMPLES_DIR / "ossec.xml" + + fmt_toml, _ = parse_config(toml_path) + fmt_ini, _ = parse_config(ini_path) + fmt_xml, _ = parse_config(xml_path) + + assert fmt_toml == "toml" + assert fmt_ini == "ini" + assert fmt_xml == "xml" + + +def test_parse_config_unsupported_format(tmp_path: Path): + """ + Hit the ValueError in parse_config when fmt is not a supported format. + """ + cfg_path = tmp_path / "config.whatever" + cfg_path.write_text("", encoding="utf-8") + + with pytest.raises(ValueError): + parse_config(cfg_path, fmt="bogus") + + +def test_generate_template_type_and_format_errors(): + """ + Exercise the error branches in generate_template: + - toml with non-dict parsed + - ini with non-ConfigParser parsed + - yaml with wrong parsed type + - json with wrong parsed type + - completely unsupported fmt (with and without original_text) + """ + # wrong type for TOML + with pytest.raises(TypeError): + generate_template("toml", parsed="not a dict", role_prefix="role") + + # wrong type for INI + with pytest.raises(TypeError): + generate_template("ini", parsed={"not": "a configparser"}, role_prefix="role") + + # wrong type for YAML + with pytest.raises(TypeError): + generate_template("yaml", parsed=None, role_prefix="role") + + # wrong type for JSON + with pytest.raises(TypeError): + generate_template("json", parsed=None, role_prefix="role") + + # unsupported format, no original_text + with pytest.raises(ValueError): + generate_template("bogusfmt", parsed=None, role_prefix="role") + + # unsupported format, with original_text + with pytest.raises(ValueError): + generate_template( + "bogusfmt", + parsed=None, + role_prefix="role", + original_text="foo=bar", + ) + + +def test_normalize_default_value_true_false_strings(): + # 'true'/'false' strings should be preserved as strings and double-quoted in YAML. + flat_items = [ + (("section", "foo"), "true"), + (("section", "bar"), "FALSE"), + ] + defaults_yaml = generate_defaults_yaml("role", flat_items) + data = yaml.safe_load(defaults_yaml) + assert data["role_section_foo"] == "true" + assert data["role_section_bar"] == "FALSE" + + +def test_fallback_str_representer_for_unknown_type(): + """ + Ensure that the _fallback_str_representer is used for objects that + PyYAML doesn't know how to represent. + """ + + class Weird: + def __str__(self) -> str: + return "weird-value" + + data = {"foo": Weird()} + + dumped = yaml.dump( + data, + Dumper=core._TurtleDumper, + sort_keys=False, + default_flow_style=False, + ) + + # It should serialize without error, and the string form should appear. + assert "weird-value" in dumped + + +def test_normalize_default_value_true_false_strings(): + # 'true'/'false' strings should be preserved as strings and double-quoted in YAML. + flat_items = [ + (("section", "foo"), "true"), + (("section", "bar"), "FALSE"), + ] + defaults_yaml = generate_defaults_yaml("role", flat_items) + data = yaml.safe_load(defaults_yaml) + assert data["role_section_foo"] == "true" + assert data["role_section_bar"] == "FALSE" + + +def test_normalize_default_value_bool_inputs_are_stringified(): + """ + Real boolean values should be turned into quoted 'true'/'false' strings + by _normalize_default_value via generate_defaults_yaml. + """ + flat_items = [ + (("section", "flag_true"), True), + (("section", "flag_false"), False), + ] + defaults_yaml = generate_defaults_yaml("role", flat_items) + data = yaml.safe_load(defaults_yaml) + + assert data["role_section_flag_true"] == "true" + assert data["role_section_flag_false"] == "false" + + +def test_flatten_config_unsupported_format(): + """ + Calling flatten_config with an unknown fmt should raise ValueError. + """ + with pytest.raises(ValueError) as exc: + flatten_config("bogusfmt", parsed=None) + + assert "Unsupported format" in str(exc.value) diff --git a/tests/test_ini_handler.py b/tests/test_ini_handler.py new file mode 100644 index 0000000..51ae457 --- /dev/null +++ b/tests/test_ini_handler.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from pathlib import Path +import configparser +import pytest +import yaml + +from jinjaturtle.core import ( + parse_config, + flatten_config, + generate_defaults_yaml, + generate_template, +) +from jinjaturtle.handlers.ini import IniHandler + +SAMPLES_DIR = Path(__file__).parent / "samples" + + +def test_ini_php_sample_roundtrip(): + ini_path = SAMPLES_DIR / "php.ini" + assert ini_path.is_file(), f"Missing sample INI file: {ini_path}" + + fmt, parsed = parse_config(ini_path) + assert fmt == "ini" + + flat_items = flatten_config(fmt, parsed) + assert flat_items, "Expected at least one flattened item from php.ini sample" + + defaults_yaml = generate_defaults_yaml("php", flat_items) + defaults = yaml.safe_load(defaults_yaml) + + # defaults should be a non-empty dict + assert isinstance(defaults, dict) + assert defaults, "Expected non-empty defaults for php.ini sample" + + # all keys should be lowercase, start with prefix, and have no spaces + for key in defaults: + assert key.startswith("php_") + assert key == key.lower() + assert " " not in key + + # template generation + original_text = ini_path.read_text(encoding="utf-8") + template = generate_template(fmt, parsed, "php", original_text=original_text) + assert "; About this file" in template + assert isinstance(template, str) + assert template.strip(), "Template for php.ini sample should not be empty" + + # each default variable name should appear in the template as a Jinja placeholder + for var_name in defaults: + assert ( + var_name in template + ), f"Variable {var_name} not referenced in INI template" + + +def test_generate_template_fallback_ini(): + """ + When original_text is not provided, generate_template should use the + structural fallback path for INI configs. + """ + parser = configparser.ConfigParser() + # foo is quoted in the INI text to hit the "preserve quotes" branch + parser["section"] = {"foo": '"bar"', "num": "42"} + + tmpl_ini = generate_template("ini", parsed=parser, role_prefix="role") + assert "[section]" in tmpl_ini + assert "role_section_foo" in tmpl_ini + assert '"{{ role_section_foo }}"' in tmpl_ini # came from quoted INI value + + +def test_generate_ini_template_from_text_edge_cases(): + # Cover CRLF newlines, lines without '=', and lines with no key before '='. + text = "[section]\r\nkey=value\r\nnoequals\r\n = bare\r\n" + handler = IniHandler() + tmpl = handler._generate_ini_template_from_text("role", text) + + # We don't care about exact formatting here, just that it runs and + # produces some reasonable output. + assert "[section]" in tmpl + assert "role_section_key" in tmpl + # The "noequals" line should be preserved as-is. + assert "noequals" in tmpl + # The " = bare" line has no key and should be left untouched. + assert " = bare" in tmpl + + +def test_ini_handler_flatten_type_error(): + """ + Passing a non-ConfigParser into IniHandler.flatten should raise TypeError. + """ + handler = IniHandler() + with pytest.raises(TypeError): + handler.flatten(parsed={"not": "a configparser"}) diff --git a/tests/test_json_handler.py b/tests/test_json_handler.py new file mode 100644 index 0000000..8e6efe2 --- /dev/null +++ b/tests/test_json_handler.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from pathlib import Path + +import json +import pytest +import yaml + +from jinjaturtle.core import ( + parse_config, + flatten_config, + generate_defaults_yaml, +) +from jinjaturtle.handlers.json import JsonHandler + +SAMPLES_DIR = Path(__file__).parent / "samples" + + +def test_json_roundtrip(): + json_path = SAMPLES_DIR / "foo.json" + assert json_path.is_file(), f"Missing sample JSON file: {json_path}" + + fmt, parsed = parse_config(json_path) + assert fmt == "json" + + flat_items = flatten_config(fmt, parsed) + defaults_yaml = generate_defaults_yaml("foobar", flat_items) + defaults = yaml.safe_load(defaults_yaml) + + # Defaults: nested keys and list indices + assert defaults["foobar_foo"] == "bar" + assert defaults["foobar_nested_a"] == 1 + # Bool normalized to string "true" + assert defaults["foobar_nested_b"] == "true" + assert defaults["foobar_list_0"] == 10 + assert defaults["foobar_list_1"] == 20 + + # Template generation is done via JsonHandler.generate_template; we just + # make sure it produces a structure with the expected placeholders. + handler = JsonHandler() + templated = json.loads(handler.generate_template(parsed, role_prefix="foobar")) + + assert templated["foo"] == "{{ foobar_foo }}" + assert "foobar_nested_a" in str(templated) + assert "foobar_nested_b" in str(templated) + assert "foobar_list_0" in str(templated) + assert "foobar_list_1" in str(templated) + + +def test_generate_template_json_type_error(): + """ + Wrong type for JSON in JsonHandler.generate_template should raise TypeError. + """ + handler = JsonHandler() + with pytest.raises(TypeError): + handler.generate_template(parsed="not a dict", role_prefix="role") diff --git a/tests/test_toml_handler.py b/tests/test_toml_handler.py new file mode 100644 index 0000000..b36830f --- /dev/null +++ b/tests/test_toml_handler.py @@ -0,0 +1,114 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest +import yaml + +from jinjaturtle.core import ( + parse_config, + flatten_config, + generate_defaults_yaml, + generate_template, +) +from jinjaturtle.handlers.toml import TomlHandler +import jinjaturtle.handlers.toml as toml_module + +SAMPLES_DIR = Path(__file__).parent / "samples" + + +def test_toml_sample_roundtrip(): + toml_path = SAMPLES_DIR / "tom.toml" + assert toml_path.is_file(), f"Missing sample TOML file: {toml_path}" + + fmt, parsed = parse_config(toml_path) + assert fmt == "toml" + + flat_items = flatten_config(fmt, parsed) + assert flat_items + + defaults_yaml = generate_defaults_yaml("jinjaturtle", flat_items) + defaults = yaml.safe_load(defaults_yaml) + + # defaults should be a non-empty dict + assert isinstance(defaults, dict) + assert defaults, "Expected non-empty defaults for TOML sample" + + # all keys should be lowercase, start with prefix, and have no spaces + for key in defaults: + assert key.startswith("jinjaturtle_") + assert key == key.lower() + assert " " not in key + + # template generation – **now with original_text** + original_text = toml_path.read_text(encoding="utf-8") + template = generate_template( + fmt, parsed, "jinjaturtle", original_text=original_text + ) + assert isinstance(template, str) + assert template.strip() + + # comments from the original file should now be preserved + assert "# This is a TOML document" in template + + # each default variable name should appear in the template as a Jinja placeholder + for var_name in defaults: + assert ( + var_name in template + ), f"Variable {var_name} not referenced in TOML template" + + +def test_parse_config_toml_missing_tomllib(monkeypatch): + """ + Force tomllib to None to hit the RuntimeError branch when parsing TOML. + """ + toml_path = SAMPLES_DIR / "tom.toml" + + # Simulate an environment without tomllib/tomli + monkeypatch.setattr(toml_module, "tomllib", None) + + with pytest.raises(RuntimeError) as exc: + parse_config(toml_path, fmt="toml") + assert "tomllib/tomli is required" in str(exc.value) + + +def test_generate_template_fallback_toml(): + """ + When original_text is not provided, generate_template should use the + structural fallback path for TOML configs. + """ + parsed_toml = { + "title": "Example", + "server": {"port": 8080, "host": "127.0.0.1"}, + "logging": { + "file": {"path": "/tmp/app.log"} + }, # nested table to hit recursive walk + } + tmpl_toml = generate_template("toml", parsed=parsed_toml, role_prefix="role") + assert "[server]" in tmpl_toml + assert "role_server_port" in tmpl_toml + assert "[logging]" in tmpl_toml or "[logging.file]" in tmpl_toml + + +def test_generate_toml_template_from_text_edge_cases(): + # Cover CRLF newlines, lines without '=', empty keys, and inline tables + # that both parse successfully and fail parsing. + text = ( + "# comment\r\n" + "[table]\r\n" + "noequals\r\n" + " = 42\r\n" + 'inline_good = { name = "abc", value = 1 }\r\n' + "inline_bad = { invalid = }\r\n" + ) + handler = TomlHandler() + tmpl = handler._generate_toml_template_from_text("role", text) + + # The good inline table should expand into two separate variables. + assert "role_table_inline_good_name" in tmpl + assert "role_table_inline_good_value" in tmpl + # The bad inline table should fall back to scalar handling. + assert "role_table_inline_bad" in tmpl + # Ensure the lines without '=' / empty key were handled without exploding. + assert "[table]" in tmpl + assert "noequals" in tmpl diff --git a/tests/test_xml_handler.py b/tests/test_xml_handler.py new file mode 100644 index 0000000..6cf5836 --- /dev/null +++ b/tests/test_xml_handler.py @@ -0,0 +1,230 @@ +from __future__ import annotations + +from pathlib import Path +import textwrap +import xml.etree.ElementTree as ET + +import pytest +import yaml + +from jinjaturtle.core import ( + parse_config, + flatten_config, + generate_defaults_yaml, + generate_template, +) +from jinjaturtle.handlers.xml import XmlHandler + +SAMPLES_DIR = Path(__file__).parent / "samples" + + +def test_xml_roundtrip_ossec_web_rules(): + xml_path = SAMPLES_DIR / "ossec.xml" + assert xml_path.is_file(), f"Missing sample XML file: {xml_path}" + + fmt, parsed = parse_config(xml_path) + assert fmt == "xml" + + flat_items = flatten_config(fmt, parsed) + assert flat_items, "Expected at least one flattened item from XML sample" + + defaults_yaml = generate_defaults_yaml("ossec", flat_items) + defaults = yaml.safe_load(defaults_yaml) + + # defaults should be a non-empty dict + assert isinstance(defaults, dict) + assert defaults, "Expected non-empty defaults for XML sample" + + # all keys should be lowercase, start with prefix, and have no spaces + for key in defaults: + assert key.startswith("ossec_") + assert key == key.lower() + assert " " not in key + + # Root attribute should flatten to ossec_name + assert defaults["ossec_name"] == "web,accesslog," + + # There should be at least one default for rule id="31100" + id_keys = [k for k, v in defaults.items() if v == "31100"] + assert id_keys, "Expected to find a default for rule id 31100" + + # At least one of them should be the rule *id* attribute + assert any( + key.startswith("ossec_rule_") and key.endswith("_id") for key in id_keys + ), f"Expected at least one *_id var for value 31100, got: {id_keys}" + + # Template generation (preserving comments) + original_text = xml_path.read_text(encoding="utf-8") + template = generate_template(fmt, parsed, "ossec", original_text=original_text) + assert isinstance(template, str) + assert template.strip(), "Template for XML sample should not be empty" + + # Top-of-file and mid-file comments should be preserved + assert "Official Web access rules for OSSEC." in template + assert "Rules to ignore crawlers" in template + + # Each default variable name should appear in the template as a Jinja placeholder + for var_name in defaults: + assert ( + var_name in template + ), f"Variable {var_name} not referenced in XML template" + + +def test_generate_xml_template_from_text_edge_cases(): + """ + Exercise XML text edge cases: + - XML declaration and DOCTYPE in prolog + - top-level and inner comments + - repeated child elements (indexing) + - attributes and text content + """ + text = textwrap.dedent( + """\ + + + + + + text + other + + """ + ) + + handler = XmlHandler() + tmpl = handler._generate_xml_template_from_text("role", text) + + # Prolog and comments preserved + assert " role_attr) + assert "role_attr" in tmpl + + # Repeated elements should be indexed in both attr and text + assert "role_child_0_attr" in tmpl + assert "role_child_0" in tmpl + assert "role_child_1" in tmpl + + +def test_generate_template_xml_type_error(): + """ + Wrong type for XML in XmlHandler.generate_template should raise TypeError. + """ + handler = XmlHandler() + with pytest.raises(TypeError): + handler.generate_template(parsed="not an element", role_prefix="role") + + +def test_flatten_config_xml_type_error(): + """ + Wrong type for XML in flatten_config should raise TypeError. + """ + with pytest.raises(TypeError): + flatten_config("xml", parsed="not-an-element") + + +def test_generate_template_xml_structural_fallback(): + """ + When original_text is not provided for XML, generate_template should use + the structural fallback path (ET.tostring + handler processing). + """ + xml_text = textwrap.dedent( + """\ + + 2 + text + + """ + ) + root = ET.fromstring(xml_text) + + tmpl = generate_template("xml", parsed=root, role_prefix="role") + + # Root attribute path ("@attr",) -> role_attr + assert "role_attr" in tmpl + + # Simple child element text ("child",) -> role_child + assert "role_child" in tmpl + + # Element with both attr and text: + # - attr -> ("node", "@attr") -> role_node_attr + # - text -> ("node", "value") -> role_node_value + assert "role_node_attr" in tmpl + assert "role_node_value" in tmpl + + +def test_split_xml_prolog_only_whitespace(): + """ + Whitespace-only input: prolog is the whitespace, body is empty. + Exercises the 'if i >= n: break' path. + """ + text = " \n\t" + handler = XmlHandler() + prolog, body = handler._split_xml_prolog(text) + assert prolog == text + assert body == "" + + +def test_split_xml_prolog_unterminated_declaration(): + """ + Unterminated XML declaration should hit the 'end == -1' branch and + treat the whole string as body. + """ + text = "" + handler = XmlHandler() + prolog, body = handler._split_xml_prolog(text) + assert prolog == "" + assert body == text + + +def test_flatten_xml_text_with_attributes_uses_value_suffix(): + """ + When an element has both attributes and text, _flatten_xml should store + the text at path + ('value',), not just path. + """ + xml_text = "text" + root = ET.fromstring(xml_text) + + items = flatten_config("xml", root) + + # Attribute path: ("node", "@attr") -> "x" + assert (("node", "@attr"), "x") in items + + # Text-with-attrs path: ("node", "value") -> "text" + assert (("node", "value"), "text") in items diff --git a/tests/test_yaml_handler.py b/tests/test_yaml_handler.py new file mode 100644 index 0000000..f2d89f1 --- /dev/null +++ b/tests/test_yaml_handler.py @@ -0,0 +1,100 @@ +from __future__ import annotations + +from pathlib import Path +import textwrap + +import yaml + +from jinjaturtle.core import ( + parse_config, + flatten_config, + generate_defaults_yaml, + generate_template, +) +from jinjaturtle.handlers.yaml import YamlHandler + +SAMPLES_DIR = Path(__file__).parent / "samples" + + +def test_yaml_roundtrip_with_list_and_comment(): + yaml_path = SAMPLES_DIR / "bar.yaml" + assert yaml_path.is_file(), f"Missing sample YAML file: {yaml_path}" + + fmt, parsed = parse_config(yaml_path) + assert fmt == "yaml" + + flat_items = flatten_config(fmt, parsed) + defaults_yaml = generate_defaults_yaml("foobar", flat_items) + defaults = yaml.safe_load(defaults_yaml) + + # Defaults: keys are flattened with indices + assert defaults["foobar_foo"] == "bar" + assert defaults["foobar_blah_0"] == "something" + assert defaults["foobar_blah_1"] == "else" + + # Template generation (preserving comments) + original_text = yaml_path.read_text(encoding="utf-8") + template = generate_template(fmt, parsed, "foobar", original_text=original_text) + + # Comment preserved + assert "# Top comment" in template + + # Scalar replacement + assert "foo:" in template + assert "foobar_foo" in template + + # List items use indexed vars, not "item" + assert "foobar_blah_0" in template + assert "foobar_blah_1" in template + assert "{{ foobar_blah }}" not in template + assert "foobar_blah_item" not in template + + +def test_generate_yaml_template_from_text_edge_cases(): + """ + Exercise YAML text edge cases: + - indentation dedent (stack pop) + - empty key before ':' + - quoted and unquoted list items + """ + text = textwrap.dedent( + """ + root: + child: 1 + other: 2 + : 3 + list: + - "quoted" + - unquoted + """ + ) + + handler = YamlHandler() + tmpl = handler._generate_yaml_template_from_text("role", text) + + # Dedent from "root -> child" back to "other" exercises the stack-pop path. + # Just check the expected variable names appear. + assert "role_root_child" in tmpl + assert "role_other" in tmpl + + # The weird " : 3" line has no key and should be left untouched. + assert " : 3" in tmpl + + # The list should generate indexed variables for each item. + # First item is quoted (use_quotes=True), second is unquoted. + assert "role_list_0" in tmpl + assert "role_list_1" in tmpl + + +def test_generate_template_yaml_structural_fallback(): + """ + When original_text is not provided for YAML, generate_template should use + the structural fallback path (yaml.safe_dump + handler processing). + """ + parsed = {"outer": {"inner": "val"}} + + tmpl = generate_template("yaml", parsed=parsed, role_prefix="role") + + # We don't care about exact formatting, just that the expected variable + # name shows up, proving we went through the structural path. + assert "role_outer_inner" in tmpl