diff --git a/.forgejo/workflows/lint.yml b/.forgejo/workflows/lint.yml index 60768d8..cbfb409 100644 --- a/.forgejo/workflows/lint.yml +++ b/.forgejo/workflows/lint.yml @@ -15,7 +15,7 @@ jobs: run: | apt-get update DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - black pyflakes3 python3-bandit + black pyflakes3 python3-bandit vulture - name: Run linters run: | @@ -24,3 +24,4 @@ jobs: pyflakes3 src/* pyflakes3 tests/* bandit -s B110 -r src/ + vulture . diff --git a/.gitignore b/.gitignore index 7bc15a0..dedc5da 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ dist *.yml *.j2 *.toml +regenerated_* diff --git a/pyproject.toml b/pyproject.toml index 937cb9b..f6310ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "jinjaturtle" -version = "0.2.0" +version = "0.3.0" description = "Convert config files into Ansible defaults and Jinja2 templates." authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/src/jinjaturtle/core.py b/src/jinjaturtle/core.py index c8e6d71..e4f3d13 100644 --- a/src/jinjaturtle/core.py +++ b/src/jinjaturtle/core.py @@ -3,6 +3,7 @@ from __future__ import annotations from pathlib import Path from typing import Any, Iterable +import datetime import yaml from .loop_analyzer import LoopAnalyzer, LoopCandidate @@ -100,6 +101,9 @@ def parse_config(path: Path, fmt: str | None = None) -> tuple[str, Any]: if handler is None: raise ValueError(f"Unsupported config format: {fmt}") parsed = handler.parse(path) + # Make sure datetime objects are treated as strings (TOML, YAML) + parsed = _stringify_timestamps(parsed) + return fmt, parsed @@ -158,17 +162,6 @@ def _path_starts_with(path: tuple[str, ...], prefix: tuple[str, ...]) -> bool: return path[: len(prefix)] == prefix -def _normalize_default_value(value: Any) -> Any: - """ - Ensure that 'true' / 'false' end up as quoted strings in YAML. - """ - if isinstance(value, bool): - return QuotedString("true" if value else "false") - if isinstance(value, str) and value.lower() in {"true", "false"}: - return QuotedString(value) - return value - - def generate_ansible_yaml( role_prefix: str, flat_items: list[tuple[tuple[str, ...], Any]], @@ -182,7 +175,7 @@ def generate_ansible_yaml( # Add scalar variables for path, value in flat_items: var_name = make_var_name(role_prefix, path) - defaults[var_name] = _normalize_default_value(value) + defaults[var_name] = value # No normalization - keep original types # Add loop collections if loop_candidates: @@ -226,3 +219,29 @@ def generate_jinja2_template( return handler.generate_jinja2_template( parsed, role_prefix, original_text=original_text ) + + +def _stringify_timestamps(obj: Any) -> Any: + """ + Recursively walk a parsed config and turn any datetime/date/time objects + into plain strings in ISO-8601 form. + + This prevents Python datetime objects from leaking into YAML/Jinja, which + would otherwise reformat the value (e.g. replacing 'T' with a space). + + This commonly occurs otherwise with TOML and YAML files, which sees + Python automatically convert those sorts of strings into datetime objects. + """ + if isinstance(obj, dict): + return {k: _stringify_timestamps(v) for k, v in obj.items()} + if isinstance(obj, list): + return [_stringify_timestamps(v) for v in obj] + + # TOML & YAML both use the standard datetime types + if isinstance(obj, datetime.datetime): + # Use default ISO-8601: 'YYYY-MM-DDTHH:MM:SS±HH:MM' (with 'T') + return obj.isoformat() + if isinstance(obj, (datetime.date, datetime.time)): + return obj.isoformat() + + return obj diff --git a/src/jinjaturtle/handlers/ini.py b/src/jinjaturtle/handlers/ini.py index ce5848e..ad92b72 100644 --- a/src/jinjaturtle/handlers/ini.py +++ b/src/jinjaturtle/handlers/ini.py @@ -12,7 +12,7 @@ class IniHandler(BaseHandler): def parse(self, path: Path) -> configparser.ConfigParser: parser = configparser.ConfigParser() - parser.optionxform = str # preserve key case + parser.optionxform = str # noqa with path.open("r", encoding="utf-8") as f: parser.read_file(f) return parser diff --git a/src/jinjaturtle/handlers/json.py b/src/jinjaturtle/handlers/json.py index dbf7d82..035efdc 100644 --- a/src/jinjaturtle/handlers/json.py +++ b/src/jinjaturtle/handlers/json.py @@ -1,10 +1,12 @@ from __future__ import annotations import json +import re from pathlib import Path from typing import Any from . import DictLikeHandler +from ..loop_analyzer import LoopCandidate class JsonHandler(DictLikeHandler): @@ -21,17 +23,38 @@ class JsonHandler(DictLikeHandler): role_prefix: str, original_text: str | None = None, ) -> str: + """Original scalar-only template generation.""" if not isinstance(parsed, (dict, list)): raise TypeError("JSON parser result must be a dict or list") # As before: ignore original_text and rebuild structurally return self._generate_json_template(role_prefix, parsed) + def generate_jinja2_template_with_loops( + self, + parsed: Any, + role_prefix: str, + original_text: str | None, + loop_candidates: list[LoopCandidate], + ) -> str: + """Generate template with Jinja2 for loops where appropriate.""" + if not isinstance(parsed, (dict, list)): + raise TypeError("JSON parser result must be a dict or list") + + # Build loop path set for quick lookup + loop_paths = {candidate.path for candidate in loop_candidates} + + return self._generate_json_template_with_loops( + role_prefix, parsed, loop_paths, loop_candidates + ) + def _generate_json_template(self, role_prefix: str, data: Any) -> str: """ Generate a JSON Jinja2 template from parsed JSON data. All scalar values are replaced with Jinja expressions whose names are derived from the path, similar to TOML/YAML. + + Uses | tojson filter to preserve types (numbers, booleans, null). """ def _walk(obj: Any, path: tuple[str, ...] = ()) -> Any: @@ -39,9 +62,130 @@ class JsonHandler(DictLikeHandler): return {k: _walk(v, path + (str(k),)) for k, v in obj.items()} if isinstance(obj, list): return [_walk(v, path + (str(i),)) for i, v in enumerate(obj)] - # scalar + # scalar - use marker that will be replaced with tojson var_name = self.make_var_name(role_prefix, path) - return f"{{{{ {var_name} }}}}" + return f"__SCALAR__{var_name}__" templated = _walk(data) - return json.dumps(templated, indent=2, ensure_ascii=False) + "\n" + json_str = json.dumps(templated, indent=2, ensure_ascii=False) + + # Replace scalar markers with Jinja expressions using tojson filter + # This preserves types (numbers stay numbers, booleans stay booleans) + json_str = re.sub( + r'"__SCALAR__([a-zA-Z_][a-zA-Z0-9_]*)__"', r"{{ \1 | tojson }}", json_str + ) + + return json_str + "\n" + + def _generate_json_template_with_loops( + self, + role_prefix: str, + data: Any, + loop_paths: set[tuple[str, ...]], + loop_candidates: list[LoopCandidate], + path: tuple[str, ...] = (), + ) -> str: + """ + Generate a JSON Jinja2 template with for loops where appropriate. + """ + + def _walk(obj: Any, current_path: tuple[str, ...] = ()) -> Any: + # Check if this path is a loop candidate + if current_path in loop_paths: + # Find the matching candidate + candidate = next(c for c in loop_candidates if c.path == current_path) + collection_var = self.make_var_name(role_prefix, candidate.path) + item_var = candidate.loop_var + + if candidate.item_schema == "scalar": + # Simple list of scalars - use special marker that we'll replace + return f"__LOOP_SCALAR__{collection_var}__{item_var}__" + elif candidate.item_schema in ("simple_dict", "nested"): + # List of dicts - use special marker + return f"__LOOP_DICT__{collection_var}__{item_var}__" + + if isinstance(obj, dict): + return {k: _walk(v, current_path + (str(k),)) for k, v in obj.items()} + if isinstance(obj, list): + # Check if this list is a loop candidate + if current_path in loop_paths: + # Already handled above + return _walk(obj, current_path) + return [_walk(v, current_path + (str(i),)) for i, v in enumerate(obj)] + + # scalar - use marker to preserve type + var_name = self.make_var_name(role_prefix, current_path) + return f"__SCALAR__{var_name}__" + + templated = _walk(data, path) + + # Convert to JSON string + json_str = json.dumps(templated, indent=2, ensure_ascii=False) + + # Replace scalar markers with Jinja expressions using tojson filter + json_str = re.sub( + r'"__SCALAR__([a-zA-Z_][a-zA-Z0-9_]*)__"', r"{{ \1 | tojson }}", json_str + ) + + # Post-process to replace loop markers with actual Jinja loops + for candidate in loop_candidates: + collection_var = self.make_var_name(role_prefix, candidate.path) + item_var = candidate.loop_var + + if candidate.item_schema == "scalar": + # Replace scalar loop marker with Jinja for loop + marker = f'"__LOOP_SCALAR__{collection_var}__{item_var}__"' + replacement = self._generate_json_scalar_loop( + collection_var, item_var, candidate + ) + json_str = json_str.replace(marker, replacement) + + elif candidate.item_schema in ("simple_dict", "nested"): + # Replace dict loop marker with Jinja for loop + marker = f'"__LOOP_DICT__{collection_var}__{item_var}__"' + replacement = self._generate_json_dict_loop( + collection_var, item_var, candidate + ) + json_str = json_str.replace(marker, replacement) + + return json_str + "\n" + + def _generate_json_scalar_loop( + self, collection_var: str, item_var: str, candidate: LoopCandidate + ) -> str: + """Generate a Jinja for loop for a scalar list in JSON.""" + # Use tojson filter to properly handle strings (quotes them) and other types + # Include array brackets around the loop + return ( + f"[{{% for {item_var} in {collection_var} %}}" + f"{{{{ {item_var} | tojson }}}}" + f"{{% if not loop.last %}}, {{% endif %}}" + f"{{% endfor %}}]" + ) + + def _generate_json_dict_loop( + self, collection_var: str, item_var: str, candidate: LoopCandidate + ) -> str: + """Generate a Jinja for loop for a dict list in JSON.""" + if not candidate.items: + return "[]" + + # Get first item as template + sample_item = candidate.items[0] + + # Build the dict template - use tojson for all values to handle types correctly + fields = [] + for key, value in sample_item.items(): + if key == "_key": + continue + # Use tojson filter to properly serialize all types (strings, numbers, booleans) + fields.append(f'"{key}": {{{{ {item_var}.{key} | tojson }}}}') + + dict_template = "{" + ", ".join(fields) + "}" + + return ( + f"{{% for {item_var} in {collection_var} %}}" + f"{dict_template}" + f"{{% if not loop.last %}}, {{% endif %}}" + f"{{% endfor %}}" + ) diff --git a/src/jinjaturtle/handlers/toml.py b/src/jinjaturtle/handlers/toml.py index 069b319..ccd1e31 100644 --- a/src/jinjaturtle/handlers/toml.py +++ b/src/jinjaturtle/handlers/toml.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import Any from . import DictLikeHandler +from ..loop_analyzer import LoopCandidate class TomlHandler(DictLikeHandler): @@ -25,12 +26,31 @@ class TomlHandler(DictLikeHandler): role_prefix: str, original_text: str | None = None, ) -> str: + """Original scalar-only template generation.""" if original_text is not None: return self._generate_toml_template_from_text(role_prefix, original_text) if not isinstance(parsed, dict): raise TypeError("TOML parser result must be a dict") return self._generate_toml_template(role_prefix, parsed) + def generate_jinja2_template_with_loops( + self, + parsed: Any, + role_prefix: str, + original_text: str | None, + loop_candidates: list[LoopCandidate], + ) -> str: + """Generate template with Jinja2 for loops where appropriate.""" + if original_text is not None: + return self._generate_toml_template_with_loops_from_text( + role_prefix, original_text, loop_candidates + ) + if not isinstance(parsed, dict): + raise TypeError("TOML parser result must be a dict") + return self._generate_toml_template_with_loops( + role_prefix, parsed, loop_candidates + ) + def _generate_toml_template(self, role_prefix: str, data: dict[str, Any]) -> str: """ Generate a TOML Jinja2 template from parsed TOML dict. @@ -45,6 +65,89 @@ class TomlHandler(DictLikeHandler): var_name = self.make_var_name(role_prefix, path + (key,)) if isinstance(value, str): lines.append(f'{key} = "{{{{ {var_name} }}}}"') + elif isinstance(value, bool): + # Booleans need | lower filter (Python True/False → TOML true/false) + lines.append(f"{key} = {{{{ {var_name} | lower }}}}") + else: + lines.append(f"{key} = {{{{ {var_name} }}}}") + + def walk(obj: dict[str, Any], path: tuple[str, ...] = ()) -> None: + scalar_items = {k: v for k, v in obj.items() if not isinstance(v, dict)} + nested_items = {k: v for k, v in obj.items() if isinstance(v, dict)} + + if path: + header = ".".join(path) + lines.append(f"[{header}]") + + for key, val in scalar_items.items(): + emit_kv(path, str(key), val) + + if scalar_items: + lines.append("") + + for key, val in nested_items.items(): + walk(val, path + (str(key),)) + + # Root scalars (no table header) + root_scalars = {k: v for k, v in data.items() if not isinstance(v, dict)} + for key, val in root_scalars.items(): + emit_kv((), str(key), val) + if root_scalars: + lines.append("") + + # Tables + for key, val in data.items(): + if isinstance(val, dict): + walk(val, (str(key),)) + + return "\n".join(lines).rstrip() + "\n" + + def _generate_toml_template_with_loops( + self, + role_prefix: str, + data: dict[str, Any], + loop_candidates: list[LoopCandidate], + ) -> str: + """ + Generate a TOML Jinja2 template with for loops where appropriate. + """ + lines: list[str] = [] + loop_paths = {candidate.path for candidate in loop_candidates} + + def emit_kv(path: tuple[str, ...], key: str, value: Any) -> None: + var_name = self.make_var_name(role_prefix, path + (key,)) + if isinstance(value, str): + lines.append(f'{key} = "{{{{ {var_name} }}}}"') + elif isinstance(value, bool): + # Booleans need | lower filter (Python True/False → TOML true/false) + lines.append(f"{key} = {{{{ {var_name} | lower }}}}") + elif isinstance(value, list): + # Check if this list is a loop candidate + if path + (key,) in loop_paths: + # Find the matching candidate + candidate = next( + c for c in loop_candidates if c.path == path + (key,) + ) + collection_var = self.make_var_name(role_prefix, candidate.path) + item_var = candidate.loop_var + + if candidate.item_schema == "scalar": + # Scalar list loop + lines.append( + f"{key} = [" + f"{{% for {item_var} in {collection_var} %}}" + f"{{{{ {item_var} }}}}" + f"{{% if not loop.last %}}, {{% endif %}}" + f"{{% endfor %}}" + f"]" + ) + elif candidate.item_schema in ("simple_dict", "nested"): + # Dict list loop - TOML array of tables + # This is complex for TOML, using simplified approach + lines.append(f"{key} = {{{{ {var_name} | tojson }}}}") + else: + # Not a loop, treat as regular variable + lines.append(f"{key} = {{{{ {var_name} }}}}") else: lines.append(f"{key} = {{{{ {var_name} }}}}") @@ -173,6 +276,236 @@ class TomlHandler(DictLikeHandler): nested_var = self.make_var_name(role_prefix, nested_path) if isinstance(sub_val, str): inner_bits.append(f'{sub_key} = "{{{{ {nested_var} }}}}"') + elif isinstance(sub_val, bool): + inner_bits.append( + f"{sub_key} = {{{{ {nested_var} | lower }}}}" + ) + else: + inner_bits.append(f"{sub_key} = {{{ {nested_var} }}}") + replacement_value = "{ " + ", ".join(inner_bits) + " }" + new_content = ( + before_eq + "=" + leading_ws + replacement_value + comment_part + ) + out_lines.append(new_content + newline) + continue + # If parsing fails, fall through to normal handling + + # Normal scalar value handling (including bools, numbers, strings) + var_name = self.make_var_name(role_prefix, path) + use_quotes = ( + len(raw_value) >= 2 + and raw_value[0] == raw_value[-1] + and raw_value[0] in {'"', "'"} + ) + + # Check if value is a boolean in the text + is_bool = raw_value.strip().lower() in ("true", "false") + + if use_quotes: + quote_char = raw_value[0] + replacement_value = f"{quote_char}{{{{ {var_name} }}}}{quote_char}" + elif is_bool: + replacement_value = f"{{{{ {var_name} | lower }}}}" + else: + replacement_value = f"{{{{ {var_name} }}}}" + + new_content = ( + before_eq + "=" + leading_ws + replacement_value + comment_part + ) + out_lines.append(new_content + newline) + + return "".join(out_lines) + + def _generate_toml_template_with_loops_from_text( + self, role_prefix: str, text: str, loop_candidates: list[LoopCandidate] + ) -> str: + """ + Generate a Jinja2 template for a TOML file with loop support. + """ + loop_paths = {candidate.path for candidate in loop_candidates} + lines = text.splitlines(keepends=True) + current_table: tuple[str, ...] = () + out_lines: list[str] = [] + skip_until_next_table = ( + False # Track when we're inside a looped array-of-tables + ) + + for raw_line in lines: + line = raw_line + stripped = line.lstrip() + + # Blank or pure comment + if not stripped or stripped.startswith("#"): + # Only output if we're not skipping + if not skip_until_next_table: + out_lines.append(raw_line) + continue + + # Table header: [server] or [server.tls] or [[array.of.tables]] + if stripped.startswith("[") and "]" in stripped: + header = stripped + # Check if it's array-of-tables ([[name]]) or regular table ([name]) + is_array_table = header.startswith("[[") and "]]" in header + + if is_array_table: + # Extract content between [[ and ]] + start = header.find("[[") + 2 + end = header.find("]]", start) + inner = header[start:end].strip() if end != -1 else "" + else: + # Extract content between [ and ] + start = header.find("[") + 1 + end = header.find("]", start) + inner = header[start:end].strip() if end != -1 else "" + + if inner: + parts = [p.strip() for p in inner.split(".") if p.strip()] + table_path = tuple(parts) + + # Check if this is an array-of-tables that's a loop candidate + if is_array_table and table_path in loop_paths: + # If we're already skipping this table, this is a subsequent occurrence + if skip_until_next_table and current_table == table_path: + # This is a duplicate [[table]] - skip it + continue + + # This is the first occurrence - generate the loop + current_table = table_path + candidate = next( + c for c in loop_candidates if c.path == table_path + ) + + # Generate the loop header + collection_var = self.make_var_name(role_prefix, candidate.path) + item_var = candidate.loop_var + + # Get sample item to build template + if candidate.items: + sample_item = candidate.items[0] + + # Build loop + out_lines.append( + f"{{% for {item_var} in {collection_var} %}}\n" + ) + out_lines.append(f"[[{'.'.join(table_path)}]]\n") + + # Add fields from sample item + for key, value in sample_item.items(): + if key == "_key": + continue + if isinstance(value, str): + out_lines.append( + f'{key} = "{{{{ {item_var}.{key} }}}}"\n' + ) + else: + out_lines.append( + f"{key} = {{{{ {item_var}.{key} }}}}\n" + ) + + out_lines.append("{% endfor %}\n") + + # Skip all content until the next different table + skip_until_next_table = True + continue + else: + # Regular table or non-loop array - reset skip flag if it's a different table + if current_table != table_path: + skip_until_next_table = False + current_table = table_path + + out_lines.append(raw_line) + continue + + # If we're inside a skipped array-of-tables section, skip this line + if skip_until_next_table: + continue + + # Try key = value + newline = "" + content = raw_line + if content.endswith("\r\n"): + newline = "\r\n" + content = content[:-2] + elif content.endswith("\n"): + newline = content[-1] + content = content[:-1] + + eq_index = content.find("=") + if eq_index == -1: + out_lines.append(raw_line) + continue + + before_eq = content[:eq_index] + after_eq = content[eq_index + 1 :] + + key = before_eq.strip() + if not key: + out_lines.append(raw_line) + continue + + # Whitespace after '=' + value_ws_len = len(after_eq) - len(after_eq.lstrip(" \t")) + leading_ws = after_eq[:value_ws_len] + value_and_comment = after_eq[value_ws_len:] + + value_part, comment_part = self._split_inline_comment( + value_and_comment, {"#"} + ) + raw_value = value_part.strip() + + # Path for this key (table + key) + path = current_table + (key,) + + # Check if this path is a loop candidate + if path in loop_paths: + candidate = next(c for c in loop_candidates if c.path == path) + collection_var = self.make_var_name(role_prefix, candidate.path) + item_var = candidate.loop_var + + if candidate.item_schema == "scalar": + # Scalar list loop + replacement_value = ( + f"[" + f"{{% for {item_var} in {collection_var} %}}" + f"{{{{ {item_var} }}}}" + f"{{% if not loop.last %}}, {{% endif %}}" + f"{{% endfor %}}" + f"]" + ) + else: + # Dict/nested loop - use tojson filter for complex arrays + replacement_value = f"{{{{ {collection_var} | tojson }}}}" + + new_content = ( + before_eq + "=" + leading_ws + replacement_value + comment_part + ) + out_lines.append(new_content + newline) + continue + + # Special case: inline table + if ( + raw_value.startswith("{") + and raw_value.endswith("}") + and tomllib is not None + ): + try: + # Parse the inline table as a tiny TOML document + mini_source = "table = " + raw_value + "\n" + mini_data = tomllib.loads(mini_source)["table"] + except Exception: + mini_data = None + + if isinstance(mini_data, dict): + inner_bits: list[str] = [] + for sub_key, sub_val in mini_data.items(): + nested_path = path + (sub_key,) + nested_var = self.make_var_name(role_prefix, nested_path) + if isinstance(sub_val, str): + inner_bits.append(f'{sub_key} = "{{{{ {nested_var} }}}}"') + elif isinstance(sub_val, bool): + inner_bits.append( + f"{sub_key} = {{{{ {nested_var} | lower }}}}" + ) else: inner_bits.append(f"{sub_key} = {{{{ {nested_var} }}}}") replacement_value = "{ " + ", ".join(inner_bits) + " }" @@ -191,9 +524,14 @@ class TomlHandler(DictLikeHandler): and raw_value[0] in {'"', "'"} ) + # Check if value is a boolean in the text + is_bool = raw_value.strip().lower() in ("true", "false") + if use_quotes: quote_char = raw_value[0] replacement_value = f"{quote_char}{{{{ {var_name} }}}}{quote_char}" + elif is_bool: + replacement_value = f"{{{{ {var_name} | lower }}}}" else: replacement_value = f"{{{{ {var_name} }}}}" diff --git a/src/jinjaturtle/handlers/xml.py b/src/jinjaturtle/handlers/xml.py index bc92c26..fed6aba 100644 --- a/src/jinjaturtle/handlers/xml.py +++ b/src/jinjaturtle/handlers/xml.py @@ -418,8 +418,8 @@ class XmlHandler(BaseHandler): # Use simple variable reference - attributes should always exist elem.set(attr_name, f"{{{{ {loop_var}.{attr_name} }}}}") elif key == "_text": - # Simple text content - elem.text = f"{{{{ {loop_var} }}}}" + # Simple text content - use ._text accessor for dict-based items + elem.text = f"{{{{ {loop_var}._text }}}}" elif key == "value": # Text with attributes/children elem.text = f"{{{{ {loop_var}.value }}}}" diff --git a/src/jinjaturtle/handlers/yaml.py b/src/jinjaturtle/handlers/yaml.py index 1220f52..f75ef4b 100644 --- a/src/jinjaturtle/handlers/yaml.py +++ b/src/jinjaturtle/handlers/yaml.py @@ -124,7 +124,8 @@ class YamlHandler(DictLikeHandler): replacement = f"{{{{ {var_name} }}}}" leading = rest[: len(rest) - len(rest.lstrip(" \t"))] - new_stripped = f"{key}: {leading}{replacement}{comment_part}" + new_rest = f"{leading}{replacement}{comment_part}" + new_stripped = f"{key}:{new_rest}" out_lines.append( " " * indent + new_stripped @@ -281,7 +282,8 @@ class YamlHandler(DictLikeHandler): replacement = f"{{{{ {var_name} }}}}" leading = rest[: len(rest) - len(rest.lstrip(" \t"))] - new_stripped = f"{key}: {leading}{replacement}{comment_part}" + new_rest = f"{leading}{replacement}{comment_part}" + new_stripped = f"{key}:{new_rest}" out_lines.append( " " * indent + new_stripped @@ -378,10 +380,10 @@ class YamlHandler(DictLikeHandler): # Dict-style: key: {% for ... %} key = candidate.path[-1] if candidate.path else "items" lines.append(f"{indent_str}{key}:") - lines.append(f"{indent_str} {{% for {item_var} in {collection_var} %}}") + lines.append(f"{indent_str} {{% for {item_var} in {collection_var} -%}}") else: # List-style: just the loop - lines.append(f"{indent_str}{{% for {item_var} in {collection_var} %}}") + lines.append(f"{indent_str}{{% for {item_var} in {collection_var} -%}}") # Generate template for item structure if candidate.items: diff --git a/src/jinjaturtle/loop_analyzer.py b/src/jinjaturtle/loop_analyzer.py index 492c2c1..23702d8 100644 --- a/src/jinjaturtle/loop_analyzer.py +++ b/src/jinjaturtle/loop_analyzer.py @@ -85,14 +85,20 @@ class LoopAnalyzer: self._analyze_xml(parsed) elif fmt in ("yaml", "json", "toml"): self._analyze_dict_like(parsed, path=()) - # INI files are typically flat key-value, not suitable for loops + elif fmt == "ini": + # INI files are typically flat key-value, not suitable for loops + pass # Sort by path depth (process parent structures before children) self.candidates.sort(key=lambda c: len(c.path)) return self.candidates def _analyze_dict_like( - self, obj: Any, path: tuple[str, ...], depth: int = 0 + self, + obj: Any, + path: tuple[str, ...], + depth: int = 0, + parent_is_list: bool = False, ) -> None: """Recursively analyze dict/list structures.""" @@ -111,9 +117,16 @@ class LoopAnalyzer: # Recurse into dict values for key, value in obj.items(): - self._analyze_dict_like(value, path + (str(key),), depth + 1) + self._analyze_dict_like( + value, path + (str(key),), depth + 1, parent_is_list=False + ) elif isinstance(obj, list): + # Don't create loop candidates for nested lists (lists inside lists) + # These are too complex for clean template generation and should fall back to scalar handling + if parent_is_list: + return + # Check if this list is homogeneous if len(obj) >= self.MIN_ITEMS_FOR_LOOP: candidate = self._check_list_collection(obj, path) @@ -123,8 +136,11 @@ class LoopAnalyzer: return # If not a good loop candidate, recurse into items + # Pass parent_is_list=True so nested lists won't create loop candidates for i, item in enumerate(obj): - self._analyze_dict_like(item, path + (str(i),), depth + 1) + self._analyze_dict_like( + item, path + (str(i),), depth + 1, parent_is_list=True + ) def _check_list_collection( self, items: list[Any], path: tuple[str, ...] @@ -185,45 +201,55 @@ class LoopAnalyzer: Example: {"server1": {...}, "server2": {...}} where all values have the same structure. + + NOTE: Currently disabled for TOML compatibility. TOML's dict-of-tables + syntax ([servers.alpha], [servers.beta]) cannot be easily converted to + loops without restructuring the entire TOML format. To maintain consistency + between Ansible YAML and Jinja2 templates, we treat these as scalars. """ - if not obj: - return None - - values = list(obj.values()) - - # Check type homogeneity - value_types = [type(v).__name__ for v in values] - type_counts = Counter(value_types) - - if len(type_counts) != 1: - return None - - value_type = value_types[0] - - # Only interested in dict values for dict collections - # (scalar-valued dicts stay as scalars) - if value_type != "dict": - return None - - # Check structural homogeneity - schema = self._analyze_dict_schema(values) - if schema in ("simple_dict", "homogeneous"): - confidence = 0.9 if schema == "simple_dict" else 0.8 - - # Convert dict to list of items with 'key' added - items_with_keys = [{"_key": k, **v} for k, v in obj.items()] - - return LoopCandidate( - path=path, - loop_var=self._derive_loop_var(path, singular=True), - items=items_with_keys, - item_schema="simple_dict", - confidence=confidence, - ) - + # TODO: Re-enable this if we implement proper dict-of-tables loop generation + # For now, return None to use scalar handling return None + # Original logic preserved below for reference: + # if not obj: + # return None + # + # values = list(obj.values()) + # + # # Check type homogeneity + # value_types = [type(v).__name__ for v in values] + # type_counts = Counter(value_types) + # + # if len(type_counts) != 1: + # return None + # + # value_type = value_types[0] + # + # # Only interested in dict values for dict collections + # # (scalar-valued dicts stay as scalars) + # if value_type != "dict": + # return None + # + # # Check structural homogeneity + # schema = self._analyze_dict_schema(values) + # if schema in ("simple_dict", "homogeneous"): + # confidence = 0.9 if schema == "simple_dict" else 0.8 + # + # # Convert dict to list of items with 'key' added + # items_with_keys = [{"_key": k, **v} for k, v in obj.items()] + # + # return LoopCandidate( + # path=path, + # loop_var=self._derive_loop_var(path, singular=True), + # items=items_with_keys, + # item_schema="simple_dict", + # confidence=confidence, + # ) + # + # return None + def _analyze_dict_schema( self, dicts: list[dict[str, Any]] ) -> Literal["simple_dict", "homogeneous", "heterogeneous"]: @@ -316,7 +342,7 @@ class LoopAnalyzer: XML is particularly suited for loops when we have repeated sibling elements. """ - import xml.etree.ElementTree as ET + import xml.etree.ElementTree as ET # nosec B405 if not isinstance(root, ET.Element): return diff --git a/tests/test_cli.py b/tests/test_cli.py index 705250f..a880135 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,10 +1,6 @@ from __future__ import annotations -import sys from pathlib import Path - -import pytest - from jinjaturtle import cli SAMPLES_DIR = Path(__file__).parent / "samples" diff --git a/tests/test_core_utils.py b/tests/test_core_utils.py index b907d5c..c8e41e1 100644 --- a/tests/test_core_utils.py +++ b/tests/test_core_utils.py @@ -168,8 +168,8 @@ def test_fallback_str_representer_for_unknown_type(): def test_normalize_default_value_bool_inputs_are_stringified(): """ - Real boolean values should be turned into quoted 'true'/'false' strings - by _normalize_default_value via generate_ansible_yaml. + Boolean values are now preserved as booleans in YAML (not stringified). + This supports proper type preservation for JSON and other formats. """ flat_items = [ (("section", "flag_true"), True), @@ -178,8 +178,9 @@ def test_normalize_default_value_bool_inputs_are_stringified(): ansible_yaml = generate_ansible_yaml("role", flat_items) data = yaml.safe_load(ansible_yaml) - assert data["role_section_flag_true"] == "true" - assert data["role_section_flag_false"] == "false" + # Booleans are now preserved as booleans + assert data["role_section_flag_true"] is True + assert data["role_section_flag_false"] is False def test_flatten_config_unsupported_format(): diff --git a/tests/test_json_handler.py b/tests/test_json_handler.py index b9a914a..dd502b1 100644 --- a/tests/test_json_handler.py +++ b/tests/test_json_handler.py @@ -2,7 +2,6 @@ from __future__ import annotations from pathlib import Path -import json import pytest import yaml @@ -10,6 +9,8 @@ from jinjaturtle.core import ( parse_config, flatten_config, generate_ansible_yaml, + analyze_loops, + generate_jinja2_template, ) from jinjaturtle.handlers.json import JsonHandler @@ -23,30 +24,34 @@ def test_json_roundtrip(): fmt, parsed = parse_config(json_path) assert fmt == "json" - flat_items = flatten_config(fmt, parsed) - ansible_yaml = generate_ansible_yaml("foobar", flat_items) + # With loop detection + loop_candidates = analyze_loops(fmt, parsed) + flat_items = flatten_config(fmt, parsed, loop_candidates) + ansible_yaml = generate_ansible_yaml("foobar", flat_items, loop_candidates) defaults = yaml.safe_load(ansible_yaml) - # Defaults: nested keys and list indices + # Defaults: nested keys assert defaults["foobar_foo"] == "bar" assert defaults["foobar_nested_a"] == 1 - # Bool normalized to string "true" - assert defaults["foobar_nested_b"] == "true" - assert defaults["foobar_list_0"] == 10 - assert defaults["foobar_list_1"] == 20 + # Booleans are now preserved as booleans (not stringified) + assert defaults["foobar_nested_b"] is True + # List should be a list (not flattened to scalars) + assert defaults["foobar_list"] == [10, 20] - # Template generation is done via JsonHandler.generate_jinja2_template; we just - # make sure it produces a structure with the expected placeholders. - handler = JsonHandler() - templated = json.loads( - handler.generate_jinja2_template(parsed, role_prefix="foobar") - ) + # Template generation with loops + template = generate_jinja2_template("json", parsed, "foobar", None, loop_candidates) - assert templated["foo"] == "{{ foobar_foo }}" - assert "foobar_nested_a" in str(templated) - assert "foobar_nested_b" in str(templated) - assert "foobar_list_0" in str(templated) - assert "foobar_list_1" in str(templated) + # Template should use | tojson for type preservation + assert "{{ foobar_foo | tojson }}" in template + assert "{{ foobar_nested_a | tojson }}" in template + assert "{{ foobar_nested_b | tojson }}" in template + + # List should use loop (not scalar indices) + assert "{% for" in template + assert "foobar_list" in template + # Should NOT have scalar indices + assert "foobar_list_0" not in template + assert "foobar_list_1" not in template def test_generate_jinja2_template_json_type_error(): diff --git a/tests/test_roundtrip.py b/tests/test_roundtrip.py new file mode 100644 index 0000000..5182e8c --- /dev/null +++ b/tests/test_roundtrip.py @@ -0,0 +1,566 @@ +""" +Roundtrip tests: Generate config → template/YAML → regenerate config → compare. + +These tests verify that: +1. Generated Jinja2 template + Ansible YAML can reproduce the original config +2. The regenerated config is semantically equivalent (allowing whitespace differences) +3. No data loss occurs during the template generation process + +This is the ultimate validation - if the roundtrip works, the templates are correct. +""" + +from __future__ import annotations + +import json +import yaml +from pathlib import Path +from typing import Any +from jinja2 import Environment, StrictUndefined + +import pytest + +from jinjaturtle.core import ( + parse_config, + analyze_loops, + flatten_config, + generate_ansible_yaml, + generate_jinja2_template, +) + + +def render_template(template: str, variables: dict[str, Any]) -> str: + """Render a Jinja2 template with variables.""" + env = Environment(undefined=StrictUndefined) + jinja_template = env.from_string(template) + return jinja_template.render(variables) + + +class TestRoundtripJSON: + """Roundtrip tests for JSON files.""" + + def test_foo_json_roundtrip(self): + """Test foo.json can be perfectly regenerated from template.""" + samples_dir = Path(__file__).parent / "samples" + json_file = samples_dir / "foo.json" + + if not json_file.exists(): + pytest.skip("foo.json not found") + + # Read original + original_text = json_file.read_text() + original_data = json.loads(original_text) + + # Generate template and YAML + fmt, parsed = parse_config(json_file) + loop_candidates = analyze_loops(fmt, parsed) + flat_items = flatten_config(fmt, parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates) + template = generate_jinja2_template(fmt, parsed, "test", None, loop_candidates) + + # Load variables from YAML + variables = yaml.safe_load(ansible_yaml) + + # Render template + regenerated_text = render_template(template, variables) + regenerated_data = json.loads(regenerated_text) + + # Compare data structures (should match exactly) + assert regenerated_data == original_data, ( + f"Regenerated JSON differs from original\n" + f"Original: {json.dumps(original_data, indent=2, sort_keys=True)}\n" + f"Regenerated: {json.dumps(regenerated_data, indent=2, sort_keys=True)}" + ) + + def test_json_all_types_roundtrip(self): + """Test JSON with all data types roundtrips perfectly.""" + json_text = """ + { + "string": "value", + "number": 42, + "float": 3.14, + "boolean": true, + "false_val": false, + "null_value": null, + "array": [1, 2, 3], + "object": { + "nested": "data" + } + } + """ + + original_data = json.loads(json_text) + + # Generate template and YAML + loop_candidates = analyze_loops("json", original_data) + flat_items = flatten_config("json", original_data, loop_candidates) + + ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates) + template = generate_jinja2_template( + "json", original_data, "test", None, loop_candidates + ) + + # Render template + variables = yaml.safe_load(ansible_yaml) + regenerated_text = render_template(template, variables) + regenerated_data = json.loads(regenerated_text) + + # Should match exactly + assert regenerated_data == original_data + + +class TestRoundtripYAML: + """Roundtrip tests for YAML files.""" + + def test_bar_yaml_roundtrip(self): + """Test bar.yaml can be regenerated from template.""" + samples_dir = Path(__file__).parent / "samples" + yaml_file = samples_dir / "bar.yaml" + + if not yaml_file.exists(): + pytest.skip("bar.yaml not found") + + # Read original + original_text = yaml_file.read_text() + original_data = yaml.safe_load(original_text) + + # Generate template and YAML + fmt, parsed = parse_config(yaml_file) + loop_candidates = analyze_loops(fmt, parsed) + flat_items = flatten_config(fmt, parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates) + template = generate_jinja2_template( + fmt, parsed, "test", original_text, loop_candidates + ) + + # Load variables from YAML + variables = yaml.safe_load(ansible_yaml) + + # Render template + regenerated_text = render_template(template, variables) + regenerated_data = yaml.safe_load(regenerated_text) + + # Compare data structures + assert regenerated_data == original_data, ( + f"Regenerated YAML differs from original\n" + f"Original: {original_data}\n" + f"Regenerated: {regenerated_data}" + ) + + def test_yaml_with_lists_roundtrip(self): + """Test YAML with various list structures.""" + yaml_text = """ + name: myapp + simple_list: + - item1 + - item2 + - item3 + list_of_dicts: + - name: first + value: 1 + - name: second + value: 2 + nested: + inner_list: + - a + - b + """ + + original_data = yaml.safe_load(yaml_text) + + # Generate template and YAML + loop_candidates = analyze_loops("yaml", original_data) + flat_items = flatten_config("yaml", original_data, loop_candidates) + + ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates) + template = generate_jinja2_template( + "yaml", original_data, "test", yaml_text, loop_candidates + ) + + # Render template + variables = yaml.safe_load(ansible_yaml) + regenerated_text = render_template(template, variables) + regenerated_data = yaml.safe_load(regenerated_text) + + # Compare + assert regenerated_data == original_data + + +class TestRoundtripTOML: + """Roundtrip tests for TOML files.""" + + def test_tom_toml_roundtrip(self): + """Test tom.toml can be regenerated from template.""" + samples_dir = Path(__file__).parent / "samples" + toml_file = samples_dir / "tom.toml" + + if not toml_file.exists(): + pytest.skip("tom.toml not found") + + # Read original + original_text = toml_file.read_text() + import tomllib + + original_data = tomllib.loads(original_text) + + # Generate template and YAML + fmt, parsed = parse_config(toml_file) + loop_candidates = analyze_loops(fmt, parsed) + flat_items = flatten_config(fmt, parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates) + template = generate_jinja2_template( + fmt, parsed, "test", original_text, loop_candidates + ) + + # Load variables from YAML + variables = yaml.safe_load(ansible_yaml) + + # Render template + regenerated_text = render_template(template, variables) + regenerated_data = tomllib.loads(regenerated_text) + + # Compare data structures + # Note: TOML datetime objects need special handling + assert _compare_toml_data(regenerated_data, original_data), ( + f"Regenerated TOML differs from original\n" + f"Original: {original_data}\n" + f"Regenerated: {regenerated_data}" + ) + + def test_toml_with_arrays_roundtrip(self): + """Test TOML with inline arrays and array-of-tables.""" + toml_text = """ + name = "test" + ports = [8080, 8081, 8082] + + [[database]] + host = "db1.example.com" + port = 5432 + + [[database]] + host = "db2.example.com" + port = 5433 + """ + + import tomllib + + original_data = tomllib.loads(toml_text) + + # Generate template and YAML + loop_candidates = analyze_loops("toml", original_data) + flat_items = flatten_config("toml", original_data, loop_candidates) + + ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates) + template = generate_jinja2_template( + "toml", original_data, "test", toml_text, loop_candidates + ) + + # Render template + variables = yaml.safe_load(ansible_yaml) + regenerated_text = render_template(template, variables) + regenerated_data = tomllib.loads(regenerated_text) + + # Compare + assert regenerated_data == original_data + + +class TestRoundtripXML: + """Roundtrip tests for XML files.""" + + def test_xml_simple_roundtrip(self): + """Test simple XML can be regenerated.""" + xml_text = """ + + test + 8080 + server1 + server2 + server3 + +""" + + import xml.etree.ElementTree as ET + + original_root = ET.fromstring(xml_text) + + # Generate template and YAML + fmt = "xml" + loop_candidates = analyze_loops(fmt, original_root) + flat_items = flatten_config(fmt, original_root, loop_candidates) + + ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates) + template = generate_jinja2_template( + fmt, original_root, "test", xml_text, loop_candidates + ) + + # Render template + variables = yaml.safe_load(ansible_yaml) + regenerated_text = render_template(template, variables) + + # Parse regenerated XML + regenerated_root = ET.fromstring(regenerated_text) + + # Compare XML structures (ignore insignificant whitespace) + assert _xml_elements_equal( + original_root, regenerated_root, ignore_whitespace=True + ), ( + f"Regenerated XML differs from original\n" + f"Original: {ET.tostring(original_root, encoding='unicode')}\n" + f"Regenerated: {ET.tostring(regenerated_root, encoding='unicode')}" + ) + + def test_ossec_xml_roundtrip(self): + """Test ossec.xml (complex real-world XML) roundtrip.""" + samples_dir = Path(__file__).parent / "samples" + xml_file = samples_dir / "ossec.xml" + + if not xml_file.exists(): + pytest.skip("ossec.xml not found") + + # Read original + original_text = xml_file.read_text() + import xml.etree.ElementTree as ET + + original_root = ET.fromstring(original_text) + + # Generate template and YAML + fmt, parsed = parse_config(xml_file) + loop_candidates = analyze_loops(fmt, parsed) + flat_items = flatten_config(fmt, parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates) + template = generate_jinja2_template( + fmt, parsed, "test", original_text, loop_candidates + ) + + # Load variables and render + variables = yaml.safe_load(ansible_yaml) + regenerated_text = render_template(template, variables) + + # Parse regenerated + regenerated_root = ET.fromstring(regenerated_text) + + # Compare - for complex XML, we compare structure not exact text + assert _xml_elements_equal( + original_root, regenerated_root, ignore_whitespace=True + ) + + +class TestRoundtripINI: + """Roundtrip tests for INI files.""" + + def test_ini_simple_roundtrip(self): + """Test simple INI can be regenerated.""" + ini_text = """[section1] +key1 = value1 +key2 = value2 + +[section2] +key3 = value3 +""" + + from configparser import ConfigParser + + original_config = ConfigParser() + original_config.read_string(ini_text) + + # Generate template and YAML + fmt = "ini" + loop_candidates = analyze_loops(fmt, original_config) + flat_items = flatten_config(fmt, original_config, loop_candidates) + + ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates) + template = generate_jinja2_template( + fmt, original_config, "test", ini_text, loop_candidates + ) + + # Render template + variables = yaml.safe_load(ansible_yaml) + regenerated_text = render_template(template, variables) + + # Parse regenerated + regenerated_config = ConfigParser() + regenerated_config.read_string(regenerated_text) + + # Compare + assert _ini_configs_equal(original_config, regenerated_config) + + +class TestRoundtripEdgeCases: + """Roundtrip tests for edge cases and special scenarios.""" + + def test_empty_lists_roundtrip(self): + """Test handling of empty lists.""" + json_text = '{"items": []}' + original_data = json.loads(json_text) + + loop_candidates = analyze_loops("json", original_data) + flat_items = flatten_config("json", original_data, loop_candidates) + + ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates) + template = generate_jinja2_template( + "json", original_data, "test", None, loop_candidates + ) + + variables = yaml.safe_load(ansible_yaml) + regenerated_text = render_template(template, variables) + regenerated_data = json.loads(regenerated_text) + + assert regenerated_data == original_data + + def test_special_characters_roundtrip(self): + """Test handling of special characters.""" + json_data = { + "quote": 'He said "hello"', + "backslash": "path\\to\\file", + "newline": "line1\nline2", + "unicode": "emoji: 🚀", + } + + loop_candidates = analyze_loops("json", json_data) + flat_items = flatten_config("json", json_data, loop_candidates) + + ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates) + template = generate_jinja2_template( + "json", json_data, "test", None, loop_candidates + ) + + variables = yaml.safe_load(ansible_yaml) + regenerated_text = render_template(template, variables) + regenerated_data = json.loads(regenerated_text) + + assert regenerated_data == json_data + + def test_numeric_types_roundtrip(self): + """Test preservation of numeric types.""" + json_data = { + "int": 42, + "float": 3.14159, + "negative": -100, + "zero": 0, + "large": 9999999999, + } + + loop_candidates = analyze_loops("json", json_data) + flat_items = flatten_config("json", json_data, loop_candidates) + + ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates) + template = generate_jinja2_template( + "json", json_data, "test", None, loop_candidates + ) + + variables = yaml.safe_load(ansible_yaml) + regenerated_text = render_template(template, variables) + regenerated_data = json.loads(regenerated_text) + + assert regenerated_data == json_data + + def test_boolean_preservation_roundtrip(self): + """Test that booleans are preserved correctly.""" + yaml_text = """ + enabled: true + disabled: false + """ + + original_data = yaml.safe_load(yaml_text) + + loop_candidates = analyze_loops("yaml", original_data) + flat_items = flatten_config("yaml", original_data, loop_candidates) + + ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates) + template = generate_jinja2_template( + "yaml", original_data, "test", yaml_text, loop_candidates + ) + + variables = yaml.safe_load(ansible_yaml) + regenerated_text = render_template(template, variables) + regenerated_data = yaml.safe_load(regenerated_text) + + # Both should be actual booleans + assert regenerated_data["enabled"] is True + assert regenerated_data["disabled"] is False + + +# Helper functions + + +def _compare_toml_data(data1: Any, data2: Any) -> bool: + """Compare TOML data, handling datetime objects.""" + import datetime + + if type(data1) != type(data2): + return False + + if isinstance(data1, dict): + if set(data1.keys()) != set(data2.keys()): + return False + return all(_compare_toml_data(data1[k], data2[k]) for k in data1.keys()) + + elif isinstance(data1, list): + if len(data1) != len(data2): + return False + return all(_compare_toml_data(v1, v2) for v1, v2 in zip(data1, data2)) + + elif isinstance(data1, datetime.datetime): + # Compare datetime objects + return data1 == data2 + + else: + return data1 == data2 + + +def _xml_elements_equal(elem1, elem2, ignore_whitespace: bool = False) -> bool: + """Compare two XML elements for equality.""" + # Compare tags + if elem1.tag != elem2.tag: + return False + + # Compare attributes + if elem1.attrib != elem2.attrib: + return False + + # Compare text + text1 = (elem1.text or "").strip() if ignore_whitespace else (elem1.text or "") + text2 = (elem2.text or "").strip() if ignore_whitespace else (elem2.text or "") + if text1 != text2: + return False + + # Compare tail + tail1 = (elem1.tail or "").strip() if ignore_whitespace else (elem1.tail or "") + tail2 = (elem2.tail or "").strip() if ignore_whitespace else (elem2.tail or "") + if tail1 != tail2: + return False + + # Compare children + children1 = list(elem1) + children2 = list(elem2) + + if len(children1) != len(children2): + return False + + return all( + _xml_elements_equal(c1, c2, ignore_whitespace) + for c1, c2 in zip(children1, children2) + ) + + +def _ini_configs_equal(config1, config2) -> bool: + """Compare two ConfigParser objects for equality.""" + if set(config1.sections()) != set(config2.sections()): + return False + + for section in config1.sections(): + if set(config1.options(section)) != set(config2.options(section)): + return False + + for option in config1.options(section): + if config1.get(section, option) != config2.get(section, option): + return False + + return True + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_yaml_template_consistency.py b/tests/test_yaml_template_consistency.py new file mode 100644 index 0000000..69184dd --- /dev/null +++ b/tests/test_yaml_template_consistency.py @@ -0,0 +1,558 @@ +""" +Tests to ensure all Jinja2 template variables exist in the Ansible YAML. + +These tests catch the bug where templates reference variables that don't exist +because the YAML has a list but the template uses scalar references (or vice versa). +""" + +from __future__ import annotations + +import re +from pathlib import Path +from typing import Set +import yaml +import pytest + +from jinjaturtle.core import ( + parse_config, + analyze_loops, + flatten_config, + generate_ansible_yaml, + generate_jinja2_template, +) + + +def extract_jinja_variables(template: str) -> Set[str]: + """ + Extract all Jinja2 variable names from a template that must exist in YAML. + + Extracts variables from: + - {{ variable_name }} + - {{ variable.field }} + - {% for item in collection %} + + Returns only the base variable names that must be defined in YAML. + Filters out loop variables (the 'item' part of 'for item in collection'). + """ + variables = set() + + # First, find all loop variables (these are defined by the template, not YAML) + loop_vars = set() + for_pattern = r"\{%\s*for\s+(\w+)\s+in\s+([a-zA-Z_][a-zA-Z0-9_]*)" + for match in re.finditer(for_pattern, template): + loop_var = match.group(1) # The item + collection = match.group(2) # The collection + loop_vars.add(loop_var) + variables.add(collection) # Collection must exist in YAML + + # Pattern 1: {{ variable_name }} or {{ variable.field }} + # Captures the first part before any dots or filters + var_pattern = r"\{\{\s*([a-zA-Z_][a-zA-Z0-9_]*)" + for match in re.finditer(var_pattern, template): + var_name = match.group(1) + # Only add if it's not a loop variable + if var_name not in loop_vars: + variables.add(var_name) + + return variables + + +def extract_yaml_variables(ansible_yaml: str) -> Set[str]: + """ + Extract all variable names from Ansible YAML. + + Returns the top-level keys from the YAML document. + """ + data = yaml.safe_load(ansible_yaml) + if not isinstance(data, dict): + return set() + return set(data.keys()) + + +class TestTemplateYamlConsistency: + """Tests that verify template variables exist in YAML.""" + + def test_simple_json_consistency(self): + """Simple JSON with scalars and lists.""" + json_text = """ + { + "name": "test", + "values": [1, 2, 3] + } + """ + + fmt = "json" + import json + + parsed = json.loads(json_text) + + loop_candidates = analyze_loops(fmt, parsed) + flat_items = flatten_config(fmt, parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates) + template = generate_jinja2_template(fmt, parsed, "app", None, loop_candidates) + + yaml_vars = extract_yaml_variables(ansible_yaml) + template_vars = extract_jinja_variables(template) + + # Every variable in template must exist in YAML + missing_vars = template_vars - yaml_vars + assert not missing_vars, ( + f"Template references variables not in YAML: {missing_vars}\n" + f"YAML vars: {yaml_vars}\n" + f"Template vars: {template_vars}\n" + f"Template:\n{template}\n" + f"YAML:\n{ansible_yaml}" + ) + + def test_toml_inline_array_consistency(self): + """TOML with inline array should use loops consistently.""" + import tomllib + + toml_text = """ + name = "myapp" + servers = ["server1", "server2", "server3"] + """ + + parsed = tomllib.loads(toml_text) + loop_candidates = analyze_loops("toml", parsed) + flat_items = flatten_config("toml", parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates) + template = generate_jinja2_template( + "toml", parsed, "app", toml_text, loop_candidates + ) + + yaml_vars = extract_yaml_variables(ansible_yaml) + template_vars = extract_jinja_variables(template) + + missing_vars = template_vars - yaml_vars + assert not missing_vars, ( + f"Template references variables not in YAML: {missing_vars}\n" + f"Template:\n{template}\n" + f"YAML:\n{ansible_yaml}" + ) + + def test_toml_array_of_tables_consistency(self): + """TOML with [[array.of.tables]] should use loops consistently.""" + import tomllib + + toml_text = """ + [[database]] + host = "db1.example.com" + port = 5432 + + [[database]] + host = "db2.example.com" + port = 5433 + """ + + parsed = tomllib.loads(toml_text) + loop_candidates = analyze_loops("toml", parsed) + flat_items = flatten_config("toml", parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates) + template = generate_jinja2_template( + "toml", parsed, "app", toml_text, loop_candidates + ) + + yaml_vars = extract_yaml_variables(ansible_yaml) + template_vars = extract_jinja_variables(template) + + missing_vars = template_vars - yaml_vars + assert not missing_vars, ( + f"Template references variables not in YAML: {missing_vars}\n" + f"Template:\n{template}\n" + f"YAML:\n{ansible_yaml}" + ) + + # Additionally verify that if YAML has a list, template uses a loop + defaults = yaml.safe_load(ansible_yaml) + for var_name, value in defaults.items(): + if isinstance(value, list) and len(value) > 1: + # YAML has a list - template should use {% for %} + assert "{% for" in template, ( + f"YAML has list variable '{var_name}' but template doesn't use loops\n" + f"Template:\n{template}" + ) + + def test_yaml_list_consistency(self): + """YAML with lists should use loops consistently.""" + yaml_text = """ + name: myapp + servers: + - server1 + - server2 + - server3 + databases: + - host: db1 + port: 5432 + - host: db2 + port: 5433 + """ + + parsed = yaml.safe_load(yaml_text) + loop_candidates = analyze_loops("yaml", parsed) + flat_items = flatten_config("yaml", parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates) + template = generate_jinja2_template( + "yaml", parsed, "app", yaml_text, loop_candidates + ) + + yaml_vars = extract_yaml_variables(ansible_yaml) + template_vars = extract_jinja_variables(template) + + missing_vars = template_vars - yaml_vars + assert not missing_vars, ( + f"Template references variables not in YAML: {missing_vars}\n" + f"Template:\n{template}\n" + f"YAML:\n{ansible_yaml}" + ) + + def test_mixed_scalars_and_loops_consistency(self): + """Config with both scalars and loops should be consistent.""" + import tomllib + + toml_text = """ + name = "myapp" + version = "1.0" + ports = [8080, 8081, 8082] + + [database] + host = "localhost" + port = 5432 + + [[servers]] + name = "web1" + ip = "10.0.0.1" + + [[servers]] + name = "web2" + ip = "10.0.0.2" + """ + + parsed = tomllib.loads(toml_text) + loop_candidates = analyze_loops("toml", parsed) + flat_items = flatten_config("toml", parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates) + template = generate_jinja2_template( + "toml", parsed, "app", toml_text, loop_candidates + ) + + yaml_vars = extract_yaml_variables(ansible_yaml) + template_vars = extract_jinja_variables(template) + + missing_vars = template_vars - yaml_vars + assert not missing_vars, ( + f"Template references variables not in YAML: {missing_vars}\n" + f"Template:\n{template}\n" + f"YAML:\n{ansible_yaml}" + ) + + def test_no_orphaned_scalar_references(self): + """ + When YAML has a list variable, template must NOT reference scalar indices. + + This catches the bug where: + - YAML has: app_list: [1, 2, 3] + - Template incorrectly uses: {{ app_list_0 }}, {{ app_list_1 }} + """ + import json + + json_text = '{"items": [1, 2, 3, 4, 5]}' + parsed = json.loads(json_text) + + loop_candidates = analyze_loops("json", parsed) + flat_items = flatten_config("json", parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates) + template = generate_jinja2_template( + "json", parsed, "app", None, loop_candidates + ) + + defaults = yaml.safe_load(ansible_yaml) + + # Check each list variable in YAML + for var_name, value in defaults.items(): + if isinstance(value, list): + # Template should NOT reference app_items_0, app_items_1, etc. + for i in range(len(value)): + scalar_ref = f"{var_name}_{i}" + assert scalar_ref not in template, ( + f"Template incorrectly uses scalar reference '{scalar_ref}' " + f"when YAML has '{var_name}' as a list\n" + f"Template should use loops, not scalar indices\n" + f"Template:\n{template}" + ) + + def test_all_sample_files_consistency(self): + """Test all sample files for consistency.""" + samples_dir = Path(__file__).parent / "samples" + + sample_files = [ + ("foo.json", "json"), + ("bar.yaml", "yaml"), + ("tom.toml", "toml"), + ] + + for filename, fmt in sample_files: + file_path = samples_dir / filename + if not file_path.exists(): + pytest.skip(f"Sample file {filename} not found") + + original_text = file_path.read_text() + fmt_detected, parsed = parse_config(file_path) + + loop_candidates = analyze_loops(fmt_detected, parsed) + flat_items = flatten_config(fmt_detected, parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates) + template = generate_jinja2_template( + fmt_detected, parsed, "test", original_text, loop_candidates + ) + + yaml_vars = extract_yaml_variables(ansible_yaml) + template_vars = extract_jinja_variables(template) + + missing_vars = template_vars - yaml_vars + assert not missing_vars, ( + f"File: {filename}\n" + f"Template references variables not in YAML: {missing_vars}\n" + f"YAML vars: {yaml_vars}\n" + f"Template vars: {template_vars}\n" + f"Template:\n{template}\n" + f"YAML:\n{ansible_yaml}" + ) + + +class TestStructuralConsistency: + """Tests that verify structural consistency between YAML and templates.""" + + def test_list_in_yaml_means_loop_in_template(self): + """When YAML has a list (len > 1), template should use {% for %}.""" + import json + + json_text = """ + { + "scalar": "value", + "list": [1, 2, 3] + } + """ + + parsed = json.loads(json_text) + loop_candidates = analyze_loops("json", parsed) + flat_items = flatten_config("json", parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates) + template = generate_jinja2_template( + "json", parsed, "app", None, loop_candidates + ) + + defaults = yaml.safe_load(ansible_yaml) + + # Find list variables in YAML + list_vars = [ + k for k, v in defaults.items() if isinstance(v, list) and len(v) > 1 + ] + + if list_vars: + # Template must contain for loops + assert "{% for" in template, ( + f"YAML has list variables {list_vars} but template has no loops\n" + f"Template:\n{template}" + ) + + # Each list variable should be used in a for loop + for var_name in list_vars: + # Look for "{% for ... in var_name %}" + for_pattern = ( + r"\{%\s*for\s+\w+\s+in\s+" + re.escape(var_name) + r"\s*%\}" + ) + assert re.search(for_pattern, template), ( + f"List variable '{var_name}' not used in a for loop\n" + f"Template:\n{template}" + ) + + def test_scalar_in_yaml_means_no_loop_in_template(self): + """When YAML has scalars, template should use {{ var }}, not loops.""" + import json + + json_text = """ + { + "name": "test", + "port": 8080, + "enabled": true + } + """ + + parsed = json.loads(json_text) + loop_candidates = analyze_loops("json", parsed) + flat_items = flatten_config("json", parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates) + template = generate_jinja2_template( + "json", parsed, "app", None, loop_candidates + ) + + defaults = yaml.safe_load(ansible_yaml) + + # All variables are scalars - template should NOT have loops + scalar_vars = [ + k for k, v in defaults.items() if not isinstance(v, (list, dict)) + ] + + # Check that scalar vars are used directly, not in loops + for var_name in scalar_vars: + # Should appear in {{ var_name }}, not {% for ... in var_name %} + direct_ref = f"{{{{ {var_name}" + loop_ref = f"for .* in {var_name}" + + assert direct_ref in template, ( + f"Scalar variable '{var_name}' should be directly referenced\n" + f"Template:\n{template}" + ) + + assert not re.search(loop_ref, template), ( + f"Scalar variable '{var_name}' incorrectly used in a loop\n" + f"Template:\n{template}" + ) + + def test_no_undefined_variable_errors(self): + """ + Simulate Ansible template rendering to catch undefined variables. + + This is the ultimate test - actually render the template with the YAML + and verify no undefined variable errors occur. + """ + from jinja2 import Environment, StrictUndefined + import json + + json_text = """ + { + "name": "myapp", + "servers": ["web1", "web2"], + "database": { + "host": "localhost", + "port": 5432 + } + } + """ + + parsed = json.loads(json_text) + loop_candidates = analyze_loops("json", parsed) + flat_items = flatten_config("json", parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates) + template = generate_jinja2_template( + "json", parsed, "app", None, loop_candidates + ) + + # Load variables from YAML + variables = yaml.safe_load(ansible_yaml) + + # Try to render the template + env = Environment(undefined=StrictUndefined) + try: + jinja_template = env.from_string(template) + rendered = jinja_template.render(variables) + + # Successfully rendered - this is what we want! + assert rendered, "Template rendered successfully" + + except Exception as e: + pytest.fail( + f"Template rendering failed with variables from YAML\n" + f"Error: {e}\n" + f"Template:\n{template}\n" + f"Variables:\n{ansible_yaml}" + ) + + +class TestRegressionBugs: + """Tests for specific bugs that were found and fixed.""" + + def test_toml_array_of_tables_no_scalar_refs(self): + """ + Regression test: TOML [[array]] should not generate scalar references. + + Bug: Template had {{ app_database_host }} when YAML had app_database as list. + """ + import tomllib + + toml_text = """ + [[database]] + host = "db1" + port = 5432 + + [[database]] + host = "db2" + port = 5433 + """ + + parsed = tomllib.loads(toml_text) + loop_candidates = analyze_loops("toml", parsed) + flat_items = flatten_config("toml", parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates) + template = generate_jinja2_template( + "toml", parsed, "app", toml_text, loop_candidates + ) + + # YAML should have app_database as a list + defaults = yaml.safe_load(ansible_yaml) + assert isinstance( + defaults.get("app_database"), list + ), f"Expected app_database to be a list in YAML\n{ansible_yaml}" + + # Template should NOT have app_database_host or app_database_port + assert ( + "app_database_host" not in template + ), f"Template incorrectly uses scalar 'app_database_host'\n{template}" + assert ( + "app_database_port" not in template + ), f"Template incorrectly uses scalar 'app_database_port'\n{template}" + + # Template SHOULD use a loop + assert "{% for" in template, f"Template should use a loop\n{template}" + assert ( + "app_database" in template + ), f"Template should reference app_database\n{template}" + + def test_json_array_no_index_refs(self): + """ + Regression test: JSON arrays should not generate index references. + + Bug: Template had {{ app_list_0 }}, {{ app_list_1 }} when YAML had app_list as list. + """ + import json + + json_text = '{"items": [1, 2, 3]}' + parsed = json.loads(json_text) + + loop_candidates = analyze_loops("json", parsed) + flat_items = flatten_config("json", parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates) + template = generate_jinja2_template( + "json", parsed, "app", None, loop_candidates + ) + + # YAML should have app_items as a list + defaults = yaml.safe_load(ansible_yaml) + assert isinstance(defaults.get("app_items"), list) + + # Template should NOT have app_items_0, app_items_1, app_items_2 + for i in range(3): + assert ( + f"app_items_{i}" not in template + ), f"Template incorrectly uses scalar 'app_items_{i}'\n{template}" + + # Template SHOULD use a loop + assert "{% for" in template + assert "app_items" in template + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/utils/diff_configs.py b/utils/diff_configs.py new file mode 100644 index 0000000..dbc68c5 --- /dev/null +++ b/utils/diff_configs.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 +""" +Side-by-side comparison of original vs regenerated config. + +Usage: + ./diff_configs.py tests/samples/foo.json + ./diff_configs.py tests/samples/tom.toml --context 5 +""" + +import argparse +import sys +from pathlib import Path +import difflib +import yaml +from jinja2 import Environment, StrictUndefined + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent)) + +from jinjaturtle.core import ( + parse_config, + analyze_loops, + flatten_config, + generate_ansible_yaml, + generate_jinja2_template, +) + + +def colorize(text: str, color: str) -> str: + """Add ANSI color codes.""" + colors = { + "red": "\033[91m", + "green": "\033[92m", + "yellow": "\033[93m", + "blue": "\033[94m", + "reset": "\033[0m", + } + return f"{colors.get(color, '')}{text}{colors['reset']}" + + +def side_by_side_diff(original: str, regenerated: str, width: int = 80): + """Print side-by-side diff.""" + orig_lines = original.splitlines() + regen_lines = regenerated.splitlines() + + # Calculate column width + col_width = width // 2 - 3 + + print( + colorize("ORIGINAL".center(col_width), "blue") + + " | " + + colorize("REGENERATED".center(col_width), "green") + ) + print("-" * col_width + "-+-" + "-" * col_width) + + max_lines = max(len(orig_lines), len(regen_lines)) + + for i in range(max_lines): + orig_line = orig_lines[i] if i < len(orig_lines) else "" + regen_line = regen_lines[i] if i < len(regen_lines) else "" + + # Truncate if too long + if len(orig_line) > col_width - 2: + orig_line = orig_line[: col_width - 5] + "..." + if len(regen_line) > col_width - 2: + regen_line = regen_line[: col_width - 5] + "..." + + # Color lines if different + if orig_line != regen_line: + orig_display = colorize(orig_line.ljust(col_width), "red") + regen_display = colorize(regen_line.ljust(col_width), "green") + else: + orig_display = orig_line.ljust(col_width) + regen_display = regen_line.ljust(col_width) + + print(f"{orig_display} | {regen_display}") + + +def unified_diff(original: str, regenerated: str, filename: str, context: int = 3): + """Print unified diff.""" + orig_lines = original.splitlines(keepends=True) + regen_lines = regenerated.splitlines(keepends=True) + + diff = difflib.unified_diff( + orig_lines, + regen_lines, + fromfile=f"{filename} (original)", + tofile=f"{filename} (regenerated)", + n=context, + ) + + for line in diff: + if line.startswith("+++") or line.startswith("---"): + print(colorize(line.rstrip(), "blue")) + elif line.startswith("@@"): + print(colorize(line.rstrip(), "cyan")) + elif line.startswith("+"): + print(colorize(line.rstrip(), "green")) + elif line.startswith("-"): + print(colorize(line.rstrip(), "red")) + else: + print(line.rstrip()) + + +def main(): + parser = argparse.ArgumentParser( + description="Compare original config with regenerated version", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + parser.add_argument("file", type=Path, help="Config file to check") + + parser.add_argument( + "--mode", + choices=["side-by-side", "unified", "both"], + default="both", + help="Comparison mode (default: both)", + ) + + parser.add_argument( + "--context", + type=int, + default=3, + help="Number of context lines for unified diff (default: 3)", + ) + + parser.add_argument( + "--width", + type=int, + default=160, + help="Terminal width for side-by-side (default: 160)", + ) + + args = parser.parse_args() + + if not args.file.exists(): + print(colorize(f"❌ File not found: {args.file}", "red")) + return 1 + + print(colorize(f"\n{'=' * 80}", "blue")) + print(colorize(f" Comparing: {args.file}", "blue")) + print(colorize(f"{'=' * 80}\n", "blue")) + + # Read and regenerate + try: + original_text = args.file.read_text() + + fmt, parsed = parse_config(args.file) + loop_candidates = analyze_loops(fmt, parsed) + flat_items = flatten_config(fmt, parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates) + template = generate_jinja2_template( + fmt, parsed, "app", original_text, loop_candidates + ) + + variables = yaml.safe_load(ansible_yaml) + env = Environment(undefined=StrictUndefined) + jinja_template = env.from_string(template) + regenerated_text = jinja_template.render(variables) + + # Check if identical + if original_text.strip() == regenerated_text.strip(): + print(colorize("✅ Files are IDENTICAL (text comparison)\n", "green")) + else: + # Show diff + if args.mode in ("unified", "both"): + print(colorize("\n--- UNIFIED DIFF ---\n", "yellow")) + unified_diff( + original_text, regenerated_text, args.file.name, args.context + ) + + if args.mode in ("side-by-side", "both"): + print(colorize("\n--- SIDE-BY-SIDE COMPARISON ---\n", "yellow")) + side_by_side_diff(original_text, regenerated_text, args.width) + + # Try semantic comparison + print(colorize(f"\n{'=' * 80}", "cyan")) + print(colorize(" Semantic Comparison", "cyan")) + print(colorize(f"{'=' * 80}", "cyan")) + + try: + if fmt == "json": + import json + + if json.loads(original_text) == json.loads(regenerated_text): + print(colorize("✅ JSON data structures are IDENTICAL", "green")) + else: + print(colorize("⚠️ JSON data structures DIFFER", "yellow")) + elif fmt == "yaml": + if yaml.safe_load(original_text) == yaml.safe_load(regenerated_text): + print(colorize("✅ YAML data structures are IDENTICAL", "green")) + else: + print(colorize("⚠️ YAML data structures DIFFER", "yellow")) + elif fmt == "toml": + import tomllib + + if tomllib.loads(original_text) == tomllib.loads(regenerated_text): + print(colorize("✅ TOML data structures are IDENTICAL", "green")) + else: + print(colorize("⚠️ TOML data structures DIFFER", "yellow")) + except Exception as e: + print(colorize(f"ℹ️ Could not compare semantically: {e}", "yellow")) + + except Exception as e: + print(colorize(f"❌ ERROR: {e}", "red")) + import traceback + + traceback.print_exc() + return 1 + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/utils/regenerate.py b/utils/regenerate.py new file mode 100644 index 0000000..f26bb32 --- /dev/null +++ b/utils/regenerate.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python3 +""" +Regenerate config files and save all intermediate files. + +Creates: + - original.{ext} + - defaults/main.yml + - templates/config.j2 + - regenerated.{ext} + +Usage: + ./regenerate.py tests/samples/foo.json + ./regenerate.py tests/samples/tom.toml --output-dir tmp/toml_test +""" + +import argparse +import sys +from pathlib import Path +import yaml +from jinja2 import Environment, StrictUndefined + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent)) + +from jinjaturtle.core import ( + parse_config, + analyze_loops, + flatten_config, + generate_ansible_yaml, + generate_jinja2_template, +) + + +def regenerate_and_save(config_file: Path, output_dir: Path, role_prefix: str = "app"): + """ + Regenerate config and save all intermediate files. + """ + output_dir.mkdir(parents=True, exist_ok=True) + + # Read original + original_text = config_file.read_text() + fmt, parsed = parse_config(config_file) + + # Determine extension + ext = config_file.suffix + + # Save original + original_out = output_dir / f"original{ext}" + original_out.write_text(original_text) + print(f"📄 Saved: {original_out}") + + # Generate Ansible files + loop_candidates = analyze_loops(fmt, parsed) + flat_items = flatten_config(fmt, parsed, loop_candidates) + + ansible_yaml = generate_ansible_yaml(role_prefix, flat_items, loop_candidates) + template = generate_jinja2_template( + fmt, parsed, role_prefix, original_text, loop_candidates + ) + + # Save Ansible YAML + defaults_dir = output_dir / "defaults" + defaults_dir.mkdir(exist_ok=True) + defaults_file = defaults_dir / "main.yml" + defaults_file.write_text(ansible_yaml) + print(f"📄 Saved: {defaults_file}") + + # Save template + templates_dir = output_dir / "templates" + templates_dir.mkdir(exist_ok=True) + template_file = templates_dir / "config.j2" + template_file.write_text(template) + print(f"📄 Saved: {template_file}") + + # Render template + variables = yaml.safe_load(ansible_yaml) + env = Environment(undefined=StrictUndefined) + jinja_template = env.from_string(template) + regenerated_text = jinja_template.render(variables) + + # Save regenerated + regenerated_out = output_dir / f"regenerated{ext}" + regenerated_out.write_text(regenerated_text) + print(f"📄 Saved: {regenerated_out}") + + # Summary + print(f"\n✅ All files saved to: {output_dir}") + print("\n📊 Statistics:") + print(f" Format: {fmt}") + print(f" Loop candidates: {len(loop_candidates)}") + if loop_candidates: + print(" Loops detected:") + for c in loop_candidates: + print(f" - {'.'.join(c.path)}: {len(c.items)} items") + + # Check if identical + if original_text.strip() == regenerated_text.strip(): + print("\n✅ Original and regenerated are IDENTICAL (text comparison)") + else: + print("\n⚠️ Original and regenerated differ in whitespace/formatting") + print(f" Run: diff {original_out} {regenerated_out}") + + return output_dir + + +def main(): + parser = argparse.ArgumentParser( + description="Regenerate config and save all intermediate files", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s tests/samples/foo.json + %(prog)s tests/samples/tom.toml -o tmp/toml_output + %(prog)s tests/samples/bar.yaml --role-prefix myapp + """, + ) + + parser.add_argument("file", type=Path, help="Config file to process") + + parser.add_argument( + "-o", + "--output-dir", + type=Path, + help="Output directory (default: regenerated_)", + ) + + parser.add_argument( + "-r", + "--role-prefix", + default="app", + help="Ansible role prefix for variables (default: app)", + ) + + args = parser.parse_args() + + if not args.file.exists(): + print(f"❌ File not found: {args.file}") + return 1 + + # Determine output directory + if args.output_dir: + output_dir = args.output_dir + else: + output_dir = Path(f"regenerated_{args.file.stem}") + + print(f"🔄 Regenerating: {args.file}") + print(f"📁 Output directory: {output_dir}") + print(f"🏷️ Role prefix: {args.role_prefix}\n") + + try: + regenerate_and_save(args.file, output_dir, args.role_prefix) + return 0 + except Exception as e: + print(f"\n❌ ERROR: {e}") + import traceback + + traceback.print_exc() + return 1 + + +if __name__ == "__main__": + sys.exit(main())