Refactor and add much more robust tests (both automated and manual) to ensure loops and things work ok
Some checks failed
CI / test (push) Failing after 45s
Lint / test (push) Successful in 26s
Trivy / test (push) Successful in 24s

This commit is contained in:
Miguel Jacq 2025-11-30 18:27:01 +11:00
parent 3af628e22e
commit d7c71f6349
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
17 changed files with 2126 additions and 91 deletions

View file

@ -15,7 +15,7 @@ jobs:
run: |
apt-get update
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
black pyflakes3 python3-bandit
black pyflakes3 python3-bandit vulture
- name: Run linters
run: |
@ -24,3 +24,4 @@ jobs:
pyflakes3 src/*
pyflakes3 tests/*
bandit -s B110 -r src/
vulture .

1
.gitignore vendored
View file

@ -7,3 +7,4 @@ dist
*.yml
*.j2
*.toml
regenerated_*

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "jinjaturtle"
version = "0.2.0"
version = "0.3.0"
description = "Convert config files into Ansible defaults and Jinja2 templates."
authors = ["Miguel Jacq <mig@mig5.net>"]
license = "GPL-3.0-or-later"

View file

@ -3,6 +3,7 @@ from __future__ import annotations
from pathlib import Path
from typing import Any, Iterable
import datetime
import yaml
from .loop_analyzer import LoopAnalyzer, LoopCandidate
@ -100,6 +101,9 @@ def parse_config(path: Path, fmt: str | None = None) -> tuple[str, Any]:
if handler is None:
raise ValueError(f"Unsupported config format: {fmt}")
parsed = handler.parse(path)
# Make sure datetime objects are treated as strings (TOML, YAML)
parsed = _stringify_timestamps(parsed)
return fmt, parsed
@ -158,17 +162,6 @@ def _path_starts_with(path: tuple[str, ...], prefix: tuple[str, ...]) -> bool:
return path[: len(prefix)] == prefix
def _normalize_default_value(value: Any) -> Any:
"""
Ensure that 'true' / 'false' end up as quoted strings in YAML.
"""
if isinstance(value, bool):
return QuotedString("true" if value else "false")
if isinstance(value, str) and value.lower() in {"true", "false"}:
return QuotedString(value)
return value
def generate_ansible_yaml(
role_prefix: str,
flat_items: list[tuple[tuple[str, ...], Any]],
@ -182,7 +175,7 @@ def generate_ansible_yaml(
# Add scalar variables
for path, value in flat_items:
var_name = make_var_name(role_prefix, path)
defaults[var_name] = _normalize_default_value(value)
defaults[var_name] = value # No normalization - keep original types
# Add loop collections
if loop_candidates:
@ -226,3 +219,29 @@ def generate_jinja2_template(
return handler.generate_jinja2_template(
parsed, role_prefix, original_text=original_text
)
def _stringify_timestamps(obj: Any) -> Any:
"""
Recursively walk a parsed config and turn any datetime/date/time objects
into plain strings in ISO-8601 form.
This prevents Python datetime objects from leaking into YAML/Jinja, which
would otherwise reformat the value (e.g. replacing 'T' with a space).
This commonly occurs otherwise with TOML and YAML files, which sees
Python automatically convert those sorts of strings into datetime objects.
"""
if isinstance(obj, dict):
return {k: _stringify_timestamps(v) for k, v in obj.items()}
if isinstance(obj, list):
return [_stringify_timestamps(v) for v in obj]
# TOML & YAML both use the standard datetime types
if isinstance(obj, datetime.datetime):
# Use default ISO-8601: 'YYYY-MM-DDTHH:MM:SS±HH:MM' (with 'T')
return obj.isoformat()
if isinstance(obj, (datetime.date, datetime.time)):
return obj.isoformat()
return obj

View file

@ -12,7 +12,7 @@ class IniHandler(BaseHandler):
def parse(self, path: Path) -> configparser.ConfigParser:
parser = configparser.ConfigParser()
parser.optionxform = str # preserve key case
parser.optionxform = str # noqa
with path.open("r", encoding="utf-8") as f:
parser.read_file(f)
return parser

View file

@ -1,10 +1,12 @@
from __future__ import annotations
import json
import re
from pathlib import Path
from typing import Any
from . import DictLikeHandler
from ..loop_analyzer import LoopCandidate
class JsonHandler(DictLikeHandler):
@ -21,17 +23,38 @@ class JsonHandler(DictLikeHandler):
role_prefix: str,
original_text: str | None = None,
) -> str:
"""Original scalar-only template generation."""
if not isinstance(parsed, (dict, list)):
raise TypeError("JSON parser result must be a dict or list")
# As before: ignore original_text and rebuild structurally
return self._generate_json_template(role_prefix, parsed)
def generate_jinja2_template_with_loops(
self,
parsed: Any,
role_prefix: str,
original_text: str | None,
loop_candidates: list[LoopCandidate],
) -> str:
"""Generate template with Jinja2 for loops where appropriate."""
if not isinstance(parsed, (dict, list)):
raise TypeError("JSON parser result must be a dict or list")
# Build loop path set for quick lookup
loop_paths = {candidate.path for candidate in loop_candidates}
return self._generate_json_template_with_loops(
role_prefix, parsed, loop_paths, loop_candidates
)
def _generate_json_template(self, role_prefix: str, data: Any) -> str:
"""
Generate a JSON Jinja2 template from parsed JSON data.
All scalar values are replaced with Jinja expressions whose names are
derived from the path, similar to TOML/YAML.
Uses | tojson filter to preserve types (numbers, booleans, null).
"""
def _walk(obj: Any, path: tuple[str, ...] = ()) -> Any:
@ -39,9 +62,130 @@ class JsonHandler(DictLikeHandler):
return {k: _walk(v, path + (str(k),)) for k, v in obj.items()}
if isinstance(obj, list):
return [_walk(v, path + (str(i),)) for i, v in enumerate(obj)]
# scalar
# scalar - use marker that will be replaced with tojson
var_name = self.make_var_name(role_prefix, path)
return f"{{{{ {var_name} }}}}"
return f"__SCALAR__{var_name}__"
templated = _walk(data)
return json.dumps(templated, indent=2, ensure_ascii=False) + "\n"
json_str = json.dumps(templated, indent=2, ensure_ascii=False)
# Replace scalar markers with Jinja expressions using tojson filter
# This preserves types (numbers stay numbers, booleans stay booleans)
json_str = re.sub(
r'"__SCALAR__([a-zA-Z_][a-zA-Z0-9_]*)__"', r"{{ \1 | tojson }}", json_str
)
return json_str + "\n"
def _generate_json_template_with_loops(
self,
role_prefix: str,
data: Any,
loop_paths: set[tuple[str, ...]],
loop_candidates: list[LoopCandidate],
path: tuple[str, ...] = (),
) -> str:
"""
Generate a JSON Jinja2 template with for loops where appropriate.
"""
def _walk(obj: Any, current_path: tuple[str, ...] = ()) -> Any:
# Check if this path is a loop candidate
if current_path in loop_paths:
# Find the matching candidate
candidate = next(c for c in loop_candidates if c.path == current_path)
collection_var = self.make_var_name(role_prefix, candidate.path)
item_var = candidate.loop_var
if candidate.item_schema == "scalar":
# Simple list of scalars - use special marker that we'll replace
return f"__LOOP_SCALAR__{collection_var}__{item_var}__"
elif candidate.item_schema in ("simple_dict", "nested"):
# List of dicts - use special marker
return f"__LOOP_DICT__{collection_var}__{item_var}__"
if isinstance(obj, dict):
return {k: _walk(v, current_path + (str(k),)) for k, v in obj.items()}
if isinstance(obj, list):
# Check if this list is a loop candidate
if current_path in loop_paths:
# Already handled above
return _walk(obj, current_path)
return [_walk(v, current_path + (str(i),)) for i, v in enumerate(obj)]
# scalar - use marker to preserve type
var_name = self.make_var_name(role_prefix, current_path)
return f"__SCALAR__{var_name}__"
templated = _walk(data, path)
# Convert to JSON string
json_str = json.dumps(templated, indent=2, ensure_ascii=False)
# Replace scalar markers with Jinja expressions using tojson filter
json_str = re.sub(
r'"__SCALAR__([a-zA-Z_][a-zA-Z0-9_]*)__"', r"{{ \1 | tojson }}", json_str
)
# Post-process to replace loop markers with actual Jinja loops
for candidate in loop_candidates:
collection_var = self.make_var_name(role_prefix, candidate.path)
item_var = candidate.loop_var
if candidate.item_schema == "scalar":
# Replace scalar loop marker with Jinja for loop
marker = f'"__LOOP_SCALAR__{collection_var}__{item_var}__"'
replacement = self._generate_json_scalar_loop(
collection_var, item_var, candidate
)
json_str = json_str.replace(marker, replacement)
elif candidate.item_schema in ("simple_dict", "nested"):
# Replace dict loop marker with Jinja for loop
marker = f'"__LOOP_DICT__{collection_var}__{item_var}__"'
replacement = self._generate_json_dict_loop(
collection_var, item_var, candidate
)
json_str = json_str.replace(marker, replacement)
return json_str + "\n"
def _generate_json_scalar_loop(
self, collection_var: str, item_var: str, candidate: LoopCandidate
) -> str:
"""Generate a Jinja for loop for a scalar list in JSON."""
# Use tojson filter to properly handle strings (quotes them) and other types
# Include array brackets around the loop
return (
f"[{{% for {item_var} in {collection_var} %}}"
f"{{{{ {item_var} | tojson }}}}"
f"{{% if not loop.last %}}, {{% endif %}}"
f"{{% endfor %}}]"
)
def _generate_json_dict_loop(
self, collection_var: str, item_var: str, candidate: LoopCandidate
) -> str:
"""Generate a Jinja for loop for a dict list in JSON."""
if not candidate.items:
return "[]"
# Get first item as template
sample_item = candidate.items[0]
# Build the dict template - use tojson for all values to handle types correctly
fields = []
for key, value in sample_item.items():
if key == "_key":
continue
# Use tojson filter to properly serialize all types (strings, numbers, booleans)
fields.append(f'"{key}": {{{{ {item_var}.{key} | tojson }}}}')
dict_template = "{" + ", ".join(fields) + "}"
return (
f"{{% for {item_var} in {collection_var} %}}"
f"{dict_template}"
f"{{% if not loop.last %}}, {{% endif %}}"
f"{{% endfor %}}"
)

View file

@ -5,6 +5,7 @@ from pathlib import Path
from typing import Any
from . import DictLikeHandler
from ..loop_analyzer import LoopCandidate
class TomlHandler(DictLikeHandler):
@ -25,12 +26,31 @@ class TomlHandler(DictLikeHandler):
role_prefix: str,
original_text: str | None = None,
) -> str:
"""Original scalar-only template generation."""
if original_text is not None:
return self._generate_toml_template_from_text(role_prefix, original_text)
if not isinstance(parsed, dict):
raise TypeError("TOML parser result must be a dict")
return self._generate_toml_template(role_prefix, parsed)
def generate_jinja2_template_with_loops(
self,
parsed: Any,
role_prefix: str,
original_text: str | None,
loop_candidates: list[LoopCandidate],
) -> str:
"""Generate template with Jinja2 for loops where appropriate."""
if original_text is not None:
return self._generate_toml_template_with_loops_from_text(
role_prefix, original_text, loop_candidates
)
if not isinstance(parsed, dict):
raise TypeError("TOML parser result must be a dict")
return self._generate_toml_template_with_loops(
role_prefix, parsed, loop_candidates
)
def _generate_toml_template(self, role_prefix: str, data: dict[str, Any]) -> str:
"""
Generate a TOML Jinja2 template from parsed TOML dict.
@ -45,6 +65,89 @@ class TomlHandler(DictLikeHandler):
var_name = self.make_var_name(role_prefix, path + (key,))
if isinstance(value, str):
lines.append(f'{key} = "{{{{ {var_name} }}}}"')
elif isinstance(value, bool):
# Booleans need | lower filter (Python True/False → TOML true/false)
lines.append(f"{key} = {{{{ {var_name} | lower }}}}")
else:
lines.append(f"{key} = {{{{ {var_name} }}}}")
def walk(obj: dict[str, Any], path: tuple[str, ...] = ()) -> None:
scalar_items = {k: v for k, v in obj.items() if not isinstance(v, dict)}
nested_items = {k: v for k, v in obj.items() if isinstance(v, dict)}
if path:
header = ".".join(path)
lines.append(f"[{header}]")
for key, val in scalar_items.items():
emit_kv(path, str(key), val)
if scalar_items:
lines.append("")
for key, val in nested_items.items():
walk(val, path + (str(key),))
# Root scalars (no table header)
root_scalars = {k: v for k, v in data.items() if not isinstance(v, dict)}
for key, val in root_scalars.items():
emit_kv((), str(key), val)
if root_scalars:
lines.append("")
# Tables
for key, val in data.items():
if isinstance(val, dict):
walk(val, (str(key),))
return "\n".join(lines).rstrip() + "\n"
def _generate_toml_template_with_loops(
self,
role_prefix: str,
data: dict[str, Any],
loop_candidates: list[LoopCandidate],
) -> str:
"""
Generate a TOML Jinja2 template with for loops where appropriate.
"""
lines: list[str] = []
loop_paths = {candidate.path for candidate in loop_candidates}
def emit_kv(path: tuple[str, ...], key: str, value: Any) -> None:
var_name = self.make_var_name(role_prefix, path + (key,))
if isinstance(value, str):
lines.append(f'{key} = "{{{{ {var_name} }}}}"')
elif isinstance(value, bool):
# Booleans need | lower filter (Python True/False → TOML true/false)
lines.append(f"{key} = {{{{ {var_name} | lower }}}}")
elif isinstance(value, list):
# Check if this list is a loop candidate
if path + (key,) in loop_paths:
# Find the matching candidate
candidate = next(
c for c in loop_candidates if c.path == path + (key,)
)
collection_var = self.make_var_name(role_prefix, candidate.path)
item_var = candidate.loop_var
if candidate.item_schema == "scalar":
# Scalar list loop
lines.append(
f"{key} = ["
f"{{% for {item_var} in {collection_var} %}}"
f"{{{{ {item_var} }}}}"
f"{{% if not loop.last %}}, {{% endif %}}"
f"{{% endfor %}}"
f"]"
)
elif candidate.item_schema in ("simple_dict", "nested"):
# Dict list loop - TOML array of tables
# This is complex for TOML, using simplified approach
lines.append(f"{key} = {{{{ {var_name} | tojson }}}}")
else:
# Not a loop, treat as regular variable
lines.append(f"{key} = {{{{ {var_name} }}}}")
else:
lines.append(f"{key} = {{{{ {var_name} }}}}")
@ -173,6 +276,236 @@ class TomlHandler(DictLikeHandler):
nested_var = self.make_var_name(role_prefix, nested_path)
if isinstance(sub_val, str):
inner_bits.append(f'{sub_key} = "{{{{ {nested_var} }}}}"')
elif isinstance(sub_val, bool):
inner_bits.append(
f"{sub_key} = {{{{ {nested_var} | lower }}}}"
)
else:
inner_bits.append(f"{sub_key} = {{{ {nested_var} }}}")
replacement_value = "{ " + ", ".join(inner_bits) + " }"
new_content = (
before_eq + "=" + leading_ws + replacement_value + comment_part
)
out_lines.append(new_content + newline)
continue
# If parsing fails, fall through to normal handling
# Normal scalar value handling (including bools, numbers, strings)
var_name = self.make_var_name(role_prefix, path)
use_quotes = (
len(raw_value) >= 2
and raw_value[0] == raw_value[-1]
and raw_value[0] in {'"', "'"}
)
# Check if value is a boolean in the text
is_bool = raw_value.strip().lower() in ("true", "false")
if use_quotes:
quote_char = raw_value[0]
replacement_value = f"{quote_char}{{{{ {var_name} }}}}{quote_char}"
elif is_bool:
replacement_value = f"{{{{ {var_name} | lower }}}}"
else:
replacement_value = f"{{{{ {var_name} }}}}"
new_content = (
before_eq + "=" + leading_ws + replacement_value + comment_part
)
out_lines.append(new_content + newline)
return "".join(out_lines)
def _generate_toml_template_with_loops_from_text(
self, role_prefix: str, text: str, loop_candidates: list[LoopCandidate]
) -> str:
"""
Generate a Jinja2 template for a TOML file with loop support.
"""
loop_paths = {candidate.path for candidate in loop_candidates}
lines = text.splitlines(keepends=True)
current_table: tuple[str, ...] = ()
out_lines: list[str] = []
skip_until_next_table = (
False # Track when we're inside a looped array-of-tables
)
for raw_line in lines:
line = raw_line
stripped = line.lstrip()
# Blank or pure comment
if not stripped or stripped.startswith("#"):
# Only output if we're not skipping
if not skip_until_next_table:
out_lines.append(raw_line)
continue
# Table header: [server] or [server.tls] or [[array.of.tables]]
if stripped.startswith("[") and "]" in stripped:
header = stripped
# Check if it's array-of-tables ([[name]]) or regular table ([name])
is_array_table = header.startswith("[[") and "]]" in header
if is_array_table:
# Extract content between [[ and ]]
start = header.find("[[") + 2
end = header.find("]]", start)
inner = header[start:end].strip() if end != -1 else ""
else:
# Extract content between [ and ]
start = header.find("[") + 1
end = header.find("]", start)
inner = header[start:end].strip() if end != -1 else ""
if inner:
parts = [p.strip() for p in inner.split(".") if p.strip()]
table_path = tuple(parts)
# Check if this is an array-of-tables that's a loop candidate
if is_array_table and table_path in loop_paths:
# If we're already skipping this table, this is a subsequent occurrence
if skip_until_next_table and current_table == table_path:
# This is a duplicate [[table]] - skip it
continue
# This is the first occurrence - generate the loop
current_table = table_path
candidate = next(
c for c in loop_candidates if c.path == table_path
)
# Generate the loop header
collection_var = self.make_var_name(role_prefix, candidate.path)
item_var = candidate.loop_var
# Get sample item to build template
if candidate.items:
sample_item = candidate.items[0]
# Build loop
out_lines.append(
f"{{% for {item_var} in {collection_var} %}}\n"
)
out_lines.append(f"[[{'.'.join(table_path)}]]\n")
# Add fields from sample item
for key, value in sample_item.items():
if key == "_key":
continue
if isinstance(value, str):
out_lines.append(
f'{key} = "{{{{ {item_var}.{key} }}}}"\n'
)
else:
out_lines.append(
f"{key} = {{{{ {item_var}.{key} }}}}\n"
)
out_lines.append("{% endfor %}\n")
# Skip all content until the next different table
skip_until_next_table = True
continue
else:
# Regular table or non-loop array - reset skip flag if it's a different table
if current_table != table_path:
skip_until_next_table = False
current_table = table_path
out_lines.append(raw_line)
continue
# If we're inside a skipped array-of-tables section, skip this line
if skip_until_next_table:
continue
# Try key = value
newline = ""
content = raw_line
if content.endswith("\r\n"):
newline = "\r\n"
content = content[:-2]
elif content.endswith("\n"):
newline = content[-1]
content = content[:-1]
eq_index = content.find("=")
if eq_index == -1:
out_lines.append(raw_line)
continue
before_eq = content[:eq_index]
after_eq = content[eq_index + 1 :]
key = before_eq.strip()
if not key:
out_lines.append(raw_line)
continue
# Whitespace after '='
value_ws_len = len(after_eq) - len(after_eq.lstrip(" \t"))
leading_ws = after_eq[:value_ws_len]
value_and_comment = after_eq[value_ws_len:]
value_part, comment_part = self._split_inline_comment(
value_and_comment, {"#"}
)
raw_value = value_part.strip()
# Path for this key (table + key)
path = current_table + (key,)
# Check if this path is a loop candidate
if path in loop_paths:
candidate = next(c for c in loop_candidates if c.path == path)
collection_var = self.make_var_name(role_prefix, candidate.path)
item_var = candidate.loop_var
if candidate.item_schema == "scalar":
# Scalar list loop
replacement_value = (
f"["
f"{{% for {item_var} in {collection_var} %}}"
f"{{{{ {item_var} }}}}"
f"{{% if not loop.last %}}, {{% endif %}}"
f"{{% endfor %}}"
f"]"
)
else:
# Dict/nested loop - use tojson filter for complex arrays
replacement_value = f"{{{{ {collection_var} | tojson }}}}"
new_content = (
before_eq + "=" + leading_ws + replacement_value + comment_part
)
out_lines.append(new_content + newline)
continue
# Special case: inline table
if (
raw_value.startswith("{")
and raw_value.endswith("}")
and tomllib is not None
):
try:
# Parse the inline table as a tiny TOML document
mini_source = "table = " + raw_value + "\n"
mini_data = tomllib.loads(mini_source)["table"]
except Exception:
mini_data = None
if isinstance(mini_data, dict):
inner_bits: list[str] = []
for sub_key, sub_val in mini_data.items():
nested_path = path + (sub_key,)
nested_var = self.make_var_name(role_prefix, nested_path)
if isinstance(sub_val, str):
inner_bits.append(f'{sub_key} = "{{{{ {nested_var} }}}}"')
elif isinstance(sub_val, bool):
inner_bits.append(
f"{sub_key} = {{{{ {nested_var} | lower }}}}"
)
else:
inner_bits.append(f"{sub_key} = {{{{ {nested_var} }}}}")
replacement_value = "{ " + ", ".join(inner_bits) + " }"
@ -191,9 +524,14 @@ class TomlHandler(DictLikeHandler):
and raw_value[0] in {'"', "'"}
)
# Check if value is a boolean in the text
is_bool = raw_value.strip().lower() in ("true", "false")
if use_quotes:
quote_char = raw_value[0]
replacement_value = f"{quote_char}{{{{ {var_name} }}}}{quote_char}"
elif is_bool:
replacement_value = f"{{{{ {var_name} | lower }}}}"
else:
replacement_value = f"{{{{ {var_name} }}}}"

View file

@ -418,8 +418,8 @@ class XmlHandler(BaseHandler):
# Use simple variable reference - attributes should always exist
elem.set(attr_name, f"{{{{ {loop_var}.{attr_name} }}}}")
elif key == "_text":
# Simple text content
elem.text = f"{{{{ {loop_var} }}}}"
# Simple text content - use ._text accessor for dict-based items
elem.text = f"{{{{ {loop_var}._text }}}}"
elif key == "value":
# Text with attributes/children
elem.text = f"{{{{ {loop_var}.value }}}}"

View file

@ -124,7 +124,8 @@ class YamlHandler(DictLikeHandler):
replacement = f"{{{{ {var_name} }}}}"
leading = rest[: len(rest) - len(rest.lstrip(" \t"))]
new_stripped = f"{key}: {leading}{replacement}{comment_part}"
new_rest = f"{leading}{replacement}{comment_part}"
new_stripped = f"{key}:{new_rest}"
out_lines.append(
" " * indent
+ new_stripped
@ -281,7 +282,8 @@ class YamlHandler(DictLikeHandler):
replacement = f"{{{{ {var_name} }}}}"
leading = rest[: len(rest) - len(rest.lstrip(" \t"))]
new_stripped = f"{key}: {leading}{replacement}{comment_part}"
new_rest = f"{leading}{replacement}{comment_part}"
new_stripped = f"{key}:{new_rest}"
out_lines.append(
" " * indent
+ new_stripped
@ -378,10 +380,10 @@ class YamlHandler(DictLikeHandler):
# Dict-style: key: {% for ... %}
key = candidate.path[-1] if candidate.path else "items"
lines.append(f"{indent_str}{key}:")
lines.append(f"{indent_str} {{% for {item_var} in {collection_var} %}}")
lines.append(f"{indent_str} {{% for {item_var} in {collection_var} -%}}")
else:
# List-style: just the loop
lines.append(f"{indent_str}{{% for {item_var} in {collection_var} %}}")
lines.append(f"{indent_str}{{% for {item_var} in {collection_var} -%}}")
# Generate template for item structure
if candidate.items:

View file

@ -85,14 +85,20 @@ class LoopAnalyzer:
self._analyze_xml(parsed)
elif fmt in ("yaml", "json", "toml"):
self._analyze_dict_like(parsed, path=())
# INI files are typically flat key-value, not suitable for loops
elif fmt == "ini":
# INI files are typically flat key-value, not suitable for loops
pass
# Sort by path depth (process parent structures before children)
self.candidates.sort(key=lambda c: len(c.path))
return self.candidates
def _analyze_dict_like(
self, obj: Any, path: tuple[str, ...], depth: int = 0
self,
obj: Any,
path: tuple[str, ...],
depth: int = 0,
parent_is_list: bool = False,
) -> None:
"""Recursively analyze dict/list structures."""
@ -111,9 +117,16 @@ class LoopAnalyzer:
# Recurse into dict values
for key, value in obj.items():
self._analyze_dict_like(value, path + (str(key),), depth + 1)
self._analyze_dict_like(
value, path + (str(key),), depth + 1, parent_is_list=False
)
elif isinstance(obj, list):
# Don't create loop candidates for nested lists (lists inside lists)
# These are too complex for clean template generation and should fall back to scalar handling
if parent_is_list:
return
# Check if this list is homogeneous
if len(obj) >= self.MIN_ITEMS_FOR_LOOP:
candidate = self._check_list_collection(obj, path)
@ -123,8 +136,11 @@ class LoopAnalyzer:
return
# If not a good loop candidate, recurse into items
# Pass parent_is_list=True so nested lists won't create loop candidates
for i, item in enumerate(obj):
self._analyze_dict_like(item, path + (str(i),), depth + 1)
self._analyze_dict_like(
item, path + (str(i),), depth + 1, parent_is_list=True
)
def _check_list_collection(
self, items: list[Any], path: tuple[str, ...]
@ -185,45 +201,55 @@ class LoopAnalyzer:
Example: {"server1": {...}, "server2": {...}} where all values
have the same structure.
NOTE: Currently disabled for TOML compatibility. TOML's dict-of-tables
syntax ([servers.alpha], [servers.beta]) cannot be easily converted to
loops without restructuring the entire TOML format. To maintain consistency
between Ansible YAML and Jinja2 templates, we treat these as scalars.
"""
if not obj:
return None
values = list(obj.values())
# Check type homogeneity
value_types = [type(v).__name__ for v in values]
type_counts = Counter(value_types)
if len(type_counts) != 1:
return None
value_type = value_types[0]
# Only interested in dict values for dict collections
# (scalar-valued dicts stay as scalars)
if value_type != "dict":
return None
# Check structural homogeneity
schema = self._analyze_dict_schema(values)
if schema in ("simple_dict", "homogeneous"):
confidence = 0.9 if schema == "simple_dict" else 0.8
# Convert dict to list of items with 'key' added
items_with_keys = [{"_key": k, **v} for k, v in obj.items()]
return LoopCandidate(
path=path,
loop_var=self._derive_loop_var(path, singular=True),
items=items_with_keys,
item_schema="simple_dict",
confidence=confidence,
)
# TODO: Re-enable this if we implement proper dict-of-tables loop generation
# For now, return None to use scalar handling
return None
# Original logic preserved below for reference:
# if not obj:
# return None
#
# values = list(obj.values())
#
# # Check type homogeneity
# value_types = [type(v).__name__ for v in values]
# type_counts = Counter(value_types)
#
# if len(type_counts) != 1:
# return None
#
# value_type = value_types[0]
#
# # Only interested in dict values for dict collections
# # (scalar-valued dicts stay as scalars)
# if value_type != "dict":
# return None
#
# # Check structural homogeneity
# schema = self._analyze_dict_schema(values)
# if schema in ("simple_dict", "homogeneous"):
# confidence = 0.9 if schema == "simple_dict" else 0.8
#
# # Convert dict to list of items with 'key' added
# items_with_keys = [{"_key": k, **v} for k, v in obj.items()]
#
# return LoopCandidate(
# path=path,
# loop_var=self._derive_loop_var(path, singular=True),
# items=items_with_keys,
# item_schema="simple_dict",
# confidence=confidence,
# )
#
# return None
def _analyze_dict_schema(
self, dicts: list[dict[str, Any]]
) -> Literal["simple_dict", "homogeneous", "heterogeneous"]:
@ -316,7 +342,7 @@ class LoopAnalyzer:
XML is particularly suited for loops when we have repeated sibling elements.
"""
import xml.etree.ElementTree as ET
import xml.etree.ElementTree as ET # nosec B405
if not isinstance(root, ET.Element):
return

View file

@ -1,10 +1,6 @@
from __future__ import annotations
import sys
from pathlib import Path
import pytest
from jinjaturtle import cli
SAMPLES_DIR = Path(__file__).parent / "samples"

View file

@ -168,8 +168,8 @@ def test_fallback_str_representer_for_unknown_type():
def test_normalize_default_value_bool_inputs_are_stringified():
"""
Real boolean values should be turned into quoted 'true'/'false' strings
by _normalize_default_value via generate_ansible_yaml.
Boolean values are now preserved as booleans in YAML (not stringified).
This supports proper type preservation for JSON and other formats.
"""
flat_items = [
(("section", "flag_true"), True),
@ -178,8 +178,9 @@ def test_normalize_default_value_bool_inputs_are_stringified():
ansible_yaml = generate_ansible_yaml("role", flat_items)
data = yaml.safe_load(ansible_yaml)
assert data["role_section_flag_true"] == "true"
assert data["role_section_flag_false"] == "false"
# Booleans are now preserved as booleans
assert data["role_section_flag_true"] is True
assert data["role_section_flag_false"] is False
def test_flatten_config_unsupported_format():

View file

@ -2,7 +2,6 @@ from __future__ import annotations
from pathlib import Path
import json
import pytest
import yaml
@ -10,6 +9,8 @@ from jinjaturtle.core import (
parse_config,
flatten_config,
generate_ansible_yaml,
analyze_loops,
generate_jinja2_template,
)
from jinjaturtle.handlers.json import JsonHandler
@ -23,30 +24,34 @@ def test_json_roundtrip():
fmt, parsed = parse_config(json_path)
assert fmt == "json"
flat_items = flatten_config(fmt, parsed)
ansible_yaml = generate_ansible_yaml("foobar", flat_items)
# With loop detection
loop_candidates = analyze_loops(fmt, parsed)
flat_items = flatten_config(fmt, parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("foobar", flat_items, loop_candidates)
defaults = yaml.safe_load(ansible_yaml)
# Defaults: nested keys and list indices
# Defaults: nested keys
assert defaults["foobar_foo"] == "bar"
assert defaults["foobar_nested_a"] == 1
# Bool normalized to string "true"
assert defaults["foobar_nested_b"] == "true"
assert defaults["foobar_list_0"] == 10
assert defaults["foobar_list_1"] == 20
# Booleans are now preserved as booleans (not stringified)
assert defaults["foobar_nested_b"] is True
# List should be a list (not flattened to scalars)
assert defaults["foobar_list"] == [10, 20]
# Template generation is done via JsonHandler.generate_jinja2_template; we just
# make sure it produces a structure with the expected placeholders.
handler = JsonHandler()
templated = json.loads(
handler.generate_jinja2_template(parsed, role_prefix="foobar")
)
# Template generation with loops
template = generate_jinja2_template("json", parsed, "foobar", None, loop_candidates)
assert templated["foo"] == "{{ foobar_foo }}"
assert "foobar_nested_a" in str(templated)
assert "foobar_nested_b" in str(templated)
assert "foobar_list_0" in str(templated)
assert "foobar_list_1" in str(templated)
# Template should use | tojson for type preservation
assert "{{ foobar_foo | tojson }}" in template
assert "{{ foobar_nested_a | tojson }}" in template
assert "{{ foobar_nested_b | tojson }}" in template
# List should use loop (not scalar indices)
assert "{% for" in template
assert "foobar_list" in template
# Should NOT have scalar indices
assert "foobar_list_0" not in template
assert "foobar_list_1" not in template
def test_generate_jinja2_template_json_type_error():

566
tests/test_roundtrip.py Normal file
View file

@ -0,0 +1,566 @@
"""
Roundtrip tests: Generate config template/YAML regenerate config compare.
These tests verify that:
1. Generated Jinja2 template + Ansible YAML can reproduce the original config
2. The regenerated config is semantically equivalent (allowing whitespace differences)
3. No data loss occurs during the template generation process
This is the ultimate validation - if the roundtrip works, the templates are correct.
"""
from __future__ import annotations
import json
import yaml
from pathlib import Path
from typing import Any
from jinja2 import Environment, StrictUndefined
import pytest
from jinjaturtle.core import (
parse_config,
analyze_loops,
flatten_config,
generate_ansible_yaml,
generate_jinja2_template,
)
def render_template(template: str, variables: dict[str, Any]) -> str:
"""Render a Jinja2 template with variables."""
env = Environment(undefined=StrictUndefined)
jinja_template = env.from_string(template)
return jinja_template.render(variables)
class TestRoundtripJSON:
"""Roundtrip tests for JSON files."""
def test_foo_json_roundtrip(self):
"""Test foo.json can be perfectly regenerated from template."""
samples_dir = Path(__file__).parent / "samples"
json_file = samples_dir / "foo.json"
if not json_file.exists():
pytest.skip("foo.json not found")
# Read original
original_text = json_file.read_text()
original_data = json.loads(original_text)
# Generate template and YAML
fmt, parsed = parse_config(json_file)
loop_candidates = analyze_loops(fmt, parsed)
flat_items = flatten_config(fmt, parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates)
template = generate_jinja2_template(fmt, parsed, "test", None, loop_candidates)
# Load variables from YAML
variables = yaml.safe_load(ansible_yaml)
# Render template
regenerated_text = render_template(template, variables)
regenerated_data = json.loads(regenerated_text)
# Compare data structures (should match exactly)
assert regenerated_data == original_data, (
f"Regenerated JSON differs from original\n"
f"Original: {json.dumps(original_data, indent=2, sort_keys=True)}\n"
f"Regenerated: {json.dumps(regenerated_data, indent=2, sort_keys=True)}"
)
def test_json_all_types_roundtrip(self):
"""Test JSON with all data types roundtrips perfectly."""
json_text = """
{
"string": "value",
"number": 42,
"float": 3.14,
"boolean": true,
"false_val": false,
"null_value": null,
"array": [1, 2, 3],
"object": {
"nested": "data"
}
}
"""
original_data = json.loads(json_text)
# Generate template and YAML
loop_candidates = analyze_loops("json", original_data)
flat_items = flatten_config("json", original_data, loop_candidates)
ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates)
template = generate_jinja2_template(
"json", original_data, "test", None, loop_candidates
)
# Render template
variables = yaml.safe_load(ansible_yaml)
regenerated_text = render_template(template, variables)
regenerated_data = json.loads(regenerated_text)
# Should match exactly
assert regenerated_data == original_data
class TestRoundtripYAML:
"""Roundtrip tests for YAML files."""
def test_bar_yaml_roundtrip(self):
"""Test bar.yaml can be regenerated from template."""
samples_dir = Path(__file__).parent / "samples"
yaml_file = samples_dir / "bar.yaml"
if not yaml_file.exists():
pytest.skip("bar.yaml not found")
# Read original
original_text = yaml_file.read_text()
original_data = yaml.safe_load(original_text)
# Generate template and YAML
fmt, parsed = parse_config(yaml_file)
loop_candidates = analyze_loops(fmt, parsed)
flat_items = flatten_config(fmt, parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates)
template = generate_jinja2_template(
fmt, parsed, "test", original_text, loop_candidates
)
# Load variables from YAML
variables = yaml.safe_load(ansible_yaml)
# Render template
regenerated_text = render_template(template, variables)
regenerated_data = yaml.safe_load(regenerated_text)
# Compare data structures
assert regenerated_data == original_data, (
f"Regenerated YAML differs from original\n"
f"Original: {original_data}\n"
f"Regenerated: {regenerated_data}"
)
def test_yaml_with_lists_roundtrip(self):
"""Test YAML with various list structures."""
yaml_text = """
name: myapp
simple_list:
- item1
- item2
- item3
list_of_dicts:
- name: first
value: 1
- name: second
value: 2
nested:
inner_list:
- a
- b
"""
original_data = yaml.safe_load(yaml_text)
# Generate template and YAML
loop_candidates = analyze_loops("yaml", original_data)
flat_items = flatten_config("yaml", original_data, loop_candidates)
ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates)
template = generate_jinja2_template(
"yaml", original_data, "test", yaml_text, loop_candidates
)
# Render template
variables = yaml.safe_load(ansible_yaml)
regenerated_text = render_template(template, variables)
regenerated_data = yaml.safe_load(regenerated_text)
# Compare
assert regenerated_data == original_data
class TestRoundtripTOML:
"""Roundtrip tests for TOML files."""
def test_tom_toml_roundtrip(self):
"""Test tom.toml can be regenerated from template."""
samples_dir = Path(__file__).parent / "samples"
toml_file = samples_dir / "tom.toml"
if not toml_file.exists():
pytest.skip("tom.toml not found")
# Read original
original_text = toml_file.read_text()
import tomllib
original_data = tomllib.loads(original_text)
# Generate template and YAML
fmt, parsed = parse_config(toml_file)
loop_candidates = analyze_loops(fmt, parsed)
flat_items = flatten_config(fmt, parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates)
template = generate_jinja2_template(
fmt, parsed, "test", original_text, loop_candidates
)
# Load variables from YAML
variables = yaml.safe_load(ansible_yaml)
# Render template
regenerated_text = render_template(template, variables)
regenerated_data = tomllib.loads(regenerated_text)
# Compare data structures
# Note: TOML datetime objects need special handling
assert _compare_toml_data(regenerated_data, original_data), (
f"Regenerated TOML differs from original\n"
f"Original: {original_data}\n"
f"Regenerated: {regenerated_data}"
)
def test_toml_with_arrays_roundtrip(self):
"""Test TOML with inline arrays and array-of-tables."""
toml_text = """
name = "test"
ports = [8080, 8081, 8082]
[[database]]
host = "db1.example.com"
port = 5432
[[database]]
host = "db2.example.com"
port = 5433
"""
import tomllib
original_data = tomllib.loads(toml_text)
# Generate template and YAML
loop_candidates = analyze_loops("toml", original_data)
flat_items = flatten_config("toml", original_data, loop_candidates)
ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates)
template = generate_jinja2_template(
"toml", original_data, "test", toml_text, loop_candidates
)
# Render template
variables = yaml.safe_load(ansible_yaml)
regenerated_text = render_template(template, variables)
regenerated_data = tomllib.loads(regenerated_text)
# Compare
assert regenerated_data == original_data
class TestRoundtripXML:
"""Roundtrip tests for XML files."""
def test_xml_simple_roundtrip(self):
"""Test simple XML can be regenerated."""
xml_text = """<?xml version="1.0"?>
<config>
<name>test</name>
<port>8080</port>
<server>server1</server>
<server>server2</server>
<server>server3</server>
</config>
"""
import xml.etree.ElementTree as ET
original_root = ET.fromstring(xml_text)
# Generate template and YAML
fmt = "xml"
loop_candidates = analyze_loops(fmt, original_root)
flat_items = flatten_config(fmt, original_root, loop_candidates)
ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates)
template = generate_jinja2_template(
fmt, original_root, "test", xml_text, loop_candidates
)
# Render template
variables = yaml.safe_load(ansible_yaml)
regenerated_text = render_template(template, variables)
# Parse regenerated XML
regenerated_root = ET.fromstring(regenerated_text)
# Compare XML structures (ignore insignificant whitespace)
assert _xml_elements_equal(
original_root, regenerated_root, ignore_whitespace=True
), (
f"Regenerated XML differs from original\n"
f"Original: {ET.tostring(original_root, encoding='unicode')}\n"
f"Regenerated: {ET.tostring(regenerated_root, encoding='unicode')}"
)
def test_ossec_xml_roundtrip(self):
"""Test ossec.xml (complex real-world XML) roundtrip."""
samples_dir = Path(__file__).parent / "samples"
xml_file = samples_dir / "ossec.xml"
if not xml_file.exists():
pytest.skip("ossec.xml not found")
# Read original
original_text = xml_file.read_text()
import xml.etree.ElementTree as ET
original_root = ET.fromstring(original_text)
# Generate template and YAML
fmt, parsed = parse_config(xml_file)
loop_candidates = analyze_loops(fmt, parsed)
flat_items = flatten_config(fmt, parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates)
template = generate_jinja2_template(
fmt, parsed, "test", original_text, loop_candidates
)
# Load variables and render
variables = yaml.safe_load(ansible_yaml)
regenerated_text = render_template(template, variables)
# Parse regenerated
regenerated_root = ET.fromstring(regenerated_text)
# Compare - for complex XML, we compare structure not exact text
assert _xml_elements_equal(
original_root, regenerated_root, ignore_whitespace=True
)
class TestRoundtripINI:
"""Roundtrip tests for INI files."""
def test_ini_simple_roundtrip(self):
"""Test simple INI can be regenerated."""
ini_text = """[section1]
key1 = value1
key2 = value2
[section2]
key3 = value3
"""
from configparser import ConfigParser
original_config = ConfigParser()
original_config.read_string(ini_text)
# Generate template and YAML
fmt = "ini"
loop_candidates = analyze_loops(fmt, original_config)
flat_items = flatten_config(fmt, original_config, loop_candidates)
ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates)
template = generate_jinja2_template(
fmt, original_config, "test", ini_text, loop_candidates
)
# Render template
variables = yaml.safe_load(ansible_yaml)
regenerated_text = render_template(template, variables)
# Parse regenerated
regenerated_config = ConfigParser()
regenerated_config.read_string(regenerated_text)
# Compare
assert _ini_configs_equal(original_config, regenerated_config)
class TestRoundtripEdgeCases:
"""Roundtrip tests for edge cases and special scenarios."""
def test_empty_lists_roundtrip(self):
"""Test handling of empty lists."""
json_text = '{"items": []}'
original_data = json.loads(json_text)
loop_candidates = analyze_loops("json", original_data)
flat_items = flatten_config("json", original_data, loop_candidates)
ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates)
template = generate_jinja2_template(
"json", original_data, "test", None, loop_candidates
)
variables = yaml.safe_load(ansible_yaml)
regenerated_text = render_template(template, variables)
regenerated_data = json.loads(regenerated_text)
assert regenerated_data == original_data
def test_special_characters_roundtrip(self):
"""Test handling of special characters."""
json_data = {
"quote": 'He said "hello"',
"backslash": "path\\to\\file",
"newline": "line1\nline2",
"unicode": "emoji: 🚀",
}
loop_candidates = analyze_loops("json", json_data)
flat_items = flatten_config("json", json_data, loop_candidates)
ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates)
template = generate_jinja2_template(
"json", json_data, "test", None, loop_candidates
)
variables = yaml.safe_load(ansible_yaml)
regenerated_text = render_template(template, variables)
regenerated_data = json.loads(regenerated_text)
assert regenerated_data == json_data
def test_numeric_types_roundtrip(self):
"""Test preservation of numeric types."""
json_data = {
"int": 42,
"float": 3.14159,
"negative": -100,
"zero": 0,
"large": 9999999999,
}
loop_candidates = analyze_loops("json", json_data)
flat_items = flatten_config("json", json_data, loop_candidates)
ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates)
template = generate_jinja2_template(
"json", json_data, "test", None, loop_candidates
)
variables = yaml.safe_load(ansible_yaml)
regenerated_text = render_template(template, variables)
regenerated_data = json.loads(regenerated_text)
assert regenerated_data == json_data
def test_boolean_preservation_roundtrip(self):
"""Test that booleans are preserved correctly."""
yaml_text = """
enabled: true
disabled: false
"""
original_data = yaml.safe_load(yaml_text)
loop_candidates = analyze_loops("yaml", original_data)
flat_items = flatten_config("yaml", original_data, loop_candidates)
ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates)
template = generate_jinja2_template(
"yaml", original_data, "test", yaml_text, loop_candidates
)
variables = yaml.safe_load(ansible_yaml)
regenerated_text = render_template(template, variables)
regenerated_data = yaml.safe_load(regenerated_text)
# Both should be actual booleans
assert regenerated_data["enabled"] is True
assert regenerated_data["disabled"] is False
# Helper functions
def _compare_toml_data(data1: Any, data2: Any) -> bool:
"""Compare TOML data, handling datetime objects."""
import datetime
if type(data1) != type(data2):
return False
if isinstance(data1, dict):
if set(data1.keys()) != set(data2.keys()):
return False
return all(_compare_toml_data(data1[k], data2[k]) for k in data1.keys())
elif isinstance(data1, list):
if len(data1) != len(data2):
return False
return all(_compare_toml_data(v1, v2) for v1, v2 in zip(data1, data2))
elif isinstance(data1, datetime.datetime):
# Compare datetime objects
return data1 == data2
else:
return data1 == data2
def _xml_elements_equal(elem1, elem2, ignore_whitespace: bool = False) -> bool:
"""Compare two XML elements for equality."""
# Compare tags
if elem1.tag != elem2.tag:
return False
# Compare attributes
if elem1.attrib != elem2.attrib:
return False
# Compare text
text1 = (elem1.text or "").strip() if ignore_whitespace else (elem1.text or "")
text2 = (elem2.text or "").strip() if ignore_whitespace else (elem2.text or "")
if text1 != text2:
return False
# Compare tail
tail1 = (elem1.tail or "").strip() if ignore_whitespace else (elem1.tail or "")
tail2 = (elem2.tail or "").strip() if ignore_whitespace else (elem2.tail or "")
if tail1 != tail2:
return False
# Compare children
children1 = list(elem1)
children2 = list(elem2)
if len(children1) != len(children2):
return False
return all(
_xml_elements_equal(c1, c2, ignore_whitespace)
for c1, c2 in zip(children1, children2)
)
def _ini_configs_equal(config1, config2) -> bool:
"""Compare two ConfigParser objects for equality."""
if set(config1.sections()) != set(config2.sections()):
return False
for section in config1.sections():
if set(config1.options(section)) != set(config2.options(section)):
return False
for option in config1.options(section):
if config1.get(section, option) != config2.get(section, option):
return False
return True
if __name__ == "__main__":
pytest.main([__file__, "-v"])

View file

@ -0,0 +1,558 @@
"""
Tests to ensure all Jinja2 template variables exist in the Ansible YAML.
These tests catch the bug where templates reference variables that don't exist
because the YAML has a list but the template uses scalar references (or vice versa).
"""
from __future__ import annotations
import re
from pathlib import Path
from typing import Set
import yaml
import pytest
from jinjaturtle.core import (
parse_config,
analyze_loops,
flatten_config,
generate_ansible_yaml,
generate_jinja2_template,
)
def extract_jinja_variables(template: str) -> Set[str]:
"""
Extract all Jinja2 variable names from a template that must exist in YAML.
Extracts variables from:
- {{ variable_name }}
- {{ variable.field }}
- {% for item in collection %}
Returns only the base variable names that must be defined in YAML.
Filters out loop variables (the 'item' part of 'for item in collection').
"""
variables = set()
# First, find all loop variables (these are defined by the template, not YAML)
loop_vars = set()
for_pattern = r"\{%\s*for\s+(\w+)\s+in\s+([a-zA-Z_][a-zA-Z0-9_]*)"
for match in re.finditer(for_pattern, template):
loop_var = match.group(1) # The item
collection = match.group(2) # The collection
loop_vars.add(loop_var)
variables.add(collection) # Collection must exist in YAML
# Pattern 1: {{ variable_name }} or {{ variable.field }}
# Captures the first part before any dots or filters
var_pattern = r"\{\{\s*([a-zA-Z_][a-zA-Z0-9_]*)"
for match in re.finditer(var_pattern, template):
var_name = match.group(1)
# Only add if it's not a loop variable
if var_name not in loop_vars:
variables.add(var_name)
return variables
def extract_yaml_variables(ansible_yaml: str) -> Set[str]:
"""
Extract all variable names from Ansible YAML.
Returns the top-level keys from the YAML document.
"""
data = yaml.safe_load(ansible_yaml)
if not isinstance(data, dict):
return set()
return set(data.keys())
class TestTemplateYamlConsistency:
"""Tests that verify template variables exist in YAML."""
def test_simple_json_consistency(self):
"""Simple JSON with scalars and lists."""
json_text = """
{
"name": "test",
"values": [1, 2, 3]
}
"""
fmt = "json"
import json
parsed = json.loads(json_text)
loop_candidates = analyze_loops(fmt, parsed)
flat_items = flatten_config(fmt, parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates)
template = generate_jinja2_template(fmt, parsed, "app", None, loop_candidates)
yaml_vars = extract_yaml_variables(ansible_yaml)
template_vars = extract_jinja_variables(template)
# Every variable in template must exist in YAML
missing_vars = template_vars - yaml_vars
assert not missing_vars, (
f"Template references variables not in YAML: {missing_vars}\n"
f"YAML vars: {yaml_vars}\n"
f"Template vars: {template_vars}\n"
f"Template:\n{template}\n"
f"YAML:\n{ansible_yaml}"
)
def test_toml_inline_array_consistency(self):
"""TOML with inline array should use loops consistently."""
import tomllib
toml_text = """
name = "myapp"
servers = ["server1", "server2", "server3"]
"""
parsed = tomllib.loads(toml_text)
loop_candidates = analyze_loops("toml", parsed)
flat_items = flatten_config("toml", parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates)
template = generate_jinja2_template(
"toml", parsed, "app", toml_text, loop_candidates
)
yaml_vars = extract_yaml_variables(ansible_yaml)
template_vars = extract_jinja_variables(template)
missing_vars = template_vars - yaml_vars
assert not missing_vars, (
f"Template references variables not in YAML: {missing_vars}\n"
f"Template:\n{template}\n"
f"YAML:\n{ansible_yaml}"
)
def test_toml_array_of_tables_consistency(self):
"""TOML with [[array.of.tables]] should use loops consistently."""
import tomllib
toml_text = """
[[database]]
host = "db1.example.com"
port = 5432
[[database]]
host = "db2.example.com"
port = 5433
"""
parsed = tomllib.loads(toml_text)
loop_candidates = analyze_loops("toml", parsed)
flat_items = flatten_config("toml", parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates)
template = generate_jinja2_template(
"toml", parsed, "app", toml_text, loop_candidates
)
yaml_vars = extract_yaml_variables(ansible_yaml)
template_vars = extract_jinja_variables(template)
missing_vars = template_vars - yaml_vars
assert not missing_vars, (
f"Template references variables not in YAML: {missing_vars}\n"
f"Template:\n{template}\n"
f"YAML:\n{ansible_yaml}"
)
# Additionally verify that if YAML has a list, template uses a loop
defaults = yaml.safe_load(ansible_yaml)
for var_name, value in defaults.items():
if isinstance(value, list) and len(value) > 1:
# YAML has a list - template should use {% for %}
assert "{% for" in template, (
f"YAML has list variable '{var_name}' but template doesn't use loops\n"
f"Template:\n{template}"
)
def test_yaml_list_consistency(self):
"""YAML with lists should use loops consistently."""
yaml_text = """
name: myapp
servers:
- server1
- server2
- server3
databases:
- host: db1
port: 5432
- host: db2
port: 5433
"""
parsed = yaml.safe_load(yaml_text)
loop_candidates = analyze_loops("yaml", parsed)
flat_items = flatten_config("yaml", parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates)
template = generate_jinja2_template(
"yaml", parsed, "app", yaml_text, loop_candidates
)
yaml_vars = extract_yaml_variables(ansible_yaml)
template_vars = extract_jinja_variables(template)
missing_vars = template_vars - yaml_vars
assert not missing_vars, (
f"Template references variables not in YAML: {missing_vars}\n"
f"Template:\n{template}\n"
f"YAML:\n{ansible_yaml}"
)
def test_mixed_scalars_and_loops_consistency(self):
"""Config with both scalars and loops should be consistent."""
import tomllib
toml_text = """
name = "myapp"
version = "1.0"
ports = [8080, 8081, 8082]
[database]
host = "localhost"
port = 5432
[[servers]]
name = "web1"
ip = "10.0.0.1"
[[servers]]
name = "web2"
ip = "10.0.0.2"
"""
parsed = tomllib.loads(toml_text)
loop_candidates = analyze_loops("toml", parsed)
flat_items = flatten_config("toml", parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates)
template = generate_jinja2_template(
"toml", parsed, "app", toml_text, loop_candidates
)
yaml_vars = extract_yaml_variables(ansible_yaml)
template_vars = extract_jinja_variables(template)
missing_vars = template_vars - yaml_vars
assert not missing_vars, (
f"Template references variables not in YAML: {missing_vars}\n"
f"Template:\n{template}\n"
f"YAML:\n{ansible_yaml}"
)
def test_no_orphaned_scalar_references(self):
"""
When YAML has a list variable, template must NOT reference scalar indices.
This catches the bug where:
- YAML has: app_list: [1, 2, 3]
- Template incorrectly uses: {{ app_list_0 }}, {{ app_list_1 }}
"""
import json
json_text = '{"items": [1, 2, 3, 4, 5]}'
parsed = json.loads(json_text)
loop_candidates = analyze_loops("json", parsed)
flat_items = flatten_config("json", parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates)
template = generate_jinja2_template(
"json", parsed, "app", None, loop_candidates
)
defaults = yaml.safe_load(ansible_yaml)
# Check each list variable in YAML
for var_name, value in defaults.items():
if isinstance(value, list):
# Template should NOT reference app_items_0, app_items_1, etc.
for i in range(len(value)):
scalar_ref = f"{var_name}_{i}"
assert scalar_ref not in template, (
f"Template incorrectly uses scalar reference '{scalar_ref}' "
f"when YAML has '{var_name}' as a list\n"
f"Template should use loops, not scalar indices\n"
f"Template:\n{template}"
)
def test_all_sample_files_consistency(self):
"""Test all sample files for consistency."""
samples_dir = Path(__file__).parent / "samples"
sample_files = [
("foo.json", "json"),
("bar.yaml", "yaml"),
("tom.toml", "toml"),
]
for filename, fmt in sample_files:
file_path = samples_dir / filename
if not file_path.exists():
pytest.skip(f"Sample file {filename} not found")
original_text = file_path.read_text()
fmt_detected, parsed = parse_config(file_path)
loop_candidates = analyze_loops(fmt_detected, parsed)
flat_items = flatten_config(fmt_detected, parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("test", flat_items, loop_candidates)
template = generate_jinja2_template(
fmt_detected, parsed, "test", original_text, loop_candidates
)
yaml_vars = extract_yaml_variables(ansible_yaml)
template_vars = extract_jinja_variables(template)
missing_vars = template_vars - yaml_vars
assert not missing_vars, (
f"File: {filename}\n"
f"Template references variables not in YAML: {missing_vars}\n"
f"YAML vars: {yaml_vars}\n"
f"Template vars: {template_vars}\n"
f"Template:\n{template}\n"
f"YAML:\n{ansible_yaml}"
)
class TestStructuralConsistency:
"""Tests that verify structural consistency between YAML and templates."""
def test_list_in_yaml_means_loop_in_template(self):
"""When YAML has a list (len > 1), template should use {% for %}."""
import json
json_text = """
{
"scalar": "value",
"list": [1, 2, 3]
}
"""
parsed = json.loads(json_text)
loop_candidates = analyze_loops("json", parsed)
flat_items = flatten_config("json", parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates)
template = generate_jinja2_template(
"json", parsed, "app", None, loop_candidates
)
defaults = yaml.safe_load(ansible_yaml)
# Find list variables in YAML
list_vars = [
k for k, v in defaults.items() if isinstance(v, list) and len(v) > 1
]
if list_vars:
# Template must contain for loops
assert "{% for" in template, (
f"YAML has list variables {list_vars} but template has no loops\n"
f"Template:\n{template}"
)
# Each list variable should be used in a for loop
for var_name in list_vars:
# Look for "{% for ... in var_name %}"
for_pattern = (
r"\{%\s*for\s+\w+\s+in\s+" + re.escape(var_name) + r"\s*%\}"
)
assert re.search(for_pattern, template), (
f"List variable '{var_name}' not used in a for loop\n"
f"Template:\n{template}"
)
def test_scalar_in_yaml_means_no_loop_in_template(self):
"""When YAML has scalars, template should use {{ var }}, not loops."""
import json
json_text = """
{
"name": "test",
"port": 8080,
"enabled": true
}
"""
parsed = json.loads(json_text)
loop_candidates = analyze_loops("json", parsed)
flat_items = flatten_config("json", parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates)
template = generate_jinja2_template(
"json", parsed, "app", None, loop_candidates
)
defaults = yaml.safe_load(ansible_yaml)
# All variables are scalars - template should NOT have loops
scalar_vars = [
k for k, v in defaults.items() if not isinstance(v, (list, dict))
]
# Check that scalar vars are used directly, not in loops
for var_name in scalar_vars:
# Should appear in {{ var_name }}, not {% for ... in var_name %}
direct_ref = f"{{{{ {var_name}"
loop_ref = f"for .* in {var_name}"
assert direct_ref in template, (
f"Scalar variable '{var_name}' should be directly referenced\n"
f"Template:\n{template}"
)
assert not re.search(loop_ref, template), (
f"Scalar variable '{var_name}' incorrectly used in a loop\n"
f"Template:\n{template}"
)
def test_no_undefined_variable_errors(self):
"""
Simulate Ansible template rendering to catch undefined variables.
This is the ultimate test - actually render the template with the YAML
and verify no undefined variable errors occur.
"""
from jinja2 import Environment, StrictUndefined
import json
json_text = """
{
"name": "myapp",
"servers": ["web1", "web2"],
"database": {
"host": "localhost",
"port": 5432
}
}
"""
parsed = json.loads(json_text)
loop_candidates = analyze_loops("json", parsed)
flat_items = flatten_config("json", parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates)
template = generate_jinja2_template(
"json", parsed, "app", None, loop_candidates
)
# Load variables from YAML
variables = yaml.safe_load(ansible_yaml)
# Try to render the template
env = Environment(undefined=StrictUndefined)
try:
jinja_template = env.from_string(template)
rendered = jinja_template.render(variables)
# Successfully rendered - this is what we want!
assert rendered, "Template rendered successfully"
except Exception as e:
pytest.fail(
f"Template rendering failed with variables from YAML\n"
f"Error: {e}\n"
f"Template:\n{template}\n"
f"Variables:\n{ansible_yaml}"
)
class TestRegressionBugs:
"""Tests for specific bugs that were found and fixed."""
def test_toml_array_of_tables_no_scalar_refs(self):
"""
Regression test: TOML [[array]] should not generate scalar references.
Bug: Template had {{ app_database_host }} when YAML had app_database as list.
"""
import tomllib
toml_text = """
[[database]]
host = "db1"
port = 5432
[[database]]
host = "db2"
port = 5433
"""
parsed = tomllib.loads(toml_text)
loop_candidates = analyze_loops("toml", parsed)
flat_items = flatten_config("toml", parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates)
template = generate_jinja2_template(
"toml", parsed, "app", toml_text, loop_candidates
)
# YAML should have app_database as a list
defaults = yaml.safe_load(ansible_yaml)
assert isinstance(
defaults.get("app_database"), list
), f"Expected app_database to be a list in YAML\n{ansible_yaml}"
# Template should NOT have app_database_host or app_database_port
assert (
"app_database_host" not in template
), f"Template incorrectly uses scalar 'app_database_host'\n{template}"
assert (
"app_database_port" not in template
), f"Template incorrectly uses scalar 'app_database_port'\n{template}"
# Template SHOULD use a loop
assert "{% for" in template, f"Template should use a loop\n{template}"
assert (
"app_database" in template
), f"Template should reference app_database\n{template}"
def test_json_array_no_index_refs(self):
"""
Regression test: JSON arrays should not generate index references.
Bug: Template had {{ app_list_0 }}, {{ app_list_1 }} when YAML had app_list as list.
"""
import json
json_text = '{"items": [1, 2, 3]}'
parsed = json.loads(json_text)
loop_candidates = analyze_loops("json", parsed)
flat_items = flatten_config("json", parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates)
template = generate_jinja2_template(
"json", parsed, "app", None, loop_candidates
)
# YAML should have app_items as a list
defaults = yaml.safe_load(ansible_yaml)
assert isinstance(defaults.get("app_items"), list)
# Template should NOT have app_items_0, app_items_1, app_items_2
for i in range(3):
assert (
f"app_items_{i}" not in template
), f"Template incorrectly uses scalar 'app_items_{i}'\n{template}"
# Template SHOULD use a loop
assert "{% for" in template
assert "app_items" in template
if __name__ == "__main__":
pytest.main([__file__, "-v"])

216
utils/diff_configs.py Normal file
View file

@ -0,0 +1,216 @@
#!/usr/bin/env python3
"""
Side-by-side comparison of original vs regenerated config.
Usage:
./diff_configs.py tests/samples/foo.json
./diff_configs.py tests/samples/tom.toml --context 5
"""
import argparse
import sys
from pathlib import Path
import difflib
import yaml
from jinja2 import Environment, StrictUndefined
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
from jinjaturtle.core import (
parse_config,
analyze_loops,
flatten_config,
generate_ansible_yaml,
generate_jinja2_template,
)
def colorize(text: str, color: str) -> str:
"""Add ANSI color codes."""
colors = {
"red": "\033[91m",
"green": "\033[92m",
"yellow": "\033[93m",
"blue": "\033[94m",
"reset": "\033[0m",
}
return f"{colors.get(color, '')}{text}{colors['reset']}"
def side_by_side_diff(original: str, regenerated: str, width: int = 80):
"""Print side-by-side diff."""
orig_lines = original.splitlines()
regen_lines = regenerated.splitlines()
# Calculate column width
col_width = width // 2 - 3
print(
colorize("ORIGINAL".center(col_width), "blue")
+ " | "
+ colorize("REGENERATED".center(col_width), "green")
)
print("-" * col_width + "-+-" + "-" * col_width)
max_lines = max(len(orig_lines), len(regen_lines))
for i in range(max_lines):
orig_line = orig_lines[i] if i < len(orig_lines) else ""
regen_line = regen_lines[i] if i < len(regen_lines) else ""
# Truncate if too long
if len(orig_line) > col_width - 2:
orig_line = orig_line[: col_width - 5] + "..."
if len(regen_line) > col_width - 2:
regen_line = regen_line[: col_width - 5] + "..."
# Color lines if different
if orig_line != regen_line:
orig_display = colorize(orig_line.ljust(col_width), "red")
regen_display = colorize(regen_line.ljust(col_width), "green")
else:
orig_display = orig_line.ljust(col_width)
regen_display = regen_line.ljust(col_width)
print(f"{orig_display} | {regen_display}")
def unified_diff(original: str, regenerated: str, filename: str, context: int = 3):
"""Print unified diff."""
orig_lines = original.splitlines(keepends=True)
regen_lines = regenerated.splitlines(keepends=True)
diff = difflib.unified_diff(
orig_lines,
regen_lines,
fromfile=f"{filename} (original)",
tofile=f"{filename} (regenerated)",
n=context,
)
for line in diff:
if line.startswith("+++") or line.startswith("---"):
print(colorize(line.rstrip(), "blue"))
elif line.startswith("@@"):
print(colorize(line.rstrip(), "cyan"))
elif line.startswith("+"):
print(colorize(line.rstrip(), "green"))
elif line.startswith("-"):
print(colorize(line.rstrip(), "red"))
else:
print(line.rstrip())
def main():
parser = argparse.ArgumentParser(
description="Compare original config with regenerated version",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("file", type=Path, help="Config file to check")
parser.add_argument(
"--mode",
choices=["side-by-side", "unified", "both"],
default="both",
help="Comparison mode (default: both)",
)
parser.add_argument(
"--context",
type=int,
default=3,
help="Number of context lines for unified diff (default: 3)",
)
parser.add_argument(
"--width",
type=int,
default=160,
help="Terminal width for side-by-side (default: 160)",
)
args = parser.parse_args()
if not args.file.exists():
print(colorize(f"❌ File not found: {args.file}", "red"))
return 1
print(colorize(f"\n{'=' * 80}", "blue"))
print(colorize(f" Comparing: {args.file}", "blue"))
print(colorize(f"{'=' * 80}\n", "blue"))
# Read and regenerate
try:
original_text = args.file.read_text()
fmt, parsed = parse_config(args.file)
loop_candidates = analyze_loops(fmt, parsed)
flat_items = flatten_config(fmt, parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml("app", flat_items, loop_candidates)
template = generate_jinja2_template(
fmt, parsed, "app", original_text, loop_candidates
)
variables = yaml.safe_load(ansible_yaml)
env = Environment(undefined=StrictUndefined)
jinja_template = env.from_string(template)
regenerated_text = jinja_template.render(variables)
# Check if identical
if original_text.strip() == regenerated_text.strip():
print(colorize("✅ Files are IDENTICAL (text comparison)\n", "green"))
else:
# Show diff
if args.mode in ("unified", "both"):
print(colorize("\n--- UNIFIED DIFF ---\n", "yellow"))
unified_diff(
original_text, regenerated_text, args.file.name, args.context
)
if args.mode in ("side-by-side", "both"):
print(colorize("\n--- SIDE-BY-SIDE COMPARISON ---\n", "yellow"))
side_by_side_diff(original_text, regenerated_text, args.width)
# Try semantic comparison
print(colorize(f"\n{'=' * 80}", "cyan"))
print(colorize(" Semantic Comparison", "cyan"))
print(colorize(f"{'=' * 80}", "cyan"))
try:
if fmt == "json":
import json
if json.loads(original_text) == json.loads(regenerated_text):
print(colorize("✅ JSON data structures are IDENTICAL", "green"))
else:
print(colorize("⚠️ JSON data structures DIFFER", "yellow"))
elif fmt == "yaml":
if yaml.safe_load(original_text) == yaml.safe_load(regenerated_text):
print(colorize("✅ YAML data structures are IDENTICAL", "green"))
else:
print(colorize("⚠️ YAML data structures DIFFER", "yellow"))
elif fmt == "toml":
import tomllib
if tomllib.loads(original_text) == tomllib.loads(regenerated_text):
print(colorize("✅ TOML data structures are IDENTICAL", "green"))
else:
print(colorize("⚠️ TOML data structures DIFFER", "yellow"))
except Exception as e:
print(colorize(f" Could not compare semantically: {e}", "yellow"))
except Exception as e:
print(colorize(f"❌ ERROR: {e}", "red"))
import traceback
traceback.print_exc()
return 1
return 0
if __name__ == "__main__":
sys.exit(main())

162
utils/regenerate.py Normal file
View file

@ -0,0 +1,162 @@
#!/usr/bin/env python3
"""
Regenerate config files and save all intermediate files.
Creates:
- original.{ext}
- defaults/main.yml
- templates/config.j2
- regenerated.{ext}
Usage:
./regenerate.py tests/samples/foo.json
./regenerate.py tests/samples/tom.toml --output-dir tmp/toml_test
"""
import argparse
import sys
from pathlib import Path
import yaml
from jinja2 import Environment, StrictUndefined
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent))
from jinjaturtle.core import (
parse_config,
analyze_loops,
flatten_config,
generate_ansible_yaml,
generate_jinja2_template,
)
def regenerate_and_save(config_file: Path, output_dir: Path, role_prefix: str = "app"):
"""
Regenerate config and save all intermediate files.
"""
output_dir.mkdir(parents=True, exist_ok=True)
# Read original
original_text = config_file.read_text()
fmt, parsed = parse_config(config_file)
# Determine extension
ext = config_file.suffix
# Save original
original_out = output_dir / f"original{ext}"
original_out.write_text(original_text)
print(f"📄 Saved: {original_out}")
# Generate Ansible files
loop_candidates = analyze_loops(fmt, parsed)
flat_items = flatten_config(fmt, parsed, loop_candidates)
ansible_yaml = generate_ansible_yaml(role_prefix, flat_items, loop_candidates)
template = generate_jinja2_template(
fmt, parsed, role_prefix, original_text, loop_candidates
)
# Save Ansible YAML
defaults_dir = output_dir / "defaults"
defaults_dir.mkdir(exist_ok=True)
defaults_file = defaults_dir / "main.yml"
defaults_file.write_text(ansible_yaml)
print(f"📄 Saved: {defaults_file}")
# Save template
templates_dir = output_dir / "templates"
templates_dir.mkdir(exist_ok=True)
template_file = templates_dir / "config.j2"
template_file.write_text(template)
print(f"📄 Saved: {template_file}")
# Render template
variables = yaml.safe_load(ansible_yaml)
env = Environment(undefined=StrictUndefined)
jinja_template = env.from_string(template)
regenerated_text = jinja_template.render(variables)
# Save regenerated
regenerated_out = output_dir / f"regenerated{ext}"
regenerated_out.write_text(regenerated_text)
print(f"📄 Saved: {regenerated_out}")
# Summary
print(f"\n✅ All files saved to: {output_dir}")
print("\n📊 Statistics:")
print(f" Format: {fmt}")
print(f" Loop candidates: {len(loop_candidates)}")
if loop_candidates:
print(" Loops detected:")
for c in loop_candidates:
print(f" - {'.'.join(c.path)}: {len(c.items)} items")
# Check if identical
if original_text.strip() == regenerated_text.strip():
print("\n✅ Original and regenerated are IDENTICAL (text comparison)")
else:
print("\n⚠️ Original and regenerated differ in whitespace/formatting")
print(f" Run: diff {original_out} {regenerated_out}")
return output_dir
def main():
parser = argparse.ArgumentParser(
description="Regenerate config and save all intermediate files",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s tests/samples/foo.json
%(prog)s tests/samples/tom.toml -o tmp/toml_output
%(prog)s tests/samples/bar.yaml --role-prefix myapp
""",
)
parser.add_argument("file", type=Path, help="Config file to process")
parser.add_argument(
"-o",
"--output-dir",
type=Path,
help="Output directory (default: regenerated_<filename>)",
)
parser.add_argument(
"-r",
"--role-prefix",
default="app",
help="Ansible role prefix for variables (default: app)",
)
args = parser.parse_args()
if not args.file.exists():
print(f"❌ File not found: {args.file}")
return 1
# Determine output directory
if args.output_dir:
output_dir = args.output_dir
else:
output_dir = Path(f"regenerated_{args.file.stem}")
print(f"🔄 Regenerating: {args.file}")
print(f"📁 Output directory: {output_dir}")
print(f"🏷️ Role prefix: {args.role_prefix}\n")
try:
regenerate_and_save(args.file, output_dir, args.role_prefix)
return 0
except Exception as e:
print(f"\n❌ ERROR: {e}")
import traceback
traceback.print_exc()
return 1
if __name__ == "__main__":
sys.exit(main())