diff --git a/README.md b/README.md
index 27fc7c5..c5702f3 100644
--- a/README.md
+++ b/README.md
@@ -29,12 +29,9 @@ TOML, YAML, INI, JSON and XML-style config files should be okay. There are alway
going to be some edge cases in very complex files that are difficult to work
with, though, so you may still find that you need to tweak the results.
-For XML and YAML files, JinjaTurtle will attempt to generate 'for' loops
-and lists in the Ansible yaml if the config file looks homogenous enough to
-support it. However, if it lacks the confidence in this, it will fall back to
-using scalar-style flattened attributes.
-
-You may need or wish to tidy up the config to suit your needs.
+The tool does not do anything intelligent like detect common sections that
+could practically be turned into 'for' loops in Jinja. You'd have to do those
+sorts of optimisations yourself.
The goal here is really to *speed up* converting files into Ansible/Jinja2,
but not necessarily to make it perfect.
diff --git a/jinjaturtle.svg b/jinjaturtle.svg
index 2e6fcf2..4a0edb7 100644
--- a/jinjaturtle.svg
+++ b/jinjaturtle.svg
@@ -9,6 +9,8 @@
stroke-width="4"/>
+
"]
license = "GPL-3.0-or-later"
diff --git a/src/jinjaturtle/cli.py b/src/jinjaturtle/cli.py
index 40a9aba..ce096c4 100644
--- a/src/jinjaturtle/cli.py
+++ b/src/jinjaturtle/cli.py
@@ -7,10 +7,9 @@ from pathlib import Path
from .core import (
parse_config,
- analyze_loops,
flatten_config,
- generate_ansible_yaml,
- generate_jinja2_template,
+ generate_defaults_yaml,
+ generate_template,
)
@@ -54,34 +53,19 @@ def _main(argv: list[str] | None = None) -> int:
args = parser.parse_args(argv)
config_path = Path(args.config)
- config_text = config_path.read_text(encoding="utf-8")
-
- # Parse the config
fmt, parsed = parse_config(config_path, args.format)
-
- # Analyze for loops
- loop_candidates = analyze_loops(fmt, parsed)
-
- # Flatten config (excluding loop paths if loops are detected)
- flat_items = flatten_config(fmt, parsed, loop_candidates)
-
- # Generate defaults YAML (with loop collections if detected)
- ansible_yaml = generate_ansible_yaml(args.role_name, flat_items, loop_candidates)
-
- # Generate template (with loops if detected)
- template_str = generate_jinja2_template(
- fmt,
- parsed,
- args.role_name,
- original_text=config_text,
- loop_candidates=loop_candidates,
+ flat_items = flatten_config(fmt, parsed)
+ defaults_yaml = generate_defaults_yaml(args.role_name, flat_items)
+ config_text = config_path.read_text(encoding="utf-8")
+ template_str = generate_template(
+ fmt, parsed, args.role_name, original_text=config_text
)
if args.defaults_output:
- Path(args.defaults_output).write_text(ansible_yaml, encoding="utf-8")
+ Path(args.defaults_output).write_text(defaults_yaml, encoding="utf-8")
else:
print("# defaults/main.yml")
- print(ansible_yaml, end="")
+ print(defaults_yaml, end="")
if args.template_output:
Path(args.template_output).write_text(template_str, encoding="utf-8")
diff --git a/src/jinjaturtle/core.py b/src/jinjaturtle/core.py
index c8e6d71..3fc46c5 100644
--- a/src/jinjaturtle/core.py
+++ b/src/jinjaturtle/core.py
@@ -5,7 +5,6 @@ from typing import Any, Iterable
import yaml
-from .loop_analyzer import LoopAnalyzer, LoopCandidate
from .handlers import (
BaseHandler,
IniHandler,
@@ -17,24 +16,21 @@ from .handlers import (
class QuotedString(str):
- """
- Marker type for strings that must be double-quoted in YAML output.
- """
+ """Marker type for strings that must be double-quoted in YAML output."""
pass
def _fallback_str_representer(dumper: yaml.SafeDumper, data: Any):
"""
- Fallback for objects the dumper doesn't know about.
+ Fallback for objects the dumper doesn't know about. Represent them as
+ plain strings.
"""
return dumper.represent_scalar("tag:yaml.org,2002:str", str(data))
class _TurtleDumper(yaml.SafeDumper):
- """
- Custom YAML dumper that always double-quotes QuotedString values.
- """
+ """Custom YAML dumper that always double-quotes QuotedString values."""
pass
@@ -46,7 +42,6 @@ def _quoted_str_representer(dumper: yaml.SafeDumper, data: QuotedString):
_TurtleDumper.add_representer(QuotedString, _quoted_str_representer)
# Use our fallback for any unknown object types
_TurtleDumper.add_representer(None, _fallback_str_representer)
-
_HANDLERS: dict[str, BaseHandler] = {}
_INI_HANDLER = IniHandler()
@@ -54,7 +49,6 @@ _JSON_HANDLER = JsonHandler()
_TOML_HANDLER = TomlHandler()
_YAML_HANDLER = YamlHandler()
_XML_HANDLER = XmlHandler()
-
_HANDLERS["ini"] = _INI_HANDLER
_HANDLERS["json"] = _JSON_HANDLER
_HANDLERS["toml"] = _TOML_HANDLER
@@ -63,15 +57,17 @@ _HANDLERS["xml"] = _XML_HANDLER
def make_var_name(role_prefix: str, path: Iterable[str]) -> str:
- """
- Wrapper for :meth:`BaseHandler.make_var_name`.
+ """Wrapper for :meth:`BaseHandler.make_var_name`.
+
+ This keeps the public API (and tests) working while the implementation
+ lives on the BaseHandler class.
"""
return BaseHandler.make_var_name(role_prefix, path)
def detect_format(path: Path, explicit: str | None = None) -> str:
"""
- Determine config format from argument or filename.
+ Determine config format (toml, yaml, json, ini-ish, xml) from argument or filename.
"""
if explicit:
return explicit
@@ -103,93 +99,49 @@ def parse_config(path: Path, fmt: str | None = None) -> tuple[str, Any]:
return fmt, parsed
-def analyze_loops(fmt: str, parsed: Any) -> list[LoopCandidate]:
+def flatten_config(fmt: str, parsed: Any) -> list[tuple[tuple[str, ...], Any]]:
"""
- Analyze parsed config to find loop opportunities.
- """
- analyzer = LoopAnalyzer()
- candidates = analyzer.analyze(parsed, fmt)
-
- # Filter by confidence threshold
- return [c for c in candidates if c.confidence >= LoopAnalyzer.MIN_CONFIDENCE]
-
-
-def flatten_config(
- fmt: str, parsed: Any, loop_candidates: list[LoopCandidate] | None = None
-) -> list[tuple[tuple[str, ...], Any]]:
- """
- Flatten parsed config into (path, value) pairs.
-
- If loop_candidates is provided, paths within those loops are excluded
- from flattening (they'll be handled via loops in the template).
+ Flatten parsed config into a list of (path_tuple, value).
"""
handler = _HANDLERS.get(fmt)
if handler is None:
+ # preserve previous ValueError for unsupported formats
raise ValueError(f"Unsupported format: {fmt}")
-
- all_items = handler.flatten(parsed)
-
- if not loop_candidates:
- return all_items
-
- # Build set of paths to exclude (anything under a loop path)
- excluded_prefixes = {candidate.path for candidate in loop_candidates}
-
- # Filter out items that fall under loop paths
- filtered_items = []
- for item_path, value in all_items:
- # Check if this path starts with any loop path
- is_excluded = False
- for loop_path in excluded_prefixes:
- if _path_starts_with(item_path, loop_path):
- is_excluded = True
- break
-
- if not is_excluded:
- filtered_items.append((item_path, value))
-
- return filtered_items
-
-
-def _path_starts_with(path: tuple[str, ...], prefix: tuple[str, ...]) -> bool:
- """Check if path starts with prefix."""
- if len(path) < len(prefix):
- return False
- return path[: len(prefix)] == prefix
+ return handler.flatten(parsed)
def _normalize_default_value(value: Any) -> Any:
"""
- Ensure that 'true' / 'false' end up as quoted strings in YAML.
+ Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans.
+
+ - bool -> QuotedString("true"/"false")
+ - "true"/"false" (any case) -> QuotedString(original_text)
+ - everything else -> unchanged
"""
if isinstance(value, bool):
+ # YAML booleans are lower-case; we keep them as strings.
return QuotedString("true" if value else "false")
if isinstance(value, str) and value.lower() in {"true", "false"}:
return QuotedString(value)
return value
-def generate_ansible_yaml(
+def generate_defaults_yaml(
role_prefix: str,
flat_items: list[tuple[tuple[str, ...], Any]],
- loop_candidates: list[LoopCandidate] | None = None,
) -> str:
"""
- Create Ansible YAML for defaults/main.yml.
+ Create YAML for defaults/main.yml from flattened items.
+
+ Boolean/boolean-like values ("true"/"false") are forced to be *strings*
+ and double-quoted in the resulting YAML so that Ansible does not coerce
+ them back into Python booleans.
"""
defaults: dict[str, Any] = {}
-
- # Add scalar variables
for path, value in flat_items:
var_name = make_var_name(role_prefix, path)
defaults[var_name] = _normalize_default_value(value)
- # Add loop collections
- if loop_candidates:
- for candidate in loop_candidates:
- var_name = make_var_name(role_prefix, candidate.path)
- defaults[var_name] = candidate.items
-
return yaml.dump(
defaults,
Dumper=_TurtleDumper,
@@ -201,28 +153,21 @@ def generate_ansible_yaml(
)
-def generate_jinja2_template(
+def generate_template(
fmt: str,
parsed: Any,
role_prefix: str,
original_text: str | None = None,
- loop_candidates: list[LoopCandidate] | None = None,
) -> str:
"""
Generate a Jinja2 template for the config.
+
+ If original_text is provided, comments and blank lines are preserved by
+ patching values in-place. Otherwise we fall back to reconstructing from
+ the parsed structure (no comments). JSON of course does not support
+ comments.
"""
handler = _HANDLERS.get(fmt)
-
if handler is None:
raise ValueError(f"Unsupported format: {fmt}")
-
- # Check if handler supports loop-aware generation
- if hasattr(handler, "generate_jinja2_template_with_loops") and loop_candidates:
- return handler.generate_jinja2_template_with_loops(
- parsed, role_prefix, original_text, loop_candidates
- )
-
- # Fallback to original scalar-only generation
- return handler.generate_jinja2_template(
- parsed, role_prefix, original_text=original_text
- )
+ return handler.generate_template(parsed, role_prefix, original_text=original_text)
diff --git a/src/jinjaturtle/handlers/base.py b/src/jinjaturtle/handlers/base.py
index 14aaec7..f427b76 100644
--- a/src/jinjaturtle/handlers/base.py
+++ b/src/jinjaturtle/handlers/base.py
@@ -11,7 +11,7 @@ class BaseHandler:
Each handler is responsible for:
- parse(path) -> parsed object
- flatten(parsed) -> list[(path_tuple, value)]
- - generate_jinja2_template(parsed, role_prefix, original_text=None) -> str
+ - generate_template(parsed, role_prefix, original_text=None) -> str
"""
fmt: str # e.g. "ini", "yaml", ...
@@ -22,7 +22,7 @@ class BaseHandler:
def flatten(self, parsed: Any) -> list[tuple[tuple[str, ...], Any]]:
raise NotImplementedError
- def generate_jinja2_template(
+ def generate_template(
self,
parsed: Any,
role_prefix: str,
diff --git a/src/jinjaturtle/handlers/ini.py b/src/jinjaturtle/handlers/ini.py
index ce5848e..24bf44f 100644
--- a/src/jinjaturtle/handlers/ini.py
+++ b/src/jinjaturtle/handlers/ini.py
@@ -32,7 +32,7 @@ class IniHandler(BaseHandler):
items.append(((section, key), processed))
return items
- def generate_jinja2_template(
+ def generate_template(
self,
parsed: Any,
role_prefix: str,
diff --git a/src/jinjaturtle/handlers/json.py b/src/jinjaturtle/handlers/json.py
index dbf7d82..5149238 100644
--- a/src/jinjaturtle/handlers/json.py
+++ b/src/jinjaturtle/handlers/json.py
@@ -15,7 +15,7 @@ class JsonHandler(DictLikeHandler):
with path.open("r", encoding="utf-8") as f:
return json.load(f)
- def generate_jinja2_template(
+ def generate_template(
self,
parsed: Any,
role_prefix: str,
diff --git a/src/jinjaturtle/handlers/toml.py b/src/jinjaturtle/handlers/toml.py
index 069b319..b70a9c8 100644
--- a/src/jinjaturtle/handlers/toml.py
+++ b/src/jinjaturtle/handlers/toml.py
@@ -19,7 +19,7 @@ class TomlHandler(DictLikeHandler):
with path.open("rb") as f:
return tomllib.load(f)
- def generate_jinja2_template(
+ def generate_template(
self,
parsed: Any,
role_prefix: str,
diff --git a/src/jinjaturtle/handlers/xml.py b/src/jinjaturtle/handlers/xml.py
index bc92c26..4d99a7d 100644
--- a/src/jinjaturtle/handlers/xml.py
+++ b/src/jinjaturtle/handlers/xml.py
@@ -5,19 +5,19 @@ from pathlib import Path
from typing import Any
import xml.etree.ElementTree as ET # nosec
-from .base import BaseHandler
-from ..loop_analyzer import LoopCandidate
+from . import BaseHandler
class XmlHandler(BaseHandler):
- """
- XML handler that can generate both scalar templates and loop-based templates.
- """
-
fmt = "xml"
def parse(self, path: Path) -> ET.Element:
text = path.read_text(encoding="utf-8")
+ # Parse with an explicit XMLParser instance so this stays compatible
+ # with Python versions where xml.etree.ElementTree.fromstring() may
+ # not accept a ``parser=`` keyword argument.
+ # defusedxml.defuse_stdlib() is called in the CLI entrypoint, so using
+ # the stdlib XMLParser here is safe.
parser = ET.XMLParser(
target=ET.TreeBuilder(insert_comments=False)
) # nosec B314
@@ -30,13 +30,12 @@ class XmlHandler(BaseHandler):
raise TypeError("XML parser result must be an Element")
return self._flatten_xml(parsed)
- def generate_jinja2_template(
+ def generate_template(
self,
parsed: Any,
role_prefix: str,
original_text: str | None = None,
) -> str:
- """Original scalar-only template generation."""
if original_text is not None:
return self._generate_xml_template_from_text(role_prefix, original_text)
if not isinstance(parsed, ET.Element):
@@ -44,30 +43,25 @@ class XmlHandler(BaseHandler):
xml_str = ET.tostring(parsed, encoding="unicode")
return self._generate_xml_template_from_text(role_prefix, xml_str)
- def generate_jinja2_template_with_loops(
- self,
- parsed: Any,
- role_prefix: str,
- original_text: str | None,
- loop_candidates: list[LoopCandidate],
- ) -> str:
- """Generate template with Jinja2 for loops where appropriate."""
-
- if original_text is not None:
- return self._generate_xml_template_with_loops_from_text(
- role_prefix, original_text, loop_candidates
- )
-
- if not isinstance(parsed, ET.Element):
- raise TypeError("XML parser result must be an Element")
-
- xml_str = ET.tostring(parsed, encoding="unicode")
- return self._generate_xml_template_with_loops_from_text(
- role_prefix, xml_str, loop_candidates
- )
-
def _flatten_xml(self, root: ET.Element) -> list[tuple[tuple[str, ...], Any]]:
- """Flatten an XML tree into (path, value) pairs."""
+ """
+ Flatten an XML tree into (path, value) pairs.
+
+ Path conventions:
+ - Root element's children are treated as top-level (root tag is *not* included).
+ - Element text:
+ bar -> path ("foo",) value "bar"
+ bar -> path ("foo", "value") value "bar"
+ baz -> ("foo", "bar") / etc.
+ - Attributes:
+
+ -> path ("server", "@host") value "localhost"
+ - Repeated sibling elements:
+ /a
+ /b
+ -> ("endpoint", "0") "/a"
+ ("endpoint", "1") "/b"
+ """
items: list[tuple[tuple[str, ...], Any]] = []
def walk(elem: ET.Element, path: tuple[str, ...]) -> None:
@@ -83,8 +77,10 @@ class XmlHandler(BaseHandler):
text = (elem.text or "").strip()
if text:
if not elem.attrib and not children:
+ # Simple bar
items.append((path, text))
else:
+ # Text alongside attrs/children
items.append((path + ("value",), text))
# Repeated siblings get an index; singletons just use the tag
@@ -101,16 +97,24 @@ class XmlHandler(BaseHandler):
child_path = path + (tag,)
walk(child, child_path)
+ # Treat root as a container: its children are top-level
walk(root, ())
return items
def _split_xml_prolog(self, text: str) -> tuple[str, str]:
- """Split XML into (prolog, body)."""
+ """
+ Split an XML document into (prolog, body), where prolog includes:
+ - XML declaration ()
+ - top-level comments
+ - DOCTYPE
+ The body starts at the root element.
+ """
i = 0
n = len(text)
prolog_parts: list[str] = []
while i < n:
+ # Preserve leading whitespace
while i < n and text[i].isspace():
prolog_parts.append(text[i])
i += 1
@@ -142,33 +146,22 @@ class XmlHandler(BaseHandler):
continue
if text[i] == "<":
+ # Assume root element starts here
break
+ # Unexpected content: stop treating as prolog
break
return "".join(prolog_parts), text[i:]
- def _apply_jinja_to_xml_tree(
- self,
- role_prefix: str,
- root: ET.Element,
- loop_candidates: list[LoopCandidate] | None = None,
- ) -> None:
+ def _apply_jinja_to_xml_tree(self, role_prefix: str, root: ET.Element) -> None:
"""
- Mutate XML tree in-place, replacing values with Jinja expressions.
-
- If loop_candidates is provided, repeated elements matching a candidate
- will be replaced with a {% for %} loop.
+ Mutate the XML tree in-place, replacing scalar values with Jinja
+ expressions based on the same paths used in _flatten_xml.
"""
- # Build a map of loop paths for quick lookup
- loop_paths = {}
- if loop_candidates:
- for candidate in loop_candidates:
- loop_paths[candidate.path] = candidate
-
def walk(elem: ET.Element, path: tuple[str, ...]) -> None:
- # Attributes (unless this element is in a loop)
+ # Attributes
for attr_name in list(elem.attrib.keys()):
attr_path = path + (f"@{attr_name}",)
var_name = self.make_var_name(role_prefix, attr_path)
@@ -187,273 +180,51 @@ class XmlHandler(BaseHandler):
var_name = self.make_var_name(role_prefix, text_path)
elem.text = f"{{{{ {var_name} }}}}"
- # Handle children - check for loops first
+ # Repeated children get indexes just like in _flatten_xml
counts = Counter(child.tag for child in children)
index_counters: dict[str, int] = defaultdict(int)
- # Check each tag to see if it's a loop candidate
- processed_tags = set()
-
for child in children:
tag = child.tag
-
- # Skip if we've already processed this tag as a loop
- if tag in processed_tags:
- continue
-
- child_path = path + (tag,)
-
- # Check if this is a loop candidate
- if child_path in loop_paths:
- # Mark this tag as processed
- processed_tags.add(tag)
-
- # Remove all children with this tag
- for child_to_remove in [c for c in children if c.tag == tag]:
- elem.remove(child_to_remove)
-
- # Create a loop comment/marker
- # We'll handle the actual loop generation in text processing
- loop_marker = ET.Comment(f"LOOP:{tag}")
- elem.append(loop_marker)
-
- elif counts[tag] > 1:
- # Multiple children but not a loop candidate - use indexed paths
+ if counts[tag] > 1:
idx = index_counters[tag]
index_counters[tag] += 1
- indexed_path = path + (tag, str(idx))
- walk(child, indexed_path)
+ child_path = path + (tag, str(idx))
else:
- # Single child
- walk(child, child_path)
+ child_path = path + (tag,)
+ walk(child, child_path)
walk(root, ())
def _generate_xml_template_from_text(self, role_prefix: str, text: str) -> str:
- """Generate scalar-only Jinja2 template."""
+ """
+ Generate a Jinja2 template for an XML file, preserving comments and prolog.
+
+ - Attributes become Jinja placeholders:
+
+ ->
+
+ - Text nodes become placeholders:
+ 8080
+ -> {{ prefix_port }}
+
+ but if the element also has attributes/children, the value path
+ gets a trailing "value" component, matching flattening.
+ """
prolog, body = self._split_xml_prolog(text)
+ # Parse with comments included so are preserved
+ # defusedxml.defuse_stdlib() is called in CLI entrypoint
parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True)) # nosec B314
parser.feed(body)
root = parser.close()
self._apply_jinja_to_xml_tree(role_prefix, root)
+ # Pretty indentation if available (Python 3.9+)
indent = getattr(ET, "indent", None)
if indent is not None:
indent(root, space=" ") # type: ignore[arg-type]
xml_body = ET.tostring(root, encoding="unicode")
return prolog + xml_body
-
- def _generate_xml_template_with_loops_from_text(
- self,
- role_prefix: str,
- text: str,
- loop_candidates: list[LoopCandidate],
- ) -> str:
- """Generate Jinja2 template with for loops."""
-
- prolog, body = self._split_xml_prolog(text)
-
- # Parse with comments preserved
- parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True)) # nosec B314
- parser.feed(body)
- root = parser.close()
-
- # Apply Jinja transformations (including loop markers)
- self._apply_jinja_to_xml_tree(role_prefix, root, loop_candidates)
-
- # Convert to string
- indent = getattr(ET, "indent", None)
- if indent is not None:
- indent(root, space=" ") # type: ignore[arg-type]
-
- xml_body = ET.tostring(root, encoding="unicode")
-
- # Post-process to replace loop markers with actual Jinja loops
- xml_body = self._insert_xml_loops(xml_body, role_prefix, loop_candidates, root)
-
- return prolog + xml_body
-
- def _insert_xml_loops(
- self,
- xml_str: str,
- role_prefix: str,
- loop_candidates: list[LoopCandidate],
- root: ET.Element,
- ) -> str:
- """
- Post-process XML string to insert Jinja2 for loops.
-
- This replaces markers with actual loop constructs.
- """
-
- # Build a sample element for each loop to use as template
- lines = xml_str.split("\n")
- result_lines = []
-
- for line in lines:
- # Check if this line contains a loop marker
- if "", start)
- tag_name = line[start:end].strip()
-
- # Find matching loop candidate
- candidate = None
- for cand in loop_candidates:
- if cand.path and cand.path[-1] == tag_name:
- candidate = cand
- break
-
- if candidate:
- # Get indentation from current line
- indent_level = len(line) - len(line.lstrip())
- indent_str = " " * indent_level
-
- # Generate loop variable name
- collection_var = self.make_var_name(role_prefix, candidate.path)
- item_var = candidate.loop_var
-
- # Create sample element with ALL possible fields from ALL items
- if candidate.items:
- # Merge all items to get the union of all fields
- merged_dict = self._merge_dicts_for_template(candidate.items)
-
- sample_elem = self._dict_to_xml_element(
- tag_name, merged_dict, item_var
- )
-
- # Apply indentation to the sample element
- ET.indent(sample_elem, space=" ")
-
- # Convert sample to string
- sample_str = ET.tostring(
- sample_elem, encoding="unicode"
- ).strip()
-
- # Add proper indentation to each line of the sample
- sample_lines = sample_str.split("\n")
-
- # Build loop
- result_lines.append(
- f"{indent_str}{{% for {item_var} in {collection_var} %}}"
- )
- # Add each line of the sample with proper indentation
- for sample_line in sample_lines:
- result_lines.append(f"{indent_str} {sample_line}")
- result_lines.append(f"{indent_str}{{% endfor %}}")
- else:
- # Keep the marker if we can't find the candidate
- result_lines.append(line)
- else:
- result_lines.append(line)
-
- # Post-process to replace and with Jinja2 conditionals
- final_lines = []
- for line in result_lines:
- # Replace with {% if var.field is defined %}
- if "", start)
- condition = line[start:end]
- indent = len(line) - len(line.lstrip())
- final_lines.append(f"{' ' * indent}{{% if {condition} is defined %}}")
- # Replace with {% endif %}
- elif "