diff --git a/README.md b/README.md index c5702f3..27fc7c5 100644 --- a/README.md +++ b/README.md @@ -29,9 +29,12 @@ TOML, YAML, INI, JSON and XML-style config files should be okay. There are alway going to be some edge cases in very complex files that are difficult to work with, though, so you may still find that you need to tweak the results. -The tool does not do anything intelligent like detect common sections that -could practically be turned into 'for' loops in Jinja. You'd have to do those -sorts of optimisations yourself. +For XML and YAML files, JinjaTurtle will attempt to generate 'for' loops +and lists in the Ansible yaml if the config file looks homogenous enough to +support it. However, if it lacks the confidence in this, it will fall back to +using scalar-style flattened attributes. + +You may need or wish to tidy up the config to suit your needs. The goal here is really to *speed up* converting files into Ansible/Jinja2, but not necessarily to make it perfect. diff --git a/jinjaturtle.svg b/jinjaturtle.svg index 4a0edb7..2e6fcf2 100644 --- a/jinjaturtle.svg +++ b/jinjaturtle.svg @@ -9,8 +9,6 @@ stroke-width="4"/> - "] license = "GPL-3.0-or-later" diff --git a/src/jinjaturtle/cli.py b/src/jinjaturtle/cli.py index ce096c4..40a9aba 100644 --- a/src/jinjaturtle/cli.py +++ b/src/jinjaturtle/cli.py @@ -7,9 +7,10 @@ from pathlib import Path from .core import ( parse_config, + analyze_loops, flatten_config, - generate_defaults_yaml, - generate_template, + generate_ansible_yaml, + generate_jinja2_template, ) @@ -53,19 +54,34 @@ def _main(argv: list[str] | None = None) -> int: args = parser.parse_args(argv) config_path = Path(args.config) - fmt, parsed = parse_config(config_path, args.format) - flat_items = flatten_config(fmt, parsed) - defaults_yaml = generate_defaults_yaml(args.role_name, flat_items) config_text = config_path.read_text(encoding="utf-8") - template_str = generate_template( - fmt, parsed, args.role_name, original_text=config_text + + # Parse the config + fmt, parsed = parse_config(config_path, args.format) + + # Analyze for loops + loop_candidates = analyze_loops(fmt, parsed) + + # Flatten config (excluding loop paths if loops are detected) + flat_items = flatten_config(fmt, parsed, loop_candidates) + + # Generate defaults YAML (with loop collections if detected) + ansible_yaml = generate_ansible_yaml(args.role_name, flat_items, loop_candidates) + + # Generate template (with loops if detected) + template_str = generate_jinja2_template( + fmt, + parsed, + args.role_name, + original_text=config_text, + loop_candidates=loop_candidates, ) if args.defaults_output: - Path(args.defaults_output).write_text(defaults_yaml, encoding="utf-8") + Path(args.defaults_output).write_text(ansible_yaml, encoding="utf-8") else: print("# defaults/main.yml") - print(defaults_yaml, end="") + print(ansible_yaml, end="") if args.template_output: Path(args.template_output).write_text(template_str, encoding="utf-8") diff --git a/src/jinjaturtle/core.py b/src/jinjaturtle/core.py index 3fc46c5..c8e6d71 100644 --- a/src/jinjaturtle/core.py +++ b/src/jinjaturtle/core.py @@ -5,6 +5,7 @@ from typing import Any, Iterable import yaml +from .loop_analyzer import LoopAnalyzer, LoopCandidate from .handlers import ( BaseHandler, IniHandler, @@ -16,21 +17,24 @@ from .handlers import ( class QuotedString(str): - """Marker type for strings that must be double-quoted in YAML output.""" + """ + Marker type for strings that must be double-quoted in YAML output. + """ pass def _fallback_str_representer(dumper: yaml.SafeDumper, data: Any): """ - Fallback for objects the dumper doesn't know about. Represent them as - plain strings. + Fallback for objects the dumper doesn't know about. """ return dumper.represent_scalar("tag:yaml.org,2002:str", str(data)) class _TurtleDumper(yaml.SafeDumper): - """Custom YAML dumper that always double-quotes QuotedString values.""" + """ + Custom YAML dumper that always double-quotes QuotedString values. + """ pass @@ -42,6 +46,7 @@ def _quoted_str_representer(dumper: yaml.SafeDumper, data: QuotedString): _TurtleDumper.add_representer(QuotedString, _quoted_str_representer) # Use our fallback for any unknown object types _TurtleDumper.add_representer(None, _fallback_str_representer) + _HANDLERS: dict[str, BaseHandler] = {} _INI_HANDLER = IniHandler() @@ -49,6 +54,7 @@ _JSON_HANDLER = JsonHandler() _TOML_HANDLER = TomlHandler() _YAML_HANDLER = YamlHandler() _XML_HANDLER = XmlHandler() + _HANDLERS["ini"] = _INI_HANDLER _HANDLERS["json"] = _JSON_HANDLER _HANDLERS["toml"] = _TOML_HANDLER @@ -57,17 +63,15 @@ _HANDLERS["xml"] = _XML_HANDLER def make_var_name(role_prefix: str, path: Iterable[str]) -> str: - """Wrapper for :meth:`BaseHandler.make_var_name`. - - This keeps the public API (and tests) working while the implementation - lives on the BaseHandler class. + """ + Wrapper for :meth:`BaseHandler.make_var_name`. """ return BaseHandler.make_var_name(role_prefix, path) def detect_format(path: Path, explicit: str | None = None) -> str: """ - Determine config format (toml, yaml, json, ini-ish, xml) from argument or filename. + Determine config format from argument or filename. """ if explicit: return explicit @@ -99,49 +103,93 @@ def parse_config(path: Path, fmt: str | None = None) -> tuple[str, Any]: return fmt, parsed -def flatten_config(fmt: str, parsed: Any) -> list[tuple[tuple[str, ...], Any]]: +def analyze_loops(fmt: str, parsed: Any) -> list[LoopCandidate]: """ - Flatten parsed config into a list of (path_tuple, value). + Analyze parsed config to find loop opportunities. + """ + analyzer = LoopAnalyzer() + candidates = analyzer.analyze(parsed, fmt) + + # Filter by confidence threshold + return [c for c in candidates if c.confidence >= LoopAnalyzer.MIN_CONFIDENCE] + + +def flatten_config( + fmt: str, parsed: Any, loop_candidates: list[LoopCandidate] | None = None +) -> list[tuple[tuple[str, ...], Any]]: + """ + Flatten parsed config into (path, value) pairs. + + If loop_candidates is provided, paths within those loops are excluded + from flattening (they'll be handled via loops in the template). """ handler = _HANDLERS.get(fmt) if handler is None: - # preserve previous ValueError for unsupported formats raise ValueError(f"Unsupported format: {fmt}") - return handler.flatten(parsed) + + all_items = handler.flatten(parsed) + + if not loop_candidates: + return all_items + + # Build set of paths to exclude (anything under a loop path) + excluded_prefixes = {candidate.path for candidate in loop_candidates} + + # Filter out items that fall under loop paths + filtered_items = [] + for item_path, value in all_items: + # Check if this path starts with any loop path + is_excluded = False + for loop_path in excluded_prefixes: + if _path_starts_with(item_path, loop_path): + is_excluded = True + break + + if not is_excluded: + filtered_items.append((item_path, value)) + + return filtered_items + + +def _path_starts_with(path: tuple[str, ...], prefix: tuple[str, ...]) -> bool: + """Check if path starts with prefix.""" + if len(path) < len(prefix): + return False + return path[: len(prefix)] == prefix def _normalize_default_value(value: Any) -> Any: """ - Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans. - - - bool -> QuotedString("true"/"false") - - "true"/"false" (any case) -> QuotedString(original_text) - - everything else -> unchanged + Ensure that 'true' / 'false' end up as quoted strings in YAML. """ if isinstance(value, bool): - # YAML booleans are lower-case; we keep them as strings. return QuotedString("true" if value else "false") if isinstance(value, str) and value.lower() in {"true", "false"}: return QuotedString(value) return value -def generate_defaults_yaml( +def generate_ansible_yaml( role_prefix: str, flat_items: list[tuple[tuple[str, ...], Any]], + loop_candidates: list[LoopCandidate] | None = None, ) -> str: """ - Create YAML for defaults/main.yml from flattened items. - - Boolean/boolean-like values ("true"/"false") are forced to be *strings* - and double-quoted in the resulting YAML so that Ansible does not coerce - them back into Python booleans. + Create Ansible YAML for defaults/main.yml. """ defaults: dict[str, Any] = {} + + # Add scalar variables for path, value in flat_items: var_name = make_var_name(role_prefix, path) defaults[var_name] = _normalize_default_value(value) + # Add loop collections + if loop_candidates: + for candidate in loop_candidates: + var_name = make_var_name(role_prefix, candidate.path) + defaults[var_name] = candidate.items + return yaml.dump( defaults, Dumper=_TurtleDumper, @@ -153,21 +201,28 @@ def generate_defaults_yaml( ) -def generate_template( +def generate_jinja2_template( fmt: str, parsed: Any, role_prefix: str, original_text: str | None = None, + loop_candidates: list[LoopCandidate] | None = None, ) -> str: """ Generate a Jinja2 template for the config. - - If original_text is provided, comments and blank lines are preserved by - patching values in-place. Otherwise we fall back to reconstructing from - the parsed structure (no comments). JSON of course does not support - comments. """ handler = _HANDLERS.get(fmt) + if handler is None: raise ValueError(f"Unsupported format: {fmt}") - return handler.generate_template(parsed, role_prefix, original_text=original_text) + + # Check if handler supports loop-aware generation + if hasattr(handler, "generate_jinja2_template_with_loops") and loop_candidates: + return handler.generate_jinja2_template_with_loops( + parsed, role_prefix, original_text, loop_candidates + ) + + # Fallback to original scalar-only generation + return handler.generate_jinja2_template( + parsed, role_prefix, original_text=original_text + ) diff --git a/src/jinjaturtle/handlers/base.py b/src/jinjaturtle/handlers/base.py index f427b76..14aaec7 100644 --- a/src/jinjaturtle/handlers/base.py +++ b/src/jinjaturtle/handlers/base.py @@ -11,7 +11,7 @@ class BaseHandler: Each handler is responsible for: - parse(path) -> parsed object - flatten(parsed) -> list[(path_tuple, value)] - - generate_template(parsed, role_prefix, original_text=None) -> str + - generate_jinja2_template(parsed, role_prefix, original_text=None) -> str """ fmt: str # e.g. "ini", "yaml", ... @@ -22,7 +22,7 @@ class BaseHandler: def flatten(self, parsed: Any) -> list[tuple[tuple[str, ...], Any]]: raise NotImplementedError - def generate_template( + def generate_jinja2_template( self, parsed: Any, role_prefix: str, diff --git a/src/jinjaturtle/handlers/ini.py b/src/jinjaturtle/handlers/ini.py index 24bf44f..ce5848e 100644 --- a/src/jinjaturtle/handlers/ini.py +++ b/src/jinjaturtle/handlers/ini.py @@ -32,7 +32,7 @@ class IniHandler(BaseHandler): items.append(((section, key), processed)) return items - def generate_template( + def generate_jinja2_template( self, parsed: Any, role_prefix: str, diff --git a/src/jinjaturtle/handlers/json.py b/src/jinjaturtle/handlers/json.py index 5149238..dbf7d82 100644 --- a/src/jinjaturtle/handlers/json.py +++ b/src/jinjaturtle/handlers/json.py @@ -15,7 +15,7 @@ class JsonHandler(DictLikeHandler): with path.open("r", encoding="utf-8") as f: return json.load(f) - def generate_template( + def generate_jinja2_template( self, parsed: Any, role_prefix: str, diff --git a/src/jinjaturtle/handlers/toml.py b/src/jinjaturtle/handlers/toml.py index b70a9c8..069b319 100644 --- a/src/jinjaturtle/handlers/toml.py +++ b/src/jinjaturtle/handlers/toml.py @@ -19,7 +19,7 @@ class TomlHandler(DictLikeHandler): with path.open("rb") as f: return tomllib.load(f) - def generate_template( + def generate_jinja2_template( self, parsed: Any, role_prefix: str, diff --git a/src/jinjaturtle/handlers/xml.py b/src/jinjaturtle/handlers/xml.py index 4d99a7d..bc92c26 100644 --- a/src/jinjaturtle/handlers/xml.py +++ b/src/jinjaturtle/handlers/xml.py @@ -5,19 +5,19 @@ from pathlib import Path from typing import Any import xml.etree.ElementTree as ET # nosec -from . import BaseHandler +from .base import BaseHandler +from ..loop_analyzer import LoopCandidate class XmlHandler(BaseHandler): + """ + XML handler that can generate both scalar templates and loop-based templates. + """ + fmt = "xml" def parse(self, path: Path) -> ET.Element: text = path.read_text(encoding="utf-8") - # Parse with an explicit XMLParser instance so this stays compatible - # with Python versions where xml.etree.ElementTree.fromstring() may - # not accept a ``parser=`` keyword argument. - # defusedxml.defuse_stdlib() is called in the CLI entrypoint, so using - # the stdlib XMLParser here is safe. parser = ET.XMLParser( target=ET.TreeBuilder(insert_comments=False) ) # nosec B314 @@ -30,12 +30,13 @@ class XmlHandler(BaseHandler): raise TypeError("XML parser result must be an Element") return self._flatten_xml(parsed) - def generate_template( + def generate_jinja2_template( self, parsed: Any, role_prefix: str, original_text: str | None = None, ) -> str: + """Original scalar-only template generation.""" if original_text is not None: return self._generate_xml_template_from_text(role_prefix, original_text) if not isinstance(parsed, ET.Element): @@ -43,25 +44,30 @@ class XmlHandler(BaseHandler): xml_str = ET.tostring(parsed, encoding="unicode") return self._generate_xml_template_from_text(role_prefix, xml_str) - def _flatten_xml(self, root: ET.Element) -> list[tuple[tuple[str, ...], Any]]: - """ - Flatten an XML tree into (path, value) pairs. + def generate_jinja2_template_with_loops( + self, + parsed: Any, + role_prefix: str, + original_text: str | None, + loop_candidates: list[LoopCandidate], + ) -> str: + """Generate template with Jinja2 for loops where appropriate.""" - Path conventions: - - Root element's children are treated as top-level (root tag is *not* included). - - Element text: - bar -> path ("foo",) value "bar" - bar -> path ("foo", "value") value "bar" - baz -> ("foo", "bar") / etc. - - Attributes: - - -> path ("server", "@host") value "localhost" - - Repeated sibling elements: - /a - /b - -> ("endpoint", "0") "/a" - ("endpoint", "1") "/b" - """ + if original_text is not None: + return self._generate_xml_template_with_loops_from_text( + role_prefix, original_text, loop_candidates + ) + + if not isinstance(parsed, ET.Element): + raise TypeError("XML parser result must be an Element") + + xml_str = ET.tostring(parsed, encoding="unicode") + return self._generate_xml_template_with_loops_from_text( + role_prefix, xml_str, loop_candidates + ) + + def _flatten_xml(self, root: ET.Element) -> list[tuple[tuple[str, ...], Any]]: + """Flatten an XML tree into (path, value) pairs.""" items: list[tuple[tuple[str, ...], Any]] = [] def walk(elem: ET.Element, path: tuple[str, ...]) -> None: @@ -77,10 +83,8 @@ class XmlHandler(BaseHandler): text = (elem.text or "").strip() if text: if not elem.attrib and not children: - # Simple bar items.append((path, text)) else: - # Text alongside attrs/children items.append((path + ("value",), text)) # Repeated siblings get an index; singletons just use the tag @@ -97,24 +101,16 @@ class XmlHandler(BaseHandler): child_path = path + (tag,) walk(child, child_path) - # Treat root as a container: its children are top-level walk(root, ()) return items def _split_xml_prolog(self, text: str) -> tuple[str, str]: - """ - Split an XML document into (prolog, body), where prolog includes: - - XML declaration () - - top-level comments - - DOCTYPE - The body starts at the root element. - """ + """Split XML into (prolog, body).""" i = 0 n = len(text) prolog_parts: list[str] = [] while i < n: - # Preserve leading whitespace while i < n and text[i].isspace(): prolog_parts.append(text[i]) i += 1 @@ -146,22 +142,33 @@ class XmlHandler(BaseHandler): continue if text[i] == "<": - # Assume root element starts here break - # Unexpected content: stop treating as prolog break return "".join(prolog_parts), text[i:] - def _apply_jinja_to_xml_tree(self, role_prefix: str, root: ET.Element) -> None: + def _apply_jinja_to_xml_tree( + self, + role_prefix: str, + root: ET.Element, + loop_candidates: list[LoopCandidate] | None = None, + ) -> None: """ - Mutate the XML tree in-place, replacing scalar values with Jinja - expressions based on the same paths used in _flatten_xml. + Mutate XML tree in-place, replacing values with Jinja expressions. + + If loop_candidates is provided, repeated elements matching a candidate + will be replaced with a {% for %} loop. """ + # Build a map of loop paths for quick lookup + loop_paths = {} + if loop_candidates: + for candidate in loop_candidates: + loop_paths[candidate.path] = candidate + def walk(elem: ET.Element, path: tuple[str, ...]) -> None: - # Attributes + # Attributes (unless this element is in a loop) for attr_name in list(elem.attrib.keys()): attr_path = path + (f"@{attr_name}",) var_name = self.make_var_name(role_prefix, attr_path) @@ -180,51 +187,273 @@ class XmlHandler(BaseHandler): var_name = self.make_var_name(role_prefix, text_path) elem.text = f"{{{{ {var_name} }}}}" - # Repeated children get indexes just like in _flatten_xml + # Handle children - check for loops first counts = Counter(child.tag for child in children) index_counters: dict[str, int] = defaultdict(int) + # Check each tag to see if it's a loop candidate + processed_tags = set() + for child in children: tag = child.tag - if counts[tag] > 1: + + # Skip if we've already processed this tag as a loop + if tag in processed_tags: + continue + + child_path = path + (tag,) + + # Check if this is a loop candidate + if child_path in loop_paths: + # Mark this tag as processed + processed_tags.add(tag) + + # Remove all children with this tag + for child_to_remove in [c for c in children if c.tag == tag]: + elem.remove(child_to_remove) + + # Create a loop comment/marker + # We'll handle the actual loop generation in text processing + loop_marker = ET.Comment(f"LOOP:{tag}") + elem.append(loop_marker) + + elif counts[tag] > 1: + # Multiple children but not a loop candidate - use indexed paths idx = index_counters[tag] index_counters[tag] += 1 - child_path = path + (tag, str(idx)) + indexed_path = path + (tag, str(idx)) + walk(child, indexed_path) else: - child_path = path + (tag,) - walk(child, child_path) + # Single child + walk(child, child_path) walk(root, ()) def _generate_xml_template_from_text(self, role_prefix: str, text: str) -> str: - """ - Generate a Jinja2 template for an XML file, preserving comments and prolog. - - - Attributes become Jinja placeholders: - - -> - - - Text nodes become placeholders: - 8080 - -> {{ prefix_port }} - - but if the element also has attributes/children, the value path - gets a trailing "value" component, matching flattening. - """ + """Generate scalar-only Jinja2 template.""" prolog, body = self._split_xml_prolog(text) - # Parse with comments included so are preserved - # defusedxml.defuse_stdlib() is called in CLI entrypoint parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True)) # nosec B314 parser.feed(body) root = parser.close() self._apply_jinja_to_xml_tree(role_prefix, root) - # Pretty indentation if available (Python 3.9+) indent = getattr(ET, "indent", None) if indent is not None: indent(root, space=" ") # type: ignore[arg-type] xml_body = ET.tostring(root, encoding="unicode") return prolog + xml_body + + def _generate_xml_template_with_loops_from_text( + self, + role_prefix: str, + text: str, + loop_candidates: list[LoopCandidate], + ) -> str: + """Generate Jinja2 template with for loops.""" + + prolog, body = self._split_xml_prolog(text) + + # Parse with comments preserved + parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True)) # nosec B314 + parser.feed(body) + root = parser.close() + + # Apply Jinja transformations (including loop markers) + self._apply_jinja_to_xml_tree(role_prefix, root, loop_candidates) + + # Convert to string + indent = getattr(ET, "indent", None) + if indent is not None: + indent(root, space=" ") # type: ignore[arg-type] + + xml_body = ET.tostring(root, encoding="unicode") + + # Post-process to replace loop markers with actual Jinja loops + xml_body = self._insert_xml_loops(xml_body, role_prefix, loop_candidates, root) + + return prolog + xml_body + + def _insert_xml_loops( + self, + xml_str: str, + role_prefix: str, + loop_candidates: list[LoopCandidate], + root: ET.Element, + ) -> str: + """ + Post-process XML string to insert Jinja2 for loops. + + This replaces markers with actual loop constructs. + """ + + # Build a sample element for each loop to use as template + lines = xml_str.split("\n") + result_lines = [] + + for line in lines: + # Check if this line contains a loop marker + if "", start) + tag_name = line[start:end].strip() + + # Find matching loop candidate + candidate = None + for cand in loop_candidates: + if cand.path and cand.path[-1] == tag_name: + candidate = cand + break + + if candidate: + # Get indentation from current line + indent_level = len(line) - len(line.lstrip()) + indent_str = " " * indent_level + + # Generate loop variable name + collection_var = self.make_var_name(role_prefix, candidate.path) + item_var = candidate.loop_var + + # Create sample element with ALL possible fields from ALL items + if candidate.items: + # Merge all items to get the union of all fields + merged_dict = self._merge_dicts_for_template(candidate.items) + + sample_elem = self._dict_to_xml_element( + tag_name, merged_dict, item_var + ) + + # Apply indentation to the sample element + ET.indent(sample_elem, space=" ") + + # Convert sample to string + sample_str = ET.tostring( + sample_elem, encoding="unicode" + ).strip() + + # Add proper indentation to each line of the sample + sample_lines = sample_str.split("\n") + + # Build loop + result_lines.append( + f"{indent_str}{{% for {item_var} in {collection_var} %}}" + ) + # Add each line of the sample with proper indentation + for sample_line in sample_lines: + result_lines.append(f"{indent_str} {sample_line}") + result_lines.append(f"{indent_str}{{% endfor %}}") + else: + # Keep the marker if we can't find the candidate + result_lines.append(line) + else: + result_lines.append(line) + + # Post-process to replace and with Jinja2 conditionals + final_lines = [] + for line in result_lines: + # Replace with {% if var.field is defined %} + if "", start) + condition = line[start:end] + indent = len(line) - len(line.lstrip()) + final_lines.append(f"{' ' * indent}{{% if {condition} is defined %}}") + # Replace with {% endif %} + elif "