From 559389a35cbb10fbbf09ac6c6a6aba599a1b1742 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Tue, 25 Nov 2025 17:38:30 +1100 Subject: [PATCH] Add support for YAML and JSON --- pyproject.toml | 2 +- src/jinjaturtle/core.py | 254 ++++++++++++++++++++++++++++++++++++---- tests/test_core.py | 101 ++++++++++++++-- 3 files changed, 328 insertions(+), 29 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8e5fd67..bd3db91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "jinjaturtle" -version = "0.1.1" +version = "0.1.2" description = "Convert config files into Ansible defaults and Jinja2 templates." authors = ["Miguel Jacq "] license = "GPL-3.0-or-later" diff --git a/src/jinjaturtle/core.py b/src/jinjaturtle/core.py index 849990b..03b159b 100644 --- a/src/jinjaturtle/core.py +++ b/src/jinjaturtle/core.py @@ -1,11 +1,16 @@ from __future__ import annotations import configparser +import json from pathlib import Path from typing import Any, Iterable - import yaml +try: + from ruamel.yaml import YAML as RuamelYAML # for comment-preserving YAML +except ImportError: # pragma: no cover + RuamelYAML = None + try: import tomllib # Python 3.11+ except ModuleNotFoundError: # pragma: no cover @@ -34,25 +39,9 @@ def _quoted_str_representer(dumper: yaml.SafeDumper, data: QuotedString): _TurtleDumper.add_representer(QuotedString, _quoted_str_representer) -def _normalize_default_value(value: Any) -> Any: - """ - Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans. - - - bool -> QuotedString("true"/"false") - - "true"/"false" (any case) -> QuotedString(original_text) - - everything else -> unchanged - """ - if isinstance(value, bool): - # YAML booleans are lower-case; we keep them as strings. - return QuotedString("true" if value else "false") - if isinstance(value, str) and value.lower() in {"true", "false"}: - return QuotedString(value) - return value - - def detect_format(path: Path, explicit: str | None = None) -> str: """ - Determine config format (toml vs ini-ish) from argument or filename. + Determine config format (toml, yaml, ini-ish) from argument or filename. """ if explicit: return explicit @@ -60,6 +49,10 @@ def detect_format(path: Path, explicit: str | None = None) -> str: name = path.name.lower() if suffix == ".toml": return "toml" + if suffix in {".yaml", ".yml"}: + return "yaml" + if suffix == ".json": + return "json" if suffix in {".ini", ".cfg", ".conf"} or name.endswith(".ini"): return "ini" # Fallback: treat as INI-ish @@ -84,6 +77,24 @@ def parse_config(path: Path, fmt: str | None = None) -> tuple[str, Any]: data = tomllib.load(f) return fmt, data + if fmt == "yaml": + text = path.read_text(encoding="utf-8") + if RuamelYAML is not None: + # ruamel.yaml preserves comments; we'll reuse them in template gen + y = RuamelYAML() + y.preserve_quotes = True + data = y.load(text) or {} + else: + # Fallback: PyYAML (drops comments in parsed structure, but we still + # have the original text for comment-preserving template generation). + data = yaml.safe_load(text) or {} + return fmt, data + + if fmt == "json": + with path.open("r", encoding="utf-8") as f: + data = json.load(f) + return fmt, data + if fmt == "ini": parser = configparser.ConfigParser() parser.optionxform = str # preserve key case @@ -109,12 +120,17 @@ def flatten_config(fmt: str, parsed: Any) -> list[tuple[tuple[str, ...], Any]]: """ items: list[tuple[tuple[str, ...], Any]] = [] - if fmt == "toml": + if fmt in {"toml", "yaml", "json"}: def _walk(obj: Any, path: tuple[str, ...] = ()) -> None: if isinstance(obj, dict): for k, v in obj.items(): _walk(v, path + (str(k),)) + elif isinstance(obj, list) and fmt in {"yaml", "json"}: + # for YAML/JSON, flatten lists so each element can be templated; + # TOML still treats list as a single scalar (ports = [..]) which is fine. + for i, v in enumerate(obj): + _walk(v, path + (str(i),)) else: items.append((path, obj)) @@ -184,6 +200,22 @@ def _split_inline_comment(text: str, comment_chars: set[str]) -> tuple[str, str] return text, "" +def _normalize_default_value(value: Any) -> Any: + """ + Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans. + + - bool -> QuotedString("true"/"false") + - "true"/"false" (any case) -> QuotedString(original_text) + - everything else -> unchanged + """ + if isinstance(value, bool): + # YAML booleans are lower-case; we keep them as strings. + return QuotedString("true" if value else "false") + if isinstance(value, str) and value.lower() in {"true", "false"}: + return QuotedString(value) + return value + + def generate_defaults_yaml( role_prefix: str, flat_items: list[tuple[tuple[str, ...], Any]], @@ -486,6 +518,171 @@ def _generate_toml_template_from_text(role_prefix: str, text: str) -> str: return "".join(out_lines) +def _generate_yaml_template_from_text( + role_prefix: str, + text: str, +) -> str: + """ + Generate a Jinja2 template for a YAML file, preserving comments and + blank lines by patching scalar values in-place. + + This handles common "config-ish" YAML: + - top-level and nested mappings + - lists of scalars + - lists of small mapping objects + It does *not* aim to support all YAML edge cases (anchors, tags, etc.). + """ + lines = text.splitlines(keepends=True) + out_lines: list[str] = [] + + # Simple indentation-based context stack: (indent, path, kind) + # kind is "map" or "seq". + stack: list[tuple[int, tuple[str, ...], str]] = [] + + # Track index per parent path for sequences + seq_counters: dict[tuple[str, ...], int] = {} + + def current_path() -> tuple[str, ...]: + return stack[-1][1] if stack else () + + for raw_line in lines: + stripped = raw_line.lstrip() + indent = len(raw_line) - len(stripped) + + # Blank or pure comment lines unchanged + if not stripped or stripped.startswith("#"): + out_lines.append(raw_line) + continue + + # Adjust stack based on indent + while stack and indent < stack[-1][0]: + stack.pop() + + # --- Handle mapping key lines: "key:" or "key: value" + if ":" in stripped and not stripped.lstrip().startswith("- "): + # separate key and rest + key_part, rest = stripped.split(":", 1) + key = key_part.strip() + if not key: + out_lines.append(raw_line) + continue + + # Is this just "key:" or "key: value"? + rest_stripped = rest.lstrip(" \t") + + # Use the same inline-comment splitter to see if there's any real value + value_candidate, _ = _split_inline_comment(rest_stripped, {"#"}) + has_value = bool(value_candidate.strip()) + + # Update stack/context: current mapping at this indent + # Replace any existing mapping at same indent + if stack and stack[-1][0] == indent and stack[-1][2] == "map": + stack.pop() + path = current_path() + (key,) + stack.append((indent, path, "map")) + + if not has_value: + # Just "key:" -> collection or nested structure begins on following lines. + out_lines.append(raw_line) + continue + + # We have an inline scalar value on this same line. + + # Separate value from inline comment + value_part, comment_part = _split_inline_comment(rest_stripped, {"#"}) + raw_value = value_part.strip() + var_name = make_var_name(role_prefix, path) + + # Keep quote-style if original was quoted + use_quotes = ( + len(raw_value) >= 2 + and raw_value[0] == raw_value[-1] + and raw_value[0] in {'"', "'"} + ) + + if use_quotes: + q = raw_value[0] + replacement = f"{q}{{{{ {var_name} }}}}{q}" + else: + replacement = f"{{{{ {var_name} }}}}" + + leading = rest[: len(rest) - len(rest.lstrip(" \t"))] + new_stripped = f"{key}: {leading}{replacement}{comment_part}" + out_lines.append( + " " * indent + new_stripped + ("\n" if raw_line.endswith("\n") else "") + ) + continue + + # --- Handle list items: "- value" or "- key: value" + if stripped.startswith("- "): + # Determine parent path + # If top of stack isn't sequence at this indent, push one using current path + if not stack or stack[-1][0] != indent or stack[-1][2] != "seq": + parent_path = current_path() + stack.append((indent, parent_path, "seq")) + + parent_path = stack[-1][1] + content = stripped[2:] # after "- " + parent_path = stack[-1][1] + content = stripped[2:] # after "- " + + # Determine index for this parent path + index = seq_counters.get(parent_path, 0) + seq_counters[parent_path] = index + 1 + + path = parent_path + (str(index),) + + value_part, comment_part = _split_inline_comment(content, {"#"}) + raw_value = value_part.strip() + var_name = make_var_name(role_prefix, path) + + # If it's of the form "key: value" inside the list, we could try to + # support that, but a simple scalar is the common case: + use_quotes = ( + len(raw_value) >= 2 + and raw_value[0] == raw_value[-1] + and raw_value[0] in {'"', "'"} + ) + + if use_quotes: + q = raw_value[0] + replacement = f"{q}{{{{ {var_name} }}}}{q}" + else: + replacement = f"{{{{ {var_name} }}}}" + + new_stripped = f"- {replacement}{comment_part}" + out_lines.append( + " " * indent + new_stripped + ("\n" if raw_line.endswith("\n") else "") + ) + continue + + # Anything else (multi-line scalars, weird YAML): leave untouched + out_lines.append(raw_line) + + return "".join(out_lines) + + +def _generate_json_template(role_prefix: str, data: Any) -> str: + """ + Generate a JSON Jinja2 template from parsed JSON data. + + All scalar values are replaced with Jinja expressions whose names are + derived from the path, similar to TOML/YAML. + """ + + def _walk(obj: Any, path: tuple[str, ...] = ()) -> Any: + if isinstance(obj, dict): + return {k: _walk(v, path + (str(k),)) for k, v in obj.items()} + if isinstance(obj, list): + return [_walk(v, path + (str(i),)) for i, v in enumerate(obj)] + # scalar + var_name = make_var_name(role_prefix, path) + return f"{{{{ {var_name} }}}}" + + templated = _walk(data) + return json.dumps(templated, indent=2, ensure_ascii=False) + "\n" + + def generate_template( fmt: str, parsed: Any, @@ -497,14 +694,19 @@ def generate_template( If original_text is provided, comments and blank lines are preserved by patching values in-place. Otherwise we fall back to reconstructing from - the parsed structure (no comments). + the parsed structure (no comments). JSON of course does not support + comments. """ if original_text is not None: if fmt == "toml": return _generate_toml_template_from_text(role_prefix, original_text) if fmt == "ini": return _generate_ini_template_from_text(role_prefix, original_text) - raise ValueError(f"Unsupported format: {fmt}") + if fmt == "yaml": + return _generate_yaml_template_from_text(role_prefix, original_text) + # For JSON we ignore original_text and reconstruct from parsed structure below + if fmt != "json": + raise ValueError(f"Unsupported format: {fmt}") # Fallback: previous behaviour (no comments preserved) if fmt == "toml": @@ -515,4 +717,14 @@ def generate_template( if not isinstance(parsed, configparser.ConfigParser): raise TypeError("INI parser result must be a ConfigParser") return _generate_ini_template(role_prefix, parsed) + if fmt == "yaml": + if not isinstance(parsed, (dict, list)): + raise TypeError("YAML parser result must be a dict or list") + return _generate_yaml_template_from_text( + role_prefix, yaml.safe_dump(parsed, sort_keys=False) + ) + if fmt == "json": + if not isinstance(parsed, (dict, list)): + raise TypeError("JSON parser result must be a dict or list") + return _generate_json_template(role_prefix, parsed) raise ValueError(f"Unsupported format: {fmt}") diff --git a/tests/test_core.py b/tests/test_core.py index 374c4e9..7056518 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -3,6 +3,7 @@ from __future__ import annotations from pathlib import Path import configparser import pytest +import textwrap import yaml import jinjaturtle.core as core @@ -170,13 +171,13 @@ def test_parse_config_toml_missing_tomllib(monkeypatch): def test_parse_config_unsupported_format(tmp_path: Path): """ - Hit the ValueError in parse_config when fmt is neither 'toml' nor 'ini'. + Hit the ValueError in parse_config when fmt is not a supported format. """ cfg_path = tmp_path / "config.whatever" cfg_path.write_text("", encoding="utf-8") with pytest.raises(ValueError): - parse_config(cfg_path, fmt="yaml") + parse_config(cfg_path, fmt="bogus") def test_generate_template_type_and_format_errors(): @@ -184,7 +185,8 @@ def test_generate_template_type_and_format_errors(): Exercise the error branches in generate_template: - toml with non-dict parsed - ini with non-ConfigParser parsed - - completely unsupported fmt + - yaml with wrong parsed type + - completely unsupported fmt (with and without original_text) """ # wrong type for TOML with pytest.raises(TypeError): @@ -194,14 +196,18 @@ def test_generate_template_type_and_format_errors(): with pytest.raises(TypeError): generate_template("ini", parsed={"not": "a configparser"}, role_prefix="role") - # unsupported format - with pytest.raises(ValueError): + # wrong type for YAML + with pytest.raises(TypeError): generate_template("yaml", parsed=None, role_prefix="role") - # unsupported format even when original_text is provided + # unsupported format, no original_text + with pytest.raises(ValueError): + generate_template("bogusfmt", parsed=None, role_prefix="role") + + # unsupported format, with original_text with pytest.raises(ValueError): generate_template( - "yaml", + "bogusfmt", parsed=None, role_prefix="role", original_text="foo=bar", @@ -286,3 +292,84 @@ def test_generate_toml_template_from_text_edge_cases(): # Ensure the lines without '=' / empty key were handled without exploding. assert "[table]" in tmpl assert "noequals" in tmpl + + +def test_yaml_roundtrip_with_list_and_comment(tmp_path: Path): + yaml_text = """ + # Top comment + foo: "bar" + + blah: + - something + - else + """ + cfg_path = tmp_path / "config.yaml" + cfg_path.write_text(textwrap.dedent(yaml_text), encoding="utf-8") + + fmt, parsed = parse_config(cfg_path) + assert fmt == "yaml" + + flat_items = flatten_config(fmt, parsed) + defaults_yaml = generate_defaults_yaml("foobar", flat_items) + defaults = yaml.safe_load(defaults_yaml) + + # Defaults: keys are flattened with indices + assert defaults["foobar_foo"] == "bar" + assert defaults["foobar_blah_0"] == "something" + assert defaults["foobar_blah_1"] == "else" + + # Template generation (preserving comments) + original_text = cfg_path.read_text(encoding="utf-8") + template = generate_template(fmt, parsed, "foobar", original_text=original_text) + + # Comment preserved + assert "# Top comment" in template + + # Scalar replacement + assert "foo:" in template + assert "foobar_foo" in template + + # List items use indexed vars, not "item" + assert "foobar_blah_0" in template + assert "foobar_blah_1" in template + assert "{{ foobar_blah }}" not in template + assert "foobar_blah_item" not in template + + +def test_json_roundtrip(tmp_path: Path): + json_text = """ + { + "foo": "bar", + "nested": { + "a": 1, + "b": true + }, + "list": [10, 20] + } + """ + cfg_path = tmp_path / "config.json" + cfg_path.write_text(textwrap.dedent(json_text), encoding="utf-8") + + fmt, parsed = parse_config(cfg_path) + assert fmt == "json" + + flat_items = flatten_config(fmt, parsed) + defaults_yaml = generate_defaults_yaml("foobar", flat_items) + defaults = yaml.safe_load(defaults_yaml) + + # Defaults: nested keys and list indices + assert defaults["foobar_foo"] == "bar" + assert defaults["foobar_nested_a"] == 1 + # Bool normalized to string "true" + assert defaults["foobar_nested_b"] == "true" + assert defaults["foobar_list_0"] == 10 + assert defaults["foobar_list_1"] == 20 + + # Template generation (JSON has no comments, so we just rebuild) + template = generate_template(fmt, parsed, "foobar") + + assert '"foo": "{{ foobar_foo }}"' in template + assert "foobar_nested_a" in template + assert "foobar_nested_b" in template + assert "foobar_list_0" in template + assert "foobar_list_1" in template