From 559389a35cbb10fbbf09ac6c6a6aba599a1b1742 Mon Sep 17 00:00:00 2001
From: Miguel Jacq <mig@mig5.net>
Date: Tue, 25 Nov 2025 17:38:30 +1100
Subject: [PATCH] Add support for YAML and JSON

---
 pyproject.toml          |   2 +-
 src/jinjaturtle/core.py | 254 ++++++++++++++++++++++++++++++++++++----
 tests/test_core.py      | 101 ++++++++++++++--
 3 files changed, 328 insertions(+), 29 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8e5fd67..bd3db91 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "jinjaturtle"
-version = "0.1.1"
+version = "0.1.2"
 description = "Convert config files into Ansible defaults and Jinja2 templates."
 authors = ["Miguel Jacq <mig@mig5.net>"]
 license = "GPL-3.0-or-later"
diff --git a/src/jinjaturtle/core.py b/src/jinjaturtle/core.py
index 849990b..03b159b 100644
--- a/src/jinjaturtle/core.py
+++ b/src/jinjaturtle/core.py
@@ -1,11 +1,16 @@
 from __future__ import annotations
 
 import configparser
+import json
 from pathlib import Path
 from typing import Any, Iterable
-
 import yaml
 
+try:
+    from ruamel.yaml import YAML as RuamelYAML  # for comment-preserving YAML
+except ImportError:  # pragma: no cover
+    RuamelYAML = None
+
 try:
     import tomllib  # Python 3.11+
 except ModuleNotFoundError:  # pragma: no cover
@@ -34,25 +39,9 @@ def _quoted_str_representer(dumper: yaml.SafeDumper, data: QuotedString):
 _TurtleDumper.add_representer(QuotedString, _quoted_str_representer)
 
 
-def _normalize_default_value(value: Any) -> Any:
-    """
-    Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans.
-
-    - bool -> QuotedString("true"/"false")
-    - "true"/"false" (any case) -> QuotedString(original_text)
-    - everything else -> unchanged
-    """
-    if isinstance(value, bool):
-        # YAML booleans are lower-case; we keep them as strings.
-        return QuotedString("true" if value else "false")
-    if isinstance(value, str) and value.lower() in {"true", "false"}:
-        return QuotedString(value)
-    return value
-
-
 def detect_format(path: Path, explicit: str | None = None) -> str:
     """
-    Determine config format (toml vs ini-ish) from argument or filename.
+    Determine config format (toml, yaml, ini-ish) from argument or filename.
     """
     if explicit:
         return explicit
@@ -60,6 +49,10 @@ def detect_format(path: Path, explicit: str | None = None) -> str:
     name = path.name.lower()
     if suffix == ".toml":
         return "toml"
+    if suffix in {".yaml", ".yml"}:
+        return "yaml"
+    if suffix == ".json":
+        return "json"
     if suffix in {".ini", ".cfg", ".conf"} or name.endswith(".ini"):
         return "ini"
     # Fallback: treat as INI-ish
@@ -84,6 +77,24 @@ def parse_config(path: Path, fmt: str | None = None) -> tuple[str, Any]:
             data = tomllib.load(f)
         return fmt, data
 
+    if fmt == "yaml":
+        text = path.read_text(encoding="utf-8")
+        if RuamelYAML is not None:
+            # ruamel.yaml preserves comments; we'll reuse them in template gen
+            y = RuamelYAML()
+            y.preserve_quotes = True
+            data = y.load(text) or {}
+        else:
+            # Fallback: PyYAML (drops comments in parsed structure, but we still
+            # have the original text for comment-preserving template generation).
+            data = yaml.safe_load(text) or {}
+        return fmt, data
+
+    if fmt == "json":
+        with path.open("r", encoding="utf-8") as f:
+            data = json.load(f)
+        return fmt, data
+
     if fmt == "ini":
         parser = configparser.ConfigParser()
         parser.optionxform = str  # preserve key case
@@ -109,12 +120,17 @@ def flatten_config(fmt: str, parsed: Any) -> list[tuple[tuple[str, ...], Any]]:
     """
     items: list[tuple[tuple[str, ...], Any]] = []
 
-    if fmt == "toml":
+    if fmt in {"toml", "yaml", "json"}:
 
         def _walk(obj: Any, path: tuple[str, ...] = ()) -> None:
             if isinstance(obj, dict):
                 for k, v in obj.items():
                     _walk(v, path + (str(k),))
+            elif isinstance(obj, list) and fmt in {"yaml", "json"}:
+                # for YAML/JSON, flatten lists so each element can be templated;
+                # TOML still treats list as a single scalar (ports = [..]) which is fine.
+                for i, v in enumerate(obj):
+                    _walk(v, path + (str(i),))
             else:
                 items.append((path, obj))
 
@@ -184,6 +200,22 @@ def _split_inline_comment(text: str, comment_chars: set[str]) -> tuple[str, str]
     return text, ""
 
 
+def _normalize_default_value(value: Any) -> Any:
+    """
+    Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans.
+
+    - bool -> QuotedString("true"/"false")
+    - "true"/"false" (any case) -> QuotedString(original_text)
+    - everything else -> unchanged
+    """
+    if isinstance(value, bool):
+        # YAML booleans are lower-case; we keep them as strings.
+        return QuotedString("true" if value else "false")
+    if isinstance(value, str) and value.lower() in {"true", "false"}:
+        return QuotedString(value)
+    return value
+
+
 def generate_defaults_yaml(
     role_prefix: str,
     flat_items: list[tuple[tuple[str, ...], Any]],
@@ -486,6 +518,171 @@ def _generate_toml_template_from_text(role_prefix: str, text: str) -> str:
     return "".join(out_lines)
 
 
+def _generate_yaml_template_from_text(
+    role_prefix: str,
+    text: str,
+) -> str:
+    """
+    Generate a Jinja2 template for a YAML file, preserving comments and
+    blank lines by patching scalar values in-place.
+
+    This handles common "config-ish" YAML:
+      - top-level and nested mappings
+      - lists of scalars
+      - lists of small mapping objects
+    It does *not* aim to support all YAML edge cases (anchors, tags, etc.).
+    """
+    lines = text.splitlines(keepends=True)
+    out_lines: list[str] = []
+
+    # Simple indentation-based context stack: (indent, path, kind)
+    # kind is "map" or "seq".
+    stack: list[tuple[int, tuple[str, ...], str]] = []
+
+    # Track index per parent path for sequences
+    seq_counters: dict[tuple[str, ...], int] = {}
+
+    def current_path() -> tuple[str, ...]:
+        return stack[-1][1] if stack else ()
+
+    for raw_line in lines:
+        stripped = raw_line.lstrip()
+        indent = len(raw_line) - len(stripped)
+
+        # Blank or pure comment lines unchanged
+        if not stripped or stripped.startswith("#"):
+            out_lines.append(raw_line)
+            continue
+
+        # Adjust stack based on indent
+        while stack and indent < stack[-1][0]:
+            stack.pop()
+
+        # --- Handle mapping key lines: "key:" or "key: value"
+        if ":" in stripped and not stripped.lstrip().startswith("- "):
+            # separate key and rest
+            key_part, rest = stripped.split(":", 1)
+            key = key_part.strip()
+            if not key:
+                out_lines.append(raw_line)
+                continue
+
+            # Is this just "key:" or "key: value"?
+            rest_stripped = rest.lstrip(" \t")
+
+            # Use the same inline-comment splitter to see if there's any real value
+            value_candidate, _ = _split_inline_comment(rest_stripped, {"#"})
+            has_value = bool(value_candidate.strip())
+
+            # Update stack/context: current mapping at this indent
+            # Replace any existing mapping at same indent
+            if stack and stack[-1][0] == indent and stack[-1][2] == "map":
+                stack.pop()
+            path = current_path() + (key,)
+            stack.append((indent, path, "map"))
+
+            if not has_value:
+                # Just "key:" -> collection or nested structure begins on following lines.
+                out_lines.append(raw_line)
+                continue
+
+            # We have an inline scalar value on this same line.
+
+            # Separate value from inline comment
+            value_part, comment_part = _split_inline_comment(rest_stripped, {"#"})
+            raw_value = value_part.strip()
+            var_name = make_var_name(role_prefix, path)
+
+            # Keep quote-style if original was quoted
+            use_quotes = (
+                len(raw_value) >= 2
+                and raw_value[0] == raw_value[-1]
+                and raw_value[0] in {'"', "'"}
+            )
+
+            if use_quotes:
+                q = raw_value[0]
+                replacement = f"{q}{{{{ {var_name} }}}}{q}"
+            else:
+                replacement = f"{{{{ {var_name} }}}}"
+
+            leading = rest[: len(rest) - len(rest.lstrip(" \t"))]
+            new_stripped = f"{key}: {leading}{replacement}{comment_part}"
+            out_lines.append(
+                " " * indent + new_stripped + ("\n" if raw_line.endswith("\n") else "")
+            )
+            continue
+
+        # --- Handle list items: "- value" or "- key: value"
+        if stripped.startswith("- "):
+            # Determine parent path
+            # If top of stack isn't sequence at this indent, push one using current path
+            if not stack or stack[-1][0] != indent or stack[-1][2] != "seq":
+                parent_path = current_path()
+                stack.append((indent, parent_path, "seq"))
+
+            parent_path = stack[-1][1]
+            content = stripped[2:]  # after "- "
+            parent_path = stack[-1][1]
+            content = stripped[2:]  # after "- "
+
+            # Determine index for this parent path
+            index = seq_counters.get(parent_path, 0)
+            seq_counters[parent_path] = index + 1
+
+            path = parent_path + (str(index),)
+
+            value_part, comment_part = _split_inline_comment(content, {"#"})
+            raw_value = value_part.strip()
+            var_name = make_var_name(role_prefix, path)
+
+            # If it's of the form "key: value" inside the list, we could try to
+            # support that, but a simple scalar is the common case:
+            use_quotes = (
+                len(raw_value) >= 2
+                and raw_value[0] == raw_value[-1]
+                and raw_value[0] in {'"', "'"}
+            )
+
+            if use_quotes:
+                q = raw_value[0]
+                replacement = f"{q}{{{{ {var_name} }}}}{q}"
+            else:
+                replacement = f"{{{{ {var_name} }}}}"
+
+            new_stripped = f"- {replacement}{comment_part}"
+            out_lines.append(
+                " " * indent + new_stripped + ("\n" if raw_line.endswith("\n") else "")
+            )
+            continue
+
+        # Anything else (multi-line scalars, weird YAML): leave untouched
+        out_lines.append(raw_line)
+
+    return "".join(out_lines)
+
+
+def _generate_json_template(role_prefix: str, data: Any) -> str:
+    """
+    Generate a JSON Jinja2 template from parsed JSON data.
+
+    All scalar values are replaced with Jinja expressions whose names are
+    derived from the path, similar to TOML/YAML.
+    """
+
+    def _walk(obj: Any, path: tuple[str, ...] = ()) -> Any:
+        if isinstance(obj, dict):
+            return {k: _walk(v, path + (str(k),)) for k, v in obj.items()}
+        if isinstance(obj, list):
+            return [_walk(v, path + (str(i),)) for i, v in enumerate(obj)]
+        # scalar
+        var_name = make_var_name(role_prefix, path)
+        return f"{{{{ {var_name} }}}}"
+
+    templated = _walk(data)
+    return json.dumps(templated, indent=2, ensure_ascii=False) + "\n"
+
+
 def generate_template(
     fmt: str,
     parsed: Any,
@@ -497,14 +694,19 @@ def generate_template(
 
     If original_text is provided, comments and blank lines are preserved by
     patching values in-place. Otherwise we fall back to reconstructing from
-    the parsed structure (no comments).
+    the parsed structure (no comments). JSON of course does not support
+    comments.
     """
     if original_text is not None:
         if fmt == "toml":
             return _generate_toml_template_from_text(role_prefix, original_text)
         if fmt == "ini":
             return _generate_ini_template_from_text(role_prefix, original_text)
-        raise ValueError(f"Unsupported format: {fmt}")
+        if fmt == "yaml":
+            return _generate_yaml_template_from_text(role_prefix, original_text)
+        # For JSON we ignore original_text and reconstruct from parsed structure below
+        if fmt != "json":
+            raise ValueError(f"Unsupported format: {fmt}")
 
     # Fallback: previous behaviour (no comments preserved)
     if fmt == "toml":
@@ -515,4 +717,14 @@ def generate_template(
         if not isinstance(parsed, configparser.ConfigParser):
             raise TypeError("INI parser result must be a ConfigParser")
         return _generate_ini_template(role_prefix, parsed)
+    if fmt == "yaml":
+        if not isinstance(parsed, (dict, list)):
+            raise TypeError("YAML parser result must be a dict or list")
+        return _generate_yaml_template_from_text(
+            role_prefix, yaml.safe_dump(parsed, sort_keys=False)
+        )
+    if fmt == "json":
+        if not isinstance(parsed, (dict, list)):
+            raise TypeError("JSON parser result must be a dict or list")
+        return _generate_json_template(role_prefix, parsed)
     raise ValueError(f"Unsupported format: {fmt}")
diff --git a/tests/test_core.py b/tests/test_core.py
index 374c4e9..7056518 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 from pathlib import Path
 import configparser
 import pytest
+import textwrap
 import yaml
 
 import jinjaturtle.core as core
@@ -170,13 +171,13 @@ def test_parse_config_toml_missing_tomllib(monkeypatch):
 
 def test_parse_config_unsupported_format(tmp_path: Path):
     """
-    Hit the ValueError in parse_config when fmt is neither 'toml' nor 'ini'.
+    Hit the ValueError in parse_config when fmt is not a supported format.
     """
     cfg_path = tmp_path / "config.whatever"
     cfg_path.write_text("", encoding="utf-8")
 
     with pytest.raises(ValueError):
-        parse_config(cfg_path, fmt="yaml")
+        parse_config(cfg_path, fmt="bogus")
 
 
 def test_generate_template_type_and_format_errors():
@@ -184,7 +185,8 @@ def test_generate_template_type_and_format_errors():
     Exercise the error branches in generate_template:
       - toml with non-dict parsed
       - ini with non-ConfigParser parsed
-      - completely unsupported fmt
+      - yaml with wrong parsed type
+      - completely unsupported fmt (with and without original_text)
     """
     # wrong type for TOML
     with pytest.raises(TypeError):
@@ -194,14 +196,18 @@ def test_generate_template_type_and_format_errors():
     with pytest.raises(TypeError):
         generate_template("ini", parsed={"not": "a configparser"}, role_prefix="role")
 
-    # unsupported format
-    with pytest.raises(ValueError):
+    # wrong type for YAML
+    with pytest.raises(TypeError):
         generate_template("yaml", parsed=None, role_prefix="role")
 
-    # unsupported format even when original_text is provided
+    # unsupported format, no original_text
+    with pytest.raises(ValueError):
+        generate_template("bogusfmt", parsed=None, role_prefix="role")
+
+    # unsupported format, with original_text
     with pytest.raises(ValueError):
         generate_template(
-            "yaml",
+            "bogusfmt",
             parsed=None,
             role_prefix="role",
             original_text="foo=bar",
@@ -286,3 +292,84 @@ def test_generate_toml_template_from_text_edge_cases():
     # Ensure the lines without '=' / empty key were handled without exploding.
     assert "[table]" in tmpl
     assert "noequals" in tmpl
+
+
+def test_yaml_roundtrip_with_list_and_comment(tmp_path: Path):
+    yaml_text = """
+    # Top comment
+    foo: "bar"
+
+    blah:
+      - something
+      - else
+    """
+    cfg_path = tmp_path / "config.yaml"
+    cfg_path.write_text(textwrap.dedent(yaml_text), encoding="utf-8")
+
+    fmt, parsed = parse_config(cfg_path)
+    assert fmt == "yaml"
+
+    flat_items = flatten_config(fmt, parsed)
+    defaults_yaml = generate_defaults_yaml("foobar", flat_items)
+    defaults = yaml.safe_load(defaults_yaml)
+
+    # Defaults: keys are flattened with indices
+    assert defaults["foobar_foo"] == "bar"
+    assert defaults["foobar_blah_0"] == "something"
+    assert defaults["foobar_blah_1"] == "else"
+
+    # Template generation (preserving comments)
+    original_text = cfg_path.read_text(encoding="utf-8")
+    template = generate_template(fmt, parsed, "foobar", original_text=original_text)
+
+    # Comment preserved
+    assert "# Top comment" in template
+
+    # Scalar replacement
+    assert "foo:" in template
+    assert "foobar_foo" in template
+
+    # List items use indexed vars, not "item"
+    assert "foobar_blah_0" in template
+    assert "foobar_blah_1" in template
+    assert "{{ foobar_blah }}" not in template
+    assert "foobar_blah_item" not in template
+
+
+def test_json_roundtrip(tmp_path: Path):
+    json_text = """
+    {
+      "foo": "bar",
+      "nested": {
+        "a": 1,
+        "b": true
+      },
+      "list": [10, 20]
+    }
+    """
+    cfg_path = tmp_path / "config.json"
+    cfg_path.write_text(textwrap.dedent(json_text), encoding="utf-8")
+
+    fmt, parsed = parse_config(cfg_path)
+    assert fmt == "json"
+
+    flat_items = flatten_config(fmt, parsed)
+    defaults_yaml = generate_defaults_yaml("foobar", flat_items)
+    defaults = yaml.safe_load(defaults_yaml)
+
+    # Defaults: nested keys and list indices
+    assert defaults["foobar_foo"] == "bar"
+    assert defaults["foobar_nested_a"] == 1
+    # Bool normalized to string "true"
+    assert defaults["foobar_nested_b"] == "true"
+    assert defaults["foobar_list_0"] == 10
+    assert defaults["foobar_list_1"] == 20
+
+    # Template generation (JSON has no comments, so we just rebuild)
+    template = generate_template(fmt, parsed, "foobar")
+
+    assert '"foo": "{{ foobar_foo }}"' in template
+    assert "foobar_nested_a" in template
+    assert "foobar_nested_b" in template
+    assert "foobar_list_0" in template
+    assert "foobar_list_1" in template