Add support for YAML and JSON

2025-11-25 17:38:30 +11:00 · 2025-11-25 17:38:30 +11:00 · 559389a35c
commit 559389a35c
parent 4acc82e35b
3 changed files with 328 additions and 29 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "jinjaturtle"
-version = "0.1.1"
+version = "0.1.2"
 description = "Convert config files into Ansible defaults and Jinja2 templates."
 authors = ["Miguel Jacq <mig@mig5.net>"]
 license = "GPL-3.0-or-later"
--- a/src/jinjaturtle/core.py
+++ b/src/jinjaturtle/core.py
@ -1,11 +1,16 @@
 from __future__ import annotations

 import configparser
+import json
 from pathlib import Path
 from typing import Any, Iterable
-
 import yaml

+try:
+    from ruamel.yaml import YAML as RuamelYAML  # for comment-preserving YAML
+except ImportError:  # pragma: no cover
+    RuamelYAML = None
+
 try:
    import tomllib  # Python 3.11+
 except ModuleNotFoundError:  # pragma: no cover
@ -34,25 +39,9 @@ def _quoted_str_representer(dumper: yaml.SafeDumper, data: QuotedString):
 _TurtleDumper.add_representer(QuotedString, _quoted_str_representer)


-def _normalize_default_value(value: Any) -> Any:
-    """
-    Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans.
-
-    - bool -> QuotedString("true"/"false")
-    - "true"/"false" (any case) -> QuotedString(original_text)
-    - everything else -> unchanged
-    """
-    if isinstance(value, bool):
-        # YAML booleans are lower-case; we keep them as strings.
-        return QuotedString("true" if value else "false")
-    if isinstance(value, str) and value.lower() in {"true", "false"}:
-        return QuotedString(value)
-    return value
-
-
 def detect_format(path: Path, explicit: str | None = None) -> str:
    """
-    Determine config format (toml vs ini-ish) from argument or filename.
+    Determine config format (toml, yaml, ini-ish) from argument or filename.
    """
    if explicit:
        return explicit
@ -60,6 +49,10 @@ def detect_format(path: Path, explicit: str | None = None) -> str:
    name = path.name.lower()
    if suffix == ".toml":
        return "toml"
+    if suffix in {".yaml", ".yml"}:
+        return "yaml"
+    if suffix == ".json":
+        return "json"
    if suffix in {".ini", ".cfg", ".conf"} or name.endswith(".ini"):
        return "ini"
    # Fallback: treat as INI-ish
@ -84,6 +77,24 @@ def parse_config(path: Path, fmt: str | None = None) -> tuple[str, Any]:
            data = tomllib.load(f)
        return fmt, data

+    if fmt == "yaml":
+        text = path.read_text(encoding="utf-8")
+        if RuamelYAML is not None:
+            # ruamel.yaml preserves comments; we'll reuse them in template gen
+            y = RuamelYAML()
+            y.preserve_quotes = True
+            data = y.load(text) or {}
+        else:
+            # Fallback: PyYAML (drops comments in parsed structure, but we still
+            # have the original text for comment-preserving template generation).
+            data = yaml.safe_load(text) or {}
+        return fmt, data
+
+    if fmt == "json":
+        with path.open("r", encoding="utf-8") as f:
+            data = json.load(f)
+        return fmt, data
+
    if fmt == "ini":
        parser = configparser.ConfigParser()
        parser.optionxform = str  # preserve key case
@ -109,12 +120,17 @@ def flatten_config(fmt: str, parsed: Any) -> list[tuple[tuple[str, ...], Any]]:
    """
    items: list[tuple[tuple[str, ...], Any]] = []

-    if fmt == "toml":
+    if fmt in {"toml", "yaml", "json"}:

        def _walk(obj: Any, path: tuple[str, ...] = ()) -> None:
            if isinstance(obj, dict):
                for k, v in obj.items():
                    _walk(v, path + (str(k),))
+            elif isinstance(obj, list) and fmt in {"yaml", "json"}:
+                # for YAML/JSON, flatten lists so each element can be templated;
+                # TOML still treats list as a single scalar (ports = [..]) which is fine.
+                for i, v in enumerate(obj):
+                    _walk(v, path + (str(i),))
            else:
                items.append((path, obj))

@ -184,6 +200,22 @@ def _split_inline_comment(text: str, comment_chars: set[str]) -> tuple[str, str]
    return text, ""


+def _normalize_default_value(value: Any) -> Any:
+    """
+    Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans.
+
+    - bool -> QuotedString("true"/"false")
+    - "true"/"false" (any case) -> QuotedString(original_text)
+    - everything else -> unchanged
+    """
+    if isinstance(value, bool):
+        # YAML booleans are lower-case; we keep them as strings.
+        return QuotedString("true" if value else "false")
+    if isinstance(value, str) and value.lower() in {"true", "false"}:
+        return QuotedString(value)
+    return value
+
+
 def generate_defaults_yaml(
    role_prefix: str,
    flat_items: list[tuple[tuple[str, ...], Any]],
@ -486,6 +518,171 @@ def _generate_toml_template_from_text(role_prefix: str, text: str) -> str:
    return "".join(out_lines)


+def _generate_yaml_template_from_text(
+    role_prefix: str,
+    text: str,
+) -> str:
+    """
+    Generate a Jinja2 template for a YAML file, preserving comments and
+    blank lines by patching scalar values in-place.
+
+    This handles common "config-ish" YAML:
+      - top-level and nested mappings
+      - lists of scalars
+      - lists of small mapping objects
+    It does *not* aim to support all YAML edge cases (anchors, tags, etc.).
+    """
+    lines = text.splitlines(keepends=True)
+    out_lines: list[str] = []
+
+    # Simple indentation-based context stack: (indent, path, kind)
+    # kind is "map" or "seq".
+    stack: list[tuple[int, tuple[str, ...], str]] = []
+
+    # Track index per parent path for sequences
+    seq_counters: dict[tuple[str, ...], int] = {}
+
+    def current_path() -> tuple[str, ...]:
+        return stack[-1][1] if stack else ()
+
+    for raw_line in lines:
+        stripped = raw_line.lstrip()
+        indent = len(raw_line) - len(stripped)
+
+        # Blank or pure comment lines unchanged
+        if not stripped or stripped.startswith("#"):
+            out_lines.append(raw_line)
+            continue
+
+        # Adjust stack based on indent
+        while stack and indent < stack[-1][0]:
+            stack.pop()
+
+        # --- Handle mapping key lines: "key:" or "key: value"
+        if ":" in stripped and not stripped.lstrip().startswith("- "):
+            # separate key and rest
+            key_part, rest = stripped.split(":", 1)
+            key = key_part.strip()
+            if not key:
+                out_lines.append(raw_line)
+                continue
+
+            # Is this just "key:" or "key: value"?
+            rest_stripped = rest.lstrip(" \t")
+
+            # Use the same inline-comment splitter to see if there's any real value
+            value_candidate, _ = _split_inline_comment(rest_stripped, {"#"})
+            has_value = bool(value_candidate.strip())
+
+            # Update stack/context: current mapping at this indent
+            # Replace any existing mapping at same indent
+            if stack and stack[-1][0] == indent and stack[-1][2] == "map":
+                stack.pop()
+            path = current_path() + (key,)
+            stack.append((indent, path, "map"))
+
+            if not has_value:
+                # Just "key:" -> collection or nested structure begins on following lines.
+                out_lines.append(raw_line)
+                continue
+
+            # We have an inline scalar value on this same line.
+
+            # Separate value from inline comment
+            value_part, comment_part = _split_inline_comment(rest_stripped, {"#"})
+            raw_value = value_part.strip()
+            var_name = make_var_name(role_prefix, path)
+
+            # Keep quote-style if original was quoted
+            use_quotes = (
+                len(raw_value) >= 2
+                and raw_value[0] == raw_value[-1]
+                and raw_value[0] in {'"', "'"}
+            )
+
+            if use_quotes:
+                q = raw_value[0]
+                replacement = f"{q}{{{{ {var_name} }}}}{q}"
+            else:
+                replacement = f"{{{{ {var_name} }}}}"
+
+            leading = rest[: len(rest) - len(rest.lstrip(" \t"))]
+            new_stripped = f"{key}: {leading}{replacement}{comment_part}"
+            out_lines.append(
+                " " * indent + new_stripped + ("\n" if raw_line.endswith("\n") else "")
+            )
+            continue
+
+        # --- Handle list items: "- value" or "- key: value"
+        if stripped.startswith("- "):
+            # Determine parent path
+            # If top of stack isn't sequence at this indent, push one using current path
+            if not stack or stack[-1][0] != indent or stack[-1][2] != "seq":
+                parent_path = current_path()
+                stack.append((indent, parent_path, "seq"))
+
+            parent_path = stack[-1][1]
+            content = stripped[2:]  # after "- "
+            parent_path = stack[-1][1]
+            content = stripped[2:]  # after "- "
+
+            # Determine index for this parent path
+            index = seq_counters.get(parent_path, 0)
+            seq_counters[parent_path] = index + 1
+
+            path = parent_path + (str(index),)
+
+            value_part, comment_part = _split_inline_comment(content, {"#"})
+            raw_value = value_part.strip()
+            var_name = make_var_name(role_prefix, path)
+
+            # If it's of the form "key: value" inside the list, we could try to
+            # support that, but a simple scalar is the common case:
+            use_quotes = (
+                len(raw_value) >= 2
+                and raw_value[0] == raw_value[-1]
+                and raw_value[0] in {'"', "'"}
+            )
+
+            if use_quotes:
+                q = raw_value[0]
+                replacement = f"{q}{{{{ {var_name} }}}}{q}"
+            else:
+                replacement = f"{{{{ {var_name} }}}}"
+
+            new_stripped = f"- {replacement}{comment_part}"
+            out_lines.append(
+                " " * indent + new_stripped + ("\n" if raw_line.endswith("\n") else "")
+            )
+            continue
+
+        # Anything else (multi-line scalars, weird YAML): leave untouched
+        out_lines.append(raw_line)
+
+    return "".join(out_lines)
+
+
+def _generate_json_template(role_prefix: str, data: Any) -> str:
+    """
+    Generate a JSON Jinja2 template from parsed JSON data.
+
+    All scalar values are replaced with Jinja expressions whose names are
+    derived from the path, similar to TOML/YAML.
+    """
+
+    def _walk(obj: Any, path: tuple[str, ...] = ()) -> Any:
+        if isinstance(obj, dict):
+            return {k: _walk(v, path + (str(k),)) for k, v in obj.items()}
+        if isinstance(obj, list):
+            return [_walk(v, path + (str(i),)) for i, v in enumerate(obj)]
+        # scalar
+        var_name = make_var_name(role_prefix, path)
+        return f"{{{{ {var_name} }}}}"
+
+    templated = _walk(data)
+    return json.dumps(templated, indent=2, ensure_ascii=False) + "\n"
+
+
 def generate_template(
    fmt: str,
    parsed: Any,
@ -497,13 +694,18 @@ def generate_template(

    If original_text is provided, comments and blank lines are preserved by
    patching values in-place. Otherwise we fall back to reconstructing from
-    the parsed structure (no comments).
+    the parsed structure (no comments). JSON of course does not support
+    comments.
    """
    if original_text is not None:
        if fmt == "toml":
            return _generate_toml_template_from_text(role_prefix, original_text)
        if fmt == "ini":
            return _generate_ini_template_from_text(role_prefix, original_text)
+        if fmt == "yaml":
+            return _generate_yaml_template_from_text(role_prefix, original_text)
+        # For JSON we ignore original_text and reconstruct from parsed structure below
+        if fmt != "json":
            raise ValueError(f"Unsupported format: {fmt}")

    # Fallback: previous behaviour (no comments preserved)
@ -515,4 +717,14 @@ def generate_template(
        if not isinstance(parsed, configparser.ConfigParser):
            raise TypeError("INI parser result must be a ConfigParser")
        return _generate_ini_template(role_prefix, parsed)
+    if fmt == "yaml":
+        if not isinstance(parsed, (dict, list)):
+            raise TypeError("YAML parser result must be a dict or list")
+        return _generate_yaml_template_from_text(
+            role_prefix, yaml.safe_dump(parsed, sort_keys=False)
+        )
+    if fmt == "json":
+        if not isinstance(parsed, (dict, list)):
+            raise TypeError("JSON parser result must be a dict or list")
+        return _generate_json_template(role_prefix, parsed)
    raise ValueError(f"Unsupported format: {fmt}")
--- a/tests/test_core.py
+++ b/tests/test_core.py
@ -3,6 +3,7 @@ from __future__ import annotations
 from pathlib import Path
 import configparser
 import pytest
+import textwrap
 import yaml

 import jinjaturtle.core as core
@ -170,13 +171,13 @@ def test_parse_config_toml_missing_tomllib(monkeypatch):

 def test_parse_config_unsupported_format(tmp_path: Path):
    """
-    Hit the ValueError in parse_config when fmt is neither 'toml' nor 'ini'.
+    Hit the ValueError in parse_config when fmt is not a supported format.
    """
    cfg_path = tmp_path / "config.whatever"
    cfg_path.write_text("", encoding="utf-8")

    with pytest.raises(ValueError):
-        parse_config(cfg_path, fmt="yaml")
+        parse_config(cfg_path, fmt="bogus")


 def test_generate_template_type_and_format_errors():
@ -184,7 +185,8 @@ def test_generate_template_type_and_format_errors():
    Exercise the error branches in generate_template:
      - toml with non-dict parsed
      - ini with non-ConfigParser parsed
-      - completely unsupported fmt
+      - yaml with wrong parsed type
+      - completely unsupported fmt (with and without original_text)
    """
    # wrong type for TOML
    with pytest.raises(TypeError):
@ -194,14 +196,18 @@ def test_generate_template_type_and_format_errors():
    with pytest.raises(TypeError):
        generate_template("ini", parsed={"not": "a configparser"}, role_prefix="role")

-    # unsupported format
-    with pytest.raises(ValueError):
+    # wrong type for YAML
+    with pytest.raises(TypeError):
        generate_template("yaml", parsed=None, role_prefix="role")

-    # unsupported format even when original_text is provided
+    # unsupported format, no original_text
+    with pytest.raises(ValueError):
+        generate_template("bogusfmt", parsed=None, role_prefix="role")
+
+    # unsupported format, with original_text
    with pytest.raises(ValueError):
        generate_template(
-            "yaml",
+            "bogusfmt",
            parsed=None,
            role_prefix="role",
            original_text="foo=bar",
@ -286,3 +292,84 @@ def test_generate_toml_template_from_text_edge_cases():
    # Ensure the lines without '=' / empty key were handled without exploding.
    assert "[table]" in tmpl
    assert "noequals" in tmpl
+
+
+def test_yaml_roundtrip_with_list_and_comment(tmp_path: Path):
+    yaml_text = """
+    # Top comment
+    foo: "bar"
+
+    blah:
+      - something
+      - else
+    """
+    cfg_path = tmp_path / "config.yaml"
+    cfg_path.write_text(textwrap.dedent(yaml_text), encoding="utf-8")
+
+    fmt, parsed = parse_config(cfg_path)
+    assert fmt == "yaml"
+
+    flat_items = flatten_config(fmt, parsed)
+    defaults_yaml = generate_defaults_yaml("foobar", flat_items)
+    defaults = yaml.safe_load(defaults_yaml)
+
+    # Defaults: keys are flattened with indices
+    assert defaults["foobar_foo"] == "bar"
+    assert defaults["foobar_blah_0"] == "something"
+    assert defaults["foobar_blah_1"] == "else"
+
+    # Template generation (preserving comments)
+    original_text = cfg_path.read_text(encoding="utf-8")
+    template = generate_template(fmt, parsed, "foobar", original_text=original_text)
+
+    # Comment preserved
+    assert "# Top comment" in template
+
+    # Scalar replacement
+    assert "foo:" in template
+    assert "foobar_foo" in template
+
+    # List items use indexed vars, not "item"
+    assert "foobar_blah_0" in template
+    assert "foobar_blah_1" in template
+    assert "{{ foobar_blah }}" not in template
+    assert "foobar_blah_item" not in template
+
+
+def test_json_roundtrip(tmp_path: Path):
+    json_text = """
+    {
+      "foo": "bar",
+      "nested": {
+        "a": 1,
+        "b": true
+      },
+      "list": [10, 20]
+    }
+    """
+    cfg_path = tmp_path / "config.json"
+    cfg_path.write_text(textwrap.dedent(json_text), encoding="utf-8")
+
+    fmt, parsed = parse_config(cfg_path)
+    assert fmt == "json"
+
+    flat_items = flatten_config(fmt, parsed)
+    defaults_yaml = generate_defaults_yaml("foobar", flat_items)
+    defaults = yaml.safe_load(defaults_yaml)
+
+    # Defaults: nested keys and list indices
+    assert defaults["foobar_foo"] == "bar"
+    assert defaults["foobar_nested_a"] == 1
+    # Bool normalized to string "true"
+    assert defaults["foobar_nested_b"] == "true"
+    assert defaults["foobar_list_0"] == 10
+    assert defaults["foobar_list_1"] == 20
+
+    # Template generation (JSON has no comments, so we just rebuild)
+    template = generate_template(fmt, parsed, "foobar")
+
+    assert '"foo": "{{ foobar_foo }}"' in template
+    assert "foobar_nested_a" in template
+    assert "foobar_nested_b" in template
+    assert "foobar_list_0" in template
+    assert "foobar_list_1" in template