Add support for YAML and JSON

2025-11-25 17:38:30 +11:00 · 2025-11-25 17:38:30 +11:00 · 559389a35c
commit 559389a35c
parent 4acc82e35b
3 changed files with 328 additions and 29 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "jinjaturtle"
-version = "0.1.1"
+version = "0.1.2"
 description = "Convert config files into Ansible defaults and Jinja2 templates."
 authors = ["Miguel Jacq <mig@mig5.net>"]
 license = "GPL-3.0-or-later"
--- a/src/jinjaturtle/core.py
+++ b/src/jinjaturtle/core.py
@ -1,11 +1,16 @@
 from __future__ import annotations
 import configparser
 import json
 from pathlib import Path
 from typing import Any, Iterable
 import yaml
 try:
    from ruamel.yaml import YAML as RuamelYAML  # for comment-preserving YAML
 except ImportError:  # pragma: no cover
    RuamelYAML = None
 try:
    import tomllib  # Python 3.11+
 except ModuleNotFoundError:  # pragma: no cover
@ -34,25 +39,9 @@ def _quoted_str_representer(dumper: yaml.SafeDumper, data: QuotedString):
 _TurtleDumper.add_representer(QuotedString, _quoted_str_representer)
 def _normalize_default_value(value: Any) -> Any:
    """
    Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans.
    - bool -> QuotedString("true"/"false")
    - "true"/"false" (any case) -> QuotedString(original_text)
    - everything else -> unchanged
    """
    if isinstance(value, bool):
        # YAML booleans are lower-case; we keep them as strings.
        return QuotedString("true" if value else "false")
    if isinstance(value, str) and value.lower() in {"true", "false"}:
        return QuotedString(value)
    return value
 def detect_format(path: Path, explicit: str | None = None) -> str:
    """
-    Determine config format (toml vs ini-ish) from argument or filename.
+    Determine config format (toml, yaml, ini-ish) from argument or filename.
    """
    if explicit:
        return explicit
@ -60,6 +49,10 @@ def detect_format(path: Path, explicit: str | None = None) -> str:
    name = path.name.lower()
    if suffix == ".toml":
        return "toml"
    if suffix in {".yaml", ".yml"}:
        return "yaml"
    if suffix == ".json":
        return "json"
    if suffix in {".ini", ".cfg", ".conf"} or name.endswith(".ini"):
        return "ini"
    # Fallback: treat as INI-ish
@ -84,6 +77,24 @@ def parse_config(path: Path, fmt: str | None = None) -> tuple[str, Any]:
            data = tomllib.load(f)
        return fmt, data
    if fmt == "yaml":
        text = path.read_text(encoding="utf-8")
        if RuamelYAML is not None:
            # ruamel.yaml preserves comments; we'll reuse them in template gen
            y = RuamelYAML()
            y.preserve_quotes = True
            data = y.load(text) or {}
        else:
            # Fallback: PyYAML (drops comments in parsed structure, but we still
            # have the original text for comment-preserving template generation).
            data = yaml.safe_load(text) or {}
        return fmt, data
    if fmt == "json":
        with path.open("r", encoding="utf-8") as f:
            data = json.load(f)
        return fmt, data
    if fmt == "ini":
        parser = configparser.ConfigParser()
        parser.optionxform = str  # preserve key case
@ -109,12 +120,17 @@ def flatten_config(fmt: str, parsed: Any) -> list[tuple[tuple[str, ...], Any]]:
    """
    items: list[tuple[tuple[str, ...], Any]] = []
-    if fmt == "toml":
+    if fmt in {"toml", "yaml", "json"}:
        def _walk(obj: Any, path: tuple[str, ...] = ()) -> None:
            if isinstance(obj, dict):
                for k, v in obj.items():
                    _walk(v, path + (str(k),))
            elif isinstance(obj, list) and fmt in {"yaml", "json"}:
                # for YAML/JSON, flatten lists so each element can be templated;
                # TOML still treats list as a single scalar (ports = [..]) which is fine.
                for i, v in enumerate(obj):
                    _walk(v, path + (str(i),))
            else:
                items.append((path, obj))
@ -184,6 +200,22 @@ def _split_inline_comment(text: str, comment_chars: set[str]) -> tuple[str, str]
    return text, ""
 def _normalize_default_value(value: Any) -> Any:
    """
    Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans.
    - bool -> QuotedString("true"/"false")
    - "true"/"false" (any case) -> QuotedString(original_text)
    - everything else -> unchanged
    """
    if isinstance(value, bool):
        # YAML booleans are lower-case; we keep them as strings.
        return QuotedString("true" if value else "false")
    if isinstance(value, str) and value.lower() in {"true", "false"}:
        return QuotedString(value)
    return value
 def generate_defaults_yaml(
    role_prefix: str,
    flat_items: list[tuple[tuple[str, ...], Any]],
@ -486,6 +518,171 @@ def _generate_toml_template_from_text(role_prefix: str, text: str) -> str:
    return "".join(out_lines)
 def _generate_yaml_template_from_text(
    role_prefix: str,
    text: str,
 ) -> str:
    """
    Generate a Jinja2 template for a YAML file, preserving comments and
    blank lines by patching scalar values in-place.
    This handles common "config-ish" YAML:
      - top-level and nested mappings
      - lists of scalars
      - lists of small mapping objects
    It does *not* aim to support all YAML edge cases (anchors, tags, etc.).
    """
    lines = text.splitlines(keepends=True)
    out_lines: list[str] = []
    # Simple indentation-based context stack: (indent, path, kind)
    # kind is "map" or "seq".
    stack: list[tuple[int, tuple[str, ...], str]] = []
    # Track index per parent path for sequences
    seq_counters: dict[tuple[str, ...], int] = {}
    def current_path() -> tuple[str, ...]:
        return stack[-1][1] if stack else ()
    for raw_line in lines:
        stripped = raw_line.lstrip()
        indent = len(raw_line) - len(stripped)
        # Blank or pure comment lines unchanged
        if not stripped or stripped.startswith("#"):
            out_lines.append(raw_line)
            continue
        # Adjust stack based on indent
        while stack and indent < stack[-1][0]:
            stack.pop()
        # --- Handle mapping key lines: "key:" or "key: value"
        if ":" in stripped and not stripped.lstrip().startswith("- "):
            # separate key and rest
            key_part, rest = stripped.split(":", 1)
            key = key_part.strip()
            if not key:
                out_lines.append(raw_line)
                continue
            # Is this just "key:" or "key: value"?
            rest_stripped = rest.lstrip(" \t")
            # Use the same inline-comment splitter to see if there's any real value
            value_candidate, _ = _split_inline_comment(rest_stripped, {"#"})
            has_value = bool(value_candidate.strip())
            # Update stack/context: current mapping at this indent
            # Replace any existing mapping at same indent
            if stack and stack[-1][0] == indent and stack[-1][2] == "map":
                stack.pop()
            path = current_path() + (key,)
            stack.append((indent, path, "map"))
            if not has_value:
                # Just "key:" -> collection or nested structure begins on following lines.
                out_lines.append(raw_line)
                continue
            # We have an inline scalar value on this same line.
            # Separate value from inline comment
            value_part, comment_part = _split_inline_comment(rest_stripped, {"#"})
            raw_value = value_part.strip()
            var_name = make_var_name(role_prefix, path)
            # Keep quote-style if original was quoted
            use_quotes = (
                len(raw_value) >= 2
                and raw_value[0] == raw_value[-1]
                and raw_value[0] in {'"', "'"}
            )
            if use_quotes:
                q = raw_value[0]
                replacement = f"{q}{{{{ {var_name} }}}}{q}"
            else:
                replacement = f"{{{{ {var_name} }}}}"
            leading = rest[: len(rest) - len(rest.lstrip(" \t"))]
            new_stripped = f"{key}: {leading}{replacement}{comment_part}"
            out_lines.append(
                " " * indent + new_stripped + ("\n" if raw_line.endswith("\n") else "")
            )
            continue
        # --- Handle list items: "- value" or "- key: value"
        if stripped.startswith("- "):
            # Determine parent path
            # If top of stack isn't sequence at this indent, push one using current path
            if not stack or stack[-1][0] != indent or stack[-1][2] != "seq":
                parent_path = current_path()
                stack.append((indent, parent_path, "seq"))
            parent_path = stack[-1][1]
            content = stripped[2:]  # after "- "
            parent_path = stack[-1][1]
            content = stripped[2:]  # after "- "
            # Determine index for this parent path
            index = seq_counters.get(parent_path, 0)
            seq_counters[parent_path] = index + 1
            path = parent_path + (str(index),)
            value_part, comment_part = _split_inline_comment(content, {"#"})
            raw_value = value_part.strip()
            var_name = make_var_name(role_prefix, path)
            # If it's of the form "key: value" inside the list, we could try to
            # support that, but a simple scalar is the common case:
            use_quotes = (
                len(raw_value) >= 2
                and raw_value[0] == raw_value[-1]
                and raw_value[0] in {'"', "'"}
            )
            if use_quotes:
                q = raw_value[0]
                replacement = f"{q}{{{{ {var_name} }}}}{q}"
            else:
                replacement = f"{{{{ {var_name} }}}}"
            new_stripped = f"- {replacement}{comment_part}"
            out_lines.append(
                " " * indent + new_stripped + ("\n" if raw_line.endswith("\n") else "")
            )
            continue
        # Anything else (multi-line scalars, weird YAML): leave untouched
        out_lines.append(raw_line)
    return "".join(out_lines)
 def _generate_json_template(role_prefix: str, data: Any) -> str:
    """
    Generate a JSON Jinja2 template from parsed JSON data.
    All scalar values are replaced with Jinja expressions whose names are
    derived from the path, similar to TOML/YAML.
    """
    def _walk(obj: Any, path: tuple[str, ...] = ()) -> Any:
        if isinstance(obj, dict):
            return {k: _walk(v, path + (str(k),)) for k, v in obj.items()}
        if isinstance(obj, list):
            return [_walk(v, path + (str(i),)) for i, v in enumerate(obj)]
        # scalar
        var_name = make_var_name(role_prefix, path)
        return f"{{{{ {var_name} }}}}"
    templated = _walk(data)
    return json.dumps(templated, indent=2, ensure_ascii=False) + "\n"
 def generate_template(
    fmt: str,
    parsed: Any,
@ -497,14 +694,19 @@ def generate_template(
    If original_text is provided, comments and blank lines are preserved by
    patching values in-place. Otherwise we fall back to reconstructing from
-    the parsed structure (no comments).
+    the parsed structure (no comments). JSON of course does not support
    comments.
    """
    if original_text is not None:
        if fmt == "toml":
            return _generate_toml_template_from_text(role_prefix, original_text)
        if fmt == "ini":
            return _generate_ini_template_from_text(role_prefix, original_text)
-        raise ValueError(f"Unsupported format: {fmt}")
+        if fmt == "yaml":
            return _generate_yaml_template_from_text(role_prefix, original_text)
        # For JSON we ignore original_text and reconstruct from parsed structure below
        if fmt != "json":
            raise ValueError(f"Unsupported format: {fmt}")
    # Fallback: previous behaviour (no comments preserved)
    if fmt == "toml":
@ -515,4 +717,14 @@ def generate_template(
        if not isinstance(parsed, configparser.ConfigParser):
            raise TypeError("INI parser result must be a ConfigParser")
        return _generate_ini_template(role_prefix, parsed)
    if fmt == "yaml":
        if not isinstance(parsed, (dict, list)):
            raise TypeError("YAML parser result must be a dict or list")
        return _generate_yaml_template_from_text(
            role_prefix, yaml.safe_dump(parsed, sort_keys=False)
        )
    if fmt == "json":
        if not isinstance(parsed, (dict, list)):
            raise TypeError("JSON parser result must be a dict or list")
        return _generate_json_template(role_prefix, parsed)
    raise ValueError(f"Unsupported format: {fmt}")
--- a/tests/test_core.py
+++ b/tests/test_core.py
@ -3,6 +3,7 @@ from __future__ import annotations
 from pathlib import Path
 import configparser
 import pytest
 import textwrap
 import yaml
 import jinjaturtle.core as core
@ -170,13 +171,13 @@ def test_parse_config_toml_missing_tomllib(monkeypatch):
 def test_parse_config_unsupported_format(tmp_path: Path):
    """
-    Hit the ValueError in parse_config when fmt is neither 'toml' nor 'ini'.
+    Hit the ValueError in parse_config when fmt is not a supported format.
    """
    cfg_path = tmp_path / "config.whatever"
    cfg_path.write_text("", encoding="utf-8")
    with pytest.raises(ValueError):
-        parse_config(cfg_path, fmt="yaml")
+        parse_config(cfg_path, fmt="bogus")
 def test_generate_template_type_and_format_errors():
@ -184,7 +185,8 @@ def test_generate_template_type_and_format_errors():
    Exercise the error branches in generate_template:
      - toml with non-dict parsed
      - ini with non-ConfigParser parsed
-      - completely unsupported fmt
+      - yaml with wrong parsed type
      - completely unsupported fmt (with and without original_text)
    """
    # wrong type for TOML
    with pytest.raises(TypeError):
@ -194,14 +196,18 @@ def test_generate_template_type_and_format_errors():
    with pytest.raises(TypeError):
        generate_template("ini", parsed={"not": "a configparser"}, role_prefix="role")
-    # unsupported format
+    # wrong type for YAML
-    with pytest.raises(ValueError):
+    with pytest.raises(TypeError):
        generate_template("yaml", parsed=None, role_prefix="role")
-    # unsupported format even when original_text is provided
+    # unsupported format, no original_text
    with pytest.raises(ValueError):
        generate_template("bogusfmt", parsed=None, role_prefix="role")
    # unsupported format, with original_text
    with pytest.raises(ValueError):
        generate_template(
-            "yaml",
+            "bogusfmt",
            parsed=None,
            role_prefix="role",
            original_text="foo=bar",
@ -286,3 +292,84 @@ def test_generate_toml_template_from_text_edge_cases():
    # Ensure the lines without '=' / empty key were handled without exploding.
    assert "[table]" in tmpl
    assert "noequals" in tmpl
 def test_yaml_roundtrip_with_list_and_comment(tmp_path: Path):
    yaml_text = """
    # Top comment
    foo: "bar"
    blah:
      - something
      - else
    """
    cfg_path = tmp_path / "config.yaml"
    cfg_path.write_text(textwrap.dedent(yaml_text), encoding="utf-8")
    fmt, parsed = parse_config(cfg_path)
    assert fmt == "yaml"
    flat_items = flatten_config(fmt, parsed)
    defaults_yaml = generate_defaults_yaml("foobar", flat_items)
    defaults = yaml.safe_load(defaults_yaml)
    # Defaults: keys are flattened with indices
    assert defaults["foobar_foo"] == "bar"
    assert defaults["foobar_blah_0"] == "something"
    assert defaults["foobar_blah_1"] == "else"
    # Template generation (preserving comments)
    original_text = cfg_path.read_text(encoding="utf-8")
    template = generate_template(fmt, parsed, "foobar", original_text=original_text)
    # Comment preserved
    assert "# Top comment" in template
    # Scalar replacement
    assert "foo:" in template
    assert "foobar_foo" in template
    # List items use indexed vars, not "item"
    assert "foobar_blah_0" in template
    assert "foobar_blah_1" in template
    assert "{{ foobar_blah }}" not in template
    assert "foobar_blah_item" not in template
 def test_json_roundtrip(tmp_path: Path):
    json_text = """
    {
      "foo": "bar",
      "nested": {
        "a": 1,
        "b": true
      },
      "list": [10, 20]
    }
    """
    cfg_path = tmp_path / "config.json"
    cfg_path.write_text(textwrap.dedent(json_text), encoding="utf-8")
    fmt, parsed = parse_config(cfg_path)
    assert fmt == "json"
    flat_items = flatten_config(fmt, parsed)
    defaults_yaml = generate_defaults_yaml("foobar", flat_items)
    defaults = yaml.safe_load(defaults_yaml)
    # Defaults: nested keys and list indices
    assert defaults["foobar_foo"] == "bar"
    assert defaults["foobar_nested_a"] == 1
    # Bool normalized to string "true"
    assert defaults["foobar_nested_b"] == "true"
    assert defaults["foobar_list_0"] == 10
    assert defaults["foobar_list_1"] == 20
    # Template generation (JSON has no comments, so we just rebuild)
    template = generate_template(fmt, parsed, "foobar")
    assert '"foo": "{{ foobar_foo }}"' in template
    assert "foobar_nested_a" in template
    assert "foobar_nested_b" in template
    assert "foobar_list_0" in template
    assert "foobar_list_1" in template