Add support for YAML and JSON
All checks were successful
CI / test (push) Successful in 41s
Lint / test (push) Successful in 23s
Trivy / test (push) Successful in 21s

This commit is contained in:
Miguel Jacq 2025-11-25 17:38:30 +11:00
parent 4acc82e35b
commit 559389a35c
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
3 changed files with 328 additions and 29 deletions

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "jinjaturtle" name = "jinjaturtle"
version = "0.1.1" version = "0.1.2"
description = "Convert config files into Ansible defaults and Jinja2 templates." description = "Convert config files into Ansible defaults and Jinja2 templates."
authors = ["Miguel Jacq <mig@mig5.net>"] authors = ["Miguel Jacq <mig@mig5.net>"]
license = "GPL-3.0-or-later" license = "GPL-3.0-or-later"

View file

@ -1,11 +1,16 @@
from __future__ import annotations from __future__ import annotations
import configparser import configparser
import json
from pathlib import Path from pathlib import Path
from typing import Any, Iterable from typing import Any, Iterable
import yaml import yaml
try:
from ruamel.yaml import YAML as RuamelYAML # for comment-preserving YAML
except ImportError: # pragma: no cover
RuamelYAML = None
try: try:
import tomllib # Python 3.11+ import tomllib # Python 3.11+
except ModuleNotFoundError: # pragma: no cover except ModuleNotFoundError: # pragma: no cover
@ -34,25 +39,9 @@ def _quoted_str_representer(dumper: yaml.SafeDumper, data: QuotedString):
_TurtleDumper.add_representer(QuotedString, _quoted_str_representer) _TurtleDumper.add_representer(QuotedString, _quoted_str_representer)
def _normalize_default_value(value: Any) -> Any:
"""
Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans.
- bool -> QuotedString("true"/"false")
- "true"/"false" (any case) -> QuotedString(original_text)
- everything else -> unchanged
"""
if isinstance(value, bool):
# YAML booleans are lower-case; we keep them as strings.
return QuotedString("true" if value else "false")
if isinstance(value, str) and value.lower() in {"true", "false"}:
return QuotedString(value)
return value
def detect_format(path: Path, explicit: str | None = None) -> str: def detect_format(path: Path, explicit: str | None = None) -> str:
""" """
Determine config format (toml vs ini-ish) from argument or filename. Determine config format (toml, yaml, ini-ish) from argument or filename.
""" """
if explicit: if explicit:
return explicit return explicit
@ -60,6 +49,10 @@ def detect_format(path: Path, explicit: str | None = None) -> str:
name = path.name.lower() name = path.name.lower()
if suffix == ".toml": if suffix == ".toml":
return "toml" return "toml"
if suffix in {".yaml", ".yml"}:
return "yaml"
if suffix == ".json":
return "json"
if suffix in {".ini", ".cfg", ".conf"} or name.endswith(".ini"): if suffix in {".ini", ".cfg", ".conf"} or name.endswith(".ini"):
return "ini" return "ini"
# Fallback: treat as INI-ish # Fallback: treat as INI-ish
@ -84,6 +77,24 @@ def parse_config(path: Path, fmt: str | None = None) -> tuple[str, Any]:
data = tomllib.load(f) data = tomllib.load(f)
return fmt, data return fmt, data
if fmt == "yaml":
text = path.read_text(encoding="utf-8")
if RuamelYAML is not None:
# ruamel.yaml preserves comments; we'll reuse them in template gen
y = RuamelYAML()
y.preserve_quotes = True
data = y.load(text) or {}
else:
# Fallback: PyYAML (drops comments in parsed structure, but we still
# have the original text for comment-preserving template generation).
data = yaml.safe_load(text) or {}
return fmt, data
if fmt == "json":
with path.open("r", encoding="utf-8") as f:
data = json.load(f)
return fmt, data
if fmt == "ini": if fmt == "ini":
parser = configparser.ConfigParser() parser = configparser.ConfigParser()
parser.optionxform = str # preserve key case parser.optionxform = str # preserve key case
@ -109,12 +120,17 @@ def flatten_config(fmt: str, parsed: Any) -> list[tuple[tuple[str, ...], Any]]:
""" """
items: list[tuple[tuple[str, ...], Any]] = [] items: list[tuple[tuple[str, ...], Any]] = []
if fmt == "toml": if fmt in {"toml", "yaml", "json"}:
def _walk(obj: Any, path: tuple[str, ...] = ()) -> None: def _walk(obj: Any, path: tuple[str, ...] = ()) -> None:
if isinstance(obj, dict): if isinstance(obj, dict):
for k, v in obj.items(): for k, v in obj.items():
_walk(v, path + (str(k),)) _walk(v, path + (str(k),))
elif isinstance(obj, list) and fmt in {"yaml", "json"}:
# for YAML/JSON, flatten lists so each element can be templated;
# TOML still treats list as a single scalar (ports = [..]) which is fine.
for i, v in enumerate(obj):
_walk(v, path + (str(i),))
else: else:
items.append((path, obj)) items.append((path, obj))
@ -184,6 +200,22 @@ def _split_inline_comment(text: str, comment_chars: set[str]) -> tuple[str, str]
return text, "" return text, ""
def _normalize_default_value(value: Any) -> Any:
"""
Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans.
- bool -> QuotedString("true"/"false")
- "true"/"false" (any case) -> QuotedString(original_text)
- everything else -> unchanged
"""
if isinstance(value, bool):
# YAML booleans are lower-case; we keep them as strings.
return QuotedString("true" if value else "false")
if isinstance(value, str) and value.lower() in {"true", "false"}:
return QuotedString(value)
return value
def generate_defaults_yaml( def generate_defaults_yaml(
role_prefix: str, role_prefix: str,
flat_items: list[tuple[tuple[str, ...], Any]], flat_items: list[tuple[tuple[str, ...], Any]],
@ -486,6 +518,171 @@ def _generate_toml_template_from_text(role_prefix: str, text: str) -> str:
return "".join(out_lines) return "".join(out_lines)
def _generate_yaml_template_from_text(
role_prefix: str,
text: str,
) -> str:
"""
Generate a Jinja2 template for a YAML file, preserving comments and
blank lines by patching scalar values in-place.
This handles common "config-ish" YAML:
- top-level and nested mappings
- lists of scalars
- lists of small mapping objects
It does *not* aim to support all YAML edge cases (anchors, tags, etc.).
"""
lines = text.splitlines(keepends=True)
out_lines: list[str] = []
# Simple indentation-based context stack: (indent, path, kind)
# kind is "map" or "seq".
stack: list[tuple[int, tuple[str, ...], str]] = []
# Track index per parent path for sequences
seq_counters: dict[tuple[str, ...], int] = {}
def current_path() -> tuple[str, ...]:
return stack[-1][1] if stack else ()
for raw_line in lines:
stripped = raw_line.lstrip()
indent = len(raw_line) - len(stripped)
# Blank or pure comment lines unchanged
if not stripped or stripped.startswith("#"):
out_lines.append(raw_line)
continue
# Adjust stack based on indent
while stack and indent < stack[-1][0]:
stack.pop()
# --- Handle mapping key lines: "key:" or "key: value"
if ":" in stripped and not stripped.lstrip().startswith("- "):
# separate key and rest
key_part, rest = stripped.split(":", 1)
key = key_part.strip()
if not key:
out_lines.append(raw_line)
continue
# Is this just "key:" or "key: value"?
rest_stripped = rest.lstrip(" \t")
# Use the same inline-comment splitter to see if there's any real value
value_candidate, _ = _split_inline_comment(rest_stripped, {"#"})
has_value = bool(value_candidate.strip())
# Update stack/context: current mapping at this indent
# Replace any existing mapping at same indent
if stack and stack[-1][0] == indent and stack[-1][2] == "map":
stack.pop()
path = current_path() + (key,)
stack.append((indent, path, "map"))
if not has_value:
# Just "key:" -> collection or nested structure begins on following lines.
out_lines.append(raw_line)
continue
# We have an inline scalar value on this same line.
# Separate value from inline comment
value_part, comment_part = _split_inline_comment(rest_stripped, {"#"})
raw_value = value_part.strip()
var_name = make_var_name(role_prefix, path)
# Keep quote-style if original was quoted
use_quotes = (
len(raw_value) >= 2
and raw_value[0] == raw_value[-1]
and raw_value[0] in {'"', "'"}
)
if use_quotes:
q = raw_value[0]
replacement = f"{q}{{{{ {var_name} }}}}{q}"
else:
replacement = f"{{{{ {var_name} }}}}"
leading = rest[: len(rest) - len(rest.lstrip(" \t"))]
new_stripped = f"{key}: {leading}{replacement}{comment_part}"
out_lines.append(
" " * indent + new_stripped + ("\n" if raw_line.endswith("\n") else "")
)
continue
# --- Handle list items: "- value" or "- key: value"
if stripped.startswith("- "):
# Determine parent path
# If top of stack isn't sequence at this indent, push one using current path
if not stack or stack[-1][0] != indent or stack[-1][2] != "seq":
parent_path = current_path()
stack.append((indent, parent_path, "seq"))
parent_path = stack[-1][1]
content = stripped[2:] # after "- "
parent_path = stack[-1][1]
content = stripped[2:] # after "- "
# Determine index for this parent path
index = seq_counters.get(parent_path, 0)
seq_counters[parent_path] = index + 1
path = parent_path + (str(index),)
value_part, comment_part = _split_inline_comment(content, {"#"})
raw_value = value_part.strip()
var_name = make_var_name(role_prefix, path)
# If it's of the form "key: value" inside the list, we could try to
# support that, but a simple scalar is the common case:
use_quotes = (
len(raw_value) >= 2
and raw_value[0] == raw_value[-1]
and raw_value[0] in {'"', "'"}
)
if use_quotes:
q = raw_value[0]
replacement = f"{q}{{{{ {var_name} }}}}{q}"
else:
replacement = f"{{{{ {var_name} }}}}"
new_stripped = f"- {replacement}{comment_part}"
out_lines.append(
" " * indent + new_stripped + ("\n" if raw_line.endswith("\n") else "")
)
continue
# Anything else (multi-line scalars, weird YAML): leave untouched
out_lines.append(raw_line)
return "".join(out_lines)
def _generate_json_template(role_prefix: str, data: Any) -> str:
"""
Generate a JSON Jinja2 template from parsed JSON data.
All scalar values are replaced with Jinja expressions whose names are
derived from the path, similar to TOML/YAML.
"""
def _walk(obj: Any, path: tuple[str, ...] = ()) -> Any:
if isinstance(obj, dict):
return {k: _walk(v, path + (str(k),)) for k, v in obj.items()}
if isinstance(obj, list):
return [_walk(v, path + (str(i),)) for i, v in enumerate(obj)]
# scalar
var_name = make_var_name(role_prefix, path)
return f"{{{{ {var_name} }}}}"
templated = _walk(data)
return json.dumps(templated, indent=2, ensure_ascii=False) + "\n"
def generate_template( def generate_template(
fmt: str, fmt: str,
parsed: Any, parsed: Any,
@ -497,14 +694,19 @@ def generate_template(
If original_text is provided, comments and blank lines are preserved by If original_text is provided, comments and blank lines are preserved by
patching values in-place. Otherwise we fall back to reconstructing from patching values in-place. Otherwise we fall back to reconstructing from
the parsed structure (no comments). the parsed structure (no comments). JSON of course does not support
comments.
""" """
if original_text is not None: if original_text is not None:
if fmt == "toml": if fmt == "toml":
return _generate_toml_template_from_text(role_prefix, original_text) return _generate_toml_template_from_text(role_prefix, original_text)
if fmt == "ini": if fmt == "ini":
return _generate_ini_template_from_text(role_prefix, original_text) return _generate_ini_template_from_text(role_prefix, original_text)
raise ValueError(f"Unsupported format: {fmt}") if fmt == "yaml":
return _generate_yaml_template_from_text(role_prefix, original_text)
# For JSON we ignore original_text and reconstruct from parsed structure below
if fmt != "json":
raise ValueError(f"Unsupported format: {fmt}")
# Fallback: previous behaviour (no comments preserved) # Fallback: previous behaviour (no comments preserved)
if fmt == "toml": if fmt == "toml":
@ -515,4 +717,14 @@ def generate_template(
if not isinstance(parsed, configparser.ConfigParser): if not isinstance(parsed, configparser.ConfigParser):
raise TypeError("INI parser result must be a ConfigParser") raise TypeError("INI parser result must be a ConfigParser")
return _generate_ini_template(role_prefix, parsed) return _generate_ini_template(role_prefix, parsed)
if fmt == "yaml":
if not isinstance(parsed, (dict, list)):
raise TypeError("YAML parser result must be a dict or list")
return _generate_yaml_template_from_text(
role_prefix, yaml.safe_dump(parsed, sort_keys=False)
)
if fmt == "json":
if not isinstance(parsed, (dict, list)):
raise TypeError("JSON parser result must be a dict or list")
return _generate_json_template(role_prefix, parsed)
raise ValueError(f"Unsupported format: {fmt}") raise ValueError(f"Unsupported format: {fmt}")

View file

@ -3,6 +3,7 @@ from __future__ import annotations
from pathlib import Path from pathlib import Path
import configparser import configparser
import pytest import pytest
import textwrap
import yaml import yaml
import jinjaturtle.core as core import jinjaturtle.core as core
@ -170,13 +171,13 @@ def test_parse_config_toml_missing_tomllib(monkeypatch):
def test_parse_config_unsupported_format(tmp_path: Path): def test_parse_config_unsupported_format(tmp_path: Path):
""" """
Hit the ValueError in parse_config when fmt is neither 'toml' nor 'ini'. Hit the ValueError in parse_config when fmt is not a supported format.
""" """
cfg_path = tmp_path / "config.whatever" cfg_path = tmp_path / "config.whatever"
cfg_path.write_text("", encoding="utf-8") cfg_path.write_text("", encoding="utf-8")
with pytest.raises(ValueError): with pytest.raises(ValueError):
parse_config(cfg_path, fmt="yaml") parse_config(cfg_path, fmt="bogus")
def test_generate_template_type_and_format_errors(): def test_generate_template_type_and_format_errors():
@ -184,7 +185,8 @@ def test_generate_template_type_and_format_errors():
Exercise the error branches in generate_template: Exercise the error branches in generate_template:
- toml with non-dict parsed - toml with non-dict parsed
- ini with non-ConfigParser parsed - ini with non-ConfigParser parsed
- completely unsupported fmt - yaml with wrong parsed type
- completely unsupported fmt (with and without original_text)
""" """
# wrong type for TOML # wrong type for TOML
with pytest.raises(TypeError): with pytest.raises(TypeError):
@ -194,14 +196,18 @@ def test_generate_template_type_and_format_errors():
with pytest.raises(TypeError): with pytest.raises(TypeError):
generate_template("ini", parsed={"not": "a configparser"}, role_prefix="role") generate_template("ini", parsed={"not": "a configparser"}, role_prefix="role")
# unsupported format # wrong type for YAML
with pytest.raises(ValueError): with pytest.raises(TypeError):
generate_template("yaml", parsed=None, role_prefix="role") generate_template("yaml", parsed=None, role_prefix="role")
# unsupported format even when original_text is provided # unsupported format, no original_text
with pytest.raises(ValueError):
generate_template("bogusfmt", parsed=None, role_prefix="role")
# unsupported format, with original_text
with pytest.raises(ValueError): with pytest.raises(ValueError):
generate_template( generate_template(
"yaml", "bogusfmt",
parsed=None, parsed=None,
role_prefix="role", role_prefix="role",
original_text="foo=bar", original_text="foo=bar",
@ -286,3 +292,84 @@ def test_generate_toml_template_from_text_edge_cases():
# Ensure the lines without '=' / empty key were handled without exploding. # Ensure the lines without '=' / empty key were handled without exploding.
assert "[table]" in tmpl assert "[table]" in tmpl
assert "noequals" in tmpl assert "noequals" in tmpl
def test_yaml_roundtrip_with_list_and_comment(tmp_path: Path):
yaml_text = """
# Top comment
foo: "bar"
blah:
- something
- else
"""
cfg_path = tmp_path / "config.yaml"
cfg_path.write_text(textwrap.dedent(yaml_text), encoding="utf-8")
fmt, parsed = parse_config(cfg_path)
assert fmt == "yaml"
flat_items = flatten_config(fmt, parsed)
defaults_yaml = generate_defaults_yaml("foobar", flat_items)
defaults = yaml.safe_load(defaults_yaml)
# Defaults: keys are flattened with indices
assert defaults["foobar_foo"] == "bar"
assert defaults["foobar_blah_0"] == "something"
assert defaults["foobar_blah_1"] == "else"
# Template generation (preserving comments)
original_text = cfg_path.read_text(encoding="utf-8")
template = generate_template(fmt, parsed, "foobar", original_text=original_text)
# Comment preserved
assert "# Top comment" in template
# Scalar replacement
assert "foo:" in template
assert "foobar_foo" in template
# List items use indexed vars, not "item"
assert "foobar_blah_0" in template
assert "foobar_blah_1" in template
assert "{{ foobar_blah }}" not in template
assert "foobar_blah_item" not in template
def test_json_roundtrip(tmp_path: Path):
json_text = """
{
"foo": "bar",
"nested": {
"a": 1,
"b": true
},
"list": [10, 20]
}
"""
cfg_path = tmp_path / "config.json"
cfg_path.write_text(textwrap.dedent(json_text), encoding="utf-8")
fmt, parsed = parse_config(cfg_path)
assert fmt == "json"
flat_items = flatten_config(fmt, parsed)
defaults_yaml = generate_defaults_yaml("foobar", flat_items)
defaults = yaml.safe_load(defaults_yaml)
# Defaults: nested keys and list indices
assert defaults["foobar_foo"] == "bar"
assert defaults["foobar_nested_a"] == 1
# Bool normalized to string "true"
assert defaults["foobar_nested_b"] == "true"
assert defaults["foobar_list_0"] == 10
assert defaults["foobar_list_1"] == 20
# Template generation (JSON has no comments, so we just rebuild)
template = generate_template(fmt, parsed, "foobar")
assert '"foo": "{{ foobar_foo }}"' in template
assert "foobar_nested_a" in template
assert "foobar_nested_b" in template
assert "foobar_list_0" in template
assert "foobar_list_1" in template