Add support for YAML and JSON
All checks were successful
CI / test (push) Successful in 41s
Lint / test (push) Successful in 23s
Trivy / test (push) Successful in 21s

This commit is contained in:
Miguel Jacq 2025-11-25 17:38:30 +11:00
parent 4acc82e35b
commit 559389a35c
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
3 changed files with 328 additions and 29 deletions

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "jinjaturtle"
version = "0.1.1"
version = "0.1.2"
description = "Convert config files into Ansible defaults and Jinja2 templates."
authors = ["Miguel Jacq <mig@mig5.net>"]
license = "GPL-3.0-or-later"

View file

@ -1,11 +1,16 @@
from __future__ import annotations
import configparser
import json
from pathlib import Path
from typing import Any, Iterable
import yaml
try:
from ruamel.yaml import YAML as RuamelYAML # for comment-preserving YAML
except ImportError: # pragma: no cover
RuamelYAML = None
try:
import tomllib # Python 3.11+
except ModuleNotFoundError: # pragma: no cover
@ -34,25 +39,9 @@ def _quoted_str_representer(dumper: yaml.SafeDumper, data: QuotedString):
_TurtleDumper.add_representer(QuotedString, _quoted_str_representer)
def _normalize_default_value(value: Any) -> Any:
"""
Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans.
- bool -> QuotedString("true"/"false")
- "true"/"false" (any case) -> QuotedString(original_text)
- everything else -> unchanged
"""
if isinstance(value, bool):
# YAML booleans are lower-case; we keep them as strings.
return QuotedString("true" if value else "false")
if isinstance(value, str) and value.lower() in {"true", "false"}:
return QuotedString(value)
return value
def detect_format(path: Path, explicit: str | None = None) -> str:
"""
Determine config format (toml vs ini-ish) from argument or filename.
Determine config format (toml, yaml, ini-ish) from argument or filename.
"""
if explicit:
return explicit
@ -60,6 +49,10 @@ def detect_format(path: Path, explicit: str | None = None) -> str:
name = path.name.lower()
if suffix == ".toml":
return "toml"
if suffix in {".yaml", ".yml"}:
return "yaml"
if suffix == ".json":
return "json"
if suffix in {".ini", ".cfg", ".conf"} or name.endswith(".ini"):
return "ini"
# Fallback: treat as INI-ish
@ -84,6 +77,24 @@ def parse_config(path: Path, fmt: str | None = None) -> tuple[str, Any]:
data = tomllib.load(f)
return fmt, data
if fmt == "yaml":
text = path.read_text(encoding="utf-8")
if RuamelYAML is not None:
# ruamel.yaml preserves comments; we'll reuse them in template gen
y = RuamelYAML()
y.preserve_quotes = True
data = y.load(text) or {}
else:
# Fallback: PyYAML (drops comments in parsed structure, but we still
# have the original text for comment-preserving template generation).
data = yaml.safe_load(text) or {}
return fmt, data
if fmt == "json":
with path.open("r", encoding="utf-8") as f:
data = json.load(f)
return fmt, data
if fmt == "ini":
parser = configparser.ConfigParser()
parser.optionxform = str # preserve key case
@ -109,12 +120,17 @@ def flatten_config(fmt: str, parsed: Any) -> list[tuple[tuple[str, ...], Any]]:
"""
items: list[tuple[tuple[str, ...], Any]] = []
if fmt == "toml":
if fmt in {"toml", "yaml", "json"}:
def _walk(obj: Any, path: tuple[str, ...] = ()) -> None:
if isinstance(obj, dict):
for k, v in obj.items():
_walk(v, path + (str(k),))
elif isinstance(obj, list) and fmt in {"yaml", "json"}:
# for YAML/JSON, flatten lists so each element can be templated;
# TOML still treats list as a single scalar (ports = [..]) which is fine.
for i, v in enumerate(obj):
_walk(v, path + (str(i),))
else:
items.append((path, obj))
@ -184,6 +200,22 @@ def _split_inline_comment(text: str, comment_chars: set[str]) -> tuple[str, str]
return text, ""
def _normalize_default_value(value: Any) -> Any:
"""
Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans.
- bool -> QuotedString("true"/"false")
- "true"/"false" (any case) -> QuotedString(original_text)
- everything else -> unchanged
"""
if isinstance(value, bool):
# YAML booleans are lower-case; we keep them as strings.
return QuotedString("true" if value else "false")
if isinstance(value, str) and value.lower() in {"true", "false"}:
return QuotedString(value)
return value
def generate_defaults_yaml(
role_prefix: str,
flat_items: list[tuple[tuple[str, ...], Any]],
@ -486,6 +518,171 @@ def _generate_toml_template_from_text(role_prefix: str, text: str) -> str:
return "".join(out_lines)
def _generate_yaml_template_from_text(
role_prefix: str,
text: str,
) -> str:
"""
Generate a Jinja2 template for a YAML file, preserving comments and
blank lines by patching scalar values in-place.
This handles common "config-ish" YAML:
- top-level and nested mappings
- lists of scalars
- lists of small mapping objects
It does *not* aim to support all YAML edge cases (anchors, tags, etc.).
"""
lines = text.splitlines(keepends=True)
out_lines: list[str] = []
# Simple indentation-based context stack: (indent, path, kind)
# kind is "map" or "seq".
stack: list[tuple[int, tuple[str, ...], str]] = []
# Track index per parent path for sequences
seq_counters: dict[tuple[str, ...], int] = {}
def current_path() -> tuple[str, ...]:
return stack[-1][1] if stack else ()
for raw_line in lines:
stripped = raw_line.lstrip()
indent = len(raw_line) - len(stripped)
# Blank or pure comment lines unchanged
if not stripped or stripped.startswith("#"):
out_lines.append(raw_line)
continue
# Adjust stack based on indent
while stack and indent < stack[-1][0]:
stack.pop()
# --- Handle mapping key lines: "key:" or "key: value"
if ":" in stripped and not stripped.lstrip().startswith("- "):
# separate key and rest
key_part, rest = stripped.split(":", 1)
key = key_part.strip()
if not key:
out_lines.append(raw_line)
continue
# Is this just "key:" or "key: value"?
rest_stripped = rest.lstrip(" \t")
# Use the same inline-comment splitter to see if there's any real value
value_candidate, _ = _split_inline_comment(rest_stripped, {"#"})
has_value = bool(value_candidate.strip())
# Update stack/context: current mapping at this indent
# Replace any existing mapping at same indent
if stack and stack[-1][0] == indent and stack[-1][2] == "map":
stack.pop()
path = current_path() + (key,)
stack.append((indent, path, "map"))
if not has_value:
# Just "key:" -> collection or nested structure begins on following lines.
out_lines.append(raw_line)
continue
# We have an inline scalar value on this same line.
# Separate value from inline comment
value_part, comment_part = _split_inline_comment(rest_stripped, {"#"})
raw_value = value_part.strip()
var_name = make_var_name(role_prefix, path)
# Keep quote-style if original was quoted
use_quotes = (
len(raw_value) >= 2
and raw_value[0] == raw_value[-1]
and raw_value[0] in {'"', "'"}
)
if use_quotes:
q = raw_value[0]
replacement = f"{q}{{{{ {var_name} }}}}{q}"
else:
replacement = f"{{{{ {var_name} }}}}"
leading = rest[: len(rest) - len(rest.lstrip(" \t"))]
new_stripped = f"{key}: {leading}{replacement}{comment_part}"
out_lines.append(
" " * indent + new_stripped + ("\n" if raw_line.endswith("\n") else "")
)
continue
# --- Handle list items: "- value" or "- key: value"
if stripped.startswith("- "):
# Determine parent path
# If top of stack isn't sequence at this indent, push one using current path
if not stack or stack[-1][0] != indent or stack[-1][2] != "seq":
parent_path = current_path()
stack.append((indent, parent_path, "seq"))
parent_path = stack[-1][1]
content = stripped[2:] # after "- "
parent_path = stack[-1][1]
content = stripped[2:] # after "- "
# Determine index for this parent path
index = seq_counters.get(parent_path, 0)
seq_counters[parent_path] = index + 1
path = parent_path + (str(index),)
value_part, comment_part = _split_inline_comment(content, {"#"})
raw_value = value_part.strip()
var_name = make_var_name(role_prefix, path)
# If it's of the form "key: value" inside the list, we could try to
# support that, but a simple scalar is the common case:
use_quotes = (
len(raw_value) >= 2
and raw_value[0] == raw_value[-1]
and raw_value[0] in {'"', "'"}
)
if use_quotes:
q = raw_value[0]
replacement = f"{q}{{{{ {var_name} }}}}{q}"
else:
replacement = f"{{{{ {var_name} }}}}"
new_stripped = f"- {replacement}{comment_part}"
out_lines.append(
" " * indent + new_stripped + ("\n" if raw_line.endswith("\n") else "")
)
continue
# Anything else (multi-line scalars, weird YAML): leave untouched
out_lines.append(raw_line)
return "".join(out_lines)
def _generate_json_template(role_prefix: str, data: Any) -> str:
"""
Generate a JSON Jinja2 template from parsed JSON data.
All scalar values are replaced with Jinja expressions whose names are
derived from the path, similar to TOML/YAML.
"""
def _walk(obj: Any, path: tuple[str, ...] = ()) -> Any:
if isinstance(obj, dict):
return {k: _walk(v, path + (str(k),)) for k, v in obj.items()}
if isinstance(obj, list):
return [_walk(v, path + (str(i),)) for i, v in enumerate(obj)]
# scalar
var_name = make_var_name(role_prefix, path)
return f"{{{{ {var_name} }}}}"
templated = _walk(data)
return json.dumps(templated, indent=2, ensure_ascii=False) + "\n"
def generate_template(
fmt: str,
parsed: Any,
@ -497,13 +694,18 @@ def generate_template(
If original_text is provided, comments and blank lines are preserved by
patching values in-place. Otherwise we fall back to reconstructing from
the parsed structure (no comments).
the parsed structure (no comments). JSON of course does not support
comments.
"""
if original_text is not None:
if fmt == "toml":
return _generate_toml_template_from_text(role_prefix, original_text)
if fmt == "ini":
return _generate_ini_template_from_text(role_prefix, original_text)
if fmt == "yaml":
return _generate_yaml_template_from_text(role_prefix, original_text)
# For JSON we ignore original_text and reconstruct from parsed structure below
if fmt != "json":
raise ValueError(f"Unsupported format: {fmt}")
# Fallback: previous behaviour (no comments preserved)
@ -515,4 +717,14 @@ def generate_template(
if not isinstance(parsed, configparser.ConfigParser):
raise TypeError("INI parser result must be a ConfigParser")
return _generate_ini_template(role_prefix, parsed)
if fmt == "yaml":
if not isinstance(parsed, (dict, list)):
raise TypeError("YAML parser result must be a dict or list")
return _generate_yaml_template_from_text(
role_prefix, yaml.safe_dump(parsed, sort_keys=False)
)
if fmt == "json":
if not isinstance(parsed, (dict, list)):
raise TypeError("JSON parser result must be a dict or list")
return _generate_json_template(role_prefix, parsed)
raise ValueError(f"Unsupported format: {fmt}")

View file

@ -3,6 +3,7 @@ from __future__ import annotations
from pathlib import Path
import configparser
import pytest
import textwrap
import yaml
import jinjaturtle.core as core
@ -170,13 +171,13 @@ def test_parse_config_toml_missing_tomllib(monkeypatch):
def test_parse_config_unsupported_format(tmp_path: Path):
"""
Hit the ValueError in parse_config when fmt is neither 'toml' nor 'ini'.
Hit the ValueError in parse_config when fmt is not a supported format.
"""
cfg_path = tmp_path / "config.whatever"
cfg_path.write_text("", encoding="utf-8")
with pytest.raises(ValueError):
parse_config(cfg_path, fmt="yaml")
parse_config(cfg_path, fmt="bogus")
def test_generate_template_type_and_format_errors():
@ -184,7 +185,8 @@ def test_generate_template_type_and_format_errors():
Exercise the error branches in generate_template:
- toml with non-dict parsed
- ini with non-ConfigParser parsed
- completely unsupported fmt
- yaml with wrong parsed type
- completely unsupported fmt (with and without original_text)
"""
# wrong type for TOML
with pytest.raises(TypeError):
@ -194,14 +196,18 @@ def test_generate_template_type_and_format_errors():
with pytest.raises(TypeError):
generate_template("ini", parsed={"not": "a configparser"}, role_prefix="role")
# unsupported format
with pytest.raises(ValueError):
# wrong type for YAML
with pytest.raises(TypeError):
generate_template("yaml", parsed=None, role_prefix="role")
# unsupported format even when original_text is provided
# unsupported format, no original_text
with pytest.raises(ValueError):
generate_template("bogusfmt", parsed=None, role_prefix="role")
# unsupported format, with original_text
with pytest.raises(ValueError):
generate_template(
"yaml",
"bogusfmt",
parsed=None,
role_prefix="role",
original_text="foo=bar",
@ -286,3 +292,84 @@ def test_generate_toml_template_from_text_edge_cases():
# Ensure the lines without '=' / empty key were handled without exploding.
assert "[table]" in tmpl
assert "noequals" in tmpl
def test_yaml_roundtrip_with_list_and_comment(tmp_path: Path):
yaml_text = """
# Top comment
foo: "bar"
blah:
- something
- else
"""
cfg_path = tmp_path / "config.yaml"
cfg_path.write_text(textwrap.dedent(yaml_text), encoding="utf-8")
fmt, parsed = parse_config(cfg_path)
assert fmt == "yaml"
flat_items = flatten_config(fmt, parsed)
defaults_yaml = generate_defaults_yaml("foobar", flat_items)
defaults = yaml.safe_load(defaults_yaml)
# Defaults: keys are flattened with indices
assert defaults["foobar_foo"] == "bar"
assert defaults["foobar_blah_0"] == "something"
assert defaults["foobar_blah_1"] == "else"
# Template generation (preserving comments)
original_text = cfg_path.read_text(encoding="utf-8")
template = generate_template(fmt, parsed, "foobar", original_text=original_text)
# Comment preserved
assert "# Top comment" in template
# Scalar replacement
assert "foo:" in template
assert "foobar_foo" in template
# List items use indexed vars, not "item"
assert "foobar_blah_0" in template
assert "foobar_blah_1" in template
assert "{{ foobar_blah }}" not in template
assert "foobar_blah_item" not in template
def test_json_roundtrip(tmp_path: Path):
json_text = """
{
"foo": "bar",
"nested": {
"a": 1,
"b": true
},
"list": [10, 20]
}
"""
cfg_path = tmp_path / "config.json"
cfg_path.write_text(textwrap.dedent(json_text), encoding="utf-8")
fmt, parsed = parse_config(cfg_path)
assert fmt == "json"
flat_items = flatten_config(fmt, parsed)
defaults_yaml = generate_defaults_yaml("foobar", flat_items)
defaults = yaml.safe_load(defaults_yaml)
# Defaults: nested keys and list indices
assert defaults["foobar_foo"] == "bar"
assert defaults["foobar_nested_a"] == 1
# Bool normalized to string "true"
assert defaults["foobar_nested_b"] == "true"
assert defaults["foobar_list_0"] == 10
assert defaults["foobar_list_1"] == 20
# Template generation (JSON has no comments, so we just rebuild)
template = generate_template(fmt, parsed, "foobar")
assert '"foo": "{{ foobar_foo }}"' in template
assert "foobar_nested_a" in template
assert "foobar_nested_b" in template
assert "foobar_list_0" in template
assert "foobar_list_1" in template