Improvements

* Preserve comments in Jinja2 templates * Handle truthy/falsy statements better * Handle params that have an empty value (php.ini is notorious) * Add indentation to yaml and also starting --- so yamllint passes
2025-11-25 16:35:18 +11:00 · 2025-11-25 16:35:18 +11:00 · f992da47ee
commit f992da47ee
parent 2be1e9e895
5 changed files with 396 additions and 13 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,3 +4,6 @@ __pycache__
 .pytest_cache
 dist
 .coverage
 *.yml
 *.j2
 *.toml
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "jinjaturtle"
-version = "0.1.0"
+version = "0.1.1"
 description = "Convert config files into Ansible defaults and Jinja2 templates."
 authors = ["Miguel Jacq <mig@mig5.net>"]
 license = "GPL-3.0-or-later"
--- a/src/jinjaturtle/cli.py
+++ b/src/jinjaturtle/cli.py
@ -54,7 +54,10 @@ def _main(argv: list[str] | None = None) -> int:
    fmt, parsed = parse_config(config_path, args.format)
    flat_items = flatten_config(fmt, parsed)
    defaults_yaml = generate_defaults_yaml(args.role_name, flat_items)
-    template_str = generate_template(fmt, parsed, args.role_name)
+    config_text = config_path.read_text(encoding="utf-8")
    template_str = generate_template(
        fmt, parsed, args.role_name, original_text=config_text
    )
    if args.defaults_output:
        Path(args.defaults_output).write_text(defaults_yaml, encoding="utf-8")
--- a/src/jinjaturtle/core.py
+++ b/src/jinjaturtle/core.py
@ -15,6 +15,41 @@ except ModuleNotFoundError:  # pragma: no cover
        tomllib = None  # type: ignore
 class QuotedString(str):
    """Marker type for strings that must be double-quoted in YAML output."""
    pass
 class _TurtleDumper(yaml.SafeDumper):
    """Custom YAML dumper that always double-quotes QuotedString values."""
    pass
 def _quoted_str_representer(dumper: yaml.SafeDumper, data: QuotedString):
    return dumper.represent_scalar("tag:yaml.org,2002:str", str(data), style='"')
 _TurtleDumper.add_representer(QuotedString, _quoted_str_representer)
 def _normalize_default_value(value: Any) -> Any:
    """
    Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans.
    - bool -> QuotedString("true"/"false")
    - "true"/"false" (any case) -> QuotedString(original_text)
    - everything else -> unchanged
    """
    if isinstance(value, bool):
        # YAML booleans are lower-case; we keep them as strings.
        return QuotedString("true" if value else "false")
    if isinstance(value, str) and value.lower() in {"true", "false"}:
        return QuotedString(value)
    return value
 def detect_format(path: Path, explicit: str | None = None) -> str:
    """
    Determine config format (toml vs ini-ish) from argument or filename.
@ -130,22 +165,49 @@ def make_var_name(role_prefix: str, path: Iterable[str]) -> str:
    return role_prefix
 def _split_inline_comment(text: str, comment_chars: set[str]) -> tuple[str, str]:
    """
    Split 'value   # comment' into (value_part, comment_part), where
    comment_part starts at the first unquoted comment character.
    comment_chars is e.g. {'#'} for TOML, {'#', ';'} for INI.
    """
    in_single = False
    in_double = False
    for i, ch in enumerate(text):
        if ch == "'" and not in_double:
            in_single = not in_single
        elif ch == '"' and not in_single:
            in_double = not in_double
        elif ch in comment_chars and not in_single and not in_double:
            return text[:i], text[i:]
    return text, ""
 def generate_defaults_yaml(
-    role_prefix: str, flat_items: list[tuple[tuple[str, ...], Any]]
+    role_prefix: str,
    flat_items: list[tuple[tuple[str, ...], Any]],
 ) -> str:
    """
    Create YAML for defaults/main.yml from flattened items.
    Boolean/boolean-like values ("true"/"false") are forced to be *strings*
    and double-quoted in the resulting YAML so that Ansible does not coerce
    them back into Python booleans.
    """
    defaults: dict[str, Any] = {}
    for path, value in flat_items:
        var_name = make_var_name(role_prefix, path)
-        defaults[var_name] = value
+        defaults[var_name] = _normalize_default_value(value)
-    return yaml.safe_dump(
+    return yaml.dump(
        defaults,
        Dumper=_TurtleDumper,
        sort_keys=True,
        default_flow_style=False,
        allow_unicode=True,
        explicit_start=True,
        indent=2,
    )
@ -223,10 +285,228 @@ def _generate_ini_template(role_prefix: str, parser: configparser.ConfigParser)
    return "\n".join(lines).rstrip() + "\n"
-def generate_template(fmt: str, parsed: Any, role_prefix: str) -> str:
+def _generate_ini_template_from_text(role_prefix: str, text: str) -> str:
    """
-    Dispatch to the appropriate template generator.
+    Generate a Jinja2 template for an INI/php.ini-style file, preserving
    comments, blank lines, and section headers by patching values in-place.
    """
    lines = text.splitlines(keepends=True)
    current_section: str | None = None
    out_lines: list[str] = []
    for raw_line in lines:
        line = raw_line
        stripped = line.lstrip()
        # Blank or pure comment: keep as-is
        if not stripped or stripped[0] in {"#", ";"}:
            out_lines.append(raw_line)
            continue
        # Section header
        if stripped.startswith("[") and "]" in stripped:
            header_inner = stripped[1 : stripped.index("]")]
            current_section = header_inner.strip()
            out_lines.append(raw_line)
            continue
        # Work without newline so we can re-attach it exactly
        newline = ""
        content = raw_line
        if content.endswith("\r\n"):
            newline = "\r\n"
            content = content[:-2]
        elif content.endswith("\n"):
            newline = content[-1]
            content = content[:-1]
        eq_index = content.find("=")
        if eq_index == -1:
            # Not a simple key=value line: leave untouched
            out_lines.append(raw_line)
            continue
        before_eq = content[:eq_index]
        after_eq = content[eq_index + 1 :]
        key = before_eq.strip()
        if not key:
            out_lines.append(raw_line)
            continue
        # Whitespace after '='
        value_ws_len = len(after_eq) - len(after_eq.lstrip(" \t"))
        leading_ws = after_eq[:value_ws_len]
        value_and_comment = after_eq[value_ws_len:]
        value_part, comment_part = _split_inline_comment(value_and_comment, {"#", ";"})
        raw_value = value_part.strip()
        path = (key,) if current_section is None else (current_section, key)
        var_name = make_var_name(role_prefix, path)
        # Was the original value quoted?
        use_quotes = (
            len(raw_value) >= 2
            and raw_value[0] == raw_value[-1]
            and raw_value[0] in {'"', "'"}
        )
        if use_quotes:
            quote_char = raw_value[0]
            replacement_value = f"{quote_char}{{{{ {var_name} }}}}{quote_char}"
        else:
            replacement_value = f"{{{{ {var_name} }}}}"
        new_content = before_eq + "=" + leading_ws + replacement_value + comment_part
        out_lines.append(new_content + newline)
    return "".join(out_lines)
 def _generate_toml_template_from_text(role_prefix: str, text: str) -> str:
    """
    Generate a Jinja2 template for a TOML file, preserving comments,
    blank lines, and table headers by patching values in-place.
    Handles inline tables like:
      temp_targets = { cpu = 79.5, case = 72.0 }
    by mapping them to:
      temp_targets = { cpu = {{ prefix_database_temp_targets_cpu }},
                       case = {{ prefix_database_temp_targets_case }} }
    """
    lines = text.splitlines(keepends=True)
    current_table: tuple[str, ...] = ()
    out_lines: list[str] = []
    for raw_line in lines:
        line = raw_line
        stripped = line.lstrip()
        # Blank or pure comment
        if not stripped or stripped.startswith("#"):
            out_lines.append(raw_line)
            continue
        # Table header: [server] or [server.tls] or [[array.of.tables]]
        if stripped.startswith("[") and "]" in stripped:
            header = stripped
            first_bracket = header.find("[")
            closing_bracket = header.find("]", first_bracket + 1)
            if first_bracket != -1 and closing_bracket != -1:
                inner = header[first_bracket + 1 : closing_bracket].strip()
                inner = inner.strip("[]")  # handle [[table]] as well
                parts = [p.strip() for p in inner.split(".") if p.strip()]
                current_table = tuple(parts)
            out_lines.append(raw_line)
            continue
        # Try key = value
        newline = ""
        content = raw_line
        if content.endswith("\r\n"):
            newline = "\r\n"
            content = content[:-2]
        elif content.endswith("\n"):
            newline = content[-1]
            content = content[:-1]
        eq_index = content.find("=")
        if eq_index == -1:
            out_lines.append(raw_line)
            continue
        before_eq = content[:eq_index]
        after_eq = content[eq_index + 1 :]
        key = before_eq.strip()
        if not key:
            out_lines.append(raw_line)
            continue
        # Whitespace after '='
        value_ws_len = len(after_eq) - len(after_eq.lstrip(" \t"))
        leading_ws = after_eq[:value_ws_len]
        value_and_comment = after_eq[value_ws_len:]
        value_part, comment_part = _split_inline_comment(value_and_comment, {"#"})
        raw_value = value_part.strip()
        # Path for this key (table + key)
        path = current_table + (key,)
        # Special case: inline table
        if (
            raw_value.startswith("{")
            and raw_value.endswith("}")
            and tomllib is not None
        ):
            try:
                # Parse the inline table as a tiny TOML document
                mini_source = "table = " + raw_value + "\n"
                mini_data = tomllib.loads(mini_source)["table"]
            except Exception:
                mini_data = None
            if isinstance(mini_data, dict):
                inner_bits: list[str] = []
                for sub_key, sub_val in mini_data.items():
                    nested_path = path + (sub_key,)
                    nested_var = make_var_name(role_prefix, nested_path)
                    if isinstance(sub_val, str):
                        inner_bits.append(f'{sub_key} = "{{{{ {nested_var} }}}}"')
                    else:
                        inner_bits.append(f"{sub_key} = {{{{ {nested_var} }}}}")
                replacement_value = "{ " + ", ".join(inner_bits) + " }"
                new_content = (
                    before_eq + "=" + leading_ws + replacement_value + comment_part
                )
                out_lines.append(new_content + newline)
                continue
            # If parsing fails, fall through to normal handling
        # Normal scalar value handling (including bools, numbers, strings)
        var_name = make_var_name(role_prefix, path)
        use_quotes = (
            len(raw_value) >= 2
            and raw_value[0] == raw_value[-1]
            and raw_value[0] in {'"', "'"}
        )
        if use_quotes:
            quote_char = raw_value[0]
            replacement_value = f"{quote_char}{{{{ {var_name} }}}}{quote_char}"
        else:
            replacement_value = f"{{{{ {var_name} }}}}"
        new_content = before_eq + "=" + leading_ws + replacement_value + comment_part
        out_lines.append(new_content + newline)
    return "".join(out_lines)
 def generate_template(
    fmt: str,
    parsed: Any,
    role_prefix: str,
    original_text: str | None = None,
 ) -> str:
    """
    Generate a Jinja2 template for the config.
    If original_text is provided, comments and blank lines are preserved by
    patching values in-place. Otherwise we fall back to reconstructing from
    the parsed structure (no comments).
    """
    if original_text is not None:
        if fmt == "toml":
            return _generate_toml_template_from_text(role_prefix, original_text)
        if fmt == "ini":
            return _generate_ini_template_from_text(role_prefix, original_text)
        raise ValueError(f"Unsupported format: {fmt}")
    # Fallback: previous behaviour (no comments preserved)
    if fmt == "toml":
        if not isinstance(parsed, dict):
            raise TypeError("TOML parser result must be a dict")
--- a/tests/test_core.py
+++ b/tests/test_core.py
@ -1,7 +1,7 @@
 from __future__ import annotations
 from pathlib import Path
-
+import configparser
 import pytest
 import yaml
@ -69,7 +69,7 @@ def test_toml_sample_roundtrip():
    assert fmt == "toml"
    flat_items = flatten_config(fmt, parsed)
-    assert flat_items, "Expected at least one flattened item from TOML sample"
+    assert flat_items
    defaults_yaml = generate_defaults_yaml("jinjaturtle", flat_items)
    defaults = yaml.safe_load(defaults_yaml)
@ -84,10 +84,16 @@ def test_toml_sample_roundtrip():
        assert key == key.lower()
        assert " " not in key
-    # template generation
+    # template generation – **now with original_text**
-    template = generate_template(fmt, parsed, "jinjaturtle")
+    original_text = toml_path.read_text(encoding="utf-8")
    template = generate_template(
        fmt, parsed, "jinjaturtle", original_text=original_text
    )
    assert isinstance(template, str)
-    assert template.strip(), "Template for TOML sample should not be empty"
+    assert template.strip()
    # comments from the original file should now be preserved
    assert "# This is a TOML document" in template
    # each default variable name should appear in the template as a Jinja placeholder
    for var_name in defaults:
@ -120,7 +126,9 @@ def test_ini_php_sample_roundtrip():
        assert " " not in key
    # template generation
-    template = generate_template(fmt, parsed, "php")
+    original_text = ini_path.read_text(encoding="utf-8")
    template = generate_template(fmt, parsed, "php", original_text=original_text)
    assert "; About this file" in template
    assert isinstance(template, str)
    assert template.strip(), "Template for php.ini sample should not be empty"
@ -189,3 +197,92 @@ def test_generate_template_type_and_format_errors():
    # unsupported format
    with pytest.raises(ValueError):
        generate_template("yaml", parsed=None, role_prefix="role")
    # unsupported format even when original_text is provided
    with pytest.raises(ValueError):
        generate_template(
            "yaml",
            parsed=None,
            role_prefix="role",
            original_text="foo=bar",
        )
 def test_normalize_default_value_true_false_strings():
    # 'true'/'false' strings should be preserved as strings and double-quoted in YAML.
    flat_items = [
        (("section", "foo"), "true"),
        (("section", "bar"), "FALSE"),
    ]
    defaults_yaml = generate_defaults_yaml("role", flat_items)
    data = yaml.safe_load(defaults_yaml)
    assert data["role_section_foo"] == "true"
    assert data["role_section_bar"] == "FALSE"
 def test_split_inline_comment_handles_quoted_hash():
    # The '#' inside quotes should not start a comment; the one outside should.
    text = " 'foo # not comment' # real"
    value, comment = core._split_inline_comment(text, {"#"})
    assert "not comment" in value
    assert comment.strip() == "# real"
 def test_generate_template_fallback_toml_and_ini():
    # When original_text is not provided, generate_template should use the
    # older fallback generators based on the parsed structures.
    parsed_toml = {
        "title": "Example",
        "server": {"port": 8080, "host": "127.0.0.1"},
        "logging": {
            "file": {"path": "/tmp/app.log"}
        },  # nested table to hit recursive walk
    }
    tmpl_toml = generate_template("toml", parsed=parsed_toml, role_prefix="role")
    assert "[server]" in tmpl_toml
    assert "role_server_port" in tmpl_toml
    assert "[logging]" in tmpl_toml or "[logging.file]" in tmpl_toml
    parser = configparser.ConfigParser()
    # foo is quoted in the INI text to hit the "preserve quotes" branch
    parser["section"] = {"foo": '"bar"', "num": "42"}
    tmpl_ini = generate_template("ini", parsed=parser, role_prefix="role")
    assert "[section]" in tmpl_ini
    assert "role_section_foo" in tmpl_ini
    assert '"{{ role_section_foo }}"' in tmpl_ini  # came from quoted INI value
 def test_generate_ini_template_from_text_edge_cases():
    # Cover CRLF newlines, lines without '=', and lines with no key before '='.
    text = "[section]\r\nkey=value\r\nnoequals\r\n   = bare\r\n"
    tmpl = core._generate_ini_template_from_text("role", text)
    # We don't care about exact formatting here, just that it runs and
    # produces some reasonable output.
    assert "[section]" in tmpl
    assert "role_section_key" in tmpl
    # The "noequals" line should be preserved as-is.
    assert "noequals" in tmpl
    # The "   = bare" line has no key and should be left untouched.
    assert "   = bare" in tmpl
 def test_generate_toml_template_from_text_edge_cases():
    # Cover CRLF newlines, lines without '=', empty keys, and inline tables
    # that both parse successfully and fail parsing.
    text = (
        "# comment\r\n"
        "[table]\r\n"
        "noequals\r\n"
        "   = 42\r\n"
        'inline_good = { name = "abc", value = 1 }\r\n'
        "inline_bad = { invalid = }\r\n"
    )
    tmpl = core._generate_toml_template_from_text("role", text)
    # The good inline table should expand into two separate variables.
    assert "role_table_inline_good_name" in tmpl
    assert "role_table_inline_good_value" in tmpl
    # The bad inline table should fall back to scalar handling.
    assert "role_table_inline_bad" in tmpl
    # Ensure the lines without '=' / empty key were handled without exploding.
    assert "[table]" in tmpl
    assert "noequals" in tmpl