From f992da47ee4043ec3e088ff5197fa9bf3c834bf5 Mon Sep 17 00:00:00 2001
From: Miguel Jacq <mig@mig5.net>
Date: Tue, 25 Nov 2025 16:35:18 +1100
Subject: [PATCH] Improvements

 * Preserve comments in Jinja2 templates
 * Handle truthy/falsy statements better
 * Handle params that have an empty value (php.ini is notorious)
 * Add indentation to yaml and also starting --- so yamllint passes
---
 .gitignore              |   3 +
 pyproject.toml          |   2 +-
 src/jinjaturtle/cli.py  |   5 +-
 src/jinjaturtle/core.py | 290 +++++++++++++++++++++++++++++++++++++++-
 tests/test_core.py      | 109 ++++++++++++++-
 5 files changed, 396 insertions(+), 13 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2352872..7bc15a0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,6 @@ __pycache__
 .pytest_cache
 dist
 .coverage
+*.yml
+*.j2
+*.toml
diff --git a/pyproject.toml b/pyproject.toml
index e8609af..8e5fd67 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "jinjaturtle"
-version = "0.1.0"
+version = "0.1.1"
 description = "Convert config files into Ansible defaults and Jinja2 templates."
 authors = ["Miguel Jacq <mig@mig5.net>"]
 license = "GPL-3.0-or-later"
diff --git a/src/jinjaturtle/cli.py b/src/jinjaturtle/cli.py
index 83a4d67..5c59a87 100644
--- a/src/jinjaturtle/cli.py
+++ b/src/jinjaturtle/cli.py
@@ -54,7 +54,10 @@ def _main(argv: list[str] | None = None) -> int:
     fmt, parsed = parse_config(config_path, args.format)
     flat_items = flatten_config(fmt, parsed)
     defaults_yaml = generate_defaults_yaml(args.role_name, flat_items)
-    template_str = generate_template(fmt, parsed, args.role_name)
+    config_text = config_path.read_text(encoding="utf-8")
+    template_str = generate_template(
+        fmt, parsed, args.role_name, original_text=config_text
+    )
 
     if args.defaults_output:
         Path(args.defaults_output).write_text(defaults_yaml, encoding="utf-8")
diff --git a/src/jinjaturtle/core.py b/src/jinjaturtle/core.py
index 8e27bc1..849990b 100644
--- a/src/jinjaturtle/core.py
+++ b/src/jinjaturtle/core.py
@@ -15,6 +15,41 @@ except ModuleNotFoundError:  # pragma: no cover
         tomllib = None  # type: ignore
 
 
+class QuotedString(str):
+    """Marker type for strings that must be double-quoted in YAML output."""
+
+    pass
+
+
+class _TurtleDumper(yaml.SafeDumper):
+    """Custom YAML dumper that always double-quotes QuotedString values."""
+
+    pass
+
+
+def _quoted_str_representer(dumper: yaml.SafeDumper, data: QuotedString):
+    return dumper.represent_scalar("tag:yaml.org,2002:str", str(data), style='"')
+
+
+_TurtleDumper.add_representer(QuotedString, _quoted_str_representer)
+
+
+def _normalize_default_value(value: Any) -> Any:
+    """
+    Ensure that 'true' / 'false' end up as quoted strings in YAML, not booleans.
+
+    - bool -> QuotedString("true"/"false")
+    - "true"/"false" (any case) -> QuotedString(original_text)
+    - everything else -> unchanged
+    """
+    if isinstance(value, bool):
+        # YAML booleans are lower-case; we keep them as strings.
+        return QuotedString("true" if value else "false")
+    if isinstance(value, str) and value.lower() in {"true", "false"}:
+        return QuotedString(value)
+    return value
+
+
 def detect_format(path: Path, explicit: str | None = None) -> str:
     """
     Determine config format (toml vs ini-ish) from argument or filename.
@@ -130,22 +165,49 @@ def make_var_name(role_prefix: str, path: Iterable[str]) -> str:
     return role_prefix
 
 
+def _split_inline_comment(text: str, comment_chars: set[str]) -> tuple[str, str]:
+    """
+    Split 'value   # comment' into (value_part, comment_part), where
+    comment_part starts at the first unquoted comment character.
+
+    comment_chars is e.g. {'#'} for TOML, {'#', ';'} for INI.
+    """
+    in_single = False
+    in_double = False
+    for i, ch in enumerate(text):
+        if ch == "'" and not in_double:
+            in_single = not in_single
+        elif ch == '"' and not in_single:
+            in_double = not in_double
+        elif ch in comment_chars and not in_single and not in_double:
+            return text[:i], text[i:]
+    return text, ""
+
+
 def generate_defaults_yaml(
-    role_prefix: str, flat_items: list[tuple[tuple[str, ...], Any]]
+    role_prefix: str,
+    flat_items: list[tuple[tuple[str, ...], Any]],
 ) -> str:
     """
     Create YAML for defaults/main.yml from flattened items.
+
+    Boolean/boolean-like values ("true"/"false") are forced to be *strings*
+    and double-quoted in the resulting YAML so that Ansible does not coerce
+    them back into Python booleans.
     """
     defaults: dict[str, Any] = {}
     for path, value in flat_items:
         var_name = make_var_name(role_prefix, path)
-        defaults[var_name] = value
+        defaults[var_name] = _normalize_default_value(value)
 
-    return yaml.safe_dump(
+    return yaml.dump(
         defaults,
+        Dumper=_TurtleDumper,
         sort_keys=True,
         default_flow_style=False,
         allow_unicode=True,
+        explicit_start=True,
+        indent=2,
     )
 
 
@@ -223,10 +285,228 @@ def _generate_ini_template(role_prefix: str, parser: configparser.ConfigParser)
     return "\n".join(lines).rstrip() + "\n"
 
 
-def generate_template(fmt: str, parsed: Any, role_prefix: str) -> str:
+def _generate_ini_template_from_text(role_prefix: str, text: str) -> str:
     """
-    Dispatch to the appropriate template generator.
+    Generate a Jinja2 template for an INI/php.ini-style file, preserving
+    comments, blank lines, and section headers by patching values in-place.
     """
+    lines = text.splitlines(keepends=True)
+    current_section: str | None = None
+    out_lines: list[str] = []
+
+    for raw_line in lines:
+        line = raw_line
+        stripped = line.lstrip()
+
+        # Blank or pure comment: keep as-is
+        if not stripped or stripped[0] in {"#", ";"}:
+            out_lines.append(raw_line)
+            continue
+
+        # Section header
+        if stripped.startswith("[") and "]" in stripped:
+            header_inner = stripped[1 : stripped.index("]")]
+            current_section = header_inner.strip()
+            out_lines.append(raw_line)
+            continue
+
+        # Work without newline so we can re-attach it exactly
+        newline = ""
+        content = raw_line
+        if content.endswith("\r\n"):
+            newline = "\r\n"
+            content = content[:-2]
+        elif content.endswith("\n"):
+            newline = content[-1]
+            content = content[:-1]
+
+        eq_index = content.find("=")
+        if eq_index == -1:
+            # Not a simple key=value line: leave untouched
+            out_lines.append(raw_line)
+            continue
+
+        before_eq = content[:eq_index]
+        after_eq = content[eq_index + 1 :]
+
+        key = before_eq.strip()
+        if not key:
+            out_lines.append(raw_line)
+            continue
+
+        # Whitespace after '='
+        value_ws_len = len(after_eq) - len(after_eq.lstrip(" \t"))
+        leading_ws = after_eq[:value_ws_len]
+        value_and_comment = after_eq[value_ws_len:]
+
+        value_part, comment_part = _split_inline_comment(value_and_comment, {"#", ";"})
+        raw_value = value_part.strip()
+
+        path = (key,) if current_section is None else (current_section, key)
+        var_name = make_var_name(role_prefix, path)
+
+        # Was the original value quoted?
+        use_quotes = (
+            len(raw_value) >= 2
+            and raw_value[0] == raw_value[-1]
+            and raw_value[0] in {'"', "'"}
+        )
+
+        if use_quotes:
+            quote_char = raw_value[0]
+            replacement_value = f"{quote_char}{{{{ {var_name} }}}}{quote_char}"
+        else:
+            replacement_value = f"{{{{ {var_name} }}}}"
+
+        new_content = before_eq + "=" + leading_ws + replacement_value + comment_part
+        out_lines.append(new_content + newline)
+
+    return "".join(out_lines)
+
+
+def _generate_toml_template_from_text(role_prefix: str, text: str) -> str:
+    """
+    Generate a Jinja2 template for a TOML file, preserving comments,
+    blank lines, and table headers by patching values in-place.
+
+    Handles inline tables like:
+      temp_targets = { cpu = 79.5, case = 72.0 }
+
+    by mapping them to:
+      temp_targets = { cpu = {{ prefix_database_temp_targets_cpu }},
+                       case = {{ prefix_database_temp_targets_case }} }
+    """
+    lines = text.splitlines(keepends=True)
+    current_table: tuple[str, ...] = ()
+    out_lines: list[str] = []
+
+    for raw_line in lines:
+        line = raw_line
+        stripped = line.lstrip()
+
+        # Blank or pure comment
+        if not stripped or stripped.startswith("#"):
+            out_lines.append(raw_line)
+            continue
+
+        # Table header: [server] or [server.tls] or [[array.of.tables]]
+        if stripped.startswith("[") and "]" in stripped:
+            header = stripped
+            first_bracket = header.find("[")
+            closing_bracket = header.find("]", first_bracket + 1)
+            if first_bracket != -1 and closing_bracket != -1:
+                inner = header[first_bracket + 1 : closing_bracket].strip()
+                inner = inner.strip("[]")  # handle [[table]] as well
+                parts = [p.strip() for p in inner.split(".") if p.strip()]
+                current_table = tuple(parts)
+            out_lines.append(raw_line)
+            continue
+
+        # Try key = value
+        newline = ""
+        content = raw_line
+        if content.endswith("\r\n"):
+            newline = "\r\n"
+            content = content[:-2]
+        elif content.endswith("\n"):
+            newline = content[-1]
+            content = content[:-1]
+
+        eq_index = content.find("=")
+        if eq_index == -1:
+            out_lines.append(raw_line)
+            continue
+
+        before_eq = content[:eq_index]
+        after_eq = content[eq_index + 1 :]
+
+        key = before_eq.strip()
+        if not key:
+            out_lines.append(raw_line)
+            continue
+
+        # Whitespace after '='
+        value_ws_len = len(after_eq) - len(after_eq.lstrip(" \t"))
+        leading_ws = after_eq[:value_ws_len]
+        value_and_comment = after_eq[value_ws_len:]
+
+        value_part, comment_part = _split_inline_comment(value_and_comment, {"#"})
+        raw_value = value_part.strip()
+
+        # Path for this key (table + key)
+        path = current_table + (key,)
+
+        # Special case: inline table
+        if (
+            raw_value.startswith("{")
+            and raw_value.endswith("}")
+            and tomllib is not None
+        ):
+            try:
+                # Parse the inline table as a tiny TOML document
+                mini_source = "table = " + raw_value + "\n"
+                mini_data = tomllib.loads(mini_source)["table"]
+            except Exception:
+                mini_data = None
+
+            if isinstance(mini_data, dict):
+                inner_bits: list[str] = []
+                for sub_key, sub_val in mini_data.items():
+                    nested_path = path + (sub_key,)
+                    nested_var = make_var_name(role_prefix, nested_path)
+                    if isinstance(sub_val, str):
+                        inner_bits.append(f'{sub_key} = "{{{{ {nested_var} }}}}"')
+                    else:
+                        inner_bits.append(f"{sub_key} = {{{{ {nested_var} }}}}")
+                replacement_value = "{ " + ", ".join(inner_bits) + " }"
+                new_content = (
+                    before_eq + "=" + leading_ws + replacement_value + comment_part
+                )
+                out_lines.append(new_content + newline)
+                continue
+            # If parsing fails, fall through to normal handling
+
+        # Normal scalar value handling (including bools, numbers, strings)
+        var_name = make_var_name(role_prefix, path)
+        use_quotes = (
+            len(raw_value) >= 2
+            and raw_value[0] == raw_value[-1]
+            and raw_value[0] in {'"', "'"}
+        )
+
+        if use_quotes:
+            quote_char = raw_value[0]
+            replacement_value = f"{quote_char}{{{{ {var_name} }}}}{quote_char}"
+        else:
+            replacement_value = f"{{{{ {var_name} }}}}"
+
+        new_content = before_eq + "=" + leading_ws + replacement_value + comment_part
+        out_lines.append(new_content + newline)
+
+    return "".join(out_lines)
+
+
+def generate_template(
+    fmt: str,
+    parsed: Any,
+    role_prefix: str,
+    original_text: str | None = None,
+) -> str:
+    """
+    Generate a Jinja2 template for the config.
+
+    If original_text is provided, comments and blank lines are preserved by
+    patching values in-place. Otherwise we fall back to reconstructing from
+    the parsed structure (no comments).
+    """
+    if original_text is not None:
+        if fmt == "toml":
+            return _generate_toml_template_from_text(role_prefix, original_text)
+        if fmt == "ini":
+            return _generate_ini_template_from_text(role_prefix, original_text)
+        raise ValueError(f"Unsupported format: {fmt}")
+
+    # Fallback: previous behaviour (no comments preserved)
     if fmt == "toml":
         if not isinstance(parsed, dict):
             raise TypeError("TOML parser result must be a dict")
diff --git a/tests/test_core.py b/tests/test_core.py
index bcdd2f7..374c4e9 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from pathlib import Path
-
+import configparser
 import pytest
 import yaml
 
@@ -69,7 +69,7 @@ def test_toml_sample_roundtrip():
     assert fmt == "toml"
 
     flat_items = flatten_config(fmt, parsed)
-    assert flat_items, "Expected at least one flattened item from TOML sample"
+    assert flat_items
 
     defaults_yaml = generate_defaults_yaml("jinjaturtle", flat_items)
     defaults = yaml.safe_load(defaults_yaml)
@@ -84,10 +84,16 @@ def test_toml_sample_roundtrip():
         assert key == key.lower()
         assert " " not in key
 
-    # template generation
-    template = generate_template(fmt, parsed, "jinjaturtle")
+    # template generation – **now with original_text**
+    original_text = toml_path.read_text(encoding="utf-8")
+    template = generate_template(
+        fmt, parsed, "jinjaturtle", original_text=original_text
+    )
     assert isinstance(template, str)
-    assert template.strip(), "Template for TOML sample should not be empty"
+    assert template.strip()
+
+    # comments from the original file should now be preserved
+    assert "# This is a TOML document" in template
 
     # each default variable name should appear in the template as a Jinja placeholder
     for var_name in defaults:
@@ -120,7 +126,9 @@ def test_ini_php_sample_roundtrip():
         assert " " not in key
 
     # template generation
-    template = generate_template(fmt, parsed, "php")
+    original_text = ini_path.read_text(encoding="utf-8")
+    template = generate_template(fmt, parsed, "php", original_text=original_text)
+    assert "; About this file" in template
     assert isinstance(template, str)
     assert template.strip(), "Template for php.ini sample should not be empty"
 
@@ -189,3 +197,92 @@ def test_generate_template_type_and_format_errors():
     # unsupported format
     with pytest.raises(ValueError):
         generate_template("yaml", parsed=None, role_prefix="role")
+
+    # unsupported format even when original_text is provided
+    with pytest.raises(ValueError):
+        generate_template(
+            "yaml",
+            parsed=None,
+            role_prefix="role",
+            original_text="foo=bar",
+        )
+
+
+def test_normalize_default_value_true_false_strings():
+    # 'true'/'false' strings should be preserved as strings and double-quoted in YAML.
+    flat_items = [
+        (("section", "foo"), "true"),
+        (("section", "bar"), "FALSE"),
+    ]
+    defaults_yaml = generate_defaults_yaml("role", flat_items)
+    data = yaml.safe_load(defaults_yaml)
+    assert data["role_section_foo"] == "true"
+    assert data["role_section_bar"] == "FALSE"
+
+
+def test_split_inline_comment_handles_quoted_hash():
+    # The '#' inside quotes should not start a comment; the one outside should.
+    text = " 'foo # not comment' # real"
+    value, comment = core._split_inline_comment(text, {"#"})
+    assert "not comment" in value
+    assert comment.strip() == "# real"
+
+
+def test_generate_template_fallback_toml_and_ini():
+    # When original_text is not provided, generate_template should use the
+    # older fallback generators based on the parsed structures.
+    parsed_toml = {
+        "title": "Example",
+        "server": {"port": 8080, "host": "127.0.0.1"},
+        "logging": {
+            "file": {"path": "/tmp/app.log"}
+        },  # nested table to hit recursive walk
+    }
+    tmpl_toml = generate_template("toml", parsed=parsed_toml, role_prefix="role")
+    assert "[server]" in tmpl_toml
+    assert "role_server_port" in tmpl_toml
+    assert "[logging]" in tmpl_toml or "[logging.file]" in tmpl_toml
+
+    parser = configparser.ConfigParser()
+    # foo is quoted in the INI text to hit the "preserve quotes" branch
+    parser["section"] = {"foo": '"bar"', "num": "42"}
+    tmpl_ini = generate_template("ini", parsed=parser, role_prefix="role")
+    assert "[section]" in tmpl_ini
+    assert "role_section_foo" in tmpl_ini
+    assert '"{{ role_section_foo }}"' in tmpl_ini  # came from quoted INI value
+
+
+def test_generate_ini_template_from_text_edge_cases():
+    # Cover CRLF newlines, lines without '=', and lines with no key before '='.
+    text = "[section]\r\nkey=value\r\nnoequals\r\n   = bare\r\n"
+    tmpl = core._generate_ini_template_from_text("role", text)
+    # We don't care about exact formatting here, just that it runs and
+    # produces some reasonable output.
+    assert "[section]" in tmpl
+    assert "role_section_key" in tmpl
+    # The "noequals" line should be preserved as-is.
+    assert "noequals" in tmpl
+    # The "   = bare" line has no key and should be left untouched.
+    assert "   = bare" in tmpl
+
+
+def test_generate_toml_template_from_text_edge_cases():
+    # Cover CRLF newlines, lines without '=', empty keys, and inline tables
+    # that both parse successfully and fail parsing.
+    text = (
+        "# comment\r\n"
+        "[table]\r\n"
+        "noequals\r\n"
+        "   = 42\r\n"
+        'inline_good = { name = "abc", value = 1 }\r\n'
+        "inline_bad = { invalid = }\r\n"
+    )
+    tmpl = core._generate_toml_template_from_text("role", text)
+    # The good inline table should expand into two separate variables.
+    assert "role_table_inline_good_name" in tmpl
+    assert "role_table_inline_good_value" in tmpl
+    # The bad inline table should fall back to scalar handling.
+    assert "role_table_inline_bad" in tmpl
+    # Ensure the lines without '=' / empty key were handled without exploding.
+    assert "[table]" in tmpl
+    assert "noequals" in tmpl