Add export options

This commit is contained in:
Miguel Jacq 2025-11-02 12:49:19 +11:00
parent 6cae652643
commit fb4a9e5e27
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
4 changed files with 171 additions and 13 deletions

View file

@ -1,9 +1,16 @@
from __future__ import annotations
import csv
import html
import json
import os
from dataclasses import dataclass
from pathlib import Path
from sqlcipher3 import dbapi2 as sqlite
from typing import List, Sequence, Tuple
Entry = Tuple[str, str]
@dataclass
@ -21,6 +28,7 @@ class DBManager:
# Ensure parent dir exists
self.cfg.path.parent.mkdir(parents=True, exist_ok=True)
self.conn = sqlite.connect(str(self.cfg.path))
self.conn.row_factory = sqlite.Row
cur = self.conn.cursor()
cur.execute(f"PRAGMA key = '{self.cfg.key}';")
cur.execute("PRAGMA journal_mode = WAL;")
@ -102,14 +110,116 @@ class DBManager:
def search_entries(self, text: str) -> list[str]:
cur = self.conn.cursor()
pattern = f"%{text}%"
cur.execute("SELECT * FROM entries WHERE TRIM(content) LIKE ?", (pattern,))
return [r for r in cur.fetchall()]
return cur.execute(
"SELECT * FROM entries WHERE TRIM(content) LIKE ?", (pattern,)
).fetchall()
def dates_with_content(self) -> list[str]:
cur = self.conn.cursor()
cur.execute("SELECT date FROM entries WHERE TRIM(content) <> '';")
return [r[0] for r in cur.fetchall()]
def get_all_entries(self) -> List[Entry]:
cur = self.conn.cursor()
rows = cur.execute("SELECT date, content FROM entries ORDER BY date").fetchall()
return [(row["date"], row["content"]) for row in rows]
def export_json(
self, entries: Sequence[Entry], file_path: str, pretty: bool = True
) -> None:
data = [{"date": d, "content": c} for d, c in entries]
with open(file_path, "w", encoding="utf-8") as f:
if pretty:
json.dump(data, f, ensure_ascii=False, indent=2)
else:
json.dump(data, f, ensure_ascii=False, separators=(",", ":"))
def export_csv(self, entries: Sequence[Entry], file_path: str) -> None:
# utf-8-sig adds a BOM so Excel opens as UTF-8 by default.
with open(file_path, "w", encoding="utf-8-sig", newline="") as f:
writer = csv.writer(f)
writer.writerow(["date", "content"]) # header
writer.writerows(entries)
def export_txt(
self,
entries: Sequence[Entry],
file_path: str,
separator: str = "\n\n— — — — —\n\n",
strip_html: bool = True,
) -> None:
import re, html as _html
# Precompiled patterns
STYLE_SCRIPT_RE = re.compile(r"(?is)<(script|style)[^>]*>.*?</\1>")
COMMENT_RE = re.compile(r"<!--.*?-->", re.S)
BR_RE = re.compile(r"(?i)<br\\s*/?>")
BLOCK_END_RE = re.compile(r"(?i)</(p|div|section|article|li|h[1-6])\\s*>")
TAG_RE = re.compile(r"<[^>]+>")
WS_ENDS_RE = re.compile(r"[ \\t]+\\n")
MULTINEWLINE_RE = re.compile(r"\\n{3,}")
def _strip(s: str) -> str:
# 1) Remove <style> and <script> blocks *including their contents*
s = STYLE_SCRIPT_RE.sub("", s)
# 2) Remove HTML comments
s = COMMENT_RE.sub("", s)
# 3) Turn some block-ish boundaries into newlines before removing tags
s = BR_RE.sub("\n", s)
s = BLOCK_END_RE.sub("\n", s)
# 4) Drop remaining tags
s = TAG_RE.sub("", s)
# 5) Unescape entities (&nbsp; etc.)
s = _html.unescape(s)
# 6) Tidy whitespace
s = WS_ENDS_RE.sub("\n", s)
s = MULTINEWLINE_RE.sub("\n\n", s)
return s.strip()
with open(file_path, "w", encoding="utf-8") as f:
for i, (d, c) in enumerate(entries):
body = _strip(c) if strip_html else c
f.write(f"{d}\n{body}\n")
if i < len(entries) - 1:
f.write(separator)
def export_html(
self, entries: Sequence[Entry], file_path: str, title: str = "Entries export"
) -> None:
parts = [
"<!doctype html>",
'<html lang="en">',
'<meta charset="utf-8">',
f"<title>{html.escape(title)}</title>",
"<style>body{font:16px/1.5 system-ui,Segoe UI,Roboto,Helvetica,Arial,sans-serif;padding:24px;max-width:900px;margin:auto;}",
"article{padding:16px 0;border-bottom:1px solid #ddd;} time{font-weight:600;color:#333;} section{margin-top:8px;}</style>",
"<body>",
f"<h1>{html.escape(title)}</h1>",
]
for d, c in entries:
parts.append(
f"<article><header><time>{html.escape(d)}</time></header><section>{c}</section></article>"
)
parts.append("</body></html>")
with open(file_path, "w", encoding="utf-8") as f:
f.write("\n".join(parts))
def export_by_extension(self, file_path: str) -> None:
entries = self.get_all_entries()
ext = os.path.splitext(file_path)[1].lower()
if ext == ".json":
self.export_json(entries, file_path)
elif ext == ".csv":
self.export_csv(entries, file_path)
elif ext == ".txt":
self.export_txt(entries, file_path)
elif ext in {".html", ".htm"}:
self.export_html(entries, file_path)
else:
raise ValueError(f"Unsupported extension: {ext}")
def close(self) -> None:
if self.conn is not None:
self.conn.close()