From 7ef79c495bb9de73e6efeb2b2ec90dbf3001a4d2 Mon Sep 17 00:00:00 2001 From: Miguel Jacq Date: Sat, 15 Nov 2025 12:06:06 +1100 Subject: [PATCH] Add a statistics dialog with heatmap --- CHANGELOG.md | 1 + bouquin/db.py | 123 ++++++++++++++- bouquin/locales/en.json | 16 +- bouquin/main_window.py | 15 +- bouquin/statistics_dialog.py | 294 +++++++++++++++++++++++++++++++++++ 5 files changed, 446 insertions(+), 3 deletions(-) create mode 100644 bouquin/statistics_dialog.py diff --git a/CHANGELOG.md b/CHANGELOG.md index b3ee732..c2be5a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # 0.3.1 * Make it possible to add a tag from the Tag Browser + * Add a statistics dialog with heatmap # 0.3 diff --git a/bouquin/db.py b/bouquin/db.py index f40dd1e..07dec13 100644 --- a/bouquin/db.py +++ b/bouquin/db.py @@ -1,14 +1,16 @@ from __future__ import annotations import csv +import datetime as _dt import hashlib import html import json +import re from dataclasses import dataclass from pathlib import Path from sqlcipher3 import dbapi2 as sqlite -from typing import List, Sequence, Tuple +from typing import List, Sequence, Tuple, Dict from . import strings @@ -640,6 +642,125 @@ class DBManager: ).fetchall() return [(r[0], r[1]) for r in rows] + # ---------- helpers for word counting ---------- + def _strip_markdown(self, text: str) -> str: + """ + Cheap markdown-ish stripper for word counting. + We only need approximate numbers. + """ + if not text: + return "" + + # Remove fenced code blocks + text = re.sub(r"```.*?```", " ", text, flags=re.DOTALL) + # Remove inline code + text = re.sub(r"`[^`]+`", " ", text) + # [text](url) → text + text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text) + # Remove emphasis markers, headings, etc. + text = re.sub(r"[#*_>]+", " ", text) + # Strip simple HTML tags + text = re.sub(r"<[^>]+>", " ", text) + + return text + + def _count_words(self, text: str) -> int: + text = self._strip_markdown(text) + words = re.findall(r"\b\w+\b", text, flags=re.UNICODE) + return len(words) + + def gather_stats(self): + """Compute all the numbers the Statistics dialog needs in one place.""" + + # 1) pages with content (current version only) + try: + pages_with_content_list = self.dates_with_content() + except Exception: + pages_with_content_list = [] + pages_with_content = len(pages_with_content_list) + + cur = self.conn.cursor() + + # 2 & 3) total revisions + page with most revisions + per-date counts + total_revisions = 0 + page_most_revisions = None + page_most_revisions_count = 0 + revisions_by_date: Dict[_dt.date, int] = {} + + rows = cur.execute( + """ + SELECT date, COUNT(*) AS c + FROM versions + GROUP BY date + ORDER BY date; + """ + ).fetchall() + + for r in rows: + date_iso = r["date"] + c = int(r["c"]) + total_revisions += c + + if c > page_most_revisions_count: + page_most_revisions_count = c + page_most_revisions = date_iso + + try: + d = _dt.date.fromisoformat(date_iso) + revisions_by_date[d] = c + except ValueError: + # Ignore malformed dates + pass + + # 4) total words + per-date words (current version only) + entries = self.get_all_entries() + total_words = 0 + words_by_date: Dict[_dt.date, int] = {} + + for date_iso, content in entries: + wc = self._count_words(content or "") + total_words += wc + try: + d = _dt.date.fromisoformat(date_iso) + words_by_date[d] = wc + except ValueError: + pass + + # tags + page with most tags + + rows = cur.execute("SELECT COUNT(*) AS total_unique FROM tags;").fetchall() + unique_tags = int(rows[0]["total_unique"]) if rows else 0 + + rows = cur.execute( + """ + SELECT page_date, COUNT(*) AS c + FROM page_tags + GROUP BY page_date + ORDER BY c DESC, page_date ASC + LIMIT 1; + """ + ).fetchall() + + if rows: + page_most_tags = rows[0]["page_date"] + page_most_tags_count = int(rows[0]["c"]) + else: + page_most_tags = None + page_most_tags_count = 0 + + return ( + pages_with_content, + total_revisions, + page_most_revisions, + page_most_revisions_count, + words_by_date, + total_words, + unique_tags, + page_most_tags, + page_most_tags_count, + revisions_by_date, + ) + def close(self) -> None: if self.conn is not None: self.conn.close() diff --git a/bouquin/locales/en.json b/bouquin/locales/en.json index 3fe7c6c..09dba58 100644 --- a/bouquin/locales/en.json +++ b/bouquin/locales/en.json @@ -114,6 +114,7 @@ "tags": "Tags", "tag": "Tag", "manage_tags": "Manage tags", + "main_window_manage_tags_accessible_flag": "Manage &Tags", "add_tag_placeholder": "Add a tag and press Enter", "tag_browser_title": "Tag Browser", "tag_browser_instructions": "Click a tag to expand and see all pages with that tag. Click a date to open it. Select a tag to edit its name, change its color, or delete it globally.", @@ -133,5 +134,18 @@ "change_color": "Change colour", "delete_tag": "Delete tag", "delete_tag_confirm": "Are you sure you want to delete the tag '{name}'? This will remove it from all pages.", - "tag_already_exists_with_that_name": "A tag already exists with that name" + "tag_already_exists_with_that_name": "A tag already exists with that name", + "statistics": "Statistics", + "main_window_statistics_accessible_flag": "Stat&istics", + "stats_pages_with_content": "Pages with content (current version)", + "stats_total_revisions": "Total revisions", + "stats_page_most_revisions": "Page with most revisions", + "stats_total_words": "Total words (current versions)", + "stats_unique_tags": "Unique tags", + "stats_page_most_tags": "Page with most tags", + "stats_activity_heatmap": "Activity heatmap", + "stats_heatmap_metric": "Colour by", + "stats_metric_words": "Words", + "stats_metric_revisions": "Revisions", + "stats_no_data": "No statistics available yet." } diff --git a/bouquin/main_window.py b/bouquin/main_window.py index ce008fa..2054e3c 100644 --- a/bouquin/main_window.py +++ b/bouquin/main_window.py @@ -55,6 +55,7 @@ from .save_dialog import SaveDialog from .search import Search from .settings import APP_ORG, APP_NAME, load_db_config, save_db_config from .settings_dialog import SettingsDialog +from .statistics_dialog import StatisticsDialog from . import strings from .tags_widget import PageTagsWidget from .toolbar import ToolBar @@ -209,10 +210,14 @@ class MainWindow(QMainWindow): act_backup.setShortcut("Ctrl+Shift+B") act_backup.triggered.connect(self._backup) file_menu.addAction(act_backup) - act_tags = QAction("&" + strings._("manage_tags"), self) + act_tags = QAction(strings._("main_window_manage_tags_accessible_flag"), self) act_tags.setShortcut("Ctrl+T") act_tags.triggered.connect(self.tags._open_manager) file_menu.addAction(act_tags) + act_stats = QAction(strings._("main_window_statistics_accessible_flag"), self) + act_stats.setShortcut("Shift+Ctrl+S") + act_stats.triggered.connect(self._open_statistics) + file_menu.addAction(act_stats) file_menu.addSeparator() act_quit = QAction("&" + strings._("quit"), self) act_quit.setShortcut("Ctrl+Q") @@ -1433,3 +1438,11 @@ class MainWindow(QMainWindow): super().changeEvent(ev) if ev.type() == QEvent.ActivationChange and self.isActiveWindow(): QTimer.singleShot(0, self._focus_editor_now) + + def _open_statistics(self): + # If the DB isn't ready for some reason, just do nothing + if not getattr(self, "db", None) or self.db.conn is None: + return + + dlg = StatisticsDialog(self.db, self) + dlg.exec() diff --git a/bouquin/statistics_dialog.py b/bouquin/statistics_dialog.py new file mode 100644 index 0000000..3c90015 --- /dev/null +++ b/bouquin/statistics_dialog.py @@ -0,0 +1,294 @@ +from __future__ import annotations + +import datetime as _dt +from typing import Dict + +from PySide6.QtCore import Qt, QSize +from PySide6.QtGui import QColor, QPainter, QPen, QBrush +from PySide6.QtWidgets import ( + QDialog, + QVBoxLayout, + QFormLayout, + QLabel, + QGroupBox, + QHBoxLayout, + QComboBox, + QScrollArea, + QWidget, + QSizePolicy, +) + +from . import strings +from .db import DBManager + + +# ---------- Activity heatmap ---------- + + +class DateHeatmap(QWidget): + """ + Small calendar heatmap for activity by date. + + Data is a mapping: datetime.date -> integer value. + """ + + def __init__(self, parent=None): + super().__init__(parent) + self._data: Dict[_dt.date, int] = {} + self._start: _dt.date | None = None + self._end: _dt.date | None = None + self._max_value: int = 0 + + self._cell = 12 + self._gap = 3 + self._margin_left = 10 + self._margin_top = 10 + self._margin_bottom = 24 + self._margin_right = 10 + + self.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Fixed) + + def set_data(self, data: Dict[_dt.date, int]) -> None: + """Replace dataset and recompute layout.""" + self._data = {k: int(v) for k, v in (data or {}).items() if v is not None} + if not self._data: + self._start = self._end = None + self._max_value = 0 + else: + earliest = min(self._data.keys()) + latest = max(self._data.keys()) + self._start = earliest - _dt.timedelta(days=earliest.weekday()) + self._end = latest + self._max_value = max(self._data.values()) if self._data else 0 + + self.updateGeometry() + self.update() + + # QWidget overrides --------------------------------------------------- + + def sizeHint(self) -> QSize: + if not self._start or not self._end: + height = ( + self._margin_top + self._margin_bottom + 7 * (self._cell + self._gap) + ) + # some default width + width = ( + self._margin_left + self._margin_right + 20 * (self._cell + self._gap) + ) + return QSize(width, height) + + day_count = (self._end - self._start).days + 1 + weeks = (day_count + 6) // 7 # ceil + + width = ( + self._margin_left + + self._margin_right + + weeks * (self._cell + self._gap) + + self._gap + ) + height = ( + self._margin_top + + self._margin_bottom + + 7 * (self._cell + self._gap) + + self._gap + ) + return QSize(width, height) + + def minimumSizeHint(self) -> QSize: + sz = self.sizeHint() + return QSize(min(300, sz.width()), sz.height()) + + def paintEvent(self, event): + super().paintEvent(event) + painter = QPainter(self) + painter.setRenderHint(QPainter.Antialiasing, True) + + if not self._start or not self._end: + return + + palette = self.palette() + bg_no_data = palette.base().color() + active = palette.highlight().color() + + painter.setPen(QPen(Qt.NoPen)) + + day_count = (self._end - self._start).days + 1 + weeks = (day_count + 6) // 7 + + for week in range(weeks): + for dow in range(7): + idx = week * 7 + dow + date = self._start + _dt.timedelta(days=idx) + if date > self._end: + value = 0 + else: + value = self._data.get(date, 0) + + x = self._margin_left + week * (self._cell + self._gap) + y = self._margin_top + dow * (self._cell + self._gap) + + if value <= 0 or self._max_value <= 0: + color = bg_no_data + else: + ratio = max(0.1, min(1.0, value / float(self._max_value))) + color = QColor(active) + # Lighter for low values, darker for high values + lighten = 150 - int(50 * ratio) # 150 ≈ light, 100 ≈ original + color = color.lighter(lighten) + + painter.fillRect( + x, + y, + self._cell, + self._cell, + QBrush(color), + ) + + painter.setPen(palette.text().color()) + fm = painter.fontMetrics() + + prev_month = None + for week in range(weeks): + date = self._start + _dt.timedelta(days=week * 7) + if date > self._end: + break + + if prev_month == date.month: + continue + prev_month = date.month + + label = date.strftime("%b") + + x_center = ( + self._margin_left + week * (self._cell + self._gap) + self._cell / 2 + ) + y = self._margin_top + 7 * (self._cell + self._gap) + fm.ascent() + + text_width = fm.horizontalAdvance(label) + painter.drawText( + int(x_center - text_width / 2), + int(y), + label, + ) + + painter.end() + + +# ---------- Statistics dialog itself ---------- + + +class StatisticsDialog(QDialog): + """ + Shows aggregate statistics and the date heatmap with a metric switcher. + """ + + def __init__(self, db: DBManager, parent=None): + super().__init__(parent) + self._db = db + + self.setWindowTitle(strings._("statistics")) + + root = QVBoxLayout(self) + + ( + pages_with_content, + total_revisions, + page_most_revisions, + page_most_revisions_count, + words_by_date, + total_words, + unique_tags, + page_most_tags, + page_most_tags_count, + revisions_by_date, + ) = self._gather_stats() + + # --- Numeric summary at the top ---------------------------------- + form = QFormLayout() + root.addLayout(form) + + form.addRow( + strings._("stats_pages_with_content"), + QLabel(str(pages_with_content)), + ) + form.addRow( + strings._("stats_total_revisions"), + QLabel(str(total_revisions)), + ) + + if page_most_revisions: + form.addRow( + strings._("stats_page_most_revisions"), + QLabel(f"{page_most_revisions} ({page_most_revisions_count})"), + ) + else: + form.addRow(strings._("stats_page_most_revisions"), QLabel("—")) + + form.addRow( + strings._("stats_total_words"), + QLabel(str(total_words)), + ) + + # Unique tag names + form.addRow( + strings._("stats_unique_tags"), + QLabel(str(unique_tags)), + ) + + if page_most_tags: + form.addRow( + strings._("stats_page_most_tags"), + QLabel(f"{page_most_tags} ({page_most_tags_count})"), + ) + else: + form.addRow(strings._("stats_page_most_tags"), QLabel("—")) + + # --- Heatmap with switcher --------------------------------------- + if words_by_date or revisions_by_date: + group = QGroupBox(strings._("stats_activity_heatmap")) + group_layout = QVBoxLayout(group) + + # Metric selector + combo_row = QHBoxLayout() + combo_row.addWidget(QLabel(strings._("stats_heatmap_metric"))) + self.metric_combo = QComboBox() + self.metric_combo.addItem(strings._("stats_metric_words"), "words") + self.metric_combo.addItem(strings._("stats_metric_revisions"), "revisions") + combo_row.addWidget(self.metric_combo) + combo_row.addStretch(1) + group_layout.addLayout(combo_row) + + self._heatmap = DateHeatmap() + self._words_by_date = words_by_date + self._revisions_by_date = revisions_by_date + + scroll = QScrollArea() + scroll.setWidgetResizable(True) + scroll.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded) + scroll.setVerticalScrollBarPolicy(Qt.ScrollBarAlwaysOff) + scroll.setWidget(self._heatmap) + group_layout.addWidget(scroll) + + root.addWidget(group) + + # Default to "words" + self._apply_metric("words") + self.metric_combo.currentIndexChanged.connect(self._on_metric_changed) + else: + root.addWidget(QLabel(strings._("stats_no_data"))) + + # ---------- internal helpers ---------- + + def _apply_metric(self, metric: str) -> None: + if metric == "revisions": + self._heatmap.set_data(self._revisions_by_date) + else: + self._heatmap.set_data(self._words_by_date) + + def _on_metric_changed(self, index: int) -> None: + metric = self.metric_combo.currentData() + if metric: + self._apply_metric(metric) + + def _gather_stats(self): + return self._db.gather_stats()