Add tag relationship visualisation graph tool
All checks were successful
CI / test (push) Successful in 3m43s
Lint / test (push) Successful in 28s
Trivy / test (push) Successful in 24s

This commit is contained in:
Miguel Jacq 2025-11-18 17:29:57 +11:00
parent 90d871246b
commit 01997aee90
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
8 changed files with 473 additions and 78 deletions

View file

@ -746,6 +746,49 @@ class DBManager:
revisions_by_date,
)
def get_tag_cooccurrences(self):
"""
Compute tagtag co-occurrence across pages.
Returns:
tags_by_id: dict[int, TagRow] # id -> (id, name, color)
edges: list[(int, int, int)] # (tag_id1, tag_id2, page_count)
tag_page_counts: dict[int, int] # tag_id -> number of pages it appears on
"""
cur = self.conn.cursor()
# 1) All tags (reuse existing helper)
all_tags: list[TagRow] = self.list_tags()
tags_by_id: dict[int, TagRow] = {t[0]: t for t in all_tags}
# 2) How many pages each tag appears on (for node sizing)
rows = cur.execute(
"""
SELECT tag_id, COUNT(DISTINCT page_date) AS c
FROM page_tags
GROUP BY tag_id;
"""
).fetchall()
tag_page_counts = {r["tag_id"]: r["c"] for r in rows}
# 3) Co-occurrence of tag pairs on the same page
rows = cur.execute(
"""
SELECT
pt1.tag_id AS tag1,
pt2.tag_id AS tag2,
COUNT(DISTINCT pt1.page_date) AS c
FROM page_tags AS pt1
JOIN page_tags AS pt2
ON pt1.page_date = pt2.page_date
AND pt1.tag_id < pt2.tag_id
GROUP BY pt1.tag_id, pt2.tag_id;
""",
).fetchall()
edges = [(r["tag1"], r["tag2"], r["c"]) for r in rows]
return tags_by_id, edges, tag_page_counts
def close(self) -> None:
if self.conn is not None:
self.conn.close()

View file

@ -135,6 +135,7 @@
"delete_tag": "Delete tag",
"delete_tag_confirm": "Are you sure you want to delete the tag '{name}'? This will remove it from all pages.",
"tag_already_exists_with_that_name": "A tag already exists with that name",
"tag_graph": "Tag relationship graph",
"statistics": "Statistics",
"main_window_statistics_accessible_flag": "Stat&istics",
"stats_pages_with_content": "Pages with content (current version)",

View file

@ -14,8 +14,9 @@ from PySide6.QtWidgets import (
)
from .db import DBManager
from sqlcipher3.dbapi2 import IntegrityError
from .tag_graph_dialog import TagGraphDialog
from . import strings
from sqlcipher3.dbapi2 import IntegrityError
class TagBrowserDialog(QDialog):
@ -71,6 +72,10 @@ class TagBrowserDialog(QDialog):
self.delete_btn.setEnabled(False)
btn_row.addWidget(self.delete_btn)
self.tag_graph_btn = QPushButton(strings._("tag_graph"))
self.tag_graph_btn.clicked.connect(self._open_tag_graph)
btn_row.addWidget(self.tag_graph_btn)
btn_row.addStretch(1)
layout.addLayout(btn_row)
@ -251,3 +256,9 @@ class TagBrowserDialog(QDialog):
self._db.delete_tag(tag_id)
self._populate(None)
self.tagsModified.emit()
# ------------ Tag graph handler --------------- #
def _open_tag_graph(self):
dlg = TagGraphDialog(self._db, self)
dlg.resize(800, 600)
dlg.exec()

309
bouquin/tag_graph_dialog.py Normal file
View file

@ -0,0 +1,309 @@
import networkx as nx
import numpy as np
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore
from PySide6.QtWidgets import QDialog, QVBoxLayout, QToolTip
from PySide6.QtGui import QFont, QCursor, QColor
from .db import DBManager
from . import strings
class DraggableGraphItem(pg.GraphItem):
"""GraphItem where individual nodes can be dragged with the left mouse button,
and hover events can be reported back to the owning dialog.
"""
def __init__(self, on_position_changed=None, on_hover=None, **kwds):
# Our own fields MUST be set before super().__init__ because
# GraphItem.__init__ will call self.setData(...)
self._drag_index = None
self._drag_offset = None
self._on_position_changed = on_position_changed
self._on_hover = on_hover
self.pos = None
self._data_kwargs = {} # cache of last setData kwargs
super().__init__(**kwds)
self.setAcceptHoverEvents(True)
def setData(self, **kwds):
"""Cache kwargs so we don't lose size/adj/brush on drag."""
if "pos" in kwds:
self.pos = kwds["pos"]
self._data_kwargs.update(kwds)
super().setData(**self._data_kwargs)
def mouseDragEvent(self, ev):
# --- start of drag ---
if ev.isStart():
if ev.button() != QtCore.Qt.MouseButton.LeftButton:
ev.ignore()
return
pos = ev.buttonDownPos()
pts = self.scatter.pointsAt(pos)
# pointsAt may return an empty list/array
if pts is None or len(pts) == 0:
ev.ignore()
return
spot = pts[0]
self._drag_index = spot.index()
node_pos = np.array(self.pos[self._drag_index], dtype=float)
if hasattr(pos, "x"):
mouse = np.array([pos.x(), pos.y()], dtype=float)
else:
mouse = np.array(pos, dtype=float)
self._drag_offset = node_pos - mouse
ev.accept()
return
# --- end of drag ---
if ev.isFinish():
self._drag_index = None
self._drag_offset = None
ev.accept()
return
# --- drag in progress ---
if self._drag_index is None:
ev.ignore()
return
pos = ev.pos()
if hasattr(pos, "x"):
mouse = np.array([pos.x(), pos.y()], dtype=float)
else:
mouse = np.array(pos, dtype=float)
new_pos = mouse + self._drag_offset
self.pos[self._drag_index] = new_pos # mutate in-place
# Repaint graph, preserving all the other kwargs (size, adj, colours, ...)
self.setData(pos=self.pos)
if self._on_position_changed is not None:
self._on_position_changed(self.pos)
ev.accept()
def hoverEvent(self, ev):
"""Report which node (if any) is under the mouse while hovering."""
# Leaving the item entirely
if ev.isExit():
if self._on_hover is not None:
self._on_hover(None, ev)
return
pos = ev.pos()
pts = self.scatter.pointsAt(pos)
if pts is None or len(pts) == 0:
if self._on_hover is not None:
self._on_hover(None, ev)
return
idx = pts[0].index()
if self._on_hover is not None:
self._on_hover(idx, ev)
class TagGraphDialog(QDialog):
def __init__(self, db: DBManager, parent=None):
super().__init__(parent)
self.setWindowTitle(strings._("tag_graph"))
layout = QVBoxLayout(self)
self.view = pg.GraphicsLayoutWidget()
layout.addWidget(self.view)
self.plot = self.view.addPlot()
self.plot.hideAxis("bottom")
self.plot.hideAxis("left")
# Dark-ish background, Grafana / neon style
self.view.setBackground("#050816")
self.plot.setMouseEnabled(x=True, y=True)
self.plot.getViewBox().setDefaultPadding(0.15)
# State for tags / edges / labels / halo
self._label_items = []
self._tag_ids = []
self._tag_names = {}
self._tag_page_counts = {}
self._halo_sizes = []
self._halo_brushes = []
self.graph_item = DraggableGraphItem(
on_position_changed=self._on_positions_changed,
on_hover=self._on_hover_index,
)
self.plot.addItem(self.graph_item)
# Separate scatter for "halo" glow behind nodes
self._halo_item = pg.ScatterPlotItem(pxMode=True)
self._halo_item.setZValue(-1) # draw behind nodes/labels
self.plot.addItem(self._halo_item)
self._populate_graph(db)
def _populate_graph(self, db: DBManager):
tags_by_id, edges, tag_page_counts = db.get_tag_cooccurrences()
if not tags_by_id:
return
# Map tag_id -> index
tag_ids = list(tags_by_id.keys())
self._tag_ids = tag_ids
self._tag_page_counts = dict(tag_page_counts)
self._tag_names = {tid: tags_by_id[tid][1] for tid in tag_ids}
idx_of = {tid: i for i, tid in enumerate(tag_ids)}
N = len(tag_ids)
# ---- Layout: prefer a weighted spring layout via networkx (topic islands)
if edges:
G = nx.Graph()
for tid in tag_ids:
G.add_node(tid)
for t1, t2, w in edges:
G.add_edge(t1, t2, weight=w)
pos_dict = nx.spring_layout(G, weight="weight", k=1.2, iterations=80)
pos = np.array([pos_dict[tid] for tid in tag_ids], dtype=float)
else:
# Fallback: random-ish blob
pos = np.random.normal(size=(N, 2))
# Adjacency (edges)
adj = np.array([[idx_of[t1], idx_of[t2]] for t1, t2, _ in edges], dtype=int)
# Node sizes: proportional to how often tag is used
max_pages = max(tag_page_counts.values() or [1])
sizes = np.array(
[10 + 20 * (tag_page_counts.get(tid, 0) / max_pages) for tid in tag_ids],
dtype=float,
)
# ---- Neon-style nodes ----
# Inner fill: dark; outline: tag hex colour
node_brushes = []
node_pens = []
dark_fill = (5, 8, 22, 230) # almost background, slightly lighter
# For halo
halo_sizes = []
halo_brushes = []
for i, tid in enumerate(tag_ids):
_id, name, color = tags_by_id[tid]
# node interior (dark) + bright outline
node_brushes.append(pg.mkBrush(dark_fill))
node_pens.append(pg.mkPen(color, width=2.5))
# halo: semi-transparent version of DB colour, larger than node
qcol = QColor(color)
qcol.setAlpha(90)
halo_brushes.append(pg.mkBrush(qcol))
halo_sizes.append(sizes[i] * 1.8)
self._halo_sizes = halo_sizes
self._halo_brushes = halo_brushes
# ---- Edges: softer neon-ish lines with opacity / width based on co-occurrence ----
if edges:
weights = np.array([w for _, _, w in edges], dtype=float)
max_w = weights.max() if weights.size else 1.0
weight_factors = (weights / max_w).clip(0.0, 1.0)
# bright cyan-ish neon
base_color = (56, 189, 248) # tailwind-ish cyan-400
edge_pens = []
for wf in weight_factors:
alpha = int(40 + 160 * wf) # 40200
width = 0.7 + 2.3 * wf # 0.73.0
edge_pens.append(pg.mkPen((*base_color, alpha), width=width))
else:
edge_pens = None
# Assign data to GraphItem (this will set self.graph_item.pos)
self.graph_item.setData(
pos=pos,
adj=adj,
size=sizes,
symbolBrush=node_brushes,
symbolPen=node_pens,
edgePen=edge_pens,
pxMode=True,
)
# ---- Neon halo layer (behind nodes) ----
xs = [p[0] for p in pos]
ys = [p[1] for p in pos]
self._halo_item.setData(
x=xs,
y=ys,
size=self._halo_sizes,
brush=self._halo_brushes,
pen=None,
)
# ---- Add text labels for each tag ----
self._label_items = [] # reset
font = QFont()
font.setPointSize(8)
for i, tid in enumerate(tag_ids):
_id, name, color = tags_by_id[tid]
label = pg.TextItem(text=name, color=color, anchor=(0.5, 0.5))
label.setFont(font)
self.plot.addItem(label)
self._label_items.append(label)
# Initial placement of labels
self._on_positions_changed(pos)
def _on_positions_changed(self, pos):
"""Called by DraggableGraphItem whenever node positions change."""
if not self._label_items:
return
# Update labels
for i, label in enumerate(self._label_items):
label.setPos(float(pos[i, 0]), float(pos[i, 1]) + 0.30)
# Update halo positions to match nodes
if self._halo_sizes and self._halo_brushes:
xs = [p[0] for p in pos]
ys = [p[1] for p in pos]
self._halo_item.setData(
x=xs,
y=ys,
size=self._halo_sizes,
brush=self._halo_brushes,
pen=None,
)
def _on_hover_index(self, index, ev):
"""Show '<tag>: N pages' when hovering a node."""
if index is None or not self._tag_ids:
QToolTip.hideText()
return
tag_id = self._tag_ids[index]
name = self._tag_names.get(tag_id, "")
count = self._tag_page_counts.get(tag_id, 0)
text = f"{name}: {count} page{'s' if count != 1 else ''}"
QToolTip.showText(QCursor.pos(), text, self)