Add tag relationship visualisation graph tool
All checks were successful
CI / test (push) Successful in 3m43s
Lint / test (push) Successful in 28s
Trivy / test (push) Successful in 24s

This commit is contained in:
Miguel Jacq 2025-11-18 17:29:57 +11:00
parent 90d871246b
commit 01997aee90
Signed by: mig5
GPG key ID: 59B3F0C24135C6A9
8 changed files with 473 additions and 78 deletions

View file

@ -746,6 +746,49 @@ class DBManager:
revisions_by_date,
)
def get_tag_cooccurrences(self):
"""
Compute tagtag co-occurrence across pages.
Returns:
tags_by_id: dict[int, TagRow] # id -> (id, name, color)
edges: list[(int, int, int)] # (tag_id1, tag_id2, page_count)
tag_page_counts: dict[int, int] # tag_id -> number of pages it appears on
"""
cur = self.conn.cursor()
# 1) All tags (reuse existing helper)
all_tags: list[TagRow] = self.list_tags()
tags_by_id: dict[int, TagRow] = {t[0]: t for t in all_tags}
# 2) How many pages each tag appears on (for node sizing)
rows = cur.execute(
"""
SELECT tag_id, COUNT(DISTINCT page_date) AS c
FROM page_tags
GROUP BY tag_id;
"""
).fetchall()
tag_page_counts = {r["tag_id"]: r["c"] for r in rows}
# 3) Co-occurrence of tag pairs on the same page
rows = cur.execute(
"""
SELECT
pt1.tag_id AS tag1,
pt2.tag_id AS tag2,
COUNT(DISTINCT pt1.page_date) AS c
FROM page_tags AS pt1
JOIN page_tags AS pt2
ON pt1.page_date = pt2.page_date
AND pt1.tag_id < pt2.tag_id
GROUP BY pt1.tag_id, pt2.tag_id;
""",
).fetchall()
edges = [(r["tag1"], r["tag2"], r["c"]) for r in rows]
return tags_by_id, edges, tag_page_counts
def close(self) -> None:
if self.conn is not None:
self.conn.close()