Update src/guideline_annotator.py
Browse files- src/guideline_annotator.py +26 -68
src/guideline_annotator.py
CHANGED
|
@@ -1,16 +1,17 @@
|
|
| 1 |
# src/guideline_annotator.py
|
| 2 |
from __future__ import annotations
|
| 3 |
|
|
|
|
|
|
|
|
|
|
| 4 |
from dataclasses import dataclass
|
| 5 |
-
from typing import Dict, List,
|
| 6 |
-
import os
|
| 7 |
|
| 8 |
-
#
|
| 9 |
try:
|
| 10 |
-
from src
|
| 11 |
-
except Exception:
|
| 12 |
-
|
| 13 |
-
search_index = None # type: ignore
|
| 14 |
|
| 15 |
from .explainability import segment_claims
|
| 16 |
|
|
@@ -21,38 +22,21 @@ class GuidelineRef:
|
|
| 21 |
excerpt: str
|
| 22 |
score: float
|
| 23 |
|
| 24 |
-
def
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
def _dedup_refs(refs: List[GuidelineRef]) -> List[GuidelineRef]:
|
| 28 |
-
seen = set()
|
| 29 |
-
out: List[GuidelineRef] = []
|
| 30 |
-
for r in refs:
|
| 31 |
-
key = (r.doc, r.page, r.excerpt.strip()[:80])
|
| 32 |
-
if key in seen:
|
| 33 |
-
continue
|
| 34 |
-
seen.add(key)
|
| 35 |
-
out.append(r)
|
| 36 |
-
return out
|
| 37 |
-
|
| 38 |
-
def find_guideline_refs_for_section(
|
| 39 |
-
section_text: str,
|
| 40 |
-
*,
|
| 41 |
-
top_k_per_claim: int = 3,
|
| 42 |
-
max_refs_per_claim: int = 2,
|
| 43 |
-
) -> List[GuidelineRef]:
|
| 44 |
-
"""Retrieve post-hoc guideline references for a section by claim-level retrieval."""
|
| 45 |
-
claims = segment_claims(section_text)
|
| 46 |
-
if not claims:
|
| 47 |
-
return []
|
| 48 |
|
|
|
|
|
|
|
|
|
|
| 49 |
refs: List[GuidelineRef] = []
|
|
|
|
| 50 |
|
| 51 |
-
|
|
|
|
| 52 |
try:
|
| 53 |
-
embedder, bundle = load_index_bundle()
|
| 54 |
for c in claims:
|
| 55 |
-
results = search_index(c, embedder, bundle, top_k=top_k_per_claim) # type: ignore
|
| 56 |
for r in (results or [])[:max_refs_per_claim]:
|
| 57 |
refs.append(GuidelineRef(
|
| 58 |
doc=str(r.get("doc_name") or r.get("doc") or r.get("source") or "Guideline"),
|
|
@@ -61,39 +45,13 @@ def find_guideline_refs_for_section(
|
|
| 61 |
score=float(r.get("score", 0.0)),
|
| 62 |
))
|
| 63 |
except Exception:
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
# Fallback mock if index isn't available
|
| 68 |
-
if not refs:
|
| 69 |
-
# Provide deterministic placeholders so the UI remains usable.
|
| 70 |
-
stub = [
|
| 71 |
-
GuidelineRef(doc="ACC/AHA 2022", page=7, excerpt="Use high-intensity statins unless contraindicated.", score=0.71),
|
| 72 |
-
GuidelineRef(doc="ESC 2021", page=12, excerpt="Consider PCSK9 inhibitors for very high LDL-C despite therapy.", score=0.66),
|
| 73 |
-
GuidelineRef(doc="KDIGO 2020", page=3, excerpt="Dose-adjust renally cleared drugs in CKD stages 3-5.", score=0.64),
|
| 74 |
-
]
|
| 75 |
-
for c in claims:
|
| 76 |
-
refs.extend(stub[:max_refs_per_claim])
|
| 77 |
|
| 78 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
-
def assign_endnotes(registry: Dict[str, int], refs: List[GuidelineRef]) -> Tuple[Dict[str, int], List[Dict[str, Any]]]:
|
| 81 |
-
"""Assign stable endnote numbers using (doc,page) keys. Returns updated registry and serializable refs."""
|
| 82 |
-
reg = dict(registry or {})
|
| 83 |
-
next_n = 1 + max(reg.values()) if reg else 1
|
| 84 |
-
out: List[Dict[str, Any]] = []
|
| 85 |
-
for r in refs:
|
| 86 |
-
key = f"{r.doc}::p{r.page}"
|
| 87 |
-
if key not in reg:
|
| 88 |
-
reg[key] = next_n
|
| 89 |
-
next_n += 1
|
| 90 |
-
out.append({
|
| 91 |
-
"n": reg[key],
|
| 92 |
-
"doc": r.doc,
|
| 93 |
-
"page": r.page,
|
| 94 |
-
"excerpt": r.excerpt,
|
| 95 |
-
"score": round(r.score, 3),
|
| 96 |
-
})
|
| 97 |
-
# Sort by endnote number
|
| 98 |
-
out.sort(key=lambda d: int(d["n"]))
|
| 99 |
-
return reg, out
|
|
|
|
| 1 |
# src/guideline_annotator.py
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
+
# NOTE (V2): This annotator is POST-HOC ONLY.
|
| 5 |
+
# Do not pre-generate citations; use after the Plan is generated and unedited.
|
| 6 |
+
|
| 7 |
from dataclasses import dataclass
|
| 8 |
+
from typing import Dict, List, Any
|
|
|
|
| 9 |
|
| 10 |
+
# Optional FAISS/RAG integration is not required for tests; we provide a safe fallback.
|
| 11 |
try:
|
| 12 |
+
from src import rag_index # type: ignore
|
| 13 |
+
except Exception: # pragma: no cover - optional
|
| 14 |
+
rag_index = None # type: ignore
|
|
|
|
| 15 |
|
| 16 |
from .explainability import segment_claims
|
| 17 |
|
|
|
|
| 22 |
excerpt: str
|
| 23 |
score: float
|
| 24 |
|
| 25 |
+
def annotate_guidelines(plan_text: str, top_k_per_claim: int = 3, max_refs_per_claim: int = 2) -> Dict[str, Any]:
|
| 26 |
+
"""Return guideline references for the plan text, post-hoc only.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
If a FAISS index is unavailable, returns an empty list and a helpful warning.
|
| 29 |
+
"""
|
| 30 |
+
claims = segment_claims(plan_text)
|
| 31 |
refs: List[GuidelineRef] = []
|
| 32 |
+
warning: str = ""
|
| 33 |
|
| 34 |
+
# Attempt a search when a working index is present
|
| 35 |
+
if rag_index and hasattr(rag_index, "load_index_bundle") and hasattr(rag_index, "search_index"):
|
| 36 |
try:
|
| 37 |
+
embedder, bundle = rag_index.load_index_bundle() # type: ignore
|
| 38 |
for c in claims:
|
| 39 |
+
results = rag_index.search_index(c, embedder, bundle, top_k=top_k_per_claim) # type: ignore
|
| 40 |
for r in (results or [])[:max_refs_per_claim]:
|
| 41 |
refs.append(GuidelineRef(
|
| 42 |
doc=str(r.get("doc_name") or r.get("doc") or r.get("source") or "Guideline"),
|
|
|
|
| 45 |
score=float(r.get("score", 0.0)),
|
| 46 |
))
|
| 47 |
except Exception:
|
| 48 |
+
warning = "⚠️ No guideline index found – Build one → Go to RAG Prep page"
|
| 49 |
+
else:
|
| 50 |
+
warning = "⚠️ No guideline index found – Build one → Go to RAG Prep page"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
+
return {
|
| 53 |
+
"refs": [vars(r) for r in refs],
|
| 54 |
+
"warning": warning,
|
| 55 |
+
"endnotes": [{"n": i+1, "doc": r.doc, "page": r.page} for i, r in enumerate(refs)],
|
| 56 |
+
}
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|