Cardiosense-AG commited on
Commit
4a9df78
·
verified ·
1 Parent(s): bcde322

Update src/guideline_annotator.py

Browse files
Files changed (1) hide show
  1. src/guideline_annotator.py +26 -68
src/guideline_annotator.py CHANGED
@@ -1,16 +1,17 @@
1
  # src/guideline_annotator.py
2
  from __future__ import annotations
3
 
 
 
 
4
  from dataclasses import dataclass
5
- from typing import Dict, List, Tuple, Any
6
- import os
7
 
8
- # We depend on the project's FAISS index helpers if available.
9
  try:
10
- from src.rag_index import load_index_bundle, search_index # type: ignore
11
- except Exception:
12
- load_index_bundle = None # type: ignore
13
- search_index = None # type: ignore
14
 
15
  from .explainability import segment_claims
16
 
@@ -21,38 +22,21 @@ class GuidelineRef:
21
  excerpt: str
22
  score: float
23
 
24
- def _bundle_ok() -> bool:
25
- return callable(load_index_bundle) and callable(search_index)
26
-
27
- def _dedup_refs(refs: List[GuidelineRef]) -> List[GuidelineRef]:
28
- seen = set()
29
- out: List[GuidelineRef] = []
30
- for r in refs:
31
- key = (r.doc, r.page, r.excerpt.strip()[:80])
32
- if key in seen:
33
- continue
34
- seen.add(key)
35
- out.append(r)
36
- return out
37
-
38
- def find_guideline_refs_for_section(
39
- section_text: str,
40
- *,
41
- top_k_per_claim: int = 3,
42
- max_refs_per_claim: int = 2,
43
- ) -> List[GuidelineRef]:
44
- """Retrieve post-hoc guideline references for a section by claim-level retrieval."""
45
- claims = segment_claims(section_text)
46
- if not claims:
47
- return []
48
 
 
 
 
49
  refs: List[GuidelineRef] = []
 
50
 
51
- if _bundle_ok():
 
52
  try:
53
- embedder, bundle = load_index_bundle()
54
  for c in claims:
55
- results = search_index(c, embedder, bundle, top_k=top_k_per_claim) # type: ignore
56
  for r in (results or [])[:max_refs_per_claim]:
57
  refs.append(GuidelineRef(
58
  doc=str(r.get("doc_name") or r.get("doc") or r.get("source") or "Guideline"),
@@ -61,39 +45,13 @@ def find_guideline_refs_for_section(
61
  score=float(r.get("score", 0.0)),
62
  ))
63
  except Exception:
64
- # fallthrough to mock
65
- pass
66
-
67
- # Fallback mock if index isn't available
68
- if not refs:
69
- # Provide deterministic placeholders so the UI remains usable.
70
- stub = [
71
- GuidelineRef(doc="ACC/AHA 2022", page=7, excerpt="Use high-intensity statins unless contraindicated.", score=0.71),
72
- GuidelineRef(doc="ESC 2021", page=12, excerpt="Consider PCSK9 inhibitors for very high LDL-C despite therapy.", score=0.66),
73
- GuidelineRef(doc="KDIGO 2020", page=3, excerpt="Dose-adjust renally cleared drugs in CKD stages 3-5.", score=0.64),
74
- ]
75
- for c in claims:
76
- refs.extend(stub[:max_refs_per_claim])
77
 
78
- return _dedup_refs(refs)
 
 
 
 
79
 
80
- def assign_endnotes(registry: Dict[str, int], refs: List[GuidelineRef]) -> Tuple[Dict[str, int], List[Dict[str, Any]]]:
81
- """Assign stable endnote numbers using (doc,page) keys. Returns updated registry and serializable refs."""
82
- reg = dict(registry or {})
83
- next_n = 1 + max(reg.values()) if reg else 1
84
- out: List[Dict[str, Any]] = []
85
- for r in refs:
86
- key = f"{r.doc}::p{r.page}"
87
- if key not in reg:
88
- reg[key] = next_n
89
- next_n += 1
90
- out.append({
91
- "n": reg[key],
92
- "doc": r.doc,
93
- "page": r.page,
94
- "excerpt": r.excerpt,
95
- "score": round(r.score, 3),
96
- })
97
- # Sort by endnote number
98
- out.sort(key=lambda d: int(d["n"]))
99
- return reg, out
 
1
  # src/guideline_annotator.py
2
  from __future__ import annotations
3
 
4
+ # NOTE (V2): This annotator is POST-HOC ONLY.
5
+ # Do not pre-generate citations; use after the Plan is generated and unedited.
6
+
7
  from dataclasses import dataclass
8
+ from typing import Dict, List, Any
 
9
 
10
+ # Optional FAISS/RAG integration is not required for tests; we provide a safe fallback.
11
  try:
12
+ from src import rag_index # type: ignore
13
+ except Exception: # pragma: no cover - optional
14
+ rag_index = None # type: ignore
 
15
 
16
  from .explainability import segment_claims
17
 
 
22
  excerpt: str
23
  score: float
24
 
25
+ def annotate_guidelines(plan_text: str, top_k_per_claim: int = 3, max_refs_per_claim: int = 2) -> Dict[str, Any]:
26
+ """Return guideline references for the plan text, post-hoc only.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ If a FAISS index is unavailable, returns an empty list and a helpful warning.
29
+ """
30
+ claims = segment_claims(plan_text)
31
  refs: List[GuidelineRef] = []
32
+ warning: str = ""
33
 
34
+ # Attempt a search when a working index is present
35
+ if rag_index and hasattr(rag_index, "load_index_bundle") and hasattr(rag_index, "search_index"):
36
  try:
37
+ embedder, bundle = rag_index.load_index_bundle() # type: ignore
38
  for c in claims:
39
+ results = rag_index.search_index(c, embedder, bundle, top_k=top_k_per_claim) # type: ignore
40
  for r in (results or [])[:max_refs_per_claim]:
41
  refs.append(GuidelineRef(
42
  doc=str(r.get("doc_name") or r.get("doc") or r.get("source") or "Guideline"),
 
45
  score=float(r.get("score", 0.0)),
46
  ))
47
  except Exception:
48
+ warning = "⚠️ No guideline index found – Build one → Go to RAG Prep page"
49
+ else:
50
+ warning = "⚠️ No guideline index found – Build one → Go to RAG Prep page"
 
 
 
 
 
 
 
 
 
 
51
 
52
+ return {
53
+ "refs": [vars(r) for r in refs],
54
+ "warning": warning,
55
+ "endnotes": [{"n": i+1, "doc": r.doc, "page": r.page} for i, r in enumerate(refs)],
56
+ }
57