Spaces:
Running
Running
Upload 21 files
Browse files- app.py +26 -23
- backend/endpoint_client.py +26 -0
- backend/pdf_utils.py +14 -0
- backend/rag_engine.py +30 -17
- backend/soap_generator.py +9 -5
- data/guidelines/internal_med/0_PCOS.txt +1 -0
- data/guidelines/internal_med/1_AUB.txt +1 -0
- data/guidelines/internal_med/2_Chest Pain.txt +1 -0
- data/guidelines/internal_med/3_Knee Pain.txt +1 -0
- data/guidelines/obgyn/0_PCOS.txt +1 -0
- data/guidelines/obgyn/1_AUB.txt +1 -0
- data/guidelines/obgyn/2_Chest Pain.txt +1 -0
- data/guidelines/obgyn/3_Knee Pain.txt +1 -0
- data/guidelines/orthopedics/0_PCOS.txt +1 -0
- data/guidelines/orthopedics/1_AUB.txt +1 -0
- data/guidelines/orthopedics/2_Chest Pain.txt +1 -0
- data/guidelines/orthopedics/3_Knee Pain.txt +1 -0
- requirements.txt +1 -0
- utils/constants.py +10 -5
- utils/helpers.py +13 -0
- utils/persona.py +10 -0
app.py
CHANGED
|
@@ -1,35 +1,38 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
from backend.rag_engine import get_embedder,get_chroma,retrieve,seed_index
|
| 3 |
from backend.soap_generator import compose_soap
|
| 4 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
st.set_page_config(page_title='MediAssist v13',page_icon='🩺',layout='wide')
|
| 7 |
@st.cache_resource
|
| 8 |
def emb():return get_embedder()
|
| 9 |
@st.cache_resource
|
| 10 |
def col():return get_chroma()[1]
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
if isinstance(d,list) and "generated_text" in d[0]:
|
| 18 |
-
return d[0]["generated_text"]
|
| 19 |
-
return str(d)
|
| 20 |
|
| 21 |
-
st.
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
| 26 |
|
| 27 |
-
|
| 28 |
-
if st.button("Generate
|
| 29 |
-
items=retrieve(col(),emb(),txt,
|
| 30 |
soap=compose_soap(txt,items)
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
reply=chat(prompt)
|
| 34 |
-
st.subheader("AI Draft Report");st.write(reply)
|
| 35 |
-
st.subheader("SOAP");st.json(soap)
|
|
|
|
| 1 |
+
|
| 2 |
+
import os,json,time,streamlit as st
|
| 3 |
from backend.rag_engine import get_embedder,get_chroma,retrieve,seed_index
|
| 4 |
from backend.soap_generator import compose_soap
|
| 5 |
+
from backend.pdf_utils import generate_pdf
|
| 6 |
+
from backend.endpoint_client import call_endpoint
|
| 7 |
+
from utils.constants import DOCS_DIR,RETRIEVAL_K_DEFAULT
|
| 8 |
+
from utils.persona import AI_GYNO_PERSONA_V2
|
| 9 |
+
|
| 10 |
+
st.set_page_config(page_title="MediAssist v14.2 Clean",page_icon="🩺",layout="wide")
|
| 11 |
|
|
|
|
| 12 |
@st.cache_resource
|
| 13 |
def emb():return get_embedder()
|
| 14 |
@st.cache_resource
|
| 15 |
def col():return get_chroma()[1]
|
| 16 |
|
| 17 |
+
st.title("🩺 MediAssist v14.2 — Clean Stable Build")
|
| 18 |
+
|
| 19 |
+
txt=st.text_area("Patient narrative")
|
| 20 |
+
k=st.slider("Results",1,10,RETRIEVAL_K_DEFAULT)
|
| 21 |
+
ep=st.text_input("Endpoint override")
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
+
if st.button("Generate OPD"):
|
| 24 |
+
items=retrieve(col(),emb(),txt,k)
|
| 25 |
+
soap=compose_soap(txt,items)
|
| 26 |
+
st.json(soap)
|
| 27 |
|
| 28 |
+
if st.button("AI Chat"):
|
| 29 |
+
prompt=f"{AI_GYNO_PERSONA_V2}\nPatient:{txt}\nAssistant:"
|
| 30 |
+
reply,_=call_endpoint(prompt,endpoint=ep or None)
|
| 31 |
+
st.write(reply)
|
| 32 |
|
| 33 |
+
summ=st.text_area("Doctor summary")
|
| 34 |
+
if st.button("Generate PDF"):
|
| 35 |
+
items=retrieve(col(),emb(),txt,3)
|
| 36 |
soap=compose_soap(txt,items)
|
| 37 |
+
generate_pdf("report.pdf","MediAssist Report",soap,summ)
|
| 38 |
+
st.download_button("Download PDF",open("report.pdf","rb"),file_name="report.pdf",mime="application/pdf")
|
|
|
|
|
|
|
|
|
backend/endpoint_client.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import os,time,requests,json
|
| 3 |
+
from utils.constants import CHAT_ENDPOINT,MAX_RETRIES_DEFAULT,RETRY_BACKOFF_SECONDS_DEFAULT,REQUEST_TIMEOUT_SECONDS_DEFAULT
|
| 4 |
+
|
| 5 |
+
def call_endpoint(prompt,endpoint=None,token=None,max_retries=None,backoff=None,timeout=None,logs=None):
|
| 6 |
+
url=endpoint or os.getenv("CHAT_ENDPOINT") or CHAT_ENDPOINT
|
| 7 |
+
tok=token or os.getenv("HF_API_TOKEN")
|
| 8 |
+
if not tok:return "❌ Missing HF_API_TOKEN.",{}
|
| 9 |
+
mr=max_retries or MAX_RETRIES_DEFAULT
|
| 10 |
+
bf=backoff or RETRY_BACKOFF_SECONDS_DEFAULT
|
| 11 |
+
to=timeout or REQUEST_TIMEOUT_SECONDS_DEFAULT
|
| 12 |
+
|
| 13 |
+
h={"Authorization":f"Bearer {tok}","Content-Type":"application/json"}
|
| 14 |
+
for a in range(1,mr+1):
|
| 15 |
+
try:
|
| 16 |
+
r=requests.post(url,headers=h,json={"inputs":prompt},timeout=to)
|
| 17 |
+
try:data=r.json()
|
| 18 |
+
except:return "⚠️ Non-JSON:\n"+r.text,{}
|
| 19 |
+
if isinstance(data,list) and data and "generated_text" in data[0]:
|
| 20 |
+
return data[0]["generated_text"],{}
|
| 21 |
+
if isinstance(data,dict) and "generated_text" in data:
|
| 22 |
+
return data["generated_text"],{}
|
| 23 |
+
return "⚠️ Unexpected:"+json.dumps(data)[:500],{}
|
| 24 |
+
except:
|
| 25 |
+
time.sleep(bf*a)
|
| 26 |
+
return "❌ Endpoint unavailable",{}
|
backend/pdf_utils.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from reportlab.pdfgen import canvas
|
| 3 |
+
from reportlab.lib.pagesizes import A4
|
| 4 |
+
from reportlab.lib.units import mm
|
| 5 |
+
|
| 6 |
+
def generate_pdf(path,title,soap,summary):
|
| 7 |
+
c=canvas.Canvas(path,pagesize=A4)
|
| 8 |
+
x,y=20*mm,270*mm
|
| 9 |
+
c.setFont("Helvetica-Bold",16);c.drawString(x,y,title);y-=15
|
| 10 |
+
c.setFont("Helvetica",10)
|
| 11 |
+
for k,v in soap.items():
|
| 12 |
+
c.drawString(x,y,f"{k}: {v}");y-=10
|
| 13 |
+
c.drawString(x,y,f"Doctor Summary: {summary}");y-=10
|
| 14 |
+
c.save()
|
backend/rag_engine.py
CHANGED
|
@@ -1,32 +1,45 @@
|
|
| 1 |
-
|
| 2 |
-
|
|
|
|
|
|
|
| 3 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 4 |
from utils.constants import CHROMA_DIR,DOCS_DIR,COLLECTION,EMB_MODEL_NAME
|
|
|
|
|
|
|
| 5 |
def get_embedder():
|
| 6 |
-
w=models.Transformer(EMB_MODEL_NAME)
|
|
|
|
| 7 |
return SentenceTransformer(modules=[w,p])
|
|
|
|
| 8 |
def get_chroma():
|
| 9 |
c=chromadb.PersistentClient(path=CHROMA_DIR)
|
| 10 |
-
return c,c.get_or_create_collection(COLLECTION
|
| 11 |
-
|
|
|
|
|
|
|
| 12 |
def seed_index(col,m,folder):
|
| 13 |
-
|
| 14 |
-
paths=glob.glob(folder+'
|
| 15 |
ids,docs,meta=[],[],[]
|
| 16 |
for p in paths:
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
em=embed(m,docs)
|
| 22 |
try:col.add(ids=ids,documents=docs,metadatas=meta,embeddings=em)
|
| 23 |
-
except:
|
|
|
|
|
|
|
|
|
|
| 24 |
return len(docs)
|
|
|
|
| 25 |
def retrieve(col,m,q,k):
|
| 26 |
em=embed(m,[q])[0]
|
| 27 |
r=col.query(query_embeddings=[em],n_results=k,include=["documents","metadatas"])
|
| 28 |
-
|
| 29 |
-
if r.get("ids"):
|
| 30 |
-
for i in range(len(r["ids"][0])):
|
| 31 |
-
out.append({"text":r["documents"][0][i],"title":r["metadatas"][0][i]["title"],"source":r["metadatas"][0][i]["source"]})
|
| 32 |
-
return out
|
|
|
|
| 1 |
+
|
| 2 |
+
import os,glob
|
| 3 |
+
import chromadb
|
| 4 |
+
from sentence_transformers import SentenceTransformer, models
|
| 5 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 6 |
from utils.constants import CHROMA_DIR,DOCS_DIR,COLLECTION,EMB_MODEL_NAME
|
| 7 |
+
from utils.helpers import to_safe_items
|
| 8 |
+
|
| 9 |
def get_embedder():
|
| 10 |
+
w=models.Transformer(EMB_MODEL_NAME)
|
| 11 |
+
p=models.Pooling(w.get_word_embedding_dimension())
|
| 12 |
return SentenceTransformer(modules=[w,p])
|
| 13 |
+
|
| 14 |
def get_chroma():
|
| 15 |
c=chromadb.PersistentClient(path=CHROMA_DIR)
|
| 16 |
+
return c,c.get_or_create_collection(COLLECTION)
|
| 17 |
+
|
| 18 |
+
def embed(m,t):return m.encode(t,convert_to_numpy=True).tolist()
|
| 19 |
+
|
| 20 |
def seed_index(col,m,folder):
|
| 21 |
+
spl=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=150)
|
| 22 |
+
paths=glob.glob(folder+'/**/*.txt',recursive=True)
|
| 23 |
ids,docs,meta=[],[],[]
|
| 24 |
for p in paths:
|
| 25 |
+
try:txt=open(p).read()
|
| 26 |
+
except:continue
|
| 27 |
+
title=os.path.basename(p).replace('.txt','')
|
| 28 |
+
chunks=spl.split_text(txt)
|
| 29 |
+
for i,ch in enumerate(chunks):
|
| 30 |
+
ids.append(f"{title}-{i}")
|
| 31 |
+
docs.append(ch)
|
| 32 |
+
meta.append({"title":title,"source":p})
|
| 33 |
+
if not docs:return 0
|
| 34 |
em=embed(m,docs)
|
| 35 |
try:col.add(ids=ids,documents=docs,metadatas=meta,embeddings=em)
|
| 36 |
+
except:
|
| 37 |
+
try:col.delete(ids=ids)
|
| 38 |
+
except:pass
|
| 39 |
+
col.add(ids=ids,documents=docs,metadatas=meta,embeddings=em)
|
| 40 |
return len(docs)
|
| 41 |
+
|
| 42 |
def retrieve(col,m,q,k):
|
| 43 |
em=embed(m,[q])[0]
|
| 44 |
r=col.query(query_embeddings=[em],n_results=k,include=["documents","metadatas"])
|
| 45 |
+
return to_safe_items(r)
|
|
|
|
|
|
|
|
|
|
|
|
backend/soap_generator.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
|
|
| 1 |
def compose_soap(n,items):
|
| 2 |
-
a=[];p=["Follow-up in
|
| 3 |
t=n.lower()
|
| 4 |
-
if "
|
| 5 |
-
if "
|
| 6 |
-
if not a:a.append("General
|
| 7 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
def compose_soap(n,items):
|
| 3 |
+
a=[];p=["Follow-up in 3–7 days.","Safety-net instructions."]
|
| 4 |
t=n.lower()
|
| 5 |
+
if "pain" in t:a.append("Pain evaluation")
|
| 6 |
+
if "bleed" in t:a.append("Bleeding assessment")
|
| 7 |
+
if not a:a.append("General evaluation")
|
| 8 |
+
return {
|
| 9 |
+
"subjective":n,"objective":"Vitals stable","assessment":a,"plan":p,
|
| 10 |
+
"citations":[i["title"] for i in items]
|
| 11 |
+
}
|
data/guidelines/internal_med/0_PCOS.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
PCOS guideline for internal_med
|
data/guidelines/internal_med/1_AUB.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
AUB guideline for internal_med
|
data/guidelines/internal_med/2_Chest Pain.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Chest Pain guideline for internal_med
|
data/guidelines/internal_med/3_Knee Pain.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Knee Pain guideline for internal_med
|
data/guidelines/obgyn/0_PCOS.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
PCOS guideline for obgyn
|
data/guidelines/obgyn/1_AUB.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
AUB guideline for obgyn
|
data/guidelines/obgyn/2_Chest Pain.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Chest Pain guideline for obgyn
|
data/guidelines/obgyn/3_Knee Pain.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Knee Pain guideline for obgyn
|
data/guidelines/orthopedics/0_PCOS.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
PCOS guideline for orthopedics
|
data/guidelines/orthopedics/1_AUB.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
AUB guideline for orthopedics
|
data/guidelines/orthopedics/2_Chest Pain.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Chest Pain guideline for orthopedics
|
data/guidelines/orthopedics/3_Knee Pain.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Knee Pain guideline for orthopedics
|
requirements.txt
CHANGED
|
@@ -3,3 +3,4 @@ chromadb
|
|
| 3 |
sentence-transformers
|
| 4 |
langchain-text-splitters
|
| 5 |
requests
|
|
|
|
|
|
| 3 |
sentence-transformers
|
| 4 |
langchain-text-splitters
|
| 5 |
requests
|
| 6 |
+
reportlab
|
utils/constants.py
CHANGED
|
@@ -1,5 +1,10 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
CHROMA_DIR = "./data/chroma"
|
| 3 |
+
DOCS_DIR = "./data/guidelines"
|
| 4 |
+
COLLECTION = "med_guidelines_multispeciality"
|
| 5 |
+
EMB_MODEL_NAME = "medicalai/ClinicalBERT"
|
| 6 |
+
RETRIEVAL_K_DEFAULT = 5
|
| 7 |
+
CHAT_ENDPOINT = "https://api-inference.huggingface.co/models/openai/gpt-oss-120b"
|
| 8 |
+
MAX_RETRIES_DEFAULT = 6
|
| 9 |
+
RETRY_BACKOFF_SECONDS_DEFAULT = 3
|
| 10 |
+
REQUEST_TIMEOUT_SECONDS_DEFAULT = 60
|
utils/helpers.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from typing import List,Dict
|
| 3 |
+
|
| 4 |
+
def to_safe_items(res)->List[Dict]:
|
| 5 |
+
items=[]
|
| 6 |
+
if res and res.get("ids"):
|
| 7 |
+
for i in range(len(res["ids"][0])):
|
| 8 |
+
items.append({
|
| 9 |
+
"text":res["documents"][0][i],
|
| 10 |
+
"title":res["metadatas"][0][i].get("title","(untitled)"),
|
| 11 |
+
"source":res["metadatas"][0][i].get("source",""),
|
| 12 |
+
})
|
| 13 |
+
return items
|
utils/persona.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
AI_GYNO_PERSONA_V2 = """
|
| 3 |
+
You are AIgyno, a multispeciality clinical assistant for doctors.
|
| 4 |
+
|
| 5 |
+
- Provide safe, accurate, careful medical reasoning.
|
| 6 |
+
- Use retrieved context.
|
| 7 |
+
- Ask clarifying questions if uncertain.
|
| 8 |
+
- Provide differentials and next steps.
|
| 9 |
+
- Doctor review required for final diagnosis.
|
| 10 |
+
"""
|