NandanData commited on
Commit
136d8e3
·
verified ·
1 Parent(s): 4660189

Upload 21 files

Browse files
app.py CHANGED
@@ -1,35 +1,38 @@
1
- import os, json, requests, streamlit as st
 
2
  from backend.rag_engine import get_embedder,get_chroma,retrieve,seed_index
3
  from backend.soap_generator import compose_soap
4
- from utils.constants import DOCS_DIR,CHAT_ENDPOINT
 
 
 
 
 
5
 
6
- st.set_page_config(page_title='MediAssist v13',page_icon='🩺',layout='wide')
7
  @st.cache_resource
8
  def emb():return get_embedder()
9
  @st.cache_resource
10
  def col():return get_chroma()[1]
11
 
12
- def chat(prompt):
13
- token=os.getenv('HF_API_TOKEN')
14
- if not token:return 'Missing HF_API_TOKEN'
15
- r=requests.post(CHAT_ENDPOINT,headers={"Authorization":f"Bearer {token}"},json={"inputs":prompt},timeout=200)
16
- d=r.json()
17
- if isinstance(d,list) and "generated_text" in d[0]:
18
- return d[0]["generated_text"]
19
- return str(d)
20
 
21
- st.title("🩺 MediAssist v13 — AI Gynae Assistant")
 
 
 
22
 
23
- with st.sidebar:
24
- if st.button("Seed Index"):
25
- n=seed_index(col(),emb(),DOCS_DIR);st.success(f"Indexed {n} chunks")
 
26
 
27
- txt=st.text_area("Patient narrative")
28
- if st.button("Generate Report"):
29
- items=retrieve(col(),emb(),txt,5)
30
  soap=compose_soap(txt,items)
31
- ctx="\n".join([i["text"] for i in items])
32
- prompt=f"Use this context to create a refined clinical report:\n{ctx}\nPatient: {txt}"
33
- reply=chat(prompt)
34
- st.subheader("AI Draft Report");st.write(reply)
35
- st.subheader("SOAP");st.json(soap)
 
1
+
2
+ import os,json,time,streamlit as st
3
  from backend.rag_engine import get_embedder,get_chroma,retrieve,seed_index
4
  from backend.soap_generator import compose_soap
5
+ from backend.pdf_utils import generate_pdf
6
+ from backend.endpoint_client import call_endpoint
7
+ from utils.constants import DOCS_DIR,RETRIEVAL_K_DEFAULT
8
+ from utils.persona import AI_GYNO_PERSONA_V2
9
+
10
+ st.set_page_config(page_title="MediAssist v14.2 Clean",page_icon="🩺",layout="wide")
11
 
 
12
  @st.cache_resource
13
  def emb():return get_embedder()
14
  @st.cache_resource
15
  def col():return get_chroma()[1]
16
 
17
+ st.title("🩺 MediAssist v14.2 — Clean Stable Build")
18
+
19
+ txt=st.text_area("Patient narrative")
20
+ k=st.slider("Results",1,10,RETRIEVAL_K_DEFAULT)
21
+ ep=st.text_input("Endpoint override")
 
 
 
22
 
23
+ if st.button("Generate OPD"):
24
+ items=retrieve(col(),emb(),txt,k)
25
+ soap=compose_soap(txt,items)
26
+ st.json(soap)
27
 
28
+ if st.button("AI Chat"):
29
+ prompt=f"{AI_GYNO_PERSONA_V2}\nPatient:{txt}\nAssistant:"
30
+ reply,_=call_endpoint(prompt,endpoint=ep or None)
31
+ st.write(reply)
32
 
33
+ summ=st.text_area("Doctor summary")
34
+ if st.button("Generate PDF"):
35
+ items=retrieve(col(),emb(),txt,3)
36
  soap=compose_soap(txt,items)
37
+ generate_pdf("report.pdf","MediAssist Report",soap,summ)
38
+ st.download_button("Download PDF",open("report.pdf","rb"),file_name="report.pdf",mime="application/pdf")
 
 
 
backend/endpoint_client.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os,time,requests,json
3
+ from utils.constants import CHAT_ENDPOINT,MAX_RETRIES_DEFAULT,RETRY_BACKOFF_SECONDS_DEFAULT,REQUEST_TIMEOUT_SECONDS_DEFAULT
4
+
5
+ def call_endpoint(prompt,endpoint=None,token=None,max_retries=None,backoff=None,timeout=None,logs=None):
6
+ url=endpoint or os.getenv("CHAT_ENDPOINT") or CHAT_ENDPOINT
7
+ tok=token or os.getenv("HF_API_TOKEN")
8
+ if not tok:return "❌ Missing HF_API_TOKEN.",{}
9
+ mr=max_retries or MAX_RETRIES_DEFAULT
10
+ bf=backoff or RETRY_BACKOFF_SECONDS_DEFAULT
11
+ to=timeout or REQUEST_TIMEOUT_SECONDS_DEFAULT
12
+
13
+ h={"Authorization":f"Bearer {tok}","Content-Type":"application/json"}
14
+ for a in range(1,mr+1):
15
+ try:
16
+ r=requests.post(url,headers=h,json={"inputs":prompt},timeout=to)
17
+ try:data=r.json()
18
+ except:return "⚠️ Non-JSON:\n"+r.text,{}
19
+ if isinstance(data,list) and data and "generated_text" in data[0]:
20
+ return data[0]["generated_text"],{}
21
+ if isinstance(data,dict) and "generated_text" in data:
22
+ return data["generated_text"],{}
23
+ return "⚠️ Unexpected:"+json.dumps(data)[:500],{}
24
+ except:
25
+ time.sleep(bf*a)
26
+ return "❌ Endpoint unavailable",{}
backend/pdf_utils.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from reportlab.pdfgen import canvas
3
+ from reportlab.lib.pagesizes import A4
4
+ from reportlab.lib.units import mm
5
+
6
+ def generate_pdf(path,title,soap,summary):
7
+ c=canvas.Canvas(path,pagesize=A4)
8
+ x,y=20*mm,270*mm
9
+ c.setFont("Helvetica-Bold",16);c.drawString(x,y,title);y-=15
10
+ c.setFont("Helvetica",10)
11
+ for k,v in soap.items():
12
+ c.drawString(x,y,f"{k}: {v}");y-=10
13
+ c.drawString(x,y,f"Doctor Summary: {summary}");y-=10
14
+ c.save()
backend/rag_engine.py CHANGED
@@ -1,32 +1,45 @@
1
- import os,glob,chromadb
2
- from sentence_transformers import SentenceTransformer,models
 
 
3
  from langchain_text_splitters import RecursiveCharacterTextSplitter
4
  from utils.constants import CHROMA_DIR,DOCS_DIR,COLLECTION,EMB_MODEL_NAME
 
 
5
  def get_embedder():
6
- w=models.Transformer(EMB_MODEL_NAME);p=models.Pooling(w.get_word_embedding_dimension())
 
7
  return SentenceTransformer(modules=[w,p])
 
8
  def get_chroma():
9
  c=chromadb.PersistentClient(path=CHROMA_DIR)
10
- return c,c.get_or_create_collection(COLLECTION,metadata={"hnsw:space":"cosine"})
11
- def embed(m,txts):return m.encode(txts,convert_to_numpy=True).tolist()
 
 
12
  def seed_index(col,m,folder):
13
- sp=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=150)
14
- paths=glob.glob(folder+'/*.txt')
15
  ids,docs,meta=[],[],[]
16
  for p in paths:
17
- t=os.path.basename(p).replace('.txt','')
18
- with open(p) as f:tx=f.read()
19
- for i,ch in enumerate(sp.split_text(tx)):
20
- ids.append(f"{t}-{i}");docs.append(ch);meta.append({"title":t,"source":p})
 
 
 
 
 
21
  em=embed(m,docs)
22
  try:col.add(ids=ids,documents=docs,metadatas=meta,embeddings=em)
23
- except:col.delete(ids=ids);col.add(ids=ids,documents=docs,metadatas=meta,embeddings=em)
 
 
 
24
  return len(docs)
 
25
  def retrieve(col,m,q,k):
26
  em=embed(m,[q])[0]
27
  r=col.query(query_embeddings=[em],n_results=k,include=["documents","metadatas"])
28
- out=[]
29
- if r.get("ids"):
30
- for i in range(len(r["ids"][0])):
31
- out.append({"text":r["documents"][0][i],"title":r["metadatas"][0][i]["title"],"source":r["metadatas"][0][i]["source"]})
32
- return out
 
1
+
2
+ import os,glob
3
+ import chromadb
4
+ from sentence_transformers import SentenceTransformer, models
5
  from langchain_text_splitters import RecursiveCharacterTextSplitter
6
  from utils.constants import CHROMA_DIR,DOCS_DIR,COLLECTION,EMB_MODEL_NAME
7
+ from utils.helpers import to_safe_items
8
+
9
  def get_embedder():
10
+ w=models.Transformer(EMB_MODEL_NAME)
11
+ p=models.Pooling(w.get_word_embedding_dimension())
12
  return SentenceTransformer(modules=[w,p])
13
+
14
  def get_chroma():
15
  c=chromadb.PersistentClient(path=CHROMA_DIR)
16
+ return c,c.get_or_create_collection(COLLECTION)
17
+
18
+ def embed(m,t):return m.encode(t,convert_to_numpy=True).tolist()
19
+
20
  def seed_index(col,m,folder):
21
+ spl=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=150)
22
+ paths=glob.glob(folder+'/**/*.txt',recursive=True)
23
  ids,docs,meta=[],[],[]
24
  for p in paths:
25
+ try:txt=open(p).read()
26
+ except:continue
27
+ title=os.path.basename(p).replace('.txt','')
28
+ chunks=spl.split_text(txt)
29
+ for i,ch in enumerate(chunks):
30
+ ids.append(f"{title}-{i}")
31
+ docs.append(ch)
32
+ meta.append({"title":title,"source":p})
33
+ if not docs:return 0
34
  em=embed(m,docs)
35
  try:col.add(ids=ids,documents=docs,metadatas=meta,embeddings=em)
36
+ except:
37
+ try:col.delete(ids=ids)
38
+ except:pass
39
+ col.add(ids=ids,documents=docs,metadatas=meta,embeddings=em)
40
  return len(docs)
41
+
42
  def retrieve(col,m,q,k):
43
  em=embed(m,[q])[0]
44
  r=col.query(query_embeddings=[em],n_results=k,include=["documents","metadatas"])
45
+ return to_safe_items(r)
 
 
 
 
backend/soap_generator.py CHANGED
@@ -1,7 +1,11 @@
 
1
  def compose_soap(n,items):
2
- a=[];p=["Follow-up in 1 week","Safety net advice"]
3
  t=n.lower()
4
- if "bleed" in t:a.append("Abnormal uterine bleeding")
5
- if "pain" in t:a.append("Pelvic pain evaluation")
6
- if not a:a.append("General gynae evaluation")
7
- return {"subjective":n,"assessment":a,"plan":p,"citations":[i["title"] for i in items]}
 
 
 
 
1
+
2
  def compose_soap(n,items):
3
+ a=[];p=["Follow-up in 3–7 days.","Safety-net instructions."]
4
  t=n.lower()
5
+ if "pain" in t:a.append("Pain evaluation")
6
+ if "bleed" in t:a.append("Bleeding assessment")
7
+ if not a:a.append("General evaluation")
8
+ return {
9
+ "subjective":n,"objective":"Vitals stable","assessment":a,"plan":p,
10
+ "citations":[i["title"] for i in items]
11
+ }
data/guidelines/internal_med/0_PCOS.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ PCOS guideline for internal_med
data/guidelines/internal_med/1_AUB.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ AUB guideline for internal_med
data/guidelines/internal_med/2_Chest Pain.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Chest Pain guideline for internal_med
data/guidelines/internal_med/3_Knee Pain.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Knee Pain guideline for internal_med
data/guidelines/obgyn/0_PCOS.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ PCOS guideline for obgyn
data/guidelines/obgyn/1_AUB.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ AUB guideline for obgyn
data/guidelines/obgyn/2_Chest Pain.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Chest Pain guideline for obgyn
data/guidelines/obgyn/3_Knee Pain.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Knee Pain guideline for obgyn
data/guidelines/orthopedics/0_PCOS.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ PCOS guideline for orthopedics
data/guidelines/orthopedics/1_AUB.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ AUB guideline for orthopedics
data/guidelines/orthopedics/2_Chest Pain.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Chest Pain guideline for orthopedics
data/guidelines/orthopedics/3_Knee Pain.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Knee Pain guideline for orthopedics
requirements.txt CHANGED
@@ -3,3 +3,4 @@ chromadb
3
  sentence-transformers
4
  langchain-text-splitters
5
  requests
 
 
3
  sentence-transformers
4
  langchain-text-splitters
5
  requests
6
+ reportlab
utils/constants.py CHANGED
@@ -1,5 +1,10 @@
1
- CHROMA_DIR='./data/chroma'
2
- DOCS_DIR='./data/guidelines'
3
- COLLECTION='gynae_guidelines'
4
- EMB_MODEL_NAME='medicalai/ClinicalBERT'
5
- CHAT_ENDPOINT='https://router.huggingface.co/models/openai/gpt-oss-120b'
 
 
 
 
 
 
1
+
2
+ CHROMA_DIR = "./data/chroma"
3
+ DOCS_DIR = "./data/guidelines"
4
+ COLLECTION = "med_guidelines_multispeciality"
5
+ EMB_MODEL_NAME = "medicalai/ClinicalBERT"
6
+ RETRIEVAL_K_DEFAULT = 5
7
+ CHAT_ENDPOINT = "https://api-inference.huggingface.co/models/openai/gpt-oss-120b"
8
+ MAX_RETRIES_DEFAULT = 6
9
+ RETRY_BACKOFF_SECONDS_DEFAULT = 3
10
+ REQUEST_TIMEOUT_SECONDS_DEFAULT = 60
utils/helpers.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from typing import List,Dict
3
+
4
+ def to_safe_items(res)->List[Dict]:
5
+ items=[]
6
+ if res and res.get("ids"):
7
+ for i in range(len(res["ids"][0])):
8
+ items.append({
9
+ "text":res["documents"][0][i],
10
+ "title":res["metadatas"][0][i].get("title","(untitled)"),
11
+ "source":res["metadatas"][0][i].get("source",""),
12
+ })
13
+ return items
utils/persona.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ AI_GYNO_PERSONA_V2 = """
3
+ You are AIgyno, a multispeciality clinical assistant for doctors.
4
+
5
+ - Provide safe, accurate, careful medical reasoning.
6
+ - Use retrieved context.
7
+ - Ask clarifying questions if uncertain.
8
+ - Provide differentials and next steps.
9
+ - Doctor review required for final diagnosis.
10
+ """