Spaces:

prithvi1029
/

agentic-document-intelligence

Sleeping

App Files Files Community

prithvi1029 commited on 9 days ago

Commit

e10610a

verified ·

1 Parent(s): 6b30ef7

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -60

app.py CHANGED Viewed

@@ -1,29 +1,31 @@
 import os
 import re
-import gradio as gr
-import faiss
 import numpy as np
 from pypdf import PdfReader
 from sentence_transformers import SentenceTransformer
-from huggingface_hub import InferenceClient
 # -----------------------------
 # Config
 # -----------------------------
-HF_TOKEN = (
-    os.getenv("HUGGINGFACEHUB_API_TOKEN")
-    or os.getenv("HUGGINGFACEHUB_API_TOKEN".replace("-", "_"))
-    or os.getenv("HF_TOKEN")
-    or ""
-).strip()
-HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "HuggingFaceH4/zephyr-7b-beta").strip()
 EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
 TOP_K = int(os.getenv("TOP_K", "4"))
 # -----------------------------
 # Helpers
@@ -56,15 +58,15 @@ def pdf_to_text(pdf_path: str) -> str:
     return "\n".join(pages)
-def build_faiss_index(chunks, embedder):
     vectors = embedder.encode(chunks, convert_to_numpy=True, normalize_embeddings=True)
     dim = vectors.shape[1]
-    index = faiss.IndexFlatIP(dim)  # cosine similarity since normalized
     index.add(vectors.astype(np.float32))
     return index
-def retrieve(query, embedder, index, chunks, k=TOP_K):
     qv = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)
     scores, ids = index.search(qv, k)
     hits = []
@@ -75,74 +77,65 @@ def retrieve(query, embedder, index, chunks, k=TOP_K):
     return hits
-def hf_generate_text(prompt: str) -> str:
-    """
-    Uses NORMAL HF serverless inference (no Inference Providers router).
-    This avoids router 404 / supported-tasks errors you were getting.
-    """
-    if not HF_TOKEN:
         return (
-            "HF token not found.\n\n"
-            "Go to **Space → Settings → Variables and secrets → New secret**\n"
-            "Name: `HUGGINGFACEHUB_API_TOKEN`\n"
-            "Value: your hf_... token\n"
             "Then restart the Space."
         )
-    client = InferenceClient(model=HF_LLM_MODEL, token=HF_TOKEN)
     try:
-        out = client.text_generation(
-            prompt=prompt,
-            max_new_tokens=450,
             temperature=0.2,
             top_p=0.9,
-            repetition_penalty=1.08,
-            return_full_text=False,
         )
-        return (out or "").strip()
     except Exception as e:
         return (
-            "LLM call failed.\n\n"
-            f"**Model:** `{HF_LLM_MODEL}`\n"
-            f"**Error:** `{type(e).__name__}: {e}`\n\n"
-            "✅ Fix checklist:\n"
-            "1) Confirm `HF_LLM_MODEL` is exactly correct (copy-paste repo id).\n"
-            "2) If model is gated, open the model page and click **Agree / Request access**.\n"
-            "3) Recreate token with **Read** (usually enough) and ensure it’s pasted correctly in Space secrets.\n"
-            "4) Restart Space.\n"
         )
 # -----------------------------
-# App logic (cached state)
 # -----------------------------
-embedder = SentenceTransformer(EMBED_MODEL_NAME)
-def on_upload(pdf_path):
-    if not pdf_path:
         return None, None, "Please upload a PDF."
-    text = pdf_to_text(pdf_path)
     if not text.strip():
-        return None, None, "Could not extract text (scanned PDF). Use a text-based PDF or add OCR."
     chunks = chunk_text(text)
     if len(chunks) < 2:
         return None, None, "Not enough text to build RAG index."
-    index = build_faiss_index(chunks, embedder)
     return index, chunks, f"✅ Indexed {len(chunks)} chunks. Now ask a question."
 def answer_question(index, chunks, question):
     if index is None or chunks is None:
-        return "Upload and index a PDF first."
     if not question or not question.strip():
         return "Type a question."
-    hits = retrieve(question, embedder, index, chunks, k=TOP_K)
     context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
     prompt = f"""You are a helpful assistant. Answer using ONLY the context.
@@ -155,23 +148,24 @@ Context:
 Answer:"""
-    ans = hf_generate_text(prompt)
     sources = "\n\n".join(
-        [f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:600]}..." for i in range(len(hits))]
     )
     return f"### Answer\n{ans}\n\n---\n### Retrieved Sources\n{sources}"
 # -----------------------------
-# UI
 # -----------------------------
-with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
     gr.Markdown(
-        "# 📄 Agentic Document Intelligence\n"
-        "Upload a PDF and ask questions (RAG).\n\n"
-        f"**Model:** `{HF_LLM_MODEL}`"
     )
     pdf = gr.File(label="Upload PDF", type="filepath")
@@ -180,11 +174,11 @@ with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
     index_state = gr.State(None)
     chunks_state = gr.State(None)
-    pdf.change(fn=on_upload, inputs=[pdf], outputs=[index_state, chunks_state, status])
-    question = gr.Textbox(label="Ask a question", placeholder="e.g., Give a summary of the PDF")
     out = gr.Markdown()
-    btn = gr.Button("Run")
     btn.click(fn=answer_question, inputs=[index_state, chunks_state, question], outputs=[out])

 import os
 import re
 import numpy as np
+import faiss
+import gradio as gr
 from pypdf import PdfReader
 from sentence_transformers import SentenceTransformer
+from openai import OpenAI
+# -----------------------------
+# Stability
+# -----------------------------
+os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
 # -----------------------------
 # Config
 # -----------------------------
+TOGETHER_API_KEY = (os.getenv("TOGETHER_API_KEY") or "").strip()
+TOGETHER_BASE_URL = os.getenv("TOGETHER_BASE_URL", "https://api.together.xyz/v1").strip()
+TOGETHER_MODEL = os.getenv("TOGETHER_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1").strip()
 EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
 TOP_K = int(os.getenv("TOP_K", "4"))
+# Load embedder once
+embedder = SentenceTransformer(EMBED_MODEL_NAME)
 # -----------------------------
 # Helpers
     return "\n".join(pages)
+def build_faiss_index(chunks):
     vectors = embedder.encode(chunks, convert_to_numpy=True, normalize_embeddings=True)
     dim = vectors.shape[1]
+    index = faiss.IndexFlatIP(dim)  # cosine similarity because normalized
     index.add(vectors.astype(np.float32))
     return index
+def retrieve(query, index, chunks, k=TOP_K):
     qv = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)
     scores, ids = index.search(qv, k)
     hits = []
     return hits
+def llm_generate(prompt: str) -> str:
+    if not TOGETHER_API_KEY:
         return (
+            "❌ TOGETHER_API_KEY not found.\n\n"
+            "Go to Space → Settings → Variables and secrets → New secret:\n"
+            "Name: TOGETHER_API_KEY\n"
+            "Value: your Together key\n"
             "Then restart the Space."
         )
+    client = OpenAI(api_key=TOGETHER_API_KEY, base_url=TOGETHER_BASE_URL)
     try:
+        resp = client.chat.completions.create(
+            model=TOGETHER_MODEL,
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant. Follow instructions carefully."},
+                {"role": "user", "content": prompt},
+            ],
             temperature=0.2,
             top_p=0.9,
+            max_tokens=450,
         )
+        return (resp.choices[0].message.content or "").strip()
     except Exception as e:
         return (
+            "❌ LLM call failed.\n\n"
+            f"Base URL: {TOGETHER_BASE_URL}\n"
+            f"Model: {TOGETHER_MODEL}\n"
+            f"Error: {type(e).__name__}: {e}"
         )
 # -----------------------------
+# Space logic
 # -----------------------------
+def index_pdf(pdf_file):
+    if pdf_file is None:
         return None, None, "Please upload a PDF."
+    text = pdf_to_text(pdf_file)
     if not text.strip():
+        return None, None, "Could not extract text. If it’s scanned, you need OCR."
     chunks = chunk_text(text)
     if len(chunks) < 2:
         return None, None, "Not enough text to build RAG index."
+    index = build_faiss_index(chunks)
     return index, chunks, f"✅ Indexed {len(chunks)} chunks. Now ask a question."
 def answer_question(index, chunks, question):
     if index is None or chunks is None:
+        return "Upload a PDF first and wait for indexing."
     if not question or not question.strip():
         return "Type a question."
+    hits = retrieve(question, index, chunks, k=TOP_K)
     context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
     prompt = f"""You are a helpful assistant. Answer using ONLY the context.
 Answer:"""
+    ans = llm_generate(prompt)
     sources = "\n\n".join(
+        [f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:700]}..." for i in range(len(hits))]
     )
     return f"### Answer\n{ans}\n\n---\n### Retrieved Sources\n{sources}"
 # -----------------------------
+# UI (Gradio)
 # -----------------------------
+with gr.Blocks(title="PDF RAG (Together.ai)") as demo:
     gr.Markdown(
+        "# 📄 PDF RAG (Together.ai)\n"
+        "Upload a PDF, build a FAISS index, and ask questions.\n\n"
+        f"**LLM:** `{TOGETHER_MODEL}`  \n"
+        f"**Embedder:** `{EMBED_MODEL_NAME}`"
     )
     pdf = gr.File(label="Upload PDF", type="filepath")
     index_state = gr.State(None)
     chunks_state = gr.State(None)
+    pdf.change(fn=index_pdf, inputs=[pdf], outputs=[index_state, chunks_state, status])
+    question = gr.Textbox(label="Question", placeholder="e.g., Summarize the document")
     out = gr.Markdown()
+    btn = gr.Button("Ask")
     btn.click(fn=answer_question, inputs=[index_state, chunks_state, question], outputs=[out])