prithvi1029 commited on
Commit
e10610a
Β·
verified Β·
1 Parent(s): 6b30ef7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -60
app.py CHANGED
@@ -1,29 +1,31 @@
1
  import os
2
  import re
3
- import gradio as gr
4
- import faiss
5
  import numpy as np
 
 
6
 
7
  from pypdf import PdfReader
8
  from sentence_transformers import SentenceTransformer
9
- from huggingface_hub import InferenceClient
10
 
 
 
 
 
11
 
12
  # -----------------------------
13
  # Config
14
  # -----------------------------
15
- HF_TOKEN = (
16
- os.getenv("HUGGINGFACEHUB_API_TOKEN")
17
- or os.getenv("HUGGINGFACEHUB_API_TOKEN".replace("-", "_"))
18
- or os.getenv("HF_TOKEN")
19
- or ""
20
- ).strip()
21
-
22
- HF_LLM_MODEL = os.getenv("HF_LLM_MODEL", "HuggingFaceH4/zephyr-7b-beta").strip()
23
 
24
  EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
25
  TOP_K = int(os.getenv("TOP_K", "4"))
26
 
 
 
 
27
 
28
  # -----------------------------
29
  # Helpers
@@ -56,15 +58,15 @@ def pdf_to_text(pdf_path: str) -> str:
56
  return "\n".join(pages)
57
 
58
 
59
- def build_faiss_index(chunks, embedder):
60
  vectors = embedder.encode(chunks, convert_to_numpy=True, normalize_embeddings=True)
61
  dim = vectors.shape[1]
62
- index = faiss.IndexFlatIP(dim) # cosine similarity since normalized
63
  index.add(vectors.astype(np.float32))
64
  return index
65
 
66
 
67
- def retrieve(query, embedder, index, chunks, k=TOP_K):
68
  qv = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)
69
  scores, ids = index.search(qv, k)
70
  hits = []
@@ -75,74 +77,65 @@ def retrieve(query, embedder, index, chunks, k=TOP_K):
75
  return hits
76
 
77
 
78
- def hf_generate_text(prompt: str) -> str:
79
- """
80
- Uses NORMAL HF serverless inference (no Inference Providers router).
81
- This avoids router 404 / supported-tasks errors you were getting.
82
- """
83
- if not HF_TOKEN:
84
  return (
85
- "HF token not found.\n\n"
86
- "Go to **Space β†’ Settings β†’ Variables and secrets β†’ New secret**\n"
87
- "Name: `HUGGINGFACEHUB_API_TOKEN`\n"
88
- "Value: your hf_... token\n"
89
  "Then restart the Space."
90
  )
91
 
92
- client = InferenceClient(model=HF_LLM_MODEL, token=HF_TOKEN)
93
 
94
  try:
95
- out = client.text_generation(
96
- prompt=prompt,
97
- max_new_tokens=450,
 
 
 
98
  temperature=0.2,
99
  top_p=0.9,
100
- repetition_penalty=1.08,
101
- return_full_text=False,
102
  )
103
- return (out or "").strip()
104
  except Exception as e:
105
  return (
106
- "LLM call failed.\n\n"
107
- f"**Model:** `{HF_LLM_MODEL}`\n"
108
- f"**Error:** `{type(e).__name__}: {e}`\n\n"
109
- "βœ… Fix checklist:\n"
110
- "1) Confirm `HF_LLM_MODEL` is exactly correct (copy-paste repo id).\n"
111
- "2) If model is gated, open the model page and click **Agree / Request access**.\n"
112
- "3) Recreate token with **Read** (usually enough) and ensure it’s pasted correctly in Space secrets.\n"
113
- "4) Restart Space.\n"
114
  )
115
 
116
 
117
  # -----------------------------
118
- # App logic (cached state)
119
  # -----------------------------
120
- embedder = SentenceTransformer(EMBED_MODEL_NAME)
121
-
122
-
123
- def on_upload(pdf_path):
124
- if not pdf_path:
125
  return None, None, "Please upload a PDF."
126
 
127
- text = pdf_to_text(pdf_path)
128
  if not text.strip():
129
- return None, None, "Could not extract text (scanned PDF). Use a text-based PDF or add OCR."
130
 
131
  chunks = chunk_text(text)
132
  if len(chunks) < 2:
133
  return None, None, "Not enough text to build RAG index."
134
 
135
- index = build_faiss_index(chunks, embedder)
136
  return index, chunks, f"βœ… Indexed {len(chunks)} chunks. Now ask a question."
137
 
138
 
139
  def answer_question(index, chunks, question):
140
  if index is None or chunks is None:
141
- return "Upload and index a PDF first."
142
  if not question or not question.strip():
143
  return "Type a question."
144
 
145
- hits = retrieve(question, embedder, index, chunks, k=TOP_K)
146
  context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
147
 
148
  prompt = f"""You are a helpful assistant. Answer using ONLY the context.
@@ -155,23 +148,24 @@ Context:
155
 
156
  Answer:"""
157
 
158
- ans = hf_generate_text(prompt)
159
 
160
  sources = "\n\n".join(
161
- [f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:600]}..." for i in range(len(hits))]
162
  )
163
 
164
  return f"### Answer\n{ans}\n\n---\n### Retrieved Sources\n{sources}"
165
 
166
 
167
  # -----------------------------
168
- # UI
169
  # -----------------------------
170
- with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
171
  gr.Markdown(
172
- "# πŸ“„ Agentic Document Intelligence\n"
173
- "Upload a PDF and ask questions (RAG).\n\n"
174
- f"**Model:** `{HF_LLM_MODEL}`"
 
175
  )
176
 
177
  pdf = gr.File(label="Upload PDF", type="filepath")
@@ -180,11 +174,11 @@ with gr.Blocks(title="Agentic Document Intelligence (HF RAG)") as demo:
180
  index_state = gr.State(None)
181
  chunks_state = gr.State(None)
182
 
183
- pdf.change(fn=on_upload, inputs=[pdf], outputs=[index_state, chunks_state, status])
184
 
185
- question = gr.Textbox(label="Ask a question", placeholder="e.g., Give a summary of the PDF")
186
  out = gr.Markdown()
187
- btn = gr.Button("Run")
188
 
189
  btn.click(fn=answer_question, inputs=[index_state, chunks_state, question], outputs=[out])
190
 
 
1
  import os
2
  import re
 
 
3
  import numpy as np
4
+ import faiss
5
+ import gradio as gr
6
 
7
  from pypdf import PdfReader
8
  from sentence_transformers import SentenceTransformer
9
+ from openai import OpenAI
10
 
11
+ # -----------------------------
12
+ # Stability
13
+ # -----------------------------
14
+ os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
15
 
16
  # -----------------------------
17
  # Config
18
  # -----------------------------
19
+ TOGETHER_API_KEY = (os.getenv("TOGETHER_API_KEY") or "").strip()
20
+ TOGETHER_BASE_URL = os.getenv("TOGETHER_BASE_URL", "https://api.together.xyz/v1").strip()
21
+ TOGETHER_MODEL = os.getenv("TOGETHER_MODEL", "mistralai/Mixtral-8x7B-Instruct-v0.1").strip()
 
 
 
 
 
22
 
23
  EMBED_MODEL_NAME = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
24
  TOP_K = int(os.getenv("TOP_K", "4"))
25
 
26
+ # Load embedder once
27
+ embedder = SentenceTransformer(EMBED_MODEL_NAME)
28
+
29
 
30
  # -----------------------------
31
  # Helpers
 
58
  return "\n".join(pages)
59
 
60
 
61
+ def build_faiss_index(chunks):
62
  vectors = embedder.encode(chunks, convert_to_numpy=True, normalize_embeddings=True)
63
  dim = vectors.shape[1]
64
+ index = faiss.IndexFlatIP(dim) # cosine similarity because normalized
65
  index.add(vectors.astype(np.float32))
66
  return index
67
 
68
 
69
+ def retrieve(query, index, chunks, k=TOP_K):
70
  qv = embedder.encode([query], convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)
71
  scores, ids = index.search(qv, k)
72
  hits = []
 
77
  return hits
78
 
79
 
80
+ def llm_generate(prompt: str) -> str:
81
+ if not TOGETHER_API_KEY:
 
 
 
 
82
  return (
83
+ "❌ TOGETHER_API_KEY not found.\n\n"
84
+ "Go to Space β†’ Settings β†’ Variables and secrets β†’ New secret:\n"
85
+ "Name: TOGETHER_API_KEY\n"
86
+ "Value: your Together key\n"
87
  "Then restart the Space."
88
  )
89
 
90
+ client = OpenAI(api_key=TOGETHER_API_KEY, base_url=TOGETHER_BASE_URL)
91
 
92
  try:
93
+ resp = client.chat.completions.create(
94
+ model=TOGETHER_MODEL,
95
+ messages=[
96
+ {"role": "system", "content": "You are a helpful assistant. Follow instructions carefully."},
97
+ {"role": "user", "content": prompt},
98
+ ],
99
  temperature=0.2,
100
  top_p=0.9,
101
+ max_tokens=450,
 
102
  )
103
+ return (resp.choices[0].message.content or "").strip()
104
  except Exception as e:
105
  return (
106
+ "❌ LLM call failed.\n\n"
107
+ f"Base URL: {TOGETHER_BASE_URL}\n"
108
+ f"Model: {TOGETHER_MODEL}\n"
109
+ f"Error: {type(e).__name__}: {e}"
 
 
 
 
110
  )
111
 
112
 
113
  # -----------------------------
114
+ # Space logic
115
  # -----------------------------
116
+ def index_pdf(pdf_file):
117
+ if pdf_file is None:
 
 
 
118
  return None, None, "Please upload a PDF."
119
 
120
+ text = pdf_to_text(pdf_file)
121
  if not text.strip():
122
+ return None, None, "Could not extract text. If it’s scanned, you need OCR."
123
 
124
  chunks = chunk_text(text)
125
  if len(chunks) < 2:
126
  return None, None, "Not enough text to build RAG index."
127
 
128
+ index = build_faiss_index(chunks)
129
  return index, chunks, f"βœ… Indexed {len(chunks)} chunks. Now ask a question."
130
 
131
 
132
  def answer_question(index, chunks, question):
133
  if index is None or chunks is None:
134
+ return "Upload a PDF first and wait for indexing."
135
  if not question or not question.strip():
136
  return "Type a question."
137
 
138
+ hits = retrieve(question, index, chunks, k=TOP_K)
139
  context = "\n\n".join([f"[{i+1}] {h[1]}" for i, h in enumerate(hits)])
140
 
141
  prompt = f"""You are a helpful assistant. Answer using ONLY the context.
 
148
 
149
  Answer:"""
150
 
151
+ ans = llm_generate(prompt)
152
 
153
  sources = "\n\n".join(
154
+ [f"**Source {i+1} (score={hits[i][0]:.3f})**\n{hits[i][1][:700]}..." for i in range(len(hits))]
155
  )
156
 
157
  return f"### Answer\n{ans}\n\n---\n### Retrieved Sources\n{sources}"
158
 
159
 
160
  # -----------------------------
161
+ # UI (Gradio)
162
  # -----------------------------
163
+ with gr.Blocks(title="PDF RAG (Together.ai)") as demo:
164
  gr.Markdown(
165
+ "# πŸ“„ PDF RAG (Together.ai)\n"
166
+ "Upload a PDF, build a FAISS index, and ask questions.\n\n"
167
+ f"**LLM:** `{TOGETHER_MODEL}` \n"
168
+ f"**Embedder:** `{EMBED_MODEL_NAME}`"
169
  )
170
 
171
  pdf = gr.File(label="Upload PDF", type="filepath")
 
174
  index_state = gr.State(None)
175
  chunks_state = gr.State(None)
176
 
177
+ pdf.change(fn=index_pdf, inputs=[pdf], outputs=[index_state, chunks_state, status])
178
 
179
+ question = gr.Textbox(label="Question", placeholder="e.g., Summarize the document")
180
  out = gr.Markdown()
181
+ btn = gr.Button("Ask")
182
 
183
  btn.click(fn=answer_question, inputs=[index_state, chunks_state, question], outputs=[out])
184