Spaces:

kith777
/

rag_agent

Paused

App Files Files Community

Cheh Kit Hong commited on 15 days ago

Commit

aa018e3

1 Parent(s): 0fc97a4

fixing gradio

Browse files

Files changed (11) hide show

README.md +2 -4
agent/graph.py +14 -5
agent/more_nodes.py +0 -97
agent/nodes.py +32 -5
agent/prompts.py +19 -75
agent/state.py +1 -1
config.py +4 -1
core/rag_agent.py +8 -3
requirements.txt +2 -1
test_scripts.py +349 -0
ui/gradio_components.py +131 -57

README.md CHANGED Viewed

@@ -2,14 +2,12 @@ rag_agent/
 ├── app.py                    # Main Gradio application entry point
 ├── config.py                 # Configuration hub (models, chunk sizes, providers)
 ├── util.py                   # PDF to markdown conversion
-├── document_chunker.py       # Chunking strategy
 ├── core/                     # Core RAG components orchestration
 │   ├── chat_interface.py
 │   ├── document_manager.py
 │   └── rag_system.py
-├── knowledge_base/                       # Storage management
-│   ├── chroma.py  # Parent chunks storage (JSON)
-│   └── vector_db_manager.py
 ├── agent_logic/                # LangGraph agent workflow
 │   ├── edges.py              # Conditional routing logic
 │   ├── graph.py              # Graph construction and compilation

 ├── app.py                    # Main Gradio application entry point
 ├── config.py                 # Configuration hub (models, chunk sizes, providers)
 ├── util.py                   # PDF to markdown conversion
 ├── core/                     # Core RAG components orchestration
 │   ├── chat_interface.py
 │   ├── document_manager.py
 │   └── rag_system.py
+├── knowledge_base/           # for create chromadb
+├── chroma_data/              # chroma vectorstore data
 ├── agent_logic/                # LangGraph agent workflow
 │   ├── edges.py              # Conditional routing logic
 │   ├── graph.py              # Graph construction and compilation

agent/graph.py CHANGED Viewed

@@ -13,12 +13,14 @@ def create_agent_graph(llm, vectordb, search_tools) -> StateGraph:
     graph = StateGraph(AgentState)
     checkpointer = MemorySaver()
     web_search_tool_node = ToolNode(search_tools)
     # --- Nodes ---
     graph.add_node("router_node", partial(router_node, llm=llm))
     graph.add_node("vectordb_node", partial(vectordb_node, vectorstore=vectordb))
-    graph.add_node("web_search_node", web_search_tool_node)
     graph.add_node("generate_node", partial(generate_node, llm=llm))
     # --- Edges ---
@@ -28,16 +30,23 @@ def create_agent_graph(llm, vectordb, search_tools) -> StateGraph:
         "router_node",
         routing_logic,
         {
-            # Output from routing_logic -> Target Node Name
             "vectordb_node": "vectordb_node",
-            "web_search_node": "web_search_node",
             "generate_node": "generate_node",
-            # If your logic has an 'else' that returns END, you don't list it here.
         }
     )
     graph.add_edge("vectordb_node", "generate_node")
-    graph.add_edge("web_search_node", "generate_node")
     graph.add_edge("generate_node", END)

     graph = StateGraph(AgentState)
     checkpointer = MemorySaver()
+    llm_with_tools = llm.bind_tools(search_tools)
     web_search_tool_node = ToolNode(search_tools)
     # --- Nodes ---
     graph.add_node("router_node", partial(router_node, llm=llm))
     graph.add_node("vectordb_node", partial(vectordb_node, vectorstore=vectordb))
+    graph.add_node("web_search_agent_node", partial(web_search_agent_node, llm=llm_with_tools))
+    graph.add_node("web_search_tool_node", web_search_tool_node)
     graph.add_node("generate_node", partial(generate_node, llm=llm))
     # --- Edges ---
         "router_node",
         routing_logic,
         {
             "vectordb_node": "vectordb_node",
+            "web_search_agent_node": "web_search_agent_node",
             "generate_node": "generate_node",
+        }
+    )
+    graph.add_conditional_edges(
+        "web_search_agent_node",
+        tools_condition,
+        {
+            "tools": "web_search_tool_node",  # Changed key from node name to "tools"
+            "__end__": "generate_node",  # Changed key from "generate_node" to "__end__"
         }
     )
     graph.add_edge("vectordb_node", "generate_node")
+    graph.add_edge("web_search_tool_node", "generate_node")
     graph.add_edge("generate_node", END)

agent/more_nodes.py DELETED Viewed

@@ -1,97 +0,0 @@
-from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, RemoveMessage
-from typing import Literal
-from .state import AgentState, QueryAnalysis
-from .prompts import *
-def analyze_chat_and_summarize(state: AgentState, llm):
-    """
-    Analyzes chat history and summarizes key points for context.
-    """
-    if len(state["messages"]) < 4:  # Need some history to summarize
-        return {"conversation_summary": ""}
-    # Extract relevant messages (excluding current query and system messages)
-    relevant_msgs = [
-        msg for msg in state["messages"][:-1]  # Exclude current query
-        if isinstance(msg, (HumanMessage, AIMessage))
-        and not getattr(msg, "tool_calls", None)
-    ]
-    if not relevant_msgs:
-        return {"conversation_summary": ""}
-    summary_prompt = """**Summarize the key topics and context from this conversation concisely (1-2 sentences max).**
-    Discard irrelevant information, such as misunderstandings or off-topic queries/responses.
-    If there are no key topics, return an empty string.
-    """
-    for msg in relevant_msgs[-6:]:  # Last 6 messages for context
-        role = "User" if isinstance(msg, HumanMessage) else "Assistant"
-        summary_prompt += f"{role}: {msg.content}\n"
-    summary_prompt += "\nBrief Summary:"
-    summary_response = llm.with_config(temperature=0.3).invoke([SystemMessage(content=summary_prompt)])
-    return {"conversation_summary": summary_response.content}
-def analyze_and_rewrite_query(state: AgentState, llm):
-    """
-    Analyzes user query and rewrites it for clarity, optionally using conversation context.
-    """
-    last_message = state["messages"][-1]
-    conversation_summary = state.get("conversation_summary", "")
-    context_section = (
-        f"**Conversation Context:**\n{conversation_summary}"
-        if conversation_summary.strip()
-        else "**Conversation Context:**\n[First query in conversation]"
-    )
-    # Create analysis prompt
-    query_analysis_prompt = get_query_analysis_prompt(last_message.content, conversation_summary)
-    llm_with_structure = llm.with_config(temperature=0.3).with_structured_output(QueryAnalysis)
-    response = llm_with_structure.invoke([SystemMessage(content=query_analysis_prompt)])
-    if response.is_clear:
-        # Remove all non-system messages
-        delete_all = [
-            RemoveMessage(id=m.id)
-            for m in state["messages"]
-            if not isinstance(m, SystemMessage)
-        ]
-        # Format rewritten query
-        rewritten = (
-            "\n".join([f"{i+1}. {q}" for i, q in enumerate(response.questions)])
-            if len(response.questions) > 1
-            else response.questions[0]
-        )
-        return {
-            "questionIsClear": True,
-            "messages": delete_all + [HumanMessage(content=rewritten)]
-        }
-    else:
-        clarification = response.clarification_needed or "I need more information to understand your question."
-        return {
-            "questionIsClear": False,
-            "messages": [AIMessage(content=clarification)]
-        }
-def human_input_node(state: AgentState):
-    """Placeholder node for human-in-the-loop interruption"""
-    return {}
-def route_after_rewrite(state: AgentState) -> Literal["agent", "human_input"]:
-    """Route to agent if question is clear, otherwise wait for human input"""
-    return "agent" if state.get("questionIsClear", False) else "human_input"
-def agent_node(state: AgentState, llm_with_tools):
-    """Main agent node that processes queries using tools"""
-    system_prompt = get_system_prompt()
-    messages = [system_prompt] + state["messages"]
-    response = llm_with_tools.invoke(messages)
-    return {"messages": [response]}
-if __name__ == "__main__":
-    pass

agent/nodes.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, RemoveMessage
 from typing import Literal
 from langgraph.graph import START, END
@@ -13,17 +13,17 @@ def router_node(state: AgentState, llm):
     """
     query = state["messages"][-1].content
     rag_method_prompt = determine_rag_method_prompt()
-    rag_method_result = llm.invoke([rag_method_prompt, HumanMessage(content=query)])
     rag_method = rag_method_result.content.strip().upper()
     state["rag_method"] = rag_method
     return state
-def routing_logic(self, state: AgentState) -> str:
     rag_method = state["rag_method"]
     if rag_method == "RAG":
         return "vectordb_node"
     elif rag_method == "WEBSEARCH":
-        return "web_search_node"
     elif rag_method == "GENERAL":
         return "generate_node" # fallback to generate_node if the question do not requires RAG or websearch
     else:
@@ -31,7 +31,7 @@ def routing_logic(self, state: AgentState) -> str:
         print(f"ERROR: Router returned unclassified intent: {rag_method}. Terminating flow.")
         return END
-def vectordb_node(state: AgentState, llm, vectorstore):
     """
     Use vectordb to answer the query.
     """
@@ -43,12 +43,39 @@ def vectordb_node(state: AgentState, llm, vectorstore):
     state["context"] = context
     return state
 def generate_node(state: AgentState, llm):
     messages = state["messages"][-10:]  # Limit to last 10 messages to handle token limit
     context = state.get("context", [])
     system_content = get_system_prompt()
     if context:
         system_content += f"\n\nRelevant Context:\n{context}"

+from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage, RemoveMessage
 from typing import Literal
 from langgraph.graph import START, END
     """
     query = state["messages"][-1].content
     rag_method_prompt = determine_rag_method_prompt()
+    rag_method_result = llm.invoke([SystemMessage(content=rag_method_prompt), HumanMessage(content=query)])
     rag_method = rag_method_result.content.strip().upper()
     state["rag_method"] = rag_method
     return state
+def routing_logic(state: AgentState) -> str:
     rag_method = state["rag_method"]
     if rag_method == "RAG":
         return "vectordb_node"
     elif rag_method == "WEBSEARCH":
+        return "web_search_agent_node"
     elif rag_method == "GENERAL":
         return "generate_node" # fallback to generate_node if the question do not requires RAG or websearch
     else:
         print(f"ERROR: Router returned unclassified intent: {rag_method}. Terminating flow.")
         return END
+def vectordb_node(state: AgentState, vectorstore):
     """
     Use vectordb to answer the query.
     """
     state["context"] = context
     return state
+def web_search_agent_node(state: AgentState, llm):
+    """
+    LLM agent that decides which web search tools to call.
+    This generates an AIMessage with tool_calls.
+    """
+    messages = state["messages"]
+    # Add instruction to use tools
+    system_msg = SystemMessage(content="""You are a web search assistant.
+Use the available search tools (web_search_tavily, wikipedia_search) to find information about the user's query.
+Call the appropriate tool with the query.""")
+    messages_with_system = [system_msg] + messages
+    # LLM with tools bound will generate AIMessage with tool_calls
+    response = llm.invoke(messages_with_system)
+    return {"messages": [response]}
 def generate_node(state: AgentState, llm):
     messages = state["messages"][-10:]  # Limit to last 10 messages to handle token limit
     context = state.get("context", [])
     system_content = get_system_prompt()
+    # Extract web search results from ToolMessages if available
+    if not context:
+        for msg in reversed(messages):
+            if isinstance(msg, ToolMessage):
+                # Web search results come as ToolMessage content
+                if msg.content:
+                    context += f"\n\n{msg.content}"
     if context:
         system_content += f"\n\nRelevant Context:\n{context}"

agent/prompts.py CHANGED Viewed

@@ -2,87 +2,31 @@ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
 def get_system_prompt() -> SystemMessage:
     """Generate the system prompt for the RAG agent."""
-    return SystemMessage(content="""
-You are an intelligent assistant that MUST use the available tools to answer questions.
-**MANDATORY WORKFLOW — Follow these steps for EVERY question:**
-1. **Call `search_chroma`** with the user's query (K = 3–7) to find the most relevant chunks in the Chroma vector store.
-2. **Review the retrieved chunks** and identify the relevant ones. The chunks will contain content and metadata (such as `parent_id` and `source`).
-3. **If additional context is needed**, retrieve more details from the source tools (e.g., Wikipedia or Arxiv) to provide the full answer.
-4. **Use metadata** such as `source` and `parent_id` to help clarify or support the answer when applicable.
-5. **Answer using ONLY the retrieved information**:
-   - Combine relevant chunks and use metadata (e.g., citation sources) as needed to clarify or support the response.
-6. **If no relevant information is found**, rewrite the query into an **answer-focused declarative statement** and search again **only once** using `search_chroma`.
-7. **Return the final answer** derived from the most relevant results.
-""")
 def determine_rag_method_prompt() -> str:
-    return SystemMessage(content="""
-You are an rag method classification model. Given the user's query, you must classify the method to use
 as one and only one of the following options:
-1. **RAG**: The query likely relates to the internal, domain-specific documents you have access to.
-2. **WEBSEARCH**: The query requires real-time facts, general knowledge, or external information not in your documents.
-3. **GENERAL**: The query can be answered based on your existing knowledge without external resources.
-Respond STRICTLY with only one of these words: RAG, WEBSEARCH, or GENERAL. Do not include any punctuation, explanation, or extra text.
-"""
-    )
-def get_conversation_summary_prompt(messages):
-    """Generate a prompt for conversation summarization."""
-    summary_prompt = """**Summarize the key topics and context from this conversation concisely (1-2 sentences max).**
-    Discard irrelevant information, such as misunderstandings or off-topic queries/responses.
-    If there are no key topics, return an empty string.
-    """
-    for msg in messages[-6:]:
-        role = "User" if isinstance(msg, HumanMessage) else "Assistant"
-        summary_prompt += f"{role}: {msg.content}\n"
-    summary_prompt += "\n**Brief Summary:**"
-    return summary_prompt
-def get_query_analysis_prompt(query: str, conversation_summary: str = "") -> str:
-    """Generate a prompt for query analysis and rewriting."""
-    context_section = (
-        f"**Conversation Context:**\n{conversation_summary}"
-        if conversation_summary.strip()
-        else "**Conversation Context:**\n[First query in conversation]"
-    )
-    return f"""
-**Rewrite the user's query** to be clear, self-contained, and optimized for information retrieval.
-**User Query:**
-"{query}"
-{context_section}
-**Instructions:**
-1. **Resolve references for follow-ups:**
-   - If the query uses pronouns or refers to previous topics, use the context to make it self-contained.
-2. **Ensure clarity for new queries:**
-   - Make the query specific, concise, and unambiguous.
-3. **Correct errors and interpret intent:**
-   - If the query is grammatically incorrect, contains typos, or has abbreviations, correct it and infer the intended meaning.
-4. **Split only when necessary:**
-   - If multiple distinct questions exist, split into **up to 3 focused sub-queries** to avoid over-segmentation.
-   - Each sub-query must still be meaningful on its own.
-5. **Optimize for search:**
-   - Use **keywords, proper nouns, numbers, dates, and technical terms**.
-   - Remove conversational filler, vague words, and redundancies.
-   - Make the query concise and focused for information retrieval.
-6. **Mark as unclear if intent is missing:**
-   - This includes nonsense, gibberish, insults, or statements without an apparent question.
 """
 if __name__ == "__main__":
     pass

 def get_system_prompt() -> SystemMessage:
     """Generate the system prompt for the RAG agent."""
+    return """
+You are a helpful assistant tasked with answering questions using a set of tools.
+Follow the ReAct framework: iteratively reason through the problem step-by-step, use tools when necessary, and refine your approach based on tool outputs.
+You will be provided with relevant context from the knowledge base if required. Use this context to inform your response, but feel free to supplement with your own knowledge when appropriate. Context will be provided in the state under 'context' key.
+You will also have access to web search tools like Tavily, Wikipedia or Arxiv.
+DO NOT make any assumptions.
+"""
 def determine_rag_method_prompt() -> str:
+    return """
+You are a query classification model. Given the user's query, you must classify the method to use
 as one and only one of the following options:
+1. **RAG**: The query asks about specific documents, papers, or systems like DeepAnalyze, AgentMem, SAM3, SAM 3, SAM3D, DeepSeek-OCR, or any technical architecture/implementation details from research papers.
+2. **WEBSEARCH**: The query asks for current events, latest news, real-time information after January 2024, or general factual knowledge not in specialized documents.
+3. **GENERAL**: The query is a simple calculation, definition, reasoning task, or common knowledge question that doesn't need external data.
+**Examples:**
+- "What is DeepAnalyze?" → RAG
+- "Explain SAM 3 architecture" → RAG
+- "Latest AI news in 2025" → WEBSEARCH
+- "What is 15 times 7?" → GENERAL
+Respond STRICTLY with only one word: RAG, WEBSEARCH, or GENERAL. No punctuation or extra text.
 """
 if __name__ == "__main__":
     pass

agent/state.py CHANGED Viewed

@@ -18,7 +18,7 @@ class AgentState(TypedDict):
     conversation_summary: str = ""
 class QueryAnalysis(BaseModel):
     """Structured output for query analysis"""
     is_clear: bool = Field(description="Indicates if the user's question is clear and answerable")

     conversation_summary: str = ""
+# Implement later if needed, omit first
 class QueryAnalysis(BaseModel):
     """Structured output for query analysis"""
     is_clear: bool = Field(description="Indicates if the user's question is clear and answerable")

config.py CHANGED Viewed

@@ -4,7 +4,10 @@ configs = {
     "DATA_PATH": "./docs/markdowns",
     "PERSIST_PATH": "./chroma_data",
     "EMBEDDING_MODEL_NAME": "sentence-transformers/all-mpnet-base-v2",
-    "COLLECTION_NAME": "langchain_mpnet_collection"
 }
 if __name__ == "__main__":

     "DATA_PATH": "./docs/markdowns",
     "PERSIST_PATH": "./chroma_data",
     "EMBEDDING_MODEL_NAME": "sentence-transformers/all-mpnet-base-v2",
+    "COLLECTION_NAME": "langchain_mpnet_collection",
+    "LLM_MODEL_NAME": "gemini-2.0-flash",
+    "TEMPERATURE": 0.2,
+    "MAX_TOKENS": 2048,
 }
 if __name__ == "__main__":

core/rag_agent.py CHANGED Viewed

@@ -1,16 +1,21 @@
 import uuid
 from langchain_google_genai import ChatGoogleGenerativeAI
-import config
 from agent.tools import *
 from agent.graph import create_agent_graph
 class RAGAgent:
     def __init__(self):
         self.thread_id = str(uuid.uuid4())
         self.llm = ChatGoogleGenerativeAI(
-            model=config.LLM_MODEL,
-            temperature=config.LLM_TEMPERATURE
         )
         vectordb = intialize_chroma_vectorstore()

 import uuid
 from langchain_google_genai import ChatGoogleGenerativeAI
+from config import configs
 from agent.tools import *
 from agent.graph import create_agent_graph
+from dotenv import load_dotenv
+load_dotenv()
 class RAGAgent:
     def __init__(self):
         self.thread_id = str(uuid.uuid4())
         self.llm = ChatGoogleGenerativeAI(
+            model=configs["LLM_MODEL_NAME"],
+            temperature=configs["TEMPERATURE"],
+            max_tokens=configs["MAX_TOKENS"]
         )
         vectordb = intialize_chroma_vectorstore()

requirements.txt CHANGED Viewed

@@ -13,4 +13,5 @@ langchain-community
 langchain_text_splitters
 pymupdf-layout
 sentence_transformers
-gradio

 langchain_text_splitters
 pymupdf-layout
 sentence_transformers
+gradio
+python-dotenv

test_scripts.py ADDED Viewed

	@@ -0,0 +1,349 @@

+"""
+Test script for RAG Agent logic.
+Tests the agent workflow, nodes, state management, and retrieval.
+"""
+import sys
+from pathlib import Path
+# Add project root to path
+sys.path.insert(0, str(Path(__file__).parent))
+from langchain_core.messages import HumanMessage, AIMessage
+from agent.state import AgentState
+from core.rag_agent import RAGAgent
+def print_separator(title: str):
+    """Print a visual separator."""
+    print("\n" + "="*70)
+    print(f"  {title}")
+    print("="*70 + "\n")
+def test_agent_initialization():
+    """Test RAGAgent can be initialized properly."""
+    print_separator("TEST 1: Agent Initialization")
+    try:
+        agent = RAGAgent()
+        print("✓ RAGAgent initialized successfully")
+        print(f"  - Thread ID: {agent.thread_id}")
+        print(f"  - LLM Model: {agent.llm.model_name if hasattr(agent.llm, 'model_name') else 'initialized'}")
+        print(f"  - Graph: {type(agent.agent_graph).__name__}")
+        return agent
+    except Exception as e:
+        print(f"✗ Failed to initialize RAGAgent: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+def test_simple_query(agent: RAGAgent):
+    """Test a simple query execution."""
+    print_separator("TEST 2: Simple Query")
+    if agent is None:
+        print("✗ Skipping - agent not initialized")
+        return False
+    try:
+        query = "What is DeepAnalyze?"
+        print(f"Query: '{query}'")
+        initial_state = {
+            "messages": [HumanMessage(content=query)],
+        }
+        result = agent.agent_graph.invoke(
+            initial_state,
+            config=agent.get_config()
+        )
+        messages = result.get("messages", [])
+        ai_messages = [m for m in messages if isinstance(m, AIMessage)]
+        if ai_messages:
+            print(f"✓ Query executed successfully")
+            print(f"  Total messages: {len(messages)}")
+            print(f"  Response length: {len(ai_messages[-1].content)} chars")
+            print(f"\n  Response preview:")
+            print(f"  {ai_messages[-1].content[:300]}...")
+            return True
+        else:
+            print(f"✗ No AI response generated")
+            return False
+    except Exception as e:
+        print(f"✗ Query execution failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+def test_rag_query(agent: RAGAgent):
+    """Test a query that should use RAG (local documents)."""
+    print_separator("TEST 3: RAG Query")
+    if agent is None:
+        print("✗ Skipping - agent not initialized")
+        return False
+    try:
+        query = "Explain the architecture of SAM 3"
+        print(f"Query: '{query}' (should use local documents)")
+        initial_state = {
+            "messages": [HumanMessage(content=query)],
+        }
+        result = agent.agent_graph.invoke(
+            initial_state,
+            config=agent.get_config()
+        )
+        messages = result.get("messages", [])
+        rag_method = result.get("rag_method", "UNKNOWN")
+        ai_messages = [m for m in messages if isinstance(m, AIMessage)]
+        print(f"  Routing decision: {rag_method}")
+        if ai_messages:
+            print(f"✓ RAG query executed")
+            print(f"  Response preview:")
+            print(f"  {ai_messages[-1].content[:300]}...")
+            return True
+        else:
+            print(f"✗ No response generated")
+            return False
+    except Exception as e:
+        print(f"✗ RAG query failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+def test_web_search_query(agent: RAGAgent):
+    """Test a query that should use web search."""
+    print_separator("TEST 4: Web Search Query")
+    if agent is None:
+        print("✗ Skipping - agent not initialized")
+        return False
+    try:
+        query = "What's the latest news about AI in 2025?"
+        print(f"Query: '{query}' (should use web search)")
+        initial_state = {
+            "messages": [HumanMessage(content=query)],
+        }
+        result = agent.agent_graph.invoke(
+            initial_state,
+            config=agent.get_config()
+        )
+        messages = result.get("messages", [])
+        rag_method = result.get("rag_method", "UNKNOWN")
+        ai_messages = [m for m in messages if isinstance(m, AIMessage)]
+        print(f"  Routing decision: {rag_method}")
+        if ai_messages:
+            print(f"✓ Web search query executed")
+            print(f"  Response preview:")
+            print(f"  {ai_messages[-1].content[:300]}...")
+            return True
+        else:
+            print(f"✗ No response generated")
+            return False
+    except Exception as e:
+        print(f"✗ Web search query failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+def test_general_query(agent: RAGAgent):
+    """Test a general query that doesn't need RAG or web search."""
+    print_separator("TEST 5: General Query")
+    if agent is None:
+        print("✗ Skipping - agent not initialized")
+        return False
+    try:
+        query = "What is 15 multiplied by 7?"
+        print(f"Query: '{query}' (should use general LLM)")
+        initial_state = {
+            "messages": [HumanMessage(content=query)],
+        }
+        result = agent.agent_graph.invoke(
+            initial_state,
+            config=agent.get_config()
+        )
+        messages = result.get("messages", [])
+        rag_method = result.get("rag_method", "UNKNOWN")
+        ai_messages = [m for m in messages if isinstance(m, AIMessage)]
+        print(f"  Routing decision: {rag_method}")
+        if ai_messages:
+            print(f"✓ General query executed")
+            print(f"  Response: {ai_messages[-1].content}")
+            return True
+        else:
+            print(f"✗ No response generated")
+            return False
+    except Exception as e:
+        print(f"✗ General query failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+def test_conversation_memory(agent: RAGAgent):
+    """Test multi-turn conversation with memory."""
+    print_separator("TEST 6: Conversation Memory")
+    if agent is None:
+        print("✗ Skipping - agent not initialized")
+        return False
+    try:
+        # Reset thread for clean test
+        agent.reset_thread()
+        # First turn
+        print("Turn 1: 'What is DeepAnalyze?'")
+        state1 = {
+            "messages": [HumanMessage(content="What is DeepAnalyze?")],
+        }
+        result1 = agent.agent_graph.invoke(state1, config=agent.get_config())
+        ai_msg_1 = [m for m in result1["messages"] if isinstance(m, AIMessage)]
+        if not ai_msg_1:
+            print("✗ No response in turn 1")
+            return False
+        print(f"✓ Turn 1 response: {ai_msg_1[-1].content[:100]}...")
+        # Second turn - follow-up question
+        print("\nTurn 2: 'What are its main features?' (requires context)")
+        state2 = {
+            "messages": [HumanMessage(content="What are its main features?")],
+        }
+        result2 = agent.agent_graph.invoke(state2, config=agent.get_config())
+        ai_msg_2 = [m for m in result2["messages"] if isinstance(m, AIMessage)]
+        if not ai_msg_2:
+            print("✗ No response in turn 2")
+            return False
+        print(f"✓ Turn 2 response: {ai_msg_2[-1].content[:100]}...")
+        # Check if response makes sense in context
+        response = ai_msg_2[-1].content.lower()
+        if "deepanalyze" in response or "feature" in response or "agent" in response:
+            print("✓ Conversation memory working - response uses context")
+            return True
+        else:
+            print("⚠ Response may not be using conversation context properly")
+            return True  # Still pass, as it generated a response
+    except Exception as e:
+        print(f"✗ Conversation memory test failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+def test_thread_reset(agent: RAGAgent):
+    """Test thread reset functionality."""
+    print_separator("TEST 7: Thread Reset")
+    if agent is None:
+        print("✗ Skipping - agent not initialized")
+        return False
+    try:
+        old_thread_id = agent.thread_id
+        print(f"Old thread ID: {old_thread_id}")
+        agent.reset_thread()
+        new_thread_id = agent.thread_id
+        print(f"New thread ID: {new_thread_id}")
+        if old_thread_id != new_thread_id:
+            print("✓ Thread reset successfully")
+            return True
+        else:
+            print("✗ Thread ID unchanged after reset")
+            return False
+    except Exception as e:
+        print(f"✗ Thread reset failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+def run_all_tests():
+    """Run all tests and provide summary."""
+    print("\n" + "█"*70)
+    print("  RAG AGENT TEST SUITE")
+    print("█"*70)
+    # Initialize agent once
+    agent = test_agent_initialization()
+    if agent is None:
+        print("\n✗ Cannot proceed - agent initialization failed")
+        return False
+    tests = [
+        ("Simple Query", lambda: test_simple_query(agent)),
+        ("RAG Query", lambda: test_rag_query(agent)),
+        ("Web Search Query", lambda: test_web_search_query(agent)),
+        ("General Query", lambda: test_general_query(agent)),
+        ("Conversation Memory", lambda: test_conversation_memory(agent)),
+        ("Thread Reset", lambda: test_thread_reset(agent)),
+    ]
+    results = {}
+    for name, test_func in tests:
+        try:
+            results[name] = test_func()
+        except Exception as e:
+            print(f"\n✗ Test '{name}' crashed: {e}")
+            import traceback
+            traceback.print_exc()
+            results[name] = False
+    # Print summary
+    print_separator("TEST SUMMARY")
+    passed = sum(results.values())
+    total = len(results)
+    for name, passed_test in results.items():
+        status = "✓ PASS" if passed_test else "✗ FAIL"
+        print(f"{status}: {name}")
+    print(f"\n{'='*70}")
+    print(f"  TOTAL: {passed}/{total} tests passed ({passed/total*100:.1f}%)")
+    print(f"{'='*70}\n")
+    return passed == total
+if __name__ == "__main__":
+    success = run_all_tests()
+    sys.exit(0 if success else 1)

ui/gradio_components.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import gradio as gr
 from core.rag_agent import RAGAgent
 # Initialize components
@@ -19,81 +20,143 @@ def chat_with_agent(message, history):
     try:
         agent = initialize_agent()
-        # Stream the agent's response
-        response_text = ""
-        for event in agent.agent_graph.stream(
-            {"messages": [("user", message)]},
-            agent.get_config(),
-            stream_mode="values"
-        ):
-            if "messages" in event and len(event["messages"]) > 0:
-                last_message = event["messages"][-1]
-                if hasattr(last_message, "content"):
-                    response_text = last_message.content
-        if not response_text:
-            response_text = "I apologize, but I couldn't generate a response. Please try again."
-        return response_text
     except Exception as e:
-        return f"Error: {str(e)}"
 def reset_conversation():
     """Reset the conversation thread"""
     global rag_agent
     if rag_agent:
         rag_agent.reset_thread()
-    return None  # Clear chat history
 def create_gradio_ui():
     """Create the complete Gradio interface"""
-    with gr.Blocks(title="RAG Agent with Agentic Memory", theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
         # 🤖 RAG Agent with Agentic Memory
         Chat with an intelligent agent that uses:
-        - 📚 **Local Knowledge Base** (ChromaDB)
-        - 🔍 **Web Search** (Tavily)
-        - 📖 **Wikipedia**
-        - 🎓 **ArXiv** (Academic Papers)
         """)
-        gr.Markdown("### Chat with Your Documents")
-        gr.Markdown("Ask questions about your documents or any topic. The agent will search multiple sources.")
-        chatbot = gr.Chatbot(
-            label="Conversation",
-            height=500,
-            show_label=True,
-            avatar_images=(None, "🤖")
-        )
         with gr.Row():
-            msg = gr.Textbox(
-                label="Your Message",
-                placeholder="Ask me anything about your documents or general knowledge...",
-                scale=4
-            )
-            submit_btn = gr.Button("Send", variant="primary", scale=1)
-        with gr.Row():
-            clear_chat_btn = gr.Button("🔄 Reset Conversation")
-            gr.Markdown("*Note: Resetting clears the conversation history*")
-        # Chat interface
-        chat_interface = gr.ChatInterface(
-            fn=chat_with_agent,
-            chatbot=chatbot,
-            textbox=msg,
-            submit_btn=submit_btn,
-            retry_btn=None,
-            undo_btn=None,
-            clear_btn=None
         )
-        clear_chat_btn.click(
             fn=reset_conversation,
             outputs=[chatbot]
         )
@@ -101,16 +164,27 @@ def create_gradio_ui():
         gr.Markdown("""
         ---
         ### 🔧 How it works:
-        1. **Ask questions** in the chat
         2. The agent will:
-           - Analyze your query
-           - Search relevant sources (ChromaDB, Web, Wikipedia, ArXiv)
-           - Provide comprehensive answers with citations
-        3. Use **Reset Conversation** to start fresh
         """)
     return demo
 if __name__ == "__main__":
     demo = create_gradio_ui()
-    demo.launch(share=False, server_name="127.0.0.1", server_port=7860)

 import gradio as gr
+from langchain_core.messages import HumanMessage, AIMessage
 from core.rag_agent import RAGAgent
 # Initialize components
     try:
         agent = initialize_agent()
+        # Convert Gradio history format to LangChain messages
+        messages = []
+        for user_msg, assistant_msg in history:
+            messages.append(HumanMessage(content=user_msg))
+            if assistant_msg:
+                messages.append(AIMessage(content=assistant_msg))
+        # Add current user message
+        messages.append(HumanMessage(content=message))
+        # Create initial state
+        initial_state = {
+            "messages": messages,
+        }
+        # Invoke the agent graph
+        result = agent.agent_graph.invoke(
+            initial_state,
+            config=agent.get_config()
+        )
+        # Extract AI response
+        result_messages = result.get("messages", [])
+        ai_messages = [m for m in result_messages if isinstance(m, AIMessage)]
+        if ai_messages:
+            # Get the last AI message
+            response = ai_messages[-1].content
+            # Add routing info as metadata (optional)
+            rag_method = result.get("rag_method", "UNKNOWN")
+            response_with_metadata = f"{response}\n\n*[Source: {rag_method}]*"
+            # Return history in Gradio's format [[user, bot], [user, bot], ...]
+            new_history = history + [[message, response_with_metadata]]
+            return new_history
+        else:
+            new_history = history + [[message, "⚠️ No response generated. Please try again."]]
+            return new_history
     except Exception as e:
+        error_msg = f"❌ Error: {str(e)}"
+        print(f"Chat error: {e}")
+        import traceback
+        traceback.print_exc()
+        new_history = history + [[message, error_msg]]
+        return new_history
 def reset_conversation():
     """Reset the conversation thread"""
     global rag_agent
     if rag_agent:
         rag_agent.reset_thread()
+    return []  # Clear chat history
 def create_gradio_ui():
     """Create the complete Gradio interface"""
+    with gr.Blocks(title="RAG Agent with Agentic Memory") as demo:
         gr.Markdown("""
         # 🤖 RAG Agent with Agentic Memory
         Chat with an intelligent agent that uses:
+        - 📚 **Local Knowledge Base** (ChromaDB) - Research papers on DeepAnalyze, AgentMem, SAM3, etc.
+        - 🔍 **Web Search** (Tavily) - Real-time information and current events
+        - 📖 **Wikipedia** - General knowledge
+        - 🎓 **ArXiv** - Academic papers
         """)
         with gr.Row():
+            with gr.Column(scale=4):
+                gr.Markdown("### 💬 Chat Interface")
+                chatbot = gr.Chatbot(
+                    label="Conversation",
+                    height=500,
+                    show_label=False,
+                )
+                with gr.Row():
+                    msg = gr.Textbox(
+                        label="Your Message",
+                        placeholder="Ask me anything about your documents or general knowledge...",
+                        scale=5,
+                        show_label=False
+                    )
+                    submit_btn = gr.Button("Send 📤", variant="primary", scale=1)
+                with gr.Row():
+                    clear_btn = gr.Button("🔄 Reset Conversation", variant="secondary")
+            with gr.Column(scale=1):
+                gr.Markdown("### 📊 Agent Status")
+                status_box = gr.Markdown("*Ready*")
+                gr.Markdown("### 💡 Example Queries")
+                gr.Markdown("""
+                **Local Documents (RAG):**
+                - What is DeepAnalyze?
+                - Explain SAM 3 architecture
+                - What is AgentMem?
+                **Web Search:**
+                - Latest AI news in 2025
+                - Current events in technology
+                **General:**
+                - What is 15 × 7?
+                - Explain machine learning
+                """)
+        # Event handlers
+        def submit_message(message, history):
+            """Handle message submission with status update"""
+            if not message.strip():
+                return history, ""
+            # Get response
+            new_history = chat_with_agent(message, history)
+            return new_history, ""
+        # Wire up events
+        msg.submit(
+            fn=submit_message,
+            inputs=[msg, chatbot],
+            outputs=[chatbot, msg]
         )
+        submit_btn.click(
+            fn=submit_message,
+            inputs=[msg, chatbot],
+            outputs=[chatbot, msg]
+        )
+        clear_btn.click(
             fn=reset_conversation,
             outputs=[chatbot]
         )
         gr.Markdown("""
         ---
         ### 🔧 How it works:
+        1. **Type your question** in the text box
         2. The agent will:
+           - 🧠 Analyze your query to determine the best source
+           - 🔍 Search relevant sources (Local docs, Web, Wikipedia)
+           - 📝 Generate a comprehensive answer
+           - 💾 Remember conversation context for follow-up questions
+        3. Use **Reset Conversation** to start a new thread
+        ---
+        *Powered by LangGraph + LangChain + ChromaDB + Anthropic Claude*
         """)
     return demo
 if __name__ == "__main__":
     demo = create_gradio_ui()
+    print("🚀 Starting Gradio interface...")
+    print("📍 Running on: http://127.0.0.1:7860")
+    demo.launch(
+        share=False,
+        server_name="127.0.0.1",
+        server_port=7860,
+        show_error=True
+    )