eudr_chabo_orchestrator

Running on CPU Upgrade

App Files Files Community

mtyrrell commited on Sep 18

Commit

ea45e0c

1 Parent(s): 9374fdf

cleanup

Browse files

Files changed (2) hide show

app/main.py +4 -380
app/nodes.py +305 -1

app/main.py CHANGED Viewed

@@ -1,31 +1,26 @@
 #CHATFED_ORCHESTRATOR
 import gradio as gr
-from fastapi import FastAPI, UploadFile, File, Form, Request
 from fastapi.responses import StreamingResponse
 from langserve import add_routes
 from langgraph.graph import StateGraph, START, END
-from typing import Optional, Dict, Any, List
 from typing_extensions import TypedDict
-from gradio_client import Client, file
 import uvicorn
 import os
 from datetime import datetime
 import logging
 from contextlib import asynccontextmanager
-# import threading
 from langchain_core.runnables import RunnableLambda
-# import tempfile
-# import mimetypes
 import asyncio
 from typing import Generator
 import json
 import httpx
-# import ast
 from functools import partial
 from utils import getconfig, convert_context_to_list
-from nodes import detect_file_type_node, ingest_node, geojson_direct_result_node, retrieve_node
-from models import GraphState, ChatFedInput, ChatFedOutput, ChatUIInput
 config = getconfig("params.cfg")
 RETRIEVER = config.get("retriever", "RETRIEVER", fallback="https://giz-chatfed-retriever.hf.space")
@@ -39,250 +34,6 @@ logger = logging.getLogger(__name__)
-# MAIN STREAMING GENERATOR
-async def generate_node_streaming(state: GraphState) -> Generator[GraphState, None, None]:
-    """Streaming version that calls generator's FastAPI endpoint"""
-    start_time = datetime.now()
-    logger.info(f"Generation (streaming): {state['query'][:50]}...")
-    try:
-        # Get MAX_CONTEXT_CHARS at the beginning so it's available throughout the function
-        MAX_CONTEXT_CHARS = int(config.get("general", "MAX_CONTEXT_CHARS"))
-        # Combine retriever context with ingestor context
-        retrieved_context = state.get("context", "")
-        ingestor_context = state.get("ingestor_context", "")
-        # Convert contexts to list format expected by generator
-        context_list = []
-        if ingestor_context:
-            # Add ingestor context
-            context_list.append({
-                "answer": ingestor_context,
-                "answer_metadata": {
-                    "filename": state.get("filename", "Uploaded Document"),
-                    "page": "Unknown",
-                    "year": "Unknown",
-                    "source": "Ingestor"
-                }
-            })
-        if retrieved_context:
-            # Convert retrieved context to list and add
-            retrieved_list = convert_context_to_list(retrieved_context)
-            context_list.extend(retrieved_list)
-        # Prepare the request payload
-        payload = {
-            "query": state["query"],
-            "context": context_list
-        }
-        # Determine generator URL - handle both Hugging Face and direct URLs
-        generator_url = GENERATOR
-        if not generator_url.startswith('http'):
-            # Allows for easy specification of space in config (converts to URL)
-            # Replace '/' with '-' for Hugging Face space URLs
-            # Force the replacement to ensure it works
-            space_name = generator_url.replace('/', '-').replace('_', '-')
-            generator_url = f"https://{space_name}.hf.space"
-        # Try FastAPI endpoint first, fallback to Gradio if needed
-        fastapi_success = False
-        try:
-            # Make streaming request to generator's FastAPI endpoint
-            async with httpx.AsyncClient(timeout=300.0, verify=False) as client:
-                async with client.stream(
-                    "POST",
-                    f"{generator_url}/generate/stream",
-                    json=payload,
-                    headers={"Content-Type": "application/json"}
-                ) as response:
-                    if response.status_code != 200:
-                        error_text = await response.aread()
-                        raise Exception(f"FastAPI endpoint returned status {response.status_code}")
-                    current_text = ""
-                    sources = None
-                    event_type = None
-                    async for line in response.aiter_lines():
-                        if not line.strip():
-                            continue
-                        # Parse SSE format
-                        if line.startswith("event: "):
-                            event_type = line[7:].strip()
-                            continue
-                        elif line.startswith("data: "):
-                            data_content = line[6:].strip()
-                            if event_type == "data":
-                                # Text chunk
-                                try:
-                                    chunk = json.loads(data_content)
-                                    if isinstance(chunk, str):
-                                        current_text += chunk
-                                        metadata = state.get("metadata", {})
-                                        metadata.update({
-                                            "generation_duration": (datetime.now() - start_time).total_seconds(),
-                                            "result_length": len(current_text),
-                                            "generation_success": True,
-                                            "streaming": True,
-                                            "generator_type": "fastapi"
-                                        })
-                                        yield {
-                                            "result": chunk,  # Send only the new chunk
-                                            "metadata": metadata
-                                        }
-                                except json.JSONDecodeError:
-                                    # Handle plain text chunks
-                                    current_text += data_content
-                                    metadata = state.get("metadata", {})
-                                    metadata.update({
-                                        "generation_duration": (datetime.now() - start_time).total_seconds(),
-                                        "result_length": len(current_text),
-                                        "generation_success": True,
-                                        "streaming": True,
-                                        "generator_type": "fastapi"
-                                    })
-                                    yield {
-                                        "result": data_content,
-                                        "metadata": metadata
-                                    }
-                            elif event_type == "sources":
-                                # Sources data
-                                try:
-                                    sources_data = json.loads(data_content)
-                                    sources = sources_data.get("sources", [])
-                                    # Update state with sources
-                                    metadata = state.get("metadata", {})
-                                    metadata.update({
-                                        "sources_received": True,
-                                        "sources_count": len(sources)
-                                    })
-                                    yield {
-                                        "sources": sources,
-                                        "metadata": metadata
-                                    }
-                                except json.JSONDecodeError:
-                                    logger.warning(f"Failed to parse sources data: {data_content}")
-                            elif event_type == "end":
-                                # Stream ended
-                                logger.info("Generator stream ended")
-                                fastapi_success = True
-                                break
-                            elif event_type == "error":
-                                # Error occurred
-                                try:
-                                    error_data = json.loads(data_content)
-                                    raise Exception(error_data.get("error", "Unknown error"))
-                                except json.JSONDecodeError:
-                                    raise Exception(data_content)
-        # GRADIO FALLBACK
-        except Exception as fastapi_error:
-            logger.warning(f"FastAPI endpoint failed: {fastapi_error}")
-            logger.info("Falling back to Gradio client")
-        #     # Fallback to Gradio client
-        #     try:
-        #         from gradio_client import Client
-        #         # Convert context back to string for Gradio
-        #         combined_context = ""
-        #         if ingestor_context and retrieved_context:
-        #             # Limit context size to prevent token overflow
-        #             ingestor_truncated = ingestor_context[:MAX_CONTEXT_CHARS//2] if len(ingestor_context) > MAX_CONTEXT_CHARS//2 else ingestor_context
-        #             retrieved_truncated = retrieved_context[:MAX_CONTEXT_CHARS//2] if len(retrieved_context) > MAX_CONTEXT_CHARS//2 else retrieved_context
-        #             combined_context = f"=== UPLOADED DOCUMENT CONTEXT ===\n{ingestor_truncated}\n\n=== RETRIEVED CONTEXT ===\n{retrieved_truncated}"
-        #         elif ingestor_context:
-        #             ingestor_truncated = ingestor_context[:MAX_CONTEXT_CHARS] if len(ingestor_context) > MAX_CONTEXT_CHARS else ingestor_context
-        #             combined_context = f"=== UPLOADED DOCUMENT CONTEXT ===\n{ingestor_truncated}"
-        #         elif retrieved_context:
-        #             combined_context = retrieved_context[:MAX_CONTEXT_CHARS] if len(retrieved_context) > MAX_CONTEXT_CHARS else retrieved_context
-        #         logger.info(f"Using Gradio client for generator at: {generator_url}")
-        #         client = Client(generator_url)
-        #         # Use streaming prediction
-        #         job = client.submit(
-        #             query=state["query"],
-        #             context=combined_context,
-        #             api_name="/generate"
-        #         )
-        #         # Track previous result to send only deltas
-        #         previous_result = ""
-        #         # Stream the results - each result is likely the full accumulated response
-        #         for result in job:
-        #             if result is not None:
-        #                 current_result = result
-        #                 # Calculate the delta (new content only)
-        #                 if len(current_result) > len(previous_result):
-        #                     delta = current_result[len(previous_result):]
-        #                     previous_result = current_result
-        #                     # Yield only the new content
-        #                     metadata = state.get("metadata", {})
-        #                     metadata.update({
-        #                         "generation_duration": (datetime.now() - start_time).total_seconds(),
-        #                         "result_length": len(current_result),
-        #                         "generation_success": True,
-        #                         "streaming": True,
-        #                         "generator_type": "gradio_fallback"
-        #                     })
-        #                     yield {
-        #                         "result": delta,  # Send only the delta, not full result
-        #                         "metadata": metadata
-        #                     }
-        #         fastapi_success = True  # Mark as successful since Gradio worked
-        #     except Exception as gradio_error:
-        #         logger.error(f"Both FastAPI and Gradio failed. FastAPI: {fastapi_error}, Gradio: {gradio_error}")
-        #         raise Exception(f"Both generation methods failed. FastAPI: {fastapi_error}, Gradio: {gradio_error}")
-        # if not fastapi_success:
-        #     raise Exception("Generation failed - no successful response received")
-    except Exception as e:
-        duration = (datetime.now() - start_time).total_seconds()
-        logger.error(f"Streaming generation failed: {str(e)}")
-        metadata = state.get("metadata", {})
-        metadata.update({
-            "generation_duration": duration,
-            "generation_success": False,
-            "generation_error": str(e),
-            "streaming": True
-        })
-        yield {"result": f"Error: {str(e)}", "metadata": metadata}
-# Conditional routing function
-def route_workflow(state: GraphState) -> str:
-    """Route to appropriate workflow based on file type"""
-    workflow_type = state.get("workflow_type", "standard")
-    return workflow_type
 #----------------------------------------
 # CORE WORKFLOW GRAPH
 #----------------------------------------
@@ -318,134 +69,7 @@ workflow.add_edge("geojson_direct", END)
 compiled_graph = workflow.compile()
-async def process_query_streaming(query: str, file_upload, reports_filter: str = "", sources_filter: str = "",
-                                 subtype_filter: str = "", year_filter: str = "",
-                                 output_format: str = "structured"):
-    """
-    Unified streaming function that yields partial results
-    Args:
-        output_format: "structured" for dict format, "gradio" for plain text format
-    """
-    file_content = None
-    filename = None
-    if file_upload is not None:
-        try:
-            with open(file_upload.name, 'rb') as f:
-                file_content = f.read()
-            filename = os.path.basename(file_upload.name)
-            logger.info(f"File uploaded: {filename}, size: {len(file_content)} bytes")
-        except Exception as e:
-            logger.error(f"Error reading uploaded file: {str(e)}")
-            if output_format == "structured":
-                yield {"type": "error", "content": f"Error reading file: {str(e)}"}
-            else:
-                yield f"Error reading file: {str(e)}"
-            return
-    start_time = datetime.now()
-    session_id = f"gradio_{start_time.strftime('%Y%m%d_%H%M%S')}"
-    try:
-        # Process ingestion first (non-streaming)
-        initial_state = {
-            "query": query,
-            "context": "",
-            "ingestor_context": "",
-            "result": "",
-            "sources": [],
-            "reports_filter": reports_filter or "",
-            "sources_filter": sources_filter or "",
-            "subtype_filter": subtype_filter or "",
-            "year_filter": year_filter or "",
-            "file_content": file_content,
-            "filename": filename,
-            "file_type": "unknown",
-            "workflow_type": "standard",
-            "metadata": {
-                "session_id": session_id,
-                "start_time": start_time.isoformat(),
-                "has_file_attachment": file_content is not None
-            }
-        }
-        # Detect file type - merge the returned state with initial state
-        state_after_detect = {**initial_state, **detect_file_type_node(initial_state)}
-        # Ingest if file provided - merge the returned state
-        state_after_ingest = {**state_after_detect, **ingest_node(state_after_detect)}
-        # Route workflow
-        workflow_type = route_workflow(state_after_ingest)
-        if workflow_type == "geojson_direct":
-            # For GeoJSON, return direct result
-            final_state = geojson_direct_result_node(state_after_ingest)
-            if output_format == "structured":
-                yield {"type": "data", "content": final_state["result"]}
-                yield {"type": "end", "content": ""}
-            else:
-                yield final_state["result"]
-        else:
-            # For standard workflow, retrieve first - merge the returned state
-            state_after_retrieve = {**state_after_ingest, **retrieve_node(state_after_ingest)}
-            # Initialize variables for both output formats
-            sources_collected = None
-            accumulated_response = "" if output_format == "gradio" else None
-            # Then stream generation
-            async for partial_state in generate_node_streaming(state_after_retrieve):
-                if "result" in partial_state:
-                    if output_format == "structured":
-                        yield {"type": "data", "content": partial_state["result"]}
-                    else:
-                        # Accumulate the content and yield the full accumulated response
-                        accumulated_response += partial_state["result"]
-                        yield accumulated_response
-                # Collect sources for later
-                if "sources" in partial_state:
-                    sources_collected = partial_state["sources"]
-            # Handle sources based on output format
-            if sources_collected:
-                if output_format == "structured":
-                    yield {"type": "sources", "content": sources_collected}
-                else:
-                    # Append sources to accumulated response
-                    sources_text = "\n\n**Sources:**\n"
-                    for i, source in enumerate(sources_collected, 1):
-                        if isinstance(source, dict):
-                            title = source.get('title', 'Unknown')
-                            link = source.get('link', '#')
-                            sources_text += f"{i}. [{title}]({link})\n"
-                        else:
-                            sources_text += f"{i}. {source}\n"
-                    accumulated_response += sources_text
-                    yield accumulated_response
-            if output_format == "structured":
-                yield {"type": "end", "content": ""}
-    except Exception as e:
-        logger.error(f"Streaming pipeline failed: {str(e)}")
-        if output_format == "structured":
-            yield {"type": "error", "content": f"Error: {str(e)}"}
-        else:
-            yield f"Error: {str(e)}"
-# # Convenience wrapper for Gradio compatibility
-# async def process_query_gradio_streaming(query: str, file_upload, reports_filter: str = "", sources_filter: str = "",
-#                                  subtype_filter: str = "", year_filter: str = ""):
-#     """Streaming version for Gradio UI - wrapper around unified function"""
-#     async for result in process_query_streaming(
-#         query, file_upload, reports_filter, sources_filter,
-#         subtype_filter, year_filter, output_format="gradio"
-#     ):
-#         yield result
 async def chatui_adapter(data):

 #CHATFED_ORCHESTRATOR
 import gradio as gr
+from fastapi import FastAPI, UploadFile, File, Form
 from fastapi.responses import StreamingResponse
 from langserve import add_routes
 from langgraph.graph import StateGraph, START, END
+from typing import Optional
 from typing_extensions import TypedDict
 import uvicorn
 import os
 from datetime import datetime
 import logging
 from contextlib import asynccontextmanager
 from langchain_core.runnables import RunnableLambda
 import asyncio
 from typing import Generator
 import json
 import httpx
 from functools import partial
 from utils import getconfig, convert_context_to_list
+from nodes import detect_file_type_node, ingest_node, geojson_direct_result_node, retrieve_node, generate_node_streaming, route_workflow, process_query_streaming
+from models import GraphState, ChatUIInput
 config = getconfig("params.cfg")
 RETRIEVER = config.get("retriever", "RETRIEVER", fallback="https://giz-chatfed-retriever.hf.space")
 #----------------------------------------
 # CORE WORKFLOW GRAPH
 #----------------------------------------
 compiled_graph = workflow.compile()
 async def chatui_adapter(data):

app/nodes.py CHANGED Viewed

@@ -7,6 +7,10 @@ from gradio_client import Client, file
 import logging
 from utils import getconfig
 import dotenv
 dotenv.load_dotenv()
@@ -180,4 +184,304 @@ def retrieve_node(state: GraphState) -> GraphState:
             "retrieval_success": False,
             "retrieval_error": str(e)
         })
-        return {"context": "", "metadata": metadata}

 import logging
 from utils import getconfig
 import dotenv
+from typing_extensions import TypedDict
+import httpx
+import json
+from typing import Generator
 dotenv.load_dotenv()
             "retrieval_success": False,
             "retrieval_error": str(e)
         })
+        return {"context": "", "metadata": metadata}
+# MAIN STREAMING GENERATOR
+async def generate_node_streaming(state: GraphState) -> Generator[GraphState, None, None]:
+    """Streaming version that calls generator's FastAPI endpoint"""
+    start_time = datetime.now()
+    logger.info(f"Generation (streaming): {state['query'][:50]}...")
+    try:
+        # Get MAX_CONTEXT_CHARS at the beginning so it's available throughout the function
+        MAX_CONTEXT_CHARS = int(config.get("general", "MAX_CONTEXT_CHARS"))
+        # Combine retriever context with ingestor context
+        retrieved_context = state.get("context", "")
+        ingestor_context = state.get("ingestor_context", "")
+        # Convert contexts to list format expected by generator
+        context_list = []
+        if ingestor_context:
+            # Add ingestor context
+            context_list.append({
+                "answer": ingestor_context,
+                "answer_metadata": {
+                    "filename": state.get("filename", "Uploaded Document"),
+                    "page": "Unknown",
+                    "year": "Unknown",
+                    "source": "Ingestor"
+                }
+            })
+        if retrieved_context:
+            # Convert retrieved context to list and add
+            retrieved_list = convert_context_to_list(retrieved_context)
+            context_list.extend(retrieved_list)
+        # Prepare the request payload
+        payload = {
+            "query": state["query"],
+            "context": context_list
+        }
+        # Determine generator URL - handle both Hugging Face and direct URLs
+        generator_url = GENERATOR
+        if not generator_url.startswith('http'):
+            # Allows for easy specification of space in config (converts to URL)
+            # Replace '/' with '-' for Hugging Face space URLs
+            # Force the replacement to ensure it works
+            space_name = generator_url.replace('/', '-').replace('_', '-')
+            generator_url = f"https://{space_name}.hf.space"
+        # Try FastAPI endpoint first, fallback to Gradio if needed
+        fastapi_success = False
+        try:
+            # Make streaming request to generator's FastAPI endpoint
+            async with httpx.AsyncClient(timeout=300.0, verify=False) as client:
+                async with client.stream(
+                    "POST",
+                    f"{generator_url}/generate/stream",
+                    json=payload,
+                    headers={"Content-Type": "application/json"}
+                ) as response:
+                    if response.status_code != 200:
+                        error_text = await response.aread()
+                        raise Exception(f"FastAPI endpoint returned status {response.status_code}")
+                    current_text = ""
+                    sources = None
+                    event_type = None
+                    async for line in response.aiter_lines():
+                        if not line.strip():
+                            continue
+                        # Parse SSE format
+                        if line.startswith("event: "):
+                            event_type = line[7:].strip()
+                            continue
+                        elif line.startswith("data: "):
+                            data_content = line[6:].strip()
+                            if event_type == "data":
+                                # Text chunk
+                                try:
+                                    chunk = json.loads(data_content)
+                                    if isinstance(chunk, str):
+                                        current_text += chunk
+                                        metadata = state.get("metadata", {})
+                                        metadata.update({
+                                            "generation_duration": (datetime.now() - start_time).total_seconds(),
+                                            "result_length": len(current_text),
+                                            "generation_success": True,
+                                            "streaming": True,
+                                            "generator_type": "fastapi"
+                                        })
+                                        yield {
+                                            "result": chunk,  # Send only the new chunk
+                                            "metadata": metadata
+                                        }
+                                except json.JSONDecodeError:
+                                    # Handle plain text chunks
+                                    current_text += data_content
+                                    metadata = state.get("metadata", {})
+                                    metadata.update({
+                                        "generation_duration": (datetime.now() - start_time).total_seconds(),
+                                        "result_length": len(current_text),
+                                        "generation_success": True,
+                                        "streaming": True,
+                                        "generator_type": "fastapi"
+                                    })
+                                    yield {
+                                        "result": data_content,
+                                        "metadata": metadata
+                                    }
+                            elif event_type == "sources":
+                                # Sources data
+                                try:
+                                    sources_data = json.loads(data_content)
+                                    sources = sources_data.get("sources", [])
+                                    # Update state with sources
+                                    metadata = state.get("metadata", {})
+                                    metadata.update({
+                                        "sources_received": True,
+                                        "sources_count": len(sources)
+                                    })
+                                    yield {
+                                        "sources": sources,
+                                        "metadata": metadata
+                                    }
+                                except json.JSONDecodeError:
+                                    logger.warning(f"Failed to parse sources data: {data_content}")
+                            elif event_type == "end":
+                                # Stream ended
+                                logger.info("Generator stream ended")
+                                fastapi_success = True
+                                break
+                            elif event_type == "error":
+                                # Error occurred
+                                try:
+                                    error_data = json.loads(data_content)
+                                    raise Exception(error_data.get("error", "Unknown error"))
+                                except json.JSONDecodeError:
+                                    raise Exception(data_content)
+    except Exception as e:
+        duration = (datetime.now() - start_time).total_seconds()
+        logger.error(f"Streaming generation failed: {str(e)}")
+        metadata = state.get("metadata", {})
+        metadata.update({
+            "generation_duration": duration,
+            "generation_success": False,
+            "generation_error": str(e),
+            "streaming": True
+        })
+        yield {"result": f"Error: {str(e)}", "metadata": metadata}
+# Conditional routing function
+def route_workflow(state: GraphState) -> str:
+    """Route to appropriate workflow based on file type"""
+    workflow_type = state.get("workflow_type", "standard")
+    return workflow_type
+async def process_query_streaming(query: str, file_upload, reports_filter: str = "", sources_filter: str = "",
+                                 subtype_filter: str = "", year_filter: str = "",
+                                 output_format: str = "structured"):
+    """
+    Unified streaming function that yields partial results
+    Args:
+        output_format: "structured" for dict format, "gradio" for plain text format
+    """
+    file_content = None
+    filename = None
+    if file_upload is not None:
+        try:
+            with open(file_upload.name, 'rb') as f:
+                file_content = f.read()
+            filename = os.path.basename(file_upload.name)
+            logger.info(f"File uploaded: {filename}, size: {len(file_content)} bytes")
+        except Exception as e:
+            logger.error(f"Error reading uploaded file: {str(e)}")
+            if output_format == "structured":
+                yield {"type": "error", "content": f"Error reading file: {str(e)}"}
+            else:
+                yield f"Error reading file: {str(e)}"
+            return
+    start_time = datetime.now()
+    session_id = f"gradio_{start_time.strftime('%Y%m%d_%H%M%S')}"
+    try:
+        # Process ingestion first (non-streaming)
+        initial_state = {
+            "query": query,
+            "context": "",
+            "ingestor_context": "",
+            "result": "",
+            "sources": [],
+            "reports_filter": reports_filter or "",
+            "sources_filter": sources_filter or "",
+            "subtype_filter": subtype_filter or "",
+            "year_filter": year_filter or "",
+            "file_content": file_content,
+            "filename": filename,
+            "file_type": "unknown",
+            "workflow_type": "standard",
+            "metadata": {
+                "session_id": session_id,
+                "start_time": start_time.isoformat(),
+                "has_file_attachment": file_content is not None
+            }
+        }
+        # Detect file type - merge the returned state with initial state
+        state_after_detect = {**initial_state, **detect_file_type_node(initial_state)}
+        # Ingest if file provided - merge the returned state
+        state_after_ingest = {**state_after_detect, **ingest_node(state_after_detect)}
+        # Route workflow
+        workflow_type = route_workflow(state_after_ingest)
+        if workflow_type == "geojson_direct":
+            # For GeoJSON, return direct result
+            final_state = geojson_direct_result_node(state_after_ingest)
+            if output_format == "structured":
+                yield {"type": "data", "content": final_state["result"]}
+                yield {"type": "end", "content": ""}
+            else:
+                yield final_state["result"]
+        else:
+            # For standard workflow, retrieve first - merge the returned state
+            state_after_retrieve = {**state_after_ingest, **retrieve_node(state_after_ingest)}
+            # Initialize variables for both output formats
+            sources_collected = None
+            accumulated_response = "" if output_format == "gradio" else None
+            # Then stream generation
+            async for partial_state in generate_node_streaming(state_after_retrieve):
+                if "result" in partial_state:
+                    if output_format == "structured":
+                        yield {"type": "data", "content": partial_state["result"]}
+                    else:
+                        # Accumulate the content and yield the full accumulated response
+                        accumulated_response += partial_state["result"]
+                        yield accumulated_response
+                # Collect sources for later
+                if "sources" in partial_state:
+                    sources_collected = partial_state["sources"]
+            # Handle sources based on output format
+            if sources_collected:
+                if output_format == "structured":
+                    yield {"type": "sources", "content": sources_collected}
+                else:
+                    # Append sources to accumulated response
+                    sources_text = "\n\n**Sources:**\n"
+                    for i, source in enumerate(sources_collected, 1):
+                        if isinstance(source, dict):
+                            title = source.get('title', 'Unknown')
+                            link = source.get('link', '#')
+                            sources_text += f"{i}. [{title}]({link})\n"
+                        else:
+                            sources_text += f"{i}. {source}\n"
+                    accumulated_response += sources_text
+                    yield accumulated_response
+            if output_format == "structured":
+                yield {"type": "end", "content": ""}
+    except Exception as e:
+        logger.error(f"Streaming pipeline failed: {str(e)}")
+        if output_format == "structured":
+            yield {"type": "error", "content": f"Error: {str(e)}"}
+        else:
+            yield f"Error: {str(e)}"