Spaces:

JoarP
/

enhancing-llama-with-icl-demo

Sleeping

App Files Files Community

Joar Paganus commited on 15 days ago

Commit

efb82bb

1 Parent(s): 80c0059

update UI

Browse files

Files changed (2) hide show

agent.py +17 -3
app.py +158 -32

agent.py CHANGED Viewed

@@ -13,14 +13,13 @@ from llama_cpp import Llama
 def get_weather(location: str) -> str:
     """This tool returns the current weather situation.
     Args:
-        location: The city or place to check
     Returns:
         str: Weather situation (e.g. cloudy, rainy, sunny)
     """
     weather_situations = ["cloudy", "rainy", "sunny", "foobar"]
     return random.choice(weather_situations)
 def get_temperature(location: str) -> str:
     """This tool returns the current temperature.
     Args:
@@ -31,8 +30,23 @@ def get_temperature(location: str) -> str:
     temperature = ["-10", "0", "20", "30"]
     return random.choice(temperature)
-TOOLS = [get_weather, get_temperature]
 TOOL_REGISTRY = {f.__name__: f for f in TOOLS}

 def get_weather(location: str) -> str:
     """This tool returns the current weather situation.
     Args:
+        location: The city or place to chec
     Returns:
         str: Weather situation (e.g. cloudy, rainy, sunny)
     """
     weather_situations = ["cloudy", "rainy", "sunny", "foobar"]
     return random.choice(weather_situations)
 def get_temperature(location: str) -> str:
     """This tool returns the current temperature.
     Args:
     temperature = ["-10", "0", "20", "30"]
     return random.choice(temperature)
+def get_weather_forecast(location: str, days_ahead: str) -> str:
+    """This tool returns the weather forecast for the specified days ahead.
+    Args:
+        location: The city or place to check
+        days_ahead: How many days ahead of today
+    Returns:
+        str: Weather situation (e.g. cloudy, rainy, sunny)
+    """
+    test = "Storm"
+    if days_ahead > 0:
+        test = "Thunderstorm"
+    elif days_ahead > 10:
+        test = "Hurricane"
+    return test
+TOOLS = [get_weather, get_temperature, get_weather_forecast]
 TOOL_REGISTRY = {f.__name__: f for f in TOOLS}

app.py CHANGED Viewed

@@ -1,5 +1,3 @@
-# app.py
 import subprocess
 import sys
@@ -21,9 +19,10 @@ N_CTX = 2048
 N_THREADS = None
 import gradio as gr
-from agent import respond
 from llama_cpp import Llama
 # ------------- LOAD MODELS ON CPU --------------
 print("Loading finetuned model")
@@ -37,6 +36,8 @@ AVAILABLE_MODELS = {
     "Fine-tuned: Llama 3.2 3B FineTome (q4_k_m)": llm_ft,
 }
 # print("Loading base model...")
 # llm_base = Llama.from_pretrained(
 #     repo_id=BASE_REPO_ID,
@@ -64,8 +65,10 @@ AVAILABLE_MODELS = {
 #     FT_LOAD_ERROR = str(e)
 #     print(f"Could not load fine-tuned model yet: {e}")
 # System message:
-SYSTEM_MESSAGE = """
 You are a helpful assistant that answers user questions using any external information provided in the system message.
 The system message may include a section like:
@@ -75,7 +78,7 @@ followed by one or more lines of the form:
 Instructions:
 - Treat these tool results as ground truth for the current reply.
-- Use them to give a clear, concise, and friendly answer to the user’s latest question.
 - Do not repeat the raw tool logs verbatim unless it is natural to do so.
 - You may summarize or rephrase the results in natural language.
 - If multiple results are present, combine them into a single coherent answer.
@@ -96,10 +99,35 @@ Assistant:
 It's sunny in Berlin right now, with a temperature of about 20 degrees.
 """
 # ------------- WRAPPER FUNCTION ----------------
 # Needed to be able to pass the llm to respond() inside agent.py
-def app_respond(message, history, system_message, model_choice):
     """
     Wrapper used by Gradio.
     - model_choice: string from the dropdown (key in AVAILABLE_MODELS)
@@ -113,38 +141,136 @@ def app_respond(message, history, system_message, model_choice):
     for chunk in respond(message, history, system_message, llm):
         yield chunk
-# ------------- GRADIO UI ----------------
-model_dropdown = gr.Dropdown(
-    label="Model",
-    choices=list(AVAILABLE_MODELS.keys()),
-    value=list(AVAILABLE_MODELS.keys())[0],
-    interactive=True,
-)
-chatbot = gr.ChatInterface(
-    fn=app_respond,
-    type="messages",
-    additional_inputs=[
-        gr.State(SYSTEM_MESSAGE),
-        model_dropdown,
-    ],
-)
 with gr.Blocks() as demo:
-    gr.Markdown("# Llama 3.2 3B (CPU, GGUF) Base vs FineTome — Tool-Using Agent")
-    intro_text = (
-        "This Space runs GGUF-quantized Llama 3.2 3B models **on CPU** using `llama-cpp-python`,\n"
-        "and demonstrates a simple agent that can call Python tools like `get_weather` and `get_temperature`.\n\n"
-        "- **Base model**: Unsloth Llama-3.2-3B-Instruct (q4_k_m GGUF)\n"
-        "- **Fine-tuned model**: Llama-3.2-3B-Instruct fine tuned on FineTome (q4_k_m GGUF).\n\n"
-        "Ask things like:\n"
-        "- `What is the weather like in Berlin?`\n"
-        "- `What's the temperature in Stockholm?`\n"
     )
-    gr.Markdown(intro_text)
-    chatbot.render()
 if __name__ == "__main__":

 import subprocess
 import sys
 N_THREADS = None
 import gradio as gr
+from agent import respond, build_prompt
 from llama_cpp import Llama
 # ------------- LOAD MODELS ON CPU --------------
 print("Loading finetuned model")
     "Fine-tuned: Llama 3.2 3B FineTome (q4_k_m)": llm_ft,
 }
+# -------------Using one model for faster deployement during development -------------------
 # print("Loading base model...")
 # llm_base = Llama.from_pretrained(
 #     repo_id=BASE_REPO_ID,
 #     FT_LOAD_ERROR = str(e)
 #     print(f"Could not load fine-tuned model yet: {e}")
 # System message:
+SYSTEM_MESSAGE_WEATHER = """
 You are a helpful assistant that answers user questions using any external information provided in the system message.
 The system message may include a section like:
 Instructions:
 - Treat these tool results as ground truth for the current reply.
+- Use them to give a clear, concise, and friendly answer to the user's latest question.
 - Do not repeat the raw tool logs verbatim unless it is natural to do so.
 - You may summarize or rephrase the results in natural language.
 - If multiple results are present, combine them into a single coherent answer.
 It's sunny in Berlin right now, with a temperature of about 20 degrees.
 """
+SYSTEM_MESSAGE_GENERAL = """
+You are a friendly, helpful, and knowledgeable AI assistant.
+Your goals:
+- Give clear, accurate, and concise answers.
+- Be honest when you don't know something.
+- Use the conversation history to stay consistent.
+- Ask clarifying questions when the user’s request is ambiguous.
+- Avoid unnecessary repetition or overly long explanations.
+- Be polite, neutral, and informative.
+You can answer questions on any topic, including:
+- general knowledge
+- mathematics and reasoning
+- writing and summarization
+- programming and debugging
+- everyday advice and explanations
+Do not claim access to external tools, APIs, the internet, or real-time data.
+All your responses must be based only on your internal knowledge and the conversation context.
+Your tone: helpful, calm, and professional.
+"""
 # ------------- WRAPPER FUNCTION ----------------
 # Needed to be able to pass the llm to respond() inside agent.py
+def app_respond(message, history, system_message, model_choice="Base: Llama 3.2 3B Instruct (q4_k_m)"):
     """
     Wrapper used by Gradio.
     - model_choice: string from the dropdown (key in AVAILABLE_MODELS)
     for chunk in respond(message, history, system_message, llm):
         yield chunk
+# ------------- No agent and just a single LLM call ----------------
+def respond_fast(message, history, system_message, model_choice):
+    """
+    Fast path: no tools, no agent. Just a single LLM call with the
+    given system message and chat history.
+    """
+    # Pick model from dropdown
+    llm = AVAILABLE_MODELS.get(model_choice)
+    if llm is None:
+        llm = next(iter(AVAILABLE_MODELS.values()))
+    # Build a simple chat-style prompt
+    prompt = build_prompt(system_message, history, message)
+    # Single streaming generation
+    stream = llm(
+        prompt,
+        max_tokens=256,
+        temperature=0.7,
+        top_p=0.9,
+        stop=["User:", "System:"],
+        stream=True,
+    )
+    partial = ""
+    for out in stream:
+        token = out["choices"][0]["text"]
+        partial += token
+        yield partial
+# ------------- GRADIO UI ----------------
 with gr.Blocks() as demo:
+    gr.Markdown(
+        "# Finetuned Llama 3.2 3B (CPU, GGUF) in an Agentic Framework\n"
+        "Switch between a general assistant and a live weather assistant."
     )
+    with gr.Tabs():
+        # -------- TAB 1: GENERAL LLM ASSISTANT --------
+        with gr.Tab("💬 General Assistant"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown(
+                        "### General Assistant\n"
+                        "Chat with the base or fine-tuned model. Use this mode for any kind of question."
+                    )
+                    model_dropdown = gr.Dropdown(
+                        label="Model",
+                        choices=list(AVAILABLE_MODELS.keys()),
+                        value=list(AVAILABLE_MODELS.keys())[0],
+                        interactive=True,
+                    )
+                with gr.Column(scale=3, elem_id="general-chat"):
+                    general_chatbot = gr.ChatInterface(
+                        fn=respond_fast,
+                        additional_inputs=[
+                            gr.State(SYSTEM_MESSAGE_GENERAL),
+                            model_dropdown,
+                        ],
+                    )
+        # -------- TAB 2: LIVE WEATHER ASSISTANT --------
+        with gr.Tab("☀️ LIVE Weather Assistant"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### Live Weather Assistant\n"
+                            "Fetches up to date weather data"
+                    )
+                    model_dropdown = gr.Dropdown(
+                        label="Model",
+                        choices=list(AVAILABLE_MODELS.keys()),
+                        value=list(AVAILABLE_MODELS.keys())[0],
+                        interactive=True,
+                    )
+                with gr.Column(scale=3, elem_id="weather-chat"):
+                    # 🌤️ ASSISTANT HEADER (name + avatar + tagline)
+                    gr.HTML(
+                        """
+                        <div style="
+                            display: flex;
+                            align-items: center;
+                            gap: 15px;
+                            padding: 12px 16px;
+                            border-radius: 12px;
+                            margin-bottom: 10px;
+                        ">
+                            <!-- Avatar circle -->
+                            <div style="
+                                width: 64px;
+                                height: 64px;
+                                border-radius: 50%;
+                                background: radial-gradient(circle at 30% 30%, #facc15, #eab308, #ca8a04);
+                                display: flex;
+                                align-items: center;
+                                justify-content: center;
+                                font-weight: 700;
+                                font-size: 26px;
+                                color: #1f2937;
+                                box-shadow: 0 4px 10px rgba(0,0,0,0.15);
+                            ">
+                                M
+                            </div>
+                            <!-- Name + description -->
+                            <div>
+                                <div style="font-size: 20px; font-weight: 700; color: #333;">
+                                    Meteo-Mila
+                                </div>
+                                <div style="font-size: 14px; color: #555; margin-top: 2px;">
+                                    I know everything about the current weather and temperature.<br>
+                                    I can also provide forecasts into the future! 🌦️
+                                </div>
+                            </div>
+                        </div>
+                        """
+                    )
+                    general_chatbot = gr.ChatInterface(
+                        fn=app_respond,
+                        additional_inputs=[
+                            gr.State(SYSTEM_MESSAGE_WEATHER),
+                            model_dropdown,
+                        ],
+                    )
 if __name__ == "__main__":