Spaces:

JoarP
/

enhancing-llama-with-icl-demo

Sleeping

App Files Files Community

Joar Paganus commited on 17 days ago

Commit

80c0059

1 Parent(s): 60e4df4

add agent and dummy tools

Browse files

Files changed (2) hide show

agent.py +251 -0
app.py +96 -119

agent.py ADDED Viewed

	@@ -0,0 +1,251 @@

+import random
+import inspect
+import re
+from llama_cpp import Llama
+# ------------- TOOLS / FUNCTIONS --------------
+# Some of the structure of the agent have been inspired by:
+# https://github.com/Pirner/zettelkasten/blob/main/main_notes/1_0_tool_calling_with_llama.py?source=post_page-----23e3d783a6d8---------------------------------------
+def get_weather(location: str) -> str:
+    """This tool returns the current weather situation.
+    Args:
+        location: The city or place to check
+    Returns:
+        str: Weather situation (e.g. cloudy, rainy, sunny)
+    """
+    weather_situations = ["cloudy", "rainy", "sunny", "foobar"]
+    return random.choice(weather_situations)
+def get_temperature(location: str) -> str:
+    """This tool returns the current temperature.
+    Args:
+        location: The city or place to check
+    Returns:
+        str: Temperature
+    """
+    temperature = ["-10", "0", "20", "30"]
+    return random.choice(temperature)
+TOOLS = [get_weather, get_temperature]
+TOOL_REGISTRY = {f.__name__: f for f in TOOLS}
+def function_to_json(func) -> dict:
+    """
+    Converts a Python function into a JSON-serializable dictionary
+    that describes the function's signature, including its name,
+    description, and parameters.
+    """
+    type_map = {
+        str: "string",
+        int: "integer",
+        float: "number",
+        bool: "boolean",
+        list: "array",
+        dict: "object",
+        type(None): "null",
+    }
+    try:
+        signature = inspect.signature(func)
+    except ValueError as e:
+        raise ValueError(
+            f"Failed to get signature for function {func.__name__}: {str(e)}"
+        )
+    parameters = {}
+    for param in signature.parameters.values():
+        param_type = type_map.get(param.annotation, "string")
+        parameters[param.name] = {"type": param_type}
+    required = [
+        param.name
+        for param in signature.parameters.values()
+        if param.default == inspect._empty
+    ]
+    return {
+        "type": "function",
+        "function": {
+            "name": func.__name__,
+            "description": func.__doc__ or "",
+            "parameters": {
+                "type": "object",
+                "properties": parameters,
+                "required": required,
+            },
+        },
+    }
+TOOLS_SCHEMA = [function_to_json(f) for f in TOOLS]
+def parse_tool_calls(tool_output: str):
+    """
+    Very simple parser for outputs like:
+    [get_weather(location="Berlin")]
+    Returns a list of (func_name, kwargs) tuples.
+    """
+    calls = []
+    # Find patterns like func_name(...)
+    for match in re.finditer(r"(\w+)\((.*?)\)", tool_output, re.DOTALL):
+        func_name, arg_str = match.groups()
+        func_name = func_name.strip()
+        kwargs = {}
+        arg_str = arg_str.strip()
+        if arg_str:
+            parts = re.split(r",\s*", arg_str)
+            for part in parts:
+                if "=" not in part:
+                    continue
+                key, val = part.split("=", 1)
+                key = key.strip()
+                val = val.strip().strip('"').strip("'")
+                # Try to cast numbers, else keep as string
+                try:
+                    if "." in val:
+                        parsed_val = float(val)
+                    else:
+                        parsed_val = int(val)
+                except ValueError:
+                    parsed_val = val
+                kwargs[key] = parsed_val
+        calls.append((func_name, kwargs))
+    return calls
+# ------------- HELPER: GENERATION -------------
+def generate_non_stream(llm, prompt, max_tokens=256, temperature=0.2, top_p=0.95):
+    """One-shot generation for internal agent/tool prompts."""
+    out = llm(
+        prompt,
+        max_tokens=max_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        stop=["User:", "System:"],
+        stream=False,
+    )
+    return out["choices"][0]["text"]
+def build_prompt(system_message, history, user_message):
+    prompt = f"System: {system_message}\n"
+    for turn in history:
+        role = turn["role"]
+        content = turn["content"]
+        prompt += f"{role.capitalize()}: {content}\n"
+    prompt += f"User: {user_message}\nAssistant:"
+    return prompt
+def select_tools_with_llm(llm, user_message: str) -> list:
+    """
+    Ask the model which tools to call.
+    Returns a list of (func_name, kwargs) from parse_tool_calls.
+    """
+    tool_selection_system = f"""
+You are an expert in composing functions.
+You are given a user question and a set of possible functions (tools).
+Your job is to decide which tools to call and with what arguments.
+Rules:
+- If you decide to invoke any function(s), you MUST put them in the format:
+  [func_name1(param1=value1, param2=value2), func_name2(param1=value1)]
+- If none of the functions are suitable, respond with: []
+- Do NOT include any explanation or extra text, only the list.
+- If the question lacks required parameters, respond with [].
+Here is a list of functions in JSON format that you can invoke:
+{TOOLS_SCHEMA}
+"""
+    prompt = (
+        f"System: {tool_selection_system}\n"
+        f"User: {user_message}\n"
+        f"Assistant:"
+    )
+    raw = generate_non_stream(
+        llm,
+        prompt,
+        max_tokens=256,
+        temperature=0.2,
+        top_p=0.95,
+    )
+    return parse_tool_calls(raw)
+def call_tools(tool_calls):
+    """
+    Execute the tools chosen by the model.
+    Returns a list of dicts: {name, args, result}.
+    """
+    results = []
+    for func_name, kwargs in tool_calls:
+        func = TOOL_REGISTRY.get(func_name)
+        if func is None:
+            results.append(
+                {
+                    "name": func_name,
+                    "args": kwargs,
+                    "result": f"Unknown tool '{func_name}'.",
+                }
+            )
+            continue
+        try:
+            res = func(**kwargs)
+        except Exception as e:
+            res = f"Error while calling {func_name}: {e}"
+        results.append({"name": func_name, "args": kwargs, "result": res})
+    return results
+# ------------- CHAT + AGENT LOGIC -------------
+def respond(message, history, system_message, llm):
+    # ---- 1) Let the model decide if any tools should be used ----
+    tool_calls = select_tools_with_llm(llm, message)
+    tool_results = call_tools(tool_calls) if tool_calls else []
+    # ---- 2) Build final system message including tool results ----
+    if tool_results:
+        tool_info_str = "\nYou have executed the following tools (name, args, result):\n"
+        for tr in tool_results:
+            tool_info_str += f"- {tr['name']}({tr['args']}) -> {tr['result']}\n"
+        final_system_message = system_message + tool_info_str
+    else:
+        final_system_message = system_message
+    # ---- 3) Use normal chat-style prompt to answer the user ----
+    prompt = build_prompt(final_system_message, history, message)
+    stream = llm(
+        prompt,
+        max_tokens=256,
+        temperature=0.7,
+        top_p=0.9,
+        stop=["User:", "System:"],
+        stream=True,
+    )
+    partial = ""
+    for out in stream:
+        token = out["choices"][0]["text"]
+        partial += token
+        yield partial

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import subprocess
 import sys
@@ -7,20 +9,6 @@ subprocess.run(
     check=True,
 )
-import gradio as gr
-import llama_cpp
-from llama_cpp import Llama
-# --- Workaround for llama-cpp-python shutdown bug on HF Spaces ---
-# Avoid calling C-level free_model after the module is partially torn down.
-def _llama_noop_del(self):
-    # Intentionally do nothing on interpreter shutdown to avoid:
-    # TypeError: 'NoneType' object is not callable in free_model
-    pass
-Llama.__del__ = _llama_noop_del
-# -----------------------------------------------------------------
 # ---------------- CONFIG ----------------
 BASE_REPO_ID = "unsloth/Llama-3.2-3B-Instruct-GGUF"
@@ -32,103 +20,98 @@ FT_FILENAME = "v1"
 N_CTX = 2048
 N_THREADS = None
-# ---------------- SYSTEM MESSAGE WITH ICL ----------------
-SYSTEM_MESSAGE = """
-You are a helpful, knowledgeable assistant fine-tuned on the FineTome dataset.
-When answering:
-- Use the user's selected latitude and longitude to provide location-aware insights.
-- Be concise, factual, and structured.
-- If the user asks a geography-, travel-, or environment-related question, incorporate the location.
-- If the location is missing, answer normally.
-### Example interaction:
-User selected location: latitude 46.02000, longitude 7.74900
-User: "What can I do here?"
-Assistant: "This location is in the Alps near Zermatt, Switzerland. Popular activities include skiing, mountaineering, and high-alpine hiking."
-### Example interaction:
-User selected location: latitude 59.32930, longitude 18.06860
-User: "Tell me something about this place."
-Assistant: "This point is in central Stockholm, Sweden. Attractions include Gamla Stan, the Royal Palace, and the surrounding archipelago."
-""".strip()
 # ------------- LOAD MODELS ON CPU --------------
-print("Loading base model...")
-llm_base = Llama.from_pretrained(
-    repo_id=BASE_REPO_ID,
-    filename=BASE_FILENAME,
-    n_ctx=N_CTX,
-    n_threads=N_THREADS,
-)
-AVAILABLE_MODELS = {
-    "Base: Llama 3.2 3B Instruct (q4_k_m)": llm_base,
-}
-try:
-    print("Attempting to load fine-tuned model...")
-    llm_ft = Llama.from_pretrained(
         repo_id=FT_REPO_ID,
         filename=FT_FILENAME,
         n_ctx=N_CTX,
         n_threads=N_THREADS,
     )
-    AVAILABLE_MODELS["Fine-tuned: Llama 3.2 3B FineTome (q4_k_m)"] = llm_ft
-    FT_LOAD_ERROR = None
-except Exception as e:
-    llm_ft = None
-    FT_LOAD_ERROR = str(e)
-    print(f"Could not load fine-tuned model yet: {e}")
-# ------------- PROMPT + CHAT LOGIC -------------
-def build_prompt(system_message, history, user_message):
-    prompt = f"System: {system_message}\n"
-    for turn in history:
-        role = turn["role"]
-        content = turn["content"]
-        prompt += f"{role.capitalize()}: {content}\n"
-    prompt += f"User: {user_message}\nAssistant:"
-    return prompt
-def respond(message, history, model_choice, coords):
-    # Start with the fixed system message
-    system_message = SYSTEM_MESSAGE
-    # Inject coordinates if user clicked on the map
-    if coords is not None and len(coords) == 2:
-        lat, lon = coords
-        system_message += (
-            f"\n\nUser selected the location with latitude {lat:.5f} "
-            f"and longitude {lon:.5f}."
-        )
-    # Pick the model
-    llm = AVAILABLE_MODELS.get(model_choice, llm_base)
-    prompt = build_prompt(system_message, history, message)
-    stream = llm(
-        prompt,
-        max_tokens=256,
-        temperature=0.7,
-        top_p=0.9,
-        stop=["User:", "System:"],
-        stream=True,
-    )
-    partial = ""
-    for out in stream:
-        token = out["choices"][0]["text"]
-        partial += token
-        yield partial
 # ------------- GRADIO UI ----------------
@@ -139,36 +122,30 @@ model_dropdown = gr.Dropdown(
     interactive=True,
 )
-location_map = gr.Map(
-    label="Click on the map to choose a location",
-    interactive=True,
-)
 chatbot = gr.ChatInterface(
-    fn=respond,
     type="messages",
     additional_inputs=[
         model_dropdown,
-        location_map,
     ],
 )
 with gr.Blocks() as demo:
-    gr.Markdown("# Llama 3.2 3B (CPU, GGUF) — Base vs FineTome (with Location Awareness)")
     intro_text = (
-        "This Space runs GGUF-quantized Llama 3.2 3B models **on CPU** using `llama-cpp-python`.\n\n"
         "- **Base model**: Unsloth Llama-3.2-3B-Instruct (q4_k_m GGUF)\n"
-        "- **Fine-tuned model**: Llama-3.2-3B-Instruct fine tuned on FineTome (q4_k_m GGUF)\n\n"
-        "Click anywhere on the map to give the assistant a location context."
     )
-    if FT_LOAD_ERROR is not None:
-        intro_text += (
-            f"\n\n⚠️ Fine-tuned model is not loaded:\n`{FT_LOAD_ERROR}`\n"
-            "Only the base model is available."
-        )
     gr.Markdown(intro_text)
     chatbot.render()
 if __name__ == "__main__":
-    demo.launch()

+# app.py
 import subprocess
 import sys
     check=True,
 )
 # ---------------- CONFIG ----------------
 BASE_REPO_ID = "unsloth/Llama-3.2-3B-Instruct-GGUF"
 N_CTX = 2048
 N_THREADS = None
+import gradio as gr
+from agent import respond
+from llama_cpp import Llama
 # ------------- LOAD MODELS ON CPU --------------
+print("Loading finetuned model")
+llm_ft = Llama.from_pretrained(
         repo_id=FT_REPO_ID,
         filename=FT_FILENAME,
         n_ctx=N_CTX,
         n_threads=N_THREADS,
     )
+AVAILABLE_MODELS = {
+    "Fine-tuned: Llama 3.2 3B FineTome (q4_k_m)": llm_ft,
+}
+# print("Loading base model...")
+# llm_base = Llama.from_pretrained(
+#     repo_id=BASE_REPO_ID,
+#     filename=BASE_FILENAME,
+#     n_ctx=N_CTX,
+#     n_threads=N_THREADS,
+# )
+# AVAILABLE_MODELS = {
+#     "Base: Llama 3.2 3B Instruct (q4_k_m)": llm_base,
+# }
+# try:
+#     print("Attempting to load fine-tuned model...")
+#     llm_ft = Llama.from_pretrained(
+#         repo_id=FT_REPO_ID,
+#         filename=FT_FILENAME,
+#         n_ctx=N_CTX,
+#         n_threads=N_THREADS,
+#     )
+#     AVAILABLE_MODELS["Fine-tuned: Llama 3.2 3B FineTome (q4_k_m)"] = llm_ft
+#     FT_LOAD_ERROR = None
+# except Exception as e:
+#     llm_ft = None
+#     FT_LOAD_ERROR = str(e)
+#     print(f"Could not load fine-tuned model yet: {e}")
+# System message:
+SYSTEM_MESSAGE = """
+You are a helpful assistant that answers user questions using any external information provided in the system message.
+The system message may include a section like:
+"You have executed the following tools (name, args, result):"
+followed by one or more lines of the form:
+- tool_name(args_dict) -> result_value
+Instructions:
+- Treat these tool results as ground truth for the current reply.
+- Use them to give a clear, concise, and friendly answer to the user’s latest question.
+- Do not repeat the raw tool logs verbatim unless it is natural to do so.
+- You may summarize or rephrase the results in natural language.
+- If multiple results are present, combine them into a single coherent answer.
+- If no tool results are present, answer the question based on your own knowledge and the conversation history.
+- Do not mention that you are using “tools” or “tool calls”; just speak as a normal assistant.
+=== EXAMPLE ===
+System (excerpt):
+You have executed the following tools (name, args, result):
+- get_temperature({'location': 'Berlin'}) -> 20
+- get_weather({'location': 'Berlin'}) -> sunny
+User:
+What is it like in Berlin right now?
+Assistant:
+It's sunny in Berlin right now, with a temperature of about 20 degrees.
+"""
+# ------------- WRAPPER FUNCTION ----------------
+# Needed to be able to pass the llm to respond() inside agent.py
+def app_respond(message, history, system_message, model_choice):
+    """
+    Wrapper used by Gradio.
+    - model_choice: string from the dropdown (key in AVAILABLE_MODELS)
+    """
+    llm = AVAILABLE_MODELS.get(model_choice)
+    if llm is None:
+        # Fallback: first model in dict
+        llm = next(iter(AVAILABLE_MODELS.values()))
+    # Delegate to the core agent logic (which expects an llm object)
+    for chunk in respond(message, history, system_message, llm):
+        yield chunk
 # ------------- GRADIO UI ----------------
     interactive=True,
 )
 chatbot = gr.ChatInterface(
+    fn=app_respond,
     type="messages",
     additional_inputs=[
+        gr.State(SYSTEM_MESSAGE),
         model_dropdown,
     ],
 )
 with gr.Blocks() as demo:
+    gr.Markdown("# Llama 3.2 3B (CPU, GGUF) Base vs FineTome — Tool-Using Agent")
     intro_text = (
+        "This Space runs GGUF-quantized Llama 3.2 3B models **on CPU** using `llama-cpp-python`,\n"
+        "and demonstrates a simple agent that can call Python tools like `get_weather` and `get_temperature`.\n\n"
         "- **Base model**: Unsloth Llama-3.2-3B-Instruct (q4_k_m GGUF)\n"
+        "- **Fine-tuned model**: Llama-3.2-3B-Instruct fine tuned on FineTome (q4_k_m GGUF).\n\n"
+        "Ask things like:\n"
+        "- `What is the weather like in Berlin?`\n"
+        "- `What's the temperature in Stockholm?`\n"
     )
     gr.Markdown(intro_text)
     chatbot.render()
 if __name__ == "__main__":
+    demo.launch()