Joar Paganus commited on
Commit
80c0059
·
1 Parent(s): 60e4df4

add agent and dummy tools

Browse files
Files changed (2) hide show
  1. agent.py +251 -0
  2. app.py +96 -119
agent.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import inspect
3
+ import re
4
+
5
+ from llama_cpp import Llama
6
+
7
+
8
+ # ------------- TOOLS / FUNCTIONS --------------
9
+ # Some of the structure of the agent have been inspired by:
10
+ # https://github.com/Pirner/zettelkasten/blob/main/main_notes/1_0_tool_calling_with_llama.py?source=post_page-----23e3d783a6d8---------------------------------------
11
+
12
+
13
+ def get_weather(location: str) -> str:
14
+ """This tool returns the current weather situation.
15
+ Args:
16
+ location: The city or place to check
17
+ Returns:
18
+ str: Weather situation (e.g. cloudy, rainy, sunny)
19
+ """
20
+ weather_situations = ["cloudy", "rainy", "sunny", "foobar"]
21
+ return random.choice(weather_situations)
22
+
23
+
24
+ def get_temperature(location: str) -> str:
25
+ """This tool returns the current temperature.
26
+ Args:
27
+ location: The city or place to check
28
+ Returns:
29
+ str: Temperature
30
+ """
31
+ temperature = ["-10", "0", "20", "30"]
32
+ return random.choice(temperature)
33
+
34
+
35
+ TOOLS = [get_weather, get_temperature]
36
+ TOOL_REGISTRY = {f.__name__: f for f in TOOLS}
37
+
38
+
39
+ def function_to_json(func) -> dict:
40
+ """
41
+ Converts a Python function into a JSON-serializable dictionary
42
+ that describes the function's signature, including its name,
43
+ description, and parameters.
44
+ """
45
+ type_map = {
46
+ str: "string",
47
+ int: "integer",
48
+ float: "number",
49
+ bool: "boolean",
50
+ list: "array",
51
+ dict: "object",
52
+ type(None): "null",
53
+ }
54
+
55
+ try:
56
+ signature = inspect.signature(func)
57
+ except ValueError as e:
58
+ raise ValueError(
59
+ f"Failed to get signature for function {func.__name__}: {str(e)}"
60
+ )
61
+
62
+ parameters = {}
63
+ for param in signature.parameters.values():
64
+ param_type = type_map.get(param.annotation, "string")
65
+ parameters[param.name] = {"type": param_type}
66
+
67
+ required = [
68
+ param.name
69
+ for param in signature.parameters.values()
70
+ if param.default == inspect._empty
71
+ ]
72
+
73
+ return {
74
+ "type": "function",
75
+ "function": {
76
+ "name": func.__name__,
77
+ "description": func.__doc__ or "",
78
+ "parameters": {
79
+ "type": "object",
80
+ "properties": parameters,
81
+ "required": required,
82
+ },
83
+ },
84
+ }
85
+
86
+
87
+ TOOLS_SCHEMA = [function_to_json(f) for f in TOOLS]
88
+
89
+
90
+ def parse_tool_calls(tool_output: str):
91
+ """
92
+ Very simple parser for outputs like:
93
+ [get_weather(location="Berlin")]
94
+ Returns a list of (func_name, kwargs) tuples.
95
+ """
96
+ calls = []
97
+ # Find patterns like func_name(...)
98
+ for match in re.finditer(r"(\w+)\((.*?)\)", tool_output, re.DOTALL):
99
+ func_name, arg_str = match.groups()
100
+ func_name = func_name.strip()
101
+ kwargs = {}
102
+
103
+ arg_str = arg_str.strip()
104
+ if arg_str:
105
+ parts = re.split(r",\s*", arg_str)
106
+ for part in parts:
107
+ if "=" not in part:
108
+ continue
109
+ key, val = part.split("=", 1)
110
+ key = key.strip()
111
+ val = val.strip().strip('"').strip("'")
112
+
113
+ # Try to cast numbers, else keep as string
114
+ try:
115
+ if "." in val:
116
+ parsed_val = float(val)
117
+ else:
118
+ parsed_val = int(val)
119
+ except ValueError:
120
+ parsed_val = val
121
+ kwargs[key] = parsed_val
122
+
123
+ calls.append((func_name, kwargs))
124
+
125
+ return calls
126
+
127
+
128
+ # ------------- HELPER: GENERATION -------------
129
+
130
+ def generate_non_stream(llm, prompt, max_tokens=256, temperature=0.2, top_p=0.95):
131
+ """One-shot generation for internal agent/tool prompts."""
132
+ out = llm(
133
+ prompt,
134
+ max_tokens=max_tokens,
135
+ temperature=temperature,
136
+ top_p=top_p,
137
+ stop=["User:", "System:"],
138
+ stream=False,
139
+ )
140
+ return out["choices"][0]["text"]
141
+
142
+
143
+ def build_prompt(system_message, history, user_message):
144
+ prompt = f"System: {system_message}\n"
145
+ for turn in history:
146
+ role = turn["role"]
147
+ content = turn["content"]
148
+ prompt += f"{role.capitalize()}: {content}\n"
149
+ prompt += f"User: {user_message}\nAssistant:"
150
+ return prompt
151
+
152
+
153
+ def select_tools_with_llm(llm, user_message: str) -> list:
154
+ """
155
+ Ask the model which tools to call.
156
+ Returns a list of (func_name, kwargs) from parse_tool_calls.
157
+ """
158
+ tool_selection_system = f"""
159
+ You are an expert in composing functions.
160
+ You are given a user question and a set of possible functions (tools).
161
+
162
+ Your job is to decide which tools to call and with what arguments.
163
+
164
+ Rules:
165
+ - If you decide to invoke any function(s), you MUST put them in the format:
166
+ [func_name1(param1=value1, param2=value2), func_name2(param1=value1)]
167
+ - If none of the functions are suitable, respond with: []
168
+ - Do NOT include any explanation or extra text, only the list.
169
+ - If the question lacks required parameters, respond with [].
170
+
171
+ Here is a list of functions in JSON format that you can invoke:
172
+ {TOOLS_SCHEMA}
173
+ """
174
+
175
+ prompt = (
176
+ f"System: {tool_selection_system}\n"
177
+ f"User: {user_message}\n"
178
+ f"Assistant:"
179
+ )
180
+
181
+ raw = generate_non_stream(
182
+ llm,
183
+ prompt,
184
+ max_tokens=256,
185
+ temperature=0.2,
186
+ top_p=0.95,
187
+ )
188
+
189
+ return parse_tool_calls(raw)
190
+
191
+
192
+ def call_tools(tool_calls):
193
+ """
194
+ Execute the tools chosen by the model.
195
+ Returns a list of dicts: {name, args, result}.
196
+ """
197
+ results = []
198
+ for func_name, kwargs in tool_calls:
199
+ func = TOOL_REGISTRY.get(func_name)
200
+ if func is None:
201
+ results.append(
202
+ {
203
+ "name": func_name,
204
+ "args": kwargs,
205
+ "result": f"Unknown tool '{func_name}'.",
206
+ }
207
+ )
208
+ continue
209
+
210
+ try:
211
+ res = func(**kwargs)
212
+ except Exception as e:
213
+ res = f"Error while calling {func_name}: {e}"
214
+
215
+ results.append({"name": func_name, "args": kwargs, "result": res})
216
+ return results
217
+
218
+
219
+ # ------------- CHAT + AGENT LOGIC -------------
220
+
221
+ def respond(message, history, system_message, llm):
222
+ # ---- 1) Let the model decide if any tools should be used ----
223
+ tool_calls = select_tools_with_llm(llm, message)
224
+ tool_results = call_tools(tool_calls) if tool_calls else []
225
+
226
+ # ---- 2) Build final system message including tool results ----
227
+ if tool_results:
228
+ tool_info_str = "\nYou have executed the following tools (name, args, result):\n"
229
+ for tr in tool_results:
230
+ tool_info_str += f"- {tr['name']}({tr['args']}) -> {tr['result']}\n"
231
+ final_system_message = system_message + tool_info_str
232
+ else:
233
+ final_system_message = system_message
234
+
235
+ # ---- 3) Use normal chat-style prompt to answer the user ----
236
+ prompt = build_prompt(final_system_message, history, message)
237
+
238
+ stream = llm(
239
+ prompt,
240
+ max_tokens=256,
241
+ temperature=0.7,
242
+ top_p=0.9,
243
+ stop=["User:", "System:"],
244
+ stream=True,
245
+ )
246
+
247
+ partial = ""
248
+ for out in stream:
249
+ token = out["choices"][0]["text"]
250
+ partial += token
251
+ yield partial
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import subprocess
2
  import sys
3
 
@@ -7,20 +9,6 @@ subprocess.run(
7
  check=True,
8
  )
9
 
10
- import gradio as gr
11
- import llama_cpp
12
- from llama_cpp import Llama
13
-
14
- # --- Workaround for llama-cpp-python shutdown bug on HF Spaces ---
15
- # Avoid calling C-level free_model after the module is partially torn down.
16
- def _llama_noop_del(self):
17
- # Intentionally do nothing on interpreter shutdown to avoid:
18
- # TypeError: 'NoneType' object is not callable in free_model
19
- pass
20
-
21
- Llama.__del__ = _llama_noop_del
22
- # -----------------------------------------------------------------
23
-
24
  # ---------------- CONFIG ----------------
25
 
26
  BASE_REPO_ID = "unsloth/Llama-3.2-3B-Instruct-GGUF"
@@ -32,103 +20,98 @@ FT_FILENAME = "v1"
32
  N_CTX = 2048
33
  N_THREADS = None
34
 
35
- # ---------------- SYSTEM MESSAGE WITH ICL ----------------
36
-
37
- SYSTEM_MESSAGE = """
38
- You are a helpful, knowledgeable assistant fine-tuned on the FineTome dataset.
39
-
40
- When answering:
41
- - Use the user's selected latitude and longitude to provide location-aware insights.
42
- - Be concise, factual, and structured.
43
- - If the user asks a geography-, travel-, or environment-related question, incorporate the location.
44
- - If the location is missing, answer normally.
45
-
46
- ### Example interaction:
47
- User selected location: latitude 46.02000, longitude 7.74900
48
- User: "What can I do here?"
49
- Assistant: "This location is in the Alps near Zermatt, Switzerland. Popular activities include skiing, mountaineering, and high-alpine hiking."
50
-
51
- ### Example interaction:
52
- User selected location: latitude 59.32930, longitude 18.06860
53
- User: "Tell me something about this place."
54
- Assistant: "This point is in central Stockholm, Sweden. Attractions include Gamla Stan, the Royal Palace, and the surrounding archipelago."
55
- """.strip()
56
-
57
 
58
  # ------------- LOAD MODELS ON CPU --------------
59
 
60
- print("Loading base model...")
61
- llm_base = Llama.from_pretrained(
62
- repo_id=BASE_REPO_ID,
63
- filename=BASE_FILENAME,
64
- n_ctx=N_CTX,
65
- n_threads=N_THREADS,
66
- )
67
-
68
- AVAILABLE_MODELS = {
69
- "Base: Llama 3.2 3B Instruct (q4_k_m)": llm_base,
70
- }
71
-
72
- try:
73
- print("Attempting to load fine-tuned model...")
74
- llm_ft = Llama.from_pretrained(
75
  repo_id=FT_REPO_ID,
76
  filename=FT_FILENAME,
77
  n_ctx=N_CTX,
78
  n_threads=N_THREADS,
79
  )
80
- AVAILABLE_MODELS["Fine-tuned: Llama 3.2 3B FineTome (q4_k_m)"] = llm_ft
81
- FT_LOAD_ERROR = None
82
- except Exception as e:
83
- llm_ft = None
84
- FT_LOAD_ERROR = str(e)
85
- print(f"Could not load fine-tuned model yet: {e}")
86
-
87
-
88
- # ------------- PROMPT + CHAT LOGIC -------------
89
-
90
- def build_prompt(system_message, history, user_message):
91
- prompt = f"System: {system_message}\n"
92
- for turn in history:
93
- role = turn["role"]
94
- content = turn["content"]
95
- prompt += f"{role.capitalize()}: {content}\n"
96
- prompt += f"User: {user_message}\nAssistant:"
97
- return prompt
98
-
99
-
100
- def respond(message, history, model_choice, coords):
101
- # Start with the fixed system message
102
- system_message = SYSTEM_MESSAGE
103
-
104
- # Inject coordinates if user clicked on the map
105
- if coords is not None and len(coords) == 2:
106
- lat, lon = coords
107
- system_message += (
108
- f"\n\nUser selected the location with latitude {lat:.5f} "
109
- f"and longitude {lon:.5f}."
110
- )
111
-
112
- # Pick the model
113
- llm = AVAILABLE_MODELS.get(model_choice, llm_base)
114
-
115
- prompt = build_prompt(system_message, history, message)
116
-
117
- stream = llm(
118
- prompt,
119
- max_tokens=256,
120
- temperature=0.7,
121
- top_p=0.9,
122
- stop=["User:", "System:"],
123
- stream=True,
124
- )
125
-
126
- partial = ""
127
- for out in stream:
128
- token = out["choices"][0]["text"]
129
- partial += token
130
- yield partial
131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  # ------------- GRADIO UI ----------------
134
 
@@ -139,36 +122,30 @@ model_dropdown = gr.Dropdown(
139
  interactive=True,
140
  )
141
 
142
- location_map = gr.Map(
143
- label="Click on the map to choose a location",
144
- interactive=True,
145
- )
146
-
147
  chatbot = gr.ChatInterface(
148
- fn=respond,
149
  type="messages",
150
  additional_inputs=[
 
151
  model_dropdown,
152
- location_map,
153
  ],
154
  )
155
 
156
  with gr.Blocks() as demo:
157
- gr.Markdown("# Llama 3.2 3B (CPU, GGUF) Base vs FineTome (with Location Awareness)")
158
  intro_text = (
159
- "This Space runs GGUF-quantized Llama 3.2 3B models **on CPU** using `llama-cpp-python`.\n\n"
 
160
  "- **Base model**: Unsloth Llama-3.2-3B-Instruct (q4_k_m GGUF)\n"
161
- "- **Fine-tuned model**: Llama-3.2-3B-Instruct fine tuned on FineTome (q4_k_m GGUF)\n\n"
162
- "Click anywhere on the map to give the assistant a location context."
 
 
163
  )
164
- if FT_LOAD_ERROR is not None:
165
- intro_text += (
166
- f"\n\n⚠️ Fine-tuned model is not loaded:\n`{FT_LOAD_ERROR}`\n"
167
- "Only the base model is available."
168
- )
169
  gr.Markdown(intro_text)
170
  chatbot.render()
171
 
172
 
173
  if __name__ == "__main__":
174
- demo.launch()
 
1
+ # app.py
2
+
3
  import subprocess
4
  import sys
5
 
 
9
  check=True,
10
  )
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  # ---------------- CONFIG ----------------
13
 
14
  BASE_REPO_ID = "unsloth/Llama-3.2-3B-Instruct-GGUF"
 
20
  N_CTX = 2048
21
  N_THREADS = None
22
 
23
+ import gradio as gr
24
+ from agent import respond
25
+ from llama_cpp import Llama
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # ------------- LOAD MODELS ON CPU --------------
28
 
29
+ print("Loading finetuned model")
30
+ llm_ft = Llama.from_pretrained(
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  repo_id=FT_REPO_ID,
32
  filename=FT_FILENAME,
33
  n_ctx=N_CTX,
34
  n_threads=N_THREADS,
35
  )
36
+ AVAILABLE_MODELS = {
37
+ "Fine-tuned: Llama 3.2 3B FineTome (q4_k_m)": llm_ft,
38
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ # print("Loading base model...")
41
+ # llm_base = Llama.from_pretrained(
42
+ # repo_id=BASE_REPO_ID,
43
+ # filename=BASE_FILENAME,
44
+ # n_ctx=N_CTX,
45
+ # n_threads=N_THREADS,
46
+ # )
47
+
48
+ # AVAILABLE_MODELS = {
49
+ # "Base: Llama 3.2 3B Instruct (q4_k_m)": llm_base,
50
+ # }
51
+
52
+ # try:
53
+ # print("Attempting to load fine-tuned model...")
54
+ # llm_ft = Llama.from_pretrained(
55
+ # repo_id=FT_REPO_ID,
56
+ # filename=FT_FILENAME,
57
+ # n_ctx=N_CTX,
58
+ # n_threads=N_THREADS,
59
+ # )
60
+ # AVAILABLE_MODELS["Fine-tuned: Llama 3.2 3B FineTome (q4_k_m)"] = llm_ft
61
+ # FT_LOAD_ERROR = None
62
+ # except Exception as e:
63
+ # llm_ft = None
64
+ # FT_LOAD_ERROR = str(e)
65
+ # print(f"Could not load fine-tuned model yet: {e}")
66
+
67
+ # System message:
68
+ SYSTEM_MESSAGE = """
69
+ You are a helpful assistant that answers user questions using any external information provided in the system message.
70
+
71
+ The system message may include a section like:
72
+ "You have executed the following tools (name, args, result):"
73
+ followed by one or more lines of the form:
74
+ - tool_name(args_dict) -> result_value
75
+
76
+ Instructions:
77
+ - Treat these tool results as ground truth for the current reply.
78
+ - Use them to give a clear, concise, and friendly answer to the user’s latest question.
79
+ - Do not repeat the raw tool logs verbatim unless it is natural to do so.
80
+ - You may summarize or rephrase the results in natural language.
81
+ - If multiple results are present, combine them into a single coherent answer.
82
+ - If no tool results are present, answer the question based on your own knowledge and the conversation history.
83
+ - Do not mention that you are using “tools” or “tool calls”; just speak as a normal assistant.
84
+
85
+ === EXAMPLE ===
86
+
87
+ System (excerpt):
88
+ You have executed the following tools (name, args, result):
89
+ - get_temperature({'location': 'Berlin'}) -> 20
90
+ - get_weather({'location': 'Berlin'}) -> sunny
91
+
92
+ User:
93
+ What is it like in Berlin right now?
94
+
95
+ Assistant:
96
+ It's sunny in Berlin right now, with a temperature of about 20 degrees.
97
+ """
98
+
99
+ # ------------- WRAPPER FUNCTION ----------------
100
+ # Needed to be able to pass the llm to respond() inside agent.py
101
+
102
+ def app_respond(message, history, system_message, model_choice):
103
+ """
104
+ Wrapper used by Gradio.
105
+ - model_choice: string from the dropdown (key in AVAILABLE_MODELS)
106
+ """
107
+ llm = AVAILABLE_MODELS.get(model_choice)
108
+ if llm is None:
109
+ # Fallback: first model in dict
110
+ llm = next(iter(AVAILABLE_MODELS.values()))
111
+
112
+ # Delegate to the core agent logic (which expects an llm object)
113
+ for chunk in respond(message, history, system_message, llm):
114
+ yield chunk
115
 
116
  # ------------- GRADIO UI ----------------
117
 
 
122
  interactive=True,
123
  )
124
 
 
 
 
 
 
125
  chatbot = gr.ChatInterface(
126
+ fn=app_respond,
127
  type="messages",
128
  additional_inputs=[
129
+ gr.State(SYSTEM_MESSAGE),
130
  model_dropdown,
 
131
  ],
132
  )
133
 
134
  with gr.Blocks() as demo:
135
+ gr.Markdown("# Llama 3.2 3B (CPU, GGUF) Base vs FineTome Tool-Using Agent")
136
  intro_text = (
137
+ "This Space runs GGUF-quantized Llama 3.2 3B models **on CPU** using `llama-cpp-python`,\n"
138
+ "and demonstrates a simple agent that can call Python tools like `get_weather` and `get_temperature`.\n\n"
139
  "- **Base model**: Unsloth Llama-3.2-3B-Instruct (q4_k_m GGUF)\n"
140
+ "- **Fine-tuned model**: Llama-3.2-3B-Instruct fine tuned on FineTome (q4_k_m GGUF).\n\n"
141
+ "Ask things like:\n"
142
+ "- `What is the weather like in Berlin?`\n"
143
+ "- `What's the temperature in Stockholm?`\n"
144
  )
145
+
 
 
 
 
146
  gr.Markdown(intro_text)
147
  chatbot.render()
148
 
149
 
150
  if __name__ == "__main__":
151
+ demo.launch()