Joar Paganus commited on
Commit
efb82bb
·
1 Parent(s): 80c0059
Files changed (2) hide show
  1. agent.py +17 -3
  2. app.py +158 -32
agent.py CHANGED
@@ -13,14 +13,13 @@ from llama_cpp import Llama
13
  def get_weather(location: str) -> str:
14
  """This tool returns the current weather situation.
15
  Args:
16
- location: The city or place to check
17
  Returns:
18
  str: Weather situation (e.g. cloudy, rainy, sunny)
19
  """
20
  weather_situations = ["cloudy", "rainy", "sunny", "foobar"]
21
  return random.choice(weather_situations)
22
 
23
-
24
  def get_temperature(location: str) -> str:
25
  """This tool returns the current temperature.
26
  Args:
@@ -31,8 +30,23 @@ def get_temperature(location: str) -> str:
31
  temperature = ["-10", "0", "20", "30"]
32
  return random.choice(temperature)
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- TOOLS = [get_weather, get_temperature]
36
  TOOL_REGISTRY = {f.__name__: f for f in TOOLS}
37
 
38
 
 
13
  def get_weather(location: str) -> str:
14
  """This tool returns the current weather situation.
15
  Args:
16
+ location: The city or place to chec
17
  Returns:
18
  str: Weather situation (e.g. cloudy, rainy, sunny)
19
  """
20
  weather_situations = ["cloudy", "rainy", "sunny", "foobar"]
21
  return random.choice(weather_situations)
22
 
 
23
  def get_temperature(location: str) -> str:
24
  """This tool returns the current temperature.
25
  Args:
 
30
  temperature = ["-10", "0", "20", "30"]
31
  return random.choice(temperature)
32
 
33
+ def get_weather_forecast(location: str, days_ahead: str) -> str:
34
+ """This tool returns the weather forecast for the specified days ahead.
35
+ Args:
36
+ location: The city or place to check
37
+ days_ahead: How many days ahead of today
38
+ Returns:
39
+ str: Weather situation (e.g. cloudy, rainy, sunny)
40
+ """
41
+ test = "Storm"
42
+ if days_ahead > 0:
43
+ test = "Thunderstorm"
44
+ elif days_ahead > 10:
45
+ test = "Hurricane"
46
+ return test
47
+
48
 
49
+ TOOLS = [get_weather, get_temperature, get_weather_forecast]
50
  TOOL_REGISTRY = {f.__name__: f for f in TOOLS}
51
 
52
 
app.py CHANGED
@@ -1,5 +1,3 @@
1
- # app.py
2
-
3
  import subprocess
4
  import sys
5
 
@@ -21,9 +19,10 @@ N_CTX = 2048
21
  N_THREADS = None
22
 
23
  import gradio as gr
24
- from agent import respond
25
  from llama_cpp import Llama
26
 
 
27
  # ------------- LOAD MODELS ON CPU --------------
28
 
29
  print("Loading finetuned model")
@@ -37,6 +36,8 @@ AVAILABLE_MODELS = {
37
  "Fine-tuned: Llama 3.2 3B FineTome (q4_k_m)": llm_ft,
38
  }
39
 
 
 
40
  # print("Loading base model...")
41
  # llm_base = Llama.from_pretrained(
42
  # repo_id=BASE_REPO_ID,
@@ -64,8 +65,10 @@ AVAILABLE_MODELS = {
64
  # FT_LOAD_ERROR = str(e)
65
  # print(f"Could not load fine-tuned model yet: {e}")
66
 
 
 
67
  # System message:
68
- SYSTEM_MESSAGE = """
69
  You are a helpful assistant that answers user questions using any external information provided in the system message.
70
 
71
  The system message may include a section like:
@@ -75,7 +78,7 @@ followed by one or more lines of the form:
75
 
76
  Instructions:
77
  - Treat these tool results as ground truth for the current reply.
78
- - Use them to give a clear, concise, and friendly answer to the users latest question.
79
  - Do not repeat the raw tool logs verbatim unless it is natural to do so.
80
  - You may summarize or rephrase the results in natural language.
81
  - If multiple results are present, combine them into a single coherent answer.
@@ -96,10 +99,35 @@ Assistant:
96
  It's sunny in Berlin right now, with a temperature of about 20 degrees.
97
  """
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  # ------------- WRAPPER FUNCTION ----------------
100
  # Needed to be able to pass the llm to respond() inside agent.py
101
 
102
- def app_respond(message, history, system_message, model_choice):
103
  """
104
  Wrapper used by Gradio.
105
  - model_choice: string from the dropdown (key in AVAILABLE_MODELS)
@@ -113,38 +141,136 @@ def app_respond(message, history, system_message, model_choice):
113
  for chunk in respond(message, history, system_message, llm):
114
  yield chunk
115
 
116
- # ------------- GRADIO UI ----------------
 
 
 
 
 
 
 
 
 
117
 
118
- model_dropdown = gr.Dropdown(
119
- label="Model",
120
- choices=list(AVAILABLE_MODELS.keys()),
121
- value=list(AVAILABLE_MODELS.keys())[0],
122
- interactive=True,
123
- )
124
 
125
- chatbot = gr.ChatInterface(
126
- fn=app_respond,
127
- type="messages",
128
- additional_inputs=[
129
- gr.State(SYSTEM_MESSAGE),
130
- model_dropdown,
131
- ],
132
- )
 
 
 
 
 
 
 
 
 
 
133
 
134
  with gr.Blocks() as demo:
135
- gr.Markdown("# Llama 3.2 3B (CPU, GGUF) Base vs FineTome — Tool-Using Agent")
136
- intro_text = (
137
- "This Space runs GGUF-quantized Llama 3.2 3B models **on CPU** using `llama-cpp-python`,\n"
138
- "and demonstrates a simple agent that can call Python tools like `get_weather` and `get_temperature`.\n\n"
139
- "- **Base model**: Unsloth Llama-3.2-3B-Instruct (q4_k_m GGUF)\n"
140
- "- **Fine-tuned model**: Llama-3.2-3B-Instruct fine tuned on FineTome (q4_k_m GGUF).\n\n"
141
- "Ask things like:\n"
142
- "- `What is the weather like in Berlin?`\n"
143
- "- `What's the temperature in Stockholm?`\n"
144
  )
145
 
146
- gr.Markdown(intro_text)
147
- chatbot.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
 
150
  if __name__ == "__main__":
 
 
 
1
  import subprocess
2
  import sys
3
 
 
19
  N_THREADS = None
20
 
21
  import gradio as gr
22
+ from agent import respond, build_prompt
23
  from llama_cpp import Llama
24
 
25
+
26
  # ------------- LOAD MODELS ON CPU --------------
27
 
28
  print("Loading finetuned model")
 
36
  "Fine-tuned: Llama 3.2 3B FineTome (q4_k_m)": llm_ft,
37
  }
38
 
39
+ # -------------Using one model for faster deployement during development -------------------
40
+
41
  # print("Loading base model...")
42
  # llm_base = Llama.from_pretrained(
43
  # repo_id=BASE_REPO_ID,
 
65
  # FT_LOAD_ERROR = str(e)
66
  # print(f"Could not load fine-tuned model yet: {e}")
67
 
68
+
69
+
70
  # System message:
71
+ SYSTEM_MESSAGE_WEATHER = """
72
  You are a helpful assistant that answers user questions using any external information provided in the system message.
73
 
74
  The system message may include a section like:
 
78
 
79
  Instructions:
80
  - Treat these tool results as ground truth for the current reply.
81
+ - Use them to give a clear, concise, and friendly answer to the user's latest question.
82
  - Do not repeat the raw tool logs verbatim unless it is natural to do so.
83
  - You may summarize or rephrase the results in natural language.
84
  - If multiple results are present, combine them into a single coherent answer.
 
99
  It's sunny in Berlin right now, with a temperature of about 20 degrees.
100
  """
101
 
102
+ SYSTEM_MESSAGE_GENERAL = """
103
+ You are a friendly, helpful, and knowledgeable AI assistant.
104
+
105
+ Your goals:
106
+ - Give clear, accurate, and concise answers.
107
+ - Be honest when you don't know something.
108
+ - Use the conversation history to stay consistent.
109
+ - Ask clarifying questions when the user’s request is ambiguous.
110
+ - Avoid unnecessary repetition or overly long explanations.
111
+ - Be polite, neutral, and informative.
112
+
113
+ You can answer questions on any topic, including:
114
+ - general knowledge
115
+ - mathematics and reasoning
116
+ - writing and summarization
117
+ - programming and debugging
118
+ - everyday advice and explanations
119
+
120
+ Do not claim access to external tools, APIs, the internet, or real-time data.
121
+ All your responses must be based only on your internal knowledge and the conversation context.
122
+
123
+ Your tone: helpful, calm, and professional.
124
+ """
125
+
126
+
127
  # ------------- WRAPPER FUNCTION ----------------
128
  # Needed to be able to pass the llm to respond() inside agent.py
129
 
130
+ def app_respond(message, history, system_message, model_choice="Base: Llama 3.2 3B Instruct (q4_k_m)"):
131
  """
132
  Wrapper used by Gradio.
133
  - model_choice: string from the dropdown (key in AVAILABLE_MODELS)
 
141
  for chunk in respond(message, history, system_message, llm):
142
  yield chunk
143
 
144
+ # ------------- No agent and just a single LLM call ----------------
145
+ def respond_fast(message, history, system_message, model_choice):
146
+ """
147
+ Fast path: no tools, no agent. Just a single LLM call with the
148
+ given system message and chat history.
149
+ """
150
+ # Pick model from dropdown
151
+ llm = AVAILABLE_MODELS.get(model_choice)
152
+ if llm is None:
153
+ llm = next(iter(AVAILABLE_MODELS.values()))
154
 
155
+ # Build a simple chat-style prompt
156
+ prompt = build_prompt(system_message, history, message)
 
 
 
 
157
 
158
+ # Single streaming generation
159
+ stream = llm(
160
+ prompt,
161
+ max_tokens=256,
162
+ temperature=0.7,
163
+ top_p=0.9,
164
+ stop=["User:", "System:"],
165
+ stream=True,
166
+ )
167
+
168
+ partial = ""
169
+ for out in stream:
170
+ token = out["choices"][0]["text"]
171
+ partial += token
172
+ yield partial
173
+
174
+
175
+ # ------------- GRADIO UI ----------------
176
 
177
  with gr.Blocks() as demo:
178
+ gr.Markdown(
179
+ "# Finetuned Llama 3.2 3B (CPU, GGUF) in an Agentic Framework\n"
180
+ "Switch between a general assistant and a live weather assistant."
 
 
 
 
 
 
181
  )
182
 
183
+ with gr.Tabs():
184
+ # -------- TAB 1: GENERAL LLM ASSISTANT --------
185
+ with gr.Tab("💬 General Assistant"):
186
+ with gr.Row():
187
+ with gr.Column(scale=1):
188
+ gr.Markdown(
189
+ "### General Assistant\n"
190
+ "Chat with the base or fine-tuned model. Use this mode for any kind of question."
191
+ )
192
+ model_dropdown = gr.Dropdown(
193
+ label="Model",
194
+ choices=list(AVAILABLE_MODELS.keys()),
195
+ value=list(AVAILABLE_MODELS.keys())[0],
196
+ interactive=True,
197
+ )
198
+
199
+ with gr.Column(scale=3, elem_id="general-chat"):
200
+ general_chatbot = gr.ChatInterface(
201
+ fn=respond_fast,
202
+ additional_inputs=[
203
+ gr.State(SYSTEM_MESSAGE_GENERAL),
204
+ model_dropdown,
205
+ ],
206
+ )
207
+
208
+ # -------- TAB 2: LIVE WEATHER ASSISTANT --------
209
+ with gr.Tab("☀️ LIVE Weather Assistant"):
210
+ with gr.Row():
211
+ with gr.Column(scale=1):
212
+ gr.Markdown("### Live Weather Assistant\n"
213
+ "Fetches up to date weather data"
214
+ )
215
+ model_dropdown = gr.Dropdown(
216
+ label="Model",
217
+ choices=list(AVAILABLE_MODELS.keys()),
218
+ value=list(AVAILABLE_MODELS.keys())[0],
219
+ interactive=True,
220
+ )
221
+
222
+ with gr.Column(scale=3, elem_id="weather-chat"):
223
+ # 🌤️ ASSISTANT HEADER (name + avatar + tagline)
224
+ gr.HTML(
225
+ """
226
+ <div style="
227
+ display: flex;
228
+ align-items: center;
229
+ gap: 15px;
230
+ padding: 12px 16px;
231
+ border-radius: 12px;
232
+ margin-bottom: 10px;
233
+ ">
234
+
235
+ <!-- Avatar circle -->
236
+ <div style="
237
+ width: 64px;
238
+ height: 64px;
239
+ border-radius: 50%;
240
+ background: radial-gradient(circle at 30% 30%, #facc15, #eab308, #ca8a04);
241
+ display: flex;
242
+ align-items: center;
243
+ justify-content: center;
244
+ font-weight: 700;
245
+ font-size: 26px;
246
+ color: #1f2937;
247
+ box-shadow: 0 4px 10px rgba(0,0,0,0.15);
248
+ ">
249
+ M
250
+ </div>
251
+
252
+ <!-- Name + description -->
253
+ <div>
254
+ <div style="font-size: 20px; font-weight: 700; color: #333;">
255
+ Meteo-Mila
256
+ </div>
257
+ <div style="font-size: 14px; color: #555; margin-top: 2px;">
258
+ I know everything about the current weather and temperature.<br>
259
+ I can also provide forecasts into the future! 🌦️
260
+ </div>
261
+ </div>
262
+
263
+ </div>
264
+ """
265
+ )
266
+
267
+ general_chatbot = gr.ChatInterface(
268
+ fn=app_respond,
269
+ additional_inputs=[
270
+ gr.State(SYSTEM_MESSAGE_WEATHER),
271
+ model_dropdown,
272
+ ],
273
+ )
274
 
275
 
276
  if __name__ == "__main__":