kashif HF Staff commited on
Commit
c1cd11a
Β·
1 Parent(s): 56bd97c

example scripts

Browse files
example_online_mode.py ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Example usage of Online mode with warmup
3
+
4
+ This demonstrates:
5
+ 1. Warmup phase (generate N sequences to calibrate threshold)
6
+ 2. Threshold computation (DeepConf-low or DeepConf-high)
7
+ 3. Final generation with calibrated early stopping
8
+ """
9
+
10
+ from typing import Optional
11
+
12
+ import numpy as np
13
+ import torch
14
+
15
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
16
+
17
+
18
+ def extract_answer(text: str) -> Optional[str]:
19
+ """
20
+ Extract boxed answer from LaTeX text
21
+
22
+ Looks for \\boxed{answer} pattern in generated text.
23
+ """
24
+ if "boxed" in text:
25
+ ans = text.split("boxed")[-1]
26
+ if len(ans) == 0:
27
+ return ""
28
+ elif ans[0] == "{":
29
+ stack = 1
30
+ a = ""
31
+ for c in ans[1:]:
32
+ if c == "{":
33
+ stack += 1
34
+ a += c
35
+ elif c == "}":
36
+ stack -= 1
37
+ if stack == 0:
38
+ break
39
+ a += c
40
+ else:
41
+ a += c
42
+ else:
43
+ a = ans.split("$")[0].strip()
44
+ return a.strip()
45
+
46
+ return None
47
+
48
+
49
+ def compute_least_grouped(confs: list, group_size: int) -> list:
50
+ """
51
+ Compute sliding window mean confidence
52
+
53
+ Args:
54
+ confs: List of per-token confidence values
55
+ group_size: Size of sliding window
56
+
57
+ Returns:
58
+ List of mean confidences for each window position
59
+ """
60
+ if len(confs) < group_size:
61
+ return [sum(confs) / len(confs)] if confs else [0]
62
+
63
+ sliding_means = []
64
+ for i in range(len(confs) - group_size + 1):
65
+ window = confs[i : i + group_size]
66
+ sliding_means.append(round(sum(window) / len(window), 3))
67
+ return sliding_means
68
+
69
+
70
+ def process_single_output(
71
+ sequence, confidences, tokenizer, window_size: int, threshold: Optional[float] = None
72
+ ) -> dict:
73
+ """
74
+ Process a single generated sequence
75
+
76
+ Args:
77
+ sequence: Generated token IDs
78
+ confidences: Per-token confidence values (list or tensor)
79
+ tokenizer: Tokenizer for decoding
80
+ window_size: Size of sliding window for confidence
81
+ threshold: Optional threshold for early stopping detection
82
+
83
+ Returns:
84
+ Dictionary with trace data
85
+ """
86
+ # Convert to list if tensor
87
+ if hasattr(confidences, "tolist"):
88
+ confs = confidences.tolist()
89
+ else:
90
+ confs = list(confidences)
91
+
92
+ # Decode text
93
+ text = tokenizer.decode(sequence, skip_special_tokens=True)
94
+
95
+ # Compute sliding window statistics
96
+ sliding_window = compute_least_grouped(confs, window_size)
97
+ min_conf = min(sliding_window) if sliding_window else 0
98
+
99
+ # Determine if early stopping would have triggered
100
+ stopped_early = False
101
+ stop_position = None
102
+
103
+ if threshold is not None:
104
+ for pos, window_mean in enumerate(sliding_window):
105
+ if window_mean < threshold:
106
+ stopped_early = True
107
+ stop_position = pos + window_size # Position in original sequence
108
+ break
109
+
110
+ # Extract answer if present
111
+ extracted_answer = extract_answer(text)
112
+
113
+ return {
114
+ "text": text,
115
+ "confs": confs,
116
+ "group_confs": sliding_window,
117
+ "min_conf": min_conf,
118
+ "stopped_early": stopped_early,
119
+ "stop_position": stop_position,
120
+ "extracted_answer": extracted_answer,
121
+ "num_tokens": len(confs),
122
+ "token_ids": sequence.tolist() if hasattr(sequence, "tolist") else list(sequence),
123
+ }
124
+
125
+
126
+ def process_batch_results(outputs, tokenizer, window_size: int = 2048, threshold: Optional[float] = None) -> dict:
127
+ """
128
+ Process batch generation outputs
129
+
130
+ This function provides post-processing capabilities for batch-generated
131
+ sequences, allowing analysis of confidence patterns and early stopping
132
+ behavior after generation is complete.
133
+
134
+ Args:
135
+ outputs: GenerateDecoderOnlyOutput from model.generate()
136
+ tokenizer: Tokenizer for decoding sequences
137
+ window_size: Size of sliding window for confidence computation
138
+ threshold: Optional threshold for detecting where early stopping would occur
139
+
140
+ Returns:
141
+ Dictionary containing:
142
+ - traces: List of processed trace dictionaries
143
+ - min_confs: List of minimum confidences per trace
144
+ - total_tokens: Total tokens across all traces
145
+ - num_traces: Number of traces processed
146
+ """
147
+ if not hasattr(outputs, "sequences"):
148
+ raise ValueError("outputs must have 'sequences' attribute")
149
+
150
+ if not hasattr(outputs, "confidences") or outputs.confidences is None:
151
+ raise ValueError("outputs must have 'confidences' attribute. Set output_confidences=True in generation_config")
152
+
153
+ sequences = outputs.sequences
154
+ confidences = outputs.confidences
155
+
156
+ # Process each sequence
157
+ traces = []
158
+ min_confs = []
159
+ total_tokens = 0
160
+
161
+ for i in range(sequences.shape[0]):
162
+ trace_data = process_single_output(sequences[i], confidences[i], tokenizer, window_size, threshold)
163
+
164
+ traces.append(trace_data)
165
+ min_confs.append(trace_data["min_conf"])
166
+ total_tokens += trace_data["num_tokens"]
167
+
168
+ return {"traces": traces, "min_confs": min_confs, "total_tokens": total_tokens, "num_traces": len(traces)}
169
+
170
+
171
+ def compute_warmup_threshold(min_confs: list, variant: str = "low", eta: Optional[float] = None) -> float:
172
+ """
173
+ Compute threshold from warmup confidences
174
+
175
+ Args:
176
+ min_confs: List of minimum confidences from warmup sequences
177
+ variant: "low" (aggressive) or "high" (permissive)
178
+ eta: Optional manual eta value (overrides variant default)
179
+
180
+ Returns:
181
+ Computed threshold value
182
+ """
183
+ if eta is None:
184
+ eta = 0.1 if variant == "low" else 0.9 if variant == "high" else 0.5
185
+
186
+ confs = np.asarray(min_confs, dtype=np.float32)
187
+ pct = max(0.0, min(100.0, 100.0 - (eta * 100.0)))
188
+ threshold = float(np.percentile(confs, pct))
189
+
190
+ return threshold
191
+
192
+
193
+ # ============================================================================
194
+ # Example Functions
195
+ # ============================================================================
196
+
197
+
198
+ def prepare_prompt(question: str, tokenizer):
199
+ """Prepare prompt using chat template"""
200
+ messages = [{"role": "user", "content": question}]
201
+
202
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
203
+
204
+ return prompt
205
+
206
+
207
+ def run_online_mode_example(
208
+ question: str,
209
+ ground_truth: Optional[str] = None,
210
+ warmup_traces: int = 8,
211
+ confidence_variant: str = "low", # "low" or "high"
212
+ window_size: int = 10,
213
+ max_tokens: int = 128,
214
+ temperature: float = 0.7,
215
+ top_p: float = 0.95,
216
+ ):
217
+ """
218
+ Run DeepConf in online mode
219
+
220
+ Args:
221
+ question: Question to answer
222
+ ground_truth: Optional ground truth answer for evaluation
223
+ warmup_traces: Number of warmup sequences (default: 8)
224
+ confidence_variant: "low" (aggressive) or "high" (permissive)
225
+ window_size: Sliding window size for confidence
226
+ max_tokens: Max tokens per generation
227
+ temperature: Sampling temperature
228
+ top_p: Top-p sampling
229
+ """
230
+
231
+ # Load model (use local cache to avoid HF Hub timeouts)
232
+ model_name = "Qwen/Qwen2.5-0.5B-Instruct"
233
+ print(f"Loading model: {model_name}")
234
+ model = AutoModelForCausalLM.from_pretrained(
235
+ model_name,
236
+ torch_dtype=torch.float16,
237
+ device_map="auto",
238
+ local_files_only=True, # Use cached model
239
+ )
240
+ tokenizer = AutoTokenizer.from_pretrained(model_name, local_files_only=True)
241
+
242
+ # Prepare prompt
243
+ prompt = prepare_prompt(question, tokenizer)
244
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
245
+
246
+ print("\n" + "=" * 80)
247
+ print("DEEPCONF ONLINE MODE - FOLLOWING OFFICIAL PATTERN")
248
+ print("=" * 80)
249
+ print(f"\nQuestion: {question}")
250
+ if ground_truth:
251
+ print(f"Ground truth: {ground_truth}")
252
+ print("\nConfiguration:")
253
+ print(f" - Warmup traces: {warmup_traces}")
254
+ print(f" - Variant: DeepConf-{confidence_variant}")
255
+ print(f" - Window size: {window_size}")
256
+ print(f" - Max tokens: {max_tokens}")
257
+ print(f" - Temperature: {temperature}")
258
+ print(f" - Top-p: {top_p}")
259
+
260
+ # ============================================================
261
+ # PHASE 1: WARMUP - Generate multiple sequences to calibrate
262
+ # ============================================================
263
+ print("\n" + "=" * 80)
264
+ print(f"PHASE 1: WARMUP (Generating {warmup_traces} sequences for calibration)")
265
+ print("=" * 80)
266
+
267
+ warmup_config = GenerationConfig(
268
+ do_sample=True,
269
+ temperature=temperature,
270
+ top_p=top_p,
271
+ max_new_tokens=max_tokens,
272
+ enable_conf=True,
273
+ enable_early_stopping=False, # No stopping during warmup
274
+ output_confidences=True,
275
+ return_dict_in_generate=True,
276
+ pad_token_id=tokenizer.eos_token_id,
277
+ )
278
+
279
+ # Expand inputs for batch generation
280
+ expanded_ids = inputs.input_ids.repeat(warmup_traces, 1)
281
+ if "attention_mask" in inputs and inputs.attention_mask is not None:
282
+ expanded_mask = inputs.attention_mask.repeat(warmup_traces, 1)
283
+ else:
284
+ expanded_mask = None
285
+
286
+ print(f"Generating {warmup_traces} warmup sequences...")
287
+ warmup_outputs = model.generate(
288
+ input_ids=expanded_ids,
289
+ attention_mask=expanded_mask,
290
+ generation_config=warmup_config,
291
+ custom_generate="kashif/DeepConf",
292
+ trust_remote_code=True,
293
+ )
294
+
295
+ # Process warmup results
296
+ warmup_results = process_batch_results(warmup_outputs, tokenizer, window_size=window_size)
297
+
298
+ print("\nWarmup complete!")
299
+ print(f" - Total tokens: {warmup_results['total_tokens']}")
300
+ print(f" - Min confidences: {[round(c, 3) for c in warmup_results['min_confs']]}")
301
+
302
+ # Show warmup traces
303
+ print("\nWarmup Traces:")
304
+ print("-" * 80)
305
+ for i, trace in enumerate(warmup_results["traces"]):
306
+ text = trace["text"][len(prompt) :].strip()
307
+ answer = extract_answer(text)
308
+ print(f"\nTrace {i + 1}:")
309
+ print(f" Tokens: {trace['num_tokens']}, Min conf: {trace['min_conf']:.3f}")
310
+ print(f" Text: {text[:80]}..." if len(text) > 80 else f" Text: {text}")
311
+ if answer:
312
+ print(f" Answer: {answer}")
313
+ if ground_truth:
314
+ correct = answer.strip() == ground_truth.strip()
315
+ print(f" Correct: {'βœ“' if correct else 'βœ—'}")
316
+
317
+ # ============================================================
318
+ # PHASE 2: THRESHOLD COMPUTATION
319
+ # ============================================================
320
+ print("\n" + "=" * 80)
321
+ print("PHASE 2: THRESHOLD COMPUTATION")
322
+ print("=" * 80)
323
+
324
+ threshold = compute_warmup_threshold(warmup_results["min_confs"], variant=confidence_variant)
325
+
326
+ eta = 0.1 if confidence_variant == "low" else 0.9
327
+ percentile = (1.0 - eta) * 100
328
+
329
+ print("\nComputed threshold from warmup:")
330
+ print(f" - Variant: DeepConf-{confidence_variant} (eta={eta})")
331
+ print(f" - Percentile: {percentile:.0f}th")
332
+ print(f" - Threshold: {threshold:.3f}")
333
+ print("\nInterpretation:")
334
+ if confidence_variant == "low":
335
+ print(" DeepConf-low is AGGRESSIVE - stops early to save tokens")
336
+ else:
337
+ print(" DeepConf-high is PERMISSIVE - allows longer generation")
338
+
339
+ # ============================================================
340
+ # PHASE 3: FINAL GENERATION with calibrated threshold
341
+ # ============================================================
342
+ print("\n" + "=" * 80)
343
+ print("PHASE 3: FINAL GENERATION (With calibrated early stopping)")
344
+ print("=" * 80)
345
+
346
+ final_config = GenerationConfig(
347
+ do_sample=True,
348
+ temperature=temperature,
349
+ top_p=top_p,
350
+ max_new_tokens=max_tokens,
351
+ enable_conf=True,
352
+ enable_early_stopping=True, # Online stopping with calibrated threshold
353
+ threshold=threshold,
354
+ window_size=window_size,
355
+ output_confidences=True,
356
+ return_dict_in_generate=True,
357
+ pad_token_id=tokenizer.eos_token_id,
358
+ )
359
+
360
+ print(f"Generating with DeepConf-{confidence_variant} (threshold={threshold:.3f})...")
361
+ final_output = model.generate(
362
+ **inputs,
363
+ generation_config=final_config,
364
+ custom_generate="kashif/DeepConf",
365
+ trust_remote_code=True,
366
+ )
367
+
368
+ final_text = tokenizer.decode(final_output.sequences[0], skip_special_tokens=True)
369
+ final_tokens = final_output.sequences.shape[1] - inputs.input_ids.shape[1]
370
+ final_answer = extract_answer(final_text)
371
+
372
+ # Calculate min confidence if available
373
+ if hasattr(final_output, "confidences") and final_output.confidences is not None:
374
+ min_conf = final_output.confidences.min().item()
375
+ mean_conf = final_output.confidences.mean().item()
376
+ else:
377
+ min_conf = None
378
+ mean_conf = None
379
+
380
+ print("\nFinal generation complete!")
381
+ print(f" - Tokens generated: {final_tokens}")
382
+ if min_conf is not None:
383
+ print(f" - Min confidence: {min_conf:.3f}")
384
+ print(f" - Mean confidence: {mean_conf:.3f}")
385
+
386
+ print("\nGenerated text:")
387
+ print("-" * 80)
388
+ print(final_text)
389
+ print("-" * 80)
390
+
391
+ if final_answer:
392
+ print(f"\nExtracted answer: {final_answer}")
393
+ if ground_truth:
394
+ correct = final_answer.strip() == ground_truth.strip()
395
+ print(f"Correct: {'βœ“' if correct else 'βœ—'}")
396
+
397
+ # ============================================================
398
+ # SUMMARY
399
+ # ============================================================
400
+ print("\n" + "=" * 80)
401
+ print("SUMMARY")
402
+ print("=" * 80)
403
+
404
+ total_warmup_tokens = warmup_results["total_tokens"]
405
+ total_tokens = total_warmup_tokens + final_tokens
406
+
407
+ print(f"Total tokens: {total_tokens}")
408
+ print(f" - Warmup: {total_warmup_tokens} ({warmup_traces} sequences)")
409
+ print(f" - Final: {final_tokens}")
410
+
411
+ # Check if we would have used more tokens without early stopping
412
+ avg_warmup_tokens = total_warmup_tokens / warmup_traces
413
+ potential_savings = avg_warmup_tokens - final_tokens
414
+ if potential_savings > 0:
415
+ print("\nToken savings from early stopping:")
416
+ print(f" - Average warmup length: {avg_warmup_tokens:.1f} tokens")
417
+ print(f" - Final length: {final_tokens} tokens")
418
+ print(f" - Saved: {potential_savings:.1f} tokens ({potential_savings / avg_warmup_tokens * 100:.1f}%)")
419
+
420
+ print("\n" + "=" * 80)
421
+ print("Example complete!")
422
+ print("=" * 80)
423
+
424
+
425
+ if __name__ == "__main__":
426
+ # Example 1: Simple math problem
427
+ print("\n\n" + "β–ˆ" * 80)
428
+ print("EXAMPLE 1: Simple Math Problem")
429
+ print("β–ˆ" * 80)
430
+
431
+ run_online_mode_example(
432
+ question="What is 15 * 8? Show your work step by step.",
433
+ ground_truth="120",
434
+ warmup_traces=4,
435
+ confidence_variant="low",
436
+ window_size=5,
437
+ max_tokens=64,
438
+ )
439
+
440
+ # Example 2: Square root problem
441
+ print("\n\n" + "β–ˆ" * 80)
442
+ print("EXAMPLE 2: Square Root Problem")
443
+ print("β–ˆ" * 80)
444
+
445
+ run_online_mode_example(
446
+ question="What is the square root of 144? Express your answer in the form \\boxed{answer}.",
447
+ ground_truth="12",
448
+ warmup_traces=4,
449
+ confidence_variant="high",
450
+ window_size=5,
451
+ max_tokens=64,
452
+ )
453
+
454
+ # Example 3: Word problem
455
+ print("\n\n" + "β–ˆ" * 80)
456
+ print("EXAMPLE 3: Word Problem")
457
+ print("β–ˆ" * 80)
458
+
459
+ run_online_mode_example(
460
+ question="If a train travels 60 miles per hour for 2.5 hours, how far does it travel?",
461
+ ground_truth="150",
462
+ warmup_traces=4,
463
+ confidence_variant="low",
464
+ window_size=5,
465
+ max_tokens=96,
466
+ )
example_simple_generations.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Simple examples showing DeepConf sample generations
3
+ """
4
+
5
+ import torch
6
+
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
8
+
9
+
10
+ def generate_with_deepconf(
11
+ question: str,
12
+ enable_early_stopping: bool = True,
13
+ threshold: float = 10.0,
14
+ window_size: int = 10,
15
+ max_tokens: int = 128,
16
+ ):
17
+ """Generate with DeepConf and show results"""
18
+
19
+ # Load model (cached)
20
+ model_name = "Qwen/Qwen2.5-0.5B-Instruct"
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ model_name, torch_dtype=torch.float16, device_map="auto", local_files_only=True
23
+ )
24
+ tokenizer = AutoTokenizer.from_pretrained(model_name, local_files_only=True)
25
+
26
+ # Prepare prompt
27
+ messages = [{"role": "user", "content": question}]
28
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
29
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
30
+
31
+ # Configure generation
32
+ gen_config = GenerationConfig(
33
+ do_sample=True,
34
+ temperature=0.7,
35
+ top_p=0.95,
36
+ max_new_tokens=max_tokens,
37
+ enable_conf=True,
38
+ enable_early_stopping=enable_early_stopping,
39
+ threshold=threshold,
40
+ window_size=window_size,
41
+ output_confidences=True,
42
+ return_dict_in_generate=True,
43
+ pad_token_id=tokenizer.eos_token_id,
44
+ )
45
+
46
+ # Generate
47
+ outputs = model.generate(**inputs, generation_config=gen_config, custom_generate="kashif/DeepConf", trust_remote_code=True)
48
+
49
+ # Extract results
50
+ generated_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
51
+ tokens_generated = outputs.sequences.shape[1] - inputs.input_ids.shape[1]
52
+
53
+ if hasattr(outputs, "confidences") and outputs.confidences is not None:
54
+ min_conf = outputs.confidences.min().item()
55
+ max_conf = outputs.confidences.max().item()
56
+ mean_conf = outputs.confidences.mean().item()
57
+ else:
58
+ min_conf = max_conf = mean_conf = None
59
+
60
+ return {
61
+ "text": generated_text,
62
+ "tokens": tokens_generated,
63
+ "min_conf": min_conf,
64
+ "max_conf": max_conf,
65
+ "mean_conf": mean_conf,
66
+ }
67
+
68
+
69
+ def print_result(title: str, question: str, result: dict):
70
+ """Pretty print generation result"""
71
+ print(f"\n{'=' * 80}")
72
+ print(f"{title}")
73
+ print(f"{'=' * 80}")
74
+ print(f"Question: {question}")
75
+ print(f"\nGenerated ({result['tokens']} tokens):")
76
+ print(f"{'-' * 80}")
77
+ print(result["text"])
78
+ print(f"{'-' * 80}")
79
+
80
+ if result["min_conf"] is not None:
81
+ print("\nConfidence stats:")
82
+ print(f" Min: {result['min_conf']:.3f}")
83
+ print(f" Max: {result['max_conf']:.3f}")
84
+ print(f" Mean: {result['mean_conf']:.3f}")
85
+
86
+
87
+ if __name__ == "__main__":
88
+ print("\n" + "β–ˆ" * 80)
89
+ print("DEEPCONF SAMPLE GENERATIONS")
90
+ print("β–ˆ" * 80)
91
+
92
+ # Example 1: Math with aggressive early stopping
93
+ result = generate_with_deepconf(
94
+ "What is 25 * 4?", enable_early_stopping=True, threshold=8.0, window_size=5, max_tokens=64
95
+ )
96
+ print_result("Example 1: Math (Aggressive Early Stopping)", "What is 25 * 4?", result)
97
+
98
+ # Example 2: Math with permissive early stopping
99
+ result = generate_with_deepconf(
100
+ "What is 25 * 4?", enable_early_stopping=True, threshold=15.0, window_size=5, max_tokens=64
101
+ )
102
+ print_result("Example 2: Math (Permissive Early Stopping)", "What is 25 * 4?", result)
103
+
104
+ # Example 3: Math without early stopping
105
+ result = generate_with_deepconf("What is 25 * 4?", enable_early_stopping=False, max_tokens=64)
106
+ print_result("Example 3: Math (No Early Stopping)", "What is 25 * 4?", result)
107
+
108
+ # Example 4: Reasoning question
109
+ result = generate_with_deepconf(
110
+ "If 5 apples cost $10, how much do 3 apples cost?",
111
+ enable_early_stopping=True,
112
+ threshold=8.0,
113
+ window_size=5,
114
+ max_tokens=96,
115
+ )
116
+ print_result("Example 4: Word Problem", "If 5 apples cost $10, how much do 3 apples cost?", result)
117
+
118
+ # Example 5: Factual question
119
+ result = generate_with_deepconf(
120
+ "Who wrote Romeo and Juliet?", enable_early_stopping=True, threshold=6.0, window_size=5, max_tokens=64
121
+ )
122
+ print_result("Example 5: Factual Question", "Who wrote Romeo and Juliet?", result)
123
+
124
+ # Example 6: Calculation
125
+ result = generate_with_deepconf(
126
+ "Calculate: (15 + 8) Γ— 2", enable_early_stopping=True, threshold=7.0, window_size=5, max_tokens=96
127
+ )
128
+ print_result("Example 6: Calculation", "Calculate: (15 + 8) Γ— 2", result)
129
+
130
+ # Example 7: Definition
131
+ result = generate_with_deepconf(
132
+ "Define photosynthesis in simple terms.",
133
+ enable_early_stopping=True,
134
+ threshold=10.0,
135
+ window_size=10,
136
+ max_tokens=128,
137
+ )
138
+ print_result("Example 7: Definition", "Define photosynthesis in simple terms.", result)
139
+
140
+ # Example 8: Step-by-step
141
+ result = generate_with_deepconf(
142
+ "Solve: x + 5 = 12. Show your steps.", enable_early_stopping=True, threshold=8.0, window_size=5, max_tokens=96
143
+ )
144
+ print_result("Example 8: Step-by-step Solution", "Solve: x + 5 = 12. Show your steps.", result)
145
+
146
+ print(f"\n{'β–ˆ' * 80}")
147
+ print("ALL EXAMPLES COMPLETE")
148
+ print("β–ˆ" * 80)
149
+ print("\nKey observations:")
150
+ print("- Lower threshold β†’ Earlier stopping (fewer tokens)")
151
+ print("- Higher threshold β†’ Later stopping (more tokens)")
152
+ print("- No early stopping β†’ Always generates max_tokens")
153
+ print("- Confidence varies based on model certainty")