Add files using upload-large-folder tool
Browse files- Modelfile +53 -0
- quantize_all.sh +115 -0
- split_big_quants.sh +60 -0
Modelfile
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM ./Ina-v11.1-Q4_0.gguf
|
| 2 |
+
|
| 3 |
+
TEMPLATE """{{- if or .System .Tools }}<|start_header_id|>system<|end_header_id|>
|
| 4 |
+
{{- if .System }}
|
| 5 |
+
|
| 6 |
+
{{ .System }}
|
| 7 |
+
{{- end }}
|
| 8 |
+
{{- if .Tools }}
|
| 9 |
+
|
| 10 |
+
Cutting Knowledge Date: December 2023
|
| 11 |
+
|
| 12 |
+
When you receive a tool call response, use the output to format an answer to the orginal user question.
|
| 13 |
+
|
| 14 |
+
You are a helpful assistant with tool calling capabilities.
|
| 15 |
+
{{- end }}<|eot_id|>
|
| 16 |
+
{{- end }}
|
| 17 |
+
{{- range $i, $_ := .Messages }}
|
| 18 |
+
{{- $last := eq (len (slice $.Messages $i)) 1 }}
|
| 19 |
+
{{- if eq .Role "user" }}<|start_header_id|>user<|end_header_id|>
|
| 20 |
+
{{- if and $.Tools $last }}
|
| 21 |
+
|
| 22 |
+
Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.
|
| 23 |
+
|
| 24 |
+
Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables.
|
| 25 |
+
|
| 26 |
+
{{ range $.Tools }}
|
| 27 |
+
{{- . }}
|
| 28 |
+
{{ end }}
|
| 29 |
+
Question: {{ .Content }}<|eot_id|>
|
| 30 |
+
{{- else }}
|
| 31 |
+
|
| 32 |
+
{{ .Content }}<|eot_id|>
|
| 33 |
+
{{- end }}{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
|
| 34 |
+
|
| 35 |
+
{{ end }}
|
| 36 |
+
{{- else if eq .Role "assistant" }}<|start_header_id|>assistant<|end_header_id|>
|
| 37 |
+
{{- if .ToolCalls }}
|
| 38 |
+
{{ range .ToolCalls }}
|
| 39 |
+
{"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }}}{{ end }}
|
| 40 |
+
{{- else }}
|
| 41 |
+
|
| 42 |
+
{{ .Content }}
|
| 43 |
+
{{- end }}{{ if not $last }}<|eot_id|>{{ end }}
|
| 44 |
+
{{- else if eq .Role "tool" }}<|start_header_id|>ipython<|end_header_id|>
|
| 45 |
+
|
| 46 |
+
{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
|
| 47 |
+
|
| 48 |
+
{{ end }}
|
| 49 |
+
{{- end }}
|
| 50 |
+
{{- end }}"""
|
| 51 |
+
PARAMETER stop <|start_header_id|>
|
| 52 |
+
PARAMETER stop <|end_header_id|>
|
| 53 |
+
PARAMETER stop <|eot_id|>
|
quantize_all.sh
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# Source the environment
|
| 4 |
+
source ~/git/llama.cpp/.venv/bin/activate
|
| 5 |
+
|
| 6 |
+
# Path to llama-quantize
|
| 7 |
+
QUANTIZER=~/git/llama.cpp/build/bin/llama-quantize
|
| 8 |
+
|
| 9 |
+
# Detect thread count for max performance (macOS)
|
| 10 |
+
THREADS=$(sysctl -n hw.logicalcpu)
|
| 11 |
+
echo "Detected $THREADS threads."
|
| 12 |
+
|
| 13 |
+
# Find the input file (looking for F16 or f16 in the name in the current directory)
|
| 14 |
+
INPUT_FILE=$(find . -maxdepth 1 -name "*[Ff]16.gguf" | head -n 1)
|
| 15 |
+
|
| 16 |
+
if [ -z "$INPUT_FILE" ]; then
|
| 17 |
+
echo "Error: No F16 GGUF file found in the current directory."
|
| 18 |
+
exit 1
|
| 19 |
+
fi
|
| 20 |
+
|
| 21 |
+
# Remove leading ./ for cleaner filenames
|
| 22 |
+
INPUT_FILE=${INPUT_FILE#./}
|
| 23 |
+
|
| 24 |
+
echo "Found input file: $INPUT_FILE"
|
| 25 |
+
|
| 26 |
+
# List of quantization types requested
|
| 27 |
+
TYPES=(
|
| 28 |
+
"IQ3_M"
|
| 29 |
+
"IQ3_XS"
|
| 30 |
+
"IQ3_XXS"
|
| 31 |
+
"IQ4_NL"
|
| 32 |
+
"IQ4_XS"
|
| 33 |
+
"Q3_K_L"
|
| 34 |
+
"Q3_K_M"
|
| 35 |
+
"Q3_K_S"
|
| 36 |
+
"Q3_K_XL"
|
| 37 |
+
"Q4_0"
|
| 38 |
+
"Q4_1"
|
| 39 |
+
"Q4_K_L"
|
| 40 |
+
"Q4_K_M"
|
| 41 |
+
"Q4_K_S"
|
| 42 |
+
"Q5_K_L"
|
| 43 |
+
"Q5_K_M"
|
| 44 |
+
"Q5_K_S"
|
| 45 |
+
"Q6_K"
|
| 46 |
+
"Q6_K_L"
|
| 47 |
+
"Q8_0"
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
echo "Starting batch quantization..."
|
| 51 |
+
echo "----------------------------------------"
|
| 52 |
+
|
| 53 |
+
for TYPE in "${TYPES[@]}"; do
|
| 54 |
+
# Construct output filename by replacing F16 or f16 with the quant type
|
| 55 |
+
# Using python to handle case-insensitive replacement safely if needed, or simple bash substitution
|
| 56 |
+
# Simple bash substitution for F16 and f16:
|
| 57 |
+
OUTPUT_FILE="${INPUT_FILE/F16/$TYPE}"
|
| 58 |
+
OUTPUT_FILE="${OUTPUT_FILE/f16/$TYPE}"
|
| 59 |
+
|
| 60 |
+
# If substitution didn't happen (filename matches neither), just append type
|
| 61 |
+
if [ "$OUTPUT_FILE" == "$INPUT_FILE" ]; then
|
| 62 |
+
OUTPUT_FILE="${INPUT_FILE%.gguf}-$TYPE.gguf"
|
| 63 |
+
fi
|
| 64 |
+
|
| 65 |
+
echo "Quantizing to $TYPE..."
|
| 66 |
+
"$QUANTIZER" "$INPUT_FILE" "$OUTPUT_FILE" "$TYPE" "$THREADS"
|
| 67 |
+
|
| 68 |
+
EXIT_CODE=$?
|
| 69 |
+
if [ $EXIT_CODE -eq 0 ]; then
|
| 70 |
+
echo "✅ Successfully created $OUTPUT_FILE"
|
| 71 |
+
|
| 72 |
+
# Check for file size and split if necessary (Limit: 40GB)
|
| 73 |
+
# 40GB in bytes = 52949672960 (using 1024^3 * 40)
|
| 74 |
+
LIMIT_BYTES=42949672960
|
| 75 |
+
FILE_SIZE=$(stat -f%z "$OUTPUT_FILE")
|
| 76 |
+
|
| 77 |
+
if [ "$FILE_SIZE" -gt "$LIMIT_BYTES" ]; then
|
| 78 |
+
echo "File size ($FILE_SIZE bytes) exceeds 40GB. Splitting into directory..."
|
| 79 |
+
|
| 80 |
+
# Create directory name (remove .gguf extension)
|
| 81 |
+
DIR_NAME="${OUTPUT_FILE%.gguf}"
|
| 82 |
+
mkdir -p "$DIR_NAME"
|
| 83 |
+
|
| 84 |
+
# Split tool path
|
| 85 |
+
SPLIT_TOOL=~/git/llama.cpp/build/bin/llama-gguf-split
|
| 86 |
+
|
| 87 |
+
echo " Splitting '$OUTPUT_FILE' into '$DIR_NAME/'..."
|
| 88 |
+
|
| 89 |
+
# Change to the new directory to run the split command
|
| 90 |
+
pushd "$DIR_NAME" > /dev/null
|
| 91 |
+
|
| 92 |
+
# Run split command: Flags first, then IN, then OUT prefix
|
| 93 |
+
"$SPLIT_TOOL" --split-max-size 40G "../$OUTPUT_FILE" "$(basename "$OUTPUT_FILE" .gguf)"
|
| 94 |
+
|
| 95 |
+
SPLIT_EXIT=$?
|
| 96 |
+
|
| 97 |
+
# Change back to original directory
|
| 98 |
+
popd > /dev/null
|
| 99 |
+
|
| 100 |
+
if [ $SPLIT_EXIT -eq 0 ]; then
|
| 101 |
+
echo "✅ Split successful. Removing original large file."
|
| 102 |
+
rm "$OUTPUT_FILE"
|
| 103 |
+
else
|
| 104 |
+
echo "❌ Splitting failed. Keeping original file."
|
| 105 |
+
fi
|
| 106 |
+
fi
|
| 107 |
+
|
| 108 |
+
else
|
| 109 |
+
echo "❌ Failed to create $OUTPUT_FILE (Error code: $EXIT_CODE)"
|
| 110 |
+
echo " (Note: '$TYPE' might not be a valid quantization type in this version of llama.cpp)"
|
| 111 |
+
fi
|
| 112 |
+
echo "----------------------------------------"
|
| 113 |
+
done
|
| 114 |
+
|
| 115 |
+
echo "Batch quantization complete."
|
split_big_quants.sh
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# Path to llama-gguf-split
|
| 4 |
+
SPLIT_TOOL=~/git/llama.cpp/build/bin/llama-gguf-split
|
| 5 |
+
|
| 6 |
+
# Define the 40GB limit in bytes
|
| 7 |
+
LIMIT_BYTES=42949672960 # 40GB (1024^3 * 40)
|
| 8 |
+
|
| 9 |
+
echo "Starting GGUF splitting for files > 40GB..."
|
| 10 |
+
echo "----------------------------------------"
|
| 11 |
+
|
| 12 |
+
# Find all .gguf files in the current directory
|
| 13 |
+
find . -maxdepth 1 -name "*.gguf" -print0 | while IFS= read -r -d $'\0' FILE;
|
| 14 |
+
do
|
| 15 |
+
# Remove leading ./ for cleaner filename display
|
| 16 |
+
FILE_NAME=${FILE#./}
|
| 17 |
+
|
| 18 |
+
# Skip Ina-v11.1-F16.gguf (original F16 file)
|
| 19 |
+
if [[ "$FILE_NAME" == "Ina-v11.1-F16.gguf" ]]; then
|
| 20 |
+
echo "Skipping original F16 file: $FILE_NAME"
|
| 21 |
+
continue
|
| 22 |
+
fi
|
| 23 |
+
|
| 24 |
+
# Get file size
|
| 25 |
+
FILE_SIZE=$(stat -f%z "$FILE_NAME")
|
| 26 |
+
|
| 27 |
+
if [ "$FILE_SIZE" -gt "$LIMIT_BYTES" ]; then
|
| 28 |
+
echo "Processing: $FILE_NAME (Size: $(numfmt --to=iec-i --suffix=B "$FILE_SIZE"))"
|
| 29 |
+
|
| 30 |
+
# Create directory name (remove .gguf extension)
|
| 31 |
+
DIR_NAME="${FILE_NAME%.gguf}"
|
| 32 |
+
mkdir -p "$DIR_NAME"
|
| 33 |
+
|
| 34 |
+
echo " Created directory: $DIR_NAME"
|
| 35 |
+
echo " Splitting '$FILE_NAME' into '$DIR_NAME/'..."
|
| 36 |
+
|
| 37 |
+
# Change to the new directory to run the split command
|
| 38 |
+
pushd "$DIR_NAME" > /dev/null
|
| 39 |
+
|
| 40 |
+
# Run split command: GGUF_IN is relative path, GGUF_OUT is basename as prefix
|
| 41 |
+
"$SPLIT_TOOL" --split-max-size 40G "../$FILE_NAME" "$(basename "$FILE_NAME" .gguf)"
|
| 42 |
+
|
| 43 |
+
SPLIT_EXIT=$?
|
| 44 |
+
|
| 45 |
+
# Change back to original directory
|
| 46 |
+
popd > /dev/null
|
| 47 |
+
|
| 48 |
+
if [ $SPLIT_EXIT -eq 0 ]; then
|
| 49 |
+
echo "✅ Split successful. Removing original large file: $FILE_NAME"
|
| 50 |
+
rm "$FILE_NAME"
|
| 51 |
+
else
|
| 52 |
+
echo "❌ Splitting failed for $FILE_NAME (Error code: $SPLIT_EXIT). Keeping original file."
|
| 53 |
+
fi
|
| 54 |
+
else
|
| 55 |
+
echo "Skipping $FILE_NAME (Size: $(numfmt --to=iec-i --suffix=B "$FILE_SIZE")) - smaller than 40GB."
|
| 56 |
+
fi
|
| 57 |
+
echo "----------------------------------------"
|
| 58 |
+
done
|
| 59 |
+
|
| 60 |
+
echo "GGUF splitting complete."
|