ehartford commited on
Commit
793fac3
·
verified ·
1 Parent(s): e207395

Add files using upload-large-folder tool

Browse files
Files changed (3) hide show
  1. Modelfile +53 -0
  2. quantize_all.sh +115 -0
  3. split_big_quants.sh +60 -0
Modelfile ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ./Ina-v11.1-Q4_0.gguf
2
+
3
+ TEMPLATE """{{- if or .System .Tools }}<|start_header_id|>system<|end_header_id|>
4
+ {{- if .System }}
5
+
6
+ {{ .System }}
7
+ {{- end }}
8
+ {{- if .Tools }}
9
+
10
+ Cutting Knowledge Date: December 2023
11
+
12
+ When you receive a tool call response, use the output to format an answer to the orginal user question.
13
+
14
+ You are a helpful assistant with tool calling capabilities.
15
+ {{- end }}<|eot_id|>
16
+ {{- end }}
17
+ {{- range $i, $_ := .Messages }}
18
+ {{- $last := eq (len (slice $.Messages $i)) 1 }}
19
+ {{- if eq .Role "user" }}<|start_header_id|>user<|end_header_id|>
20
+ {{- if and $.Tools $last }}
21
+
22
+ Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.
23
+
24
+ Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables.
25
+
26
+ {{ range $.Tools }}
27
+ {{- . }}
28
+ {{ end }}
29
+ Question: {{ .Content }}<|eot_id|>
30
+ {{- else }}
31
+
32
+ {{ .Content }}<|eot_id|>
33
+ {{- end }}{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
34
+
35
+ {{ end }}
36
+ {{- else if eq .Role "assistant" }}<|start_header_id|>assistant<|end_header_id|>
37
+ {{- if .ToolCalls }}
38
+ {{ range .ToolCalls }}
39
+ {"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }}}{{ end }}
40
+ {{- else }}
41
+
42
+ {{ .Content }}
43
+ {{- end }}{{ if not $last }}<|eot_id|>{{ end }}
44
+ {{- else if eq .Role "tool" }}<|start_header_id|>ipython<|end_header_id|>
45
+
46
+ {{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
47
+
48
+ {{ end }}
49
+ {{- end }}
50
+ {{- end }}"""
51
+ PARAMETER stop <|start_header_id|>
52
+ PARAMETER stop <|end_header_id|>
53
+ PARAMETER stop <|eot_id|>
quantize_all.sh ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Source the environment
4
+ source ~/git/llama.cpp/.venv/bin/activate
5
+
6
+ # Path to llama-quantize
7
+ QUANTIZER=~/git/llama.cpp/build/bin/llama-quantize
8
+
9
+ # Detect thread count for max performance (macOS)
10
+ THREADS=$(sysctl -n hw.logicalcpu)
11
+ echo "Detected $THREADS threads."
12
+
13
+ # Find the input file (looking for F16 or f16 in the name in the current directory)
14
+ INPUT_FILE=$(find . -maxdepth 1 -name "*[Ff]16.gguf" | head -n 1)
15
+
16
+ if [ -z "$INPUT_FILE" ]; then
17
+ echo "Error: No F16 GGUF file found in the current directory."
18
+ exit 1
19
+ fi
20
+
21
+ # Remove leading ./ for cleaner filenames
22
+ INPUT_FILE=${INPUT_FILE#./}
23
+
24
+ echo "Found input file: $INPUT_FILE"
25
+
26
+ # List of quantization types requested
27
+ TYPES=(
28
+ "IQ3_M"
29
+ "IQ3_XS"
30
+ "IQ3_XXS"
31
+ "IQ4_NL"
32
+ "IQ4_XS"
33
+ "Q3_K_L"
34
+ "Q3_K_M"
35
+ "Q3_K_S"
36
+ "Q3_K_XL"
37
+ "Q4_0"
38
+ "Q4_1"
39
+ "Q4_K_L"
40
+ "Q4_K_M"
41
+ "Q4_K_S"
42
+ "Q5_K_L"
43
+ "Q5_K_M"
44
+ "Q5_K_S"
45
+ "Q6_K"
46
+ "Q6_K_L"
47
+ "Q8_0"
48
+ )
49
+
50
+ echo "Starting batch quantization..."
51
+ echo "----------------------------------------"
52
+
53
+ for TYPE in "${TYPES[@]}"; do
54
+ # Construct output filename by replacing F16 or f16 with the quant type
55
+ # Using python to handle case-insensitive replacement safely if needed, or simple bash substitution
56
+ # Simple bash substitution for F16 and f16:
57
+ OUTPUT_FILE="${INPUT_FILE/F16/$TYPE}"
58
+ OUTPUT_FILE="${OUTPUT_FILE/f16/$TYPE}"
59
+
60
+ # If substitution didn't happen (filename matches neither), just append type
61
+ if [ "$OUTPUT_FILE" == "$INPUT_FILE" ]; then
62
+ OUTPUT_FILE="${INPUT_FILE%.gguf}-$TYPE.gguf"
63
+ fi
64
+
65
+ echo "Quantizing to $TYPE..."
66
+ "$QUANTIZER" "$INPUT_FILE" "$OUTPUT_FILE" "$TYPE" "$THREADS"
67
+
68
+ EXIT_CODE=$?
69
+ if [ $EXIT_CODE -eq 0 ]; then
70
+ echo "✅ Successfully created $OUTPUT_FILE"
71
+
72
+ # Check for file size and split if necessary (Limit: 40GB)
73
+ # 40GB in bytes = 52949672960 (using 1024^3 * 40)
74
+ LIMIT_BYTES=42949672960
75
+ FILE_SIZE=$(stat -f%z "$OUTPUT_FILE")
76
+
77
+ if [ "$FILE_SIZE" -gt "$LIMIT_BYTES" ]; then
78
+ echo "File size ($FILE_SIZE bytes) exceeds 40GB. Splitting into directory..."
79
+
80
+ # Create directory name (remove .gguf extension)
81
+ DIR_NAME="${OUTPUT_FILE%.gguf}"
82
+ mkdir -p "$DIR_NAME"
83
+
84
+ # Split tool path
85
+ SPLIT_TOOL=~/git/llama.cpp/build/bin/llama-gguf-split
86
+
87
+ echo " Splitting '$OUTPUT_FILE' into '$DIR_NAME/'..."
88
+
89
+ # Change to the new directory to run the split command
90
+ pushd "$DIR_NAME" > /dev/null
91
+
92
+ # Run split command: Flags first, then IN, then OUT prefix
93
+ "$SPLIT_TOOL" --split-max-size 40G "../$OUTPUT_FILE" "$(basename "$OUTPUT_FILE" .gguf)"
94
+
95
+ SPLIT_EXIT=$?
96
+
97
+ # Change back to original directory
98
+ popd > /dev/null
99
+
100
+ if [ $SPLIT_EXIT -eq 0 ]; then
101
+ echo "✅ Split successful. Removing original large file."
102
+ rm "$OUTPUT_FILE"
103
+ else
104
+ echo "❌ Splitting failed. Keeping original file."
105
+ fi
106
+ fi
107
+
108
+ else
109
+ echo "❌ Failed to create $OUTPUT_FILE (Error code: $EXIT_CODE)"
110
+ echo " (Note: '$TYPE' might not be a valid quantization type in this version of llama.cpp)"
111
+ fi
112
+ echo "----------------------------------------"
113
+ done
114
+
115
+ echo "Batch quantization complete."
split_big_quants.sh ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Path to llama-gguf-split
4
+ SPLIT_TOOL=~/git/llama.cpp/build/bin/llama-gguf-split
5
+
6
+ # Define the 40GB limit in bytes
7
+ LIMIT_BYTES=42949672960 # 40GB (1024^3 * 40)
8
+
9
+ echo "Starting GGUF splitting for files > 40GB..."
10
+ echo "----------------------------------------"
11
+
12
+ # Find all .gguf files in the current directory
13
+ find . -maxdepth 1 -name "*.gguf" -print0 | while IFS= read -r -d $'\0' FILE;
14
+ do
15
+ # Remove leading ./ for cleaner filename display
16
+ FILE_NAME=${FILE#./}
17
+
18
+ # Skip Ina-v11.1-F16.gguf (original F16 file)
19
+ if [[ "$FILE_NAME" == "Ina-v11.1-F16.gguf" ]]; then
20
+ echo "Skipping original F16 file: $FILE_NAME"
21
+ continue
22
+ fi
23
+
24
+ # Get file size
25
+ FILE_SIZE=$(stat -f%z "$FILE_NAME")
26
+
27
+ if [ "$FILE_SIZE" -gt "$LIMIT_BYTES" ]; then
28
+ echo "Processing: $FILE_NAME (Size: $(numfmt --to=iec-i --suffix=B "$FILE_SIZE"))"
29
+
30
+ # Create directory name (remove .gguf extension)
31
+ DIR_NAME="${FILE_NAME%.gguf}"
32
+ mkdir -p "$DIR_NAME"
33
+
34
+ echo " Created directory: $DIR_NAME"
35
+ echo " Splitting '$FILE_NAME' into '$DIR_NAME/'..."
36
+
37
+ # Change to the new directory to run the split command
38
+ pushd "$DIR_NAME" > /dev/null
39
+
40
+ # Run split command: GGUF_IN is relative path, GGUF_OUT is basename as prefix
41
+ "$SPLIT_TOOL" --split-max-size 40G "../$FILE_NAME" "$(basename "$FILE_NAME" .gguf)"
42
+
43
+ SPLIT_EXIT=$?
44
+
45
+ # Change back to original directory
46
+ popd > /dev/null
47
+
48
+ if [ $SPLIT_EXIT -eq 0 ]; then
49
+ echo "✅ Split successful. Removing original large file: $FILE_NAME"
50
+ rm "$FILE_NAME"
51
+ else
52
+ echo "❌ Splitting failed for $FILE_NAME (Error code: $SPLIT_EXIT). Keeping original file."
53
+ fi
54
+ else
55
+ echo "Skipping $FILE_NAME (Size: $(numfmt --to=iec-i --suffix=B "$FILE_SIZE")) - smaller than 40GB."
56
+ fi
57
+ echo "----------------------------------------"
58
+ done
59
+
60
+ echo "GGUF splitting complete."