Add files using upload-large-folder tool

Browse files

Files changed (3) hide show

Modelfile +53 -0
quantize_all.sh +115 -0
split_big_quants.sh +60 -0

Modelfile ADDED Viewed

	@@ -0,0 +1,53 @@

+FROM ./Ina-v11.1-Q4_0.gguf
+TEMPLATE """{{- if or .System .Tools }}<|start_header_id|>system<|end_header_id|>
+{{- if .System }}
+{{ .System }}
+{{- end }}
+{{- if .Tools }}
+Cutting Knowledge Date: December 2023
+When you receive a tool call response, use the output to format an answer to the orginal user question.
+You are a helpful assistant with tool calling capabilities.
+{{- end }}<|eot_id|>
+{{- end }}
+{{- range $i, $_ := .Messages }}
+{{- $last := eq (len (slice $.Messages $i)) 1 }}
+{{- if eq .Role "user" }}<|start_header_id|>user<|end_header_id|>
+{{- if and $.Tools $last }}
+Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.
+Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables.
+{{ range $.Tools }}
+{{- . }}
+{{ end }}
+Question: {{ .Content }}<|eot_id|>
+{{- else }}
+{{ .Content }}<|eot_id|>
+{{- end }}{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
+{{ end }}
+{{- else if eq .Role "assistant" }}<|start_header_id|>assistant<|end_header_id|>
+{{- if .ToolCalls }}
+{{ range .ToolCalls }}
+{"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }}}{{ end }}
+{{- else }}
+{{ .Content }}
+{{- end }}{{ if not $last }}<|eot_id|>{{ end }}
+{{- else if eq .Role "tool" }}<|start_header_id|>ipython<|end_header_id|>
+{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|>
+{{ end }}
+{{- end }}
+{{- end }}"""
+PARAMETER stop <|start_header_id|>
+PARAMETER stop <|end_header_id|>
+PARAMETER stop <|eot_id|>

quantize_all.sh ADDED Viewed

	@@ -0,0 +1,115 @@

+#!/bin/bash
+# Source the environment
+source ~/git/llama.cpp/.venv/bin/activate
+# Path to llama-quantize
+QUANTIZER=~/git/llama.cpp/build/bin/llama-quantize
+# Detect thread count for max performance (macOS)
+THREADS=$(sysctl -n hw.logicalcpu)
+echo "Detected $THREADS threads."
+# Find the input file (looking for F16 or f16 in the name in the current directory)
+INPUT_FILE=$(find . -maxdepth 1 -name "*[Ff]16.gguf" | head -n 1)
+if [ -z "$INPUT_FILE" ]; then
+    echo "Error: No F16 GGUF file found in the current directory."
+    exit 1
+fi
+# Remove leading ./ for cleaner filenames
+INPUT_FILE=${INPUT_FILE#./}
+echo "Found input file: $INPUT_FILE"
+# List of quantization types requested
+TYPES=(
+    "IQ3_M"
+    "IQ3_XS"
+    "IQ3_XXS"
+    "IQ4_NL"
+    "IQ4_XS"
+    "Q3_K_L"
+    "Q3_K_M"
+    "Q3_K_S"
+    "Q3_K_XL"
+    "Q4_0"
+    "Q4_1"
+    "Q4_K_L"
+    "Q4_K_M"
+    "Q4_K_S"
+    "Q5_K_L"
+    "Q5_K_M"
+    "Q5_K_S"
+    "Q6_K"
+    "Q6_K_L"
+    "Q8_0"
+)
+echo "Starting batch quantization..."
+echo "----------------------------------------"
+for TYPE in "${TYPES[@]}"; do
+    # Construct output filename by replacing F16 or f16 with the quant type
+    # Using python to handle case-insensitive replacement safely if needed, or simple bash substitution
+    # Simple bash substitution for F16 and f16:
+    OUTPUT_FILE="${INPUT_FILE/F16/$TYPE}"
+    OUTPUT_FILE="${OUTPUT_FILE/f16/$TYPE}"
+    # If substitution didn't happen (filename matches neither), just append type
+    if [ "$OUTPUT_FILE" == "$INPUT_FILE" ]; then
+        OUTPUT_FILE="${INPUT_FILE%.gguf}-$TYPE.gguf"
+    fi
+    echo "Quantizing to $TYPE..."
+    "$QUANTIZER" "$INPUT_FILE" "$OUTPUT_FILE" "$TYPE" "$THREADS"
+    EXIT_CODE=$?
+    if [ $EXIT_CODE -eq 0 ]; then
+        echo "✅ Successfully created $OUTPUT_FILE"
+        # Check for file size and split if necessary (Limit: 40GB)
+        # 40GB in bytes = 52949672960 (using 1024^3 * 40)
+        LIMIT_BYTES=42949672960
+        FILE_SIZE=$(stat -f%z "$OUTPUT_FILE")
+        if [ "$FILE_SIZE" -gt "$LIMIT_BYTES" ]; then
+            echo "File size ($FILE_SIZE bytes) exceeds 40GB. Splitting into directory..."
+            # Create directory name (remove .gguf extension)
+            DIR_NAME="${OUTPUT_FILE%.gguf}"
+            mkdir -p "$DIR_NAME"
+            # Split tool path
+            SPLIT_TOOL=~/git/llama.cpp/build/bin/llama-gguf-split
+            echo "  Splitting '$OUTPUT_FILE' into '$DIR_NAME/'..."
+            # Change to the new directory to run the split command
+            pushd "$DIR_NAME" > /dev/null
+            # Run split command: Flags first, then IN, then OUT prefix
+            "$SPLIT_TOOL" --split-max-size 40G "../$OUTPUT_FILE" "$(basename "$OUTPUT_FILE" .gguf)"
+            SPLIT_EXIT=$?
+            # Change back to original directory
+            popd > /dev/null
+            if [ $SPLIT_EXIT -eq 0 ]; then
+                echo "✅ Split successful. Removing original large file."
+                rm "$OUTPUT_FILE"
+            else
+                echo "❌ Splitting failed. Keeping original file."
+            fi
+        fi
+    else
+        echo "❌ Failed to create $OUTPUT_FILE (Error code: $EXIT_CODE)"
+        echo "   (Note: '$TYPE' might not be a valid quantization type in this version of llama.cpp)"
+    fi
+    echo "----------------------------------------"
+done
+echo "Batch quantization complete."

split_big_quants.sh ADDED Viewed

	@@ -0,0 +1,60 @@

+#!/bin/bash
+# Path to llama-gguf-split
+SPLIT_TOOL=~/git/llama.cpp/build/bin/llama-gguf-split
+# Define the 40GB limit in bytes
+LIMIT_BYTES=42949672960 # 40GB (1024^3 * 40)
+echo "Starting GGUF splitting for files > 40GB..."
+echo "----------------------------------------"
+# Find all .gguf files in the current directory
+find . -maxdepth 1 -name "*.gguf" -print0 | while IFS= read -r -d $'\0' FILE;
+do
+    # Remove leading ./ for cleaner filename display
+    FILE_NAME=${FILE#./}
+    # Skip Ina-v11.1-F16.gguf (original F16 file)
+    if [[ "$FILE_NAME" == "Ina-v11.1-F16.gguf" ]]; then
+        echo "Skipping original F16 file: $FILE_NAME"
+        continue
+    fi
+    # Get file size
+    FILE_SIZE=$(stat -f%z "$FILE_NAME")
+    if [ "$FILE_SIZE" -gt "$LIMIT_BYTES" ]; then
+        echo "Processing: $FILE_NAME (Size: $(numfmt --to=iec-i --suffix=B "$FILE_SIZE"))"
+        # Create directory name (remove .gguf extension)
+        DIR_NAME="${FILE_NAME%.gguf}"
+        mkdir -p "$DIR_NAME"
+        echo "  Created directory: $DIR_NAME"
+        echo "  Splitting '$FILE_NAME' into '$DIR_NAME/'..."
+        # Change to the new directory to run the split command
+        pushd "$DIR_NAME" > /dev/null
+        # Run split command: GGUF_IN is relative path, GGUF_OUT is basename as prefix
+        "$SPLIT_TOOL" --split-max-size 40G "../$FILE_NAME" "$(basename "$FILE_NAME" .gguf)"
+        SPLIT_EXIT=$?
+        # Change back to original directory
+        popd > /dev/null
+        if [ $SPLIT_EXIT -eq 0 ]; then
+            echo "✅ Split successful. Removing original large file: $FILE_NAME"
+            rm "$FILE_NAME"
+        else
+            echo "❌ Splitting failed for $FILE_NAME (Error code: $SPLIT_EXIT). Keeping original file."
+        fi
+    else
+        echo "Skipping $FILE_NAME (Size: $(numfmt --to=iec-i --suffix=B "$FILE_SIZE")) - smaller than 40GB."
+    fi
+    echo "----------------------------------------"
+done
+echo "GGUF splitting complete."