Spaces:

satyaki-mitra
/

ContractIntel_AI

Sleeping

satyaki-mitra commited on 4 days ago

Commit

2473068

1 Parent(s): b213f99

Fix: Handle None LLAMA_CPP_MODEL_PATH and pre-download model

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -5,6 +5,7 @@ ENV PIP_NO_CACHE_DIR=1
 ENV DOCKER_CONTAINER=true
 ENV SPACE_APP_DATA=/data
 ENV HF_HOME=/data/huggingface
 # Optimize llama-cpp-python build for CPU only
 ENV CMAKE_ARGS="-DLLAMA_BLAS=0 -DLLAMA_CUBLAS=0"
@@ -37,6 +38,17 @@ RUN python -m spacy download en_core_web_sm
 # Create directories that your app expects
 RUN mkdir -p /data/models /data/uploads /data/cache /data/logs /data/huggingface
 # Copy app code
 COPY . .
@@ -46,7 +58,7 @@ RUN chmod -R 755 /app && \
 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
-    CMD curl -f http://localhost:7860/docs || exit 1  # Changed to /docs endpoint
 EXPOSE 7860

 ENV DOCKER_CONTAINER=true
 ENV SPACE_APP_DATA=/data
 ENV HF_HOME=/data/huggingface
+ENV LLAMA_CPP_MODEL_PATH=/data/models/Hermes-2-Pro-Llama-3-8B-GGUF.Q4_K_M.gguf
 # Optimize llama-cpp-python build for CPU only
 ENV CMAKE_ARGS="-DLLAMA_BLAS=0 -DLLAMA_CUBLAS=0"
 # Create directories that your app expects
 RUN mkdir -p /data/models /data/uploads /data/cache /data/logs /data/huggingface
+# Download GGUF model during build (BEFORE copying app code)
+RUN python -c "from huggingface_hub import hf_hub_download; \
+    import shutil; \
+    downloaded = hf_hub_download( \
+        repo_id='NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF', \
+        filename='Hermes-2-Pro-Llama-3-8B-GGUF.Q4_K_M.gguf', \
+        cache_dir='/data/huggingface' \
+    ); \
+    shutil.copy(downloaded, '/data/models/Hermes-2-Pro-Llama-3-8B-GGUF.Q4_K_M.gguf')" && \
+    echo "Model downloaded to /data/models/"
 # Copy app code
 COPY . .
 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
+    CMD curl -f http://localhost:7860/api/v1/health || exit 1
 EXPOSE 7860

config/settings.py CHANGED Viewed

@@ -91,10 +91,10 @@ class Settings(BaseSettings):
     HF_API_TOKEN           : Optional[str]                                      = None   # HF token for gated models
     # LLM Generation Settings (Shared across providers)
-    LLM_TEMPERATURE        : float                                              = 0.1      # Default for all providers
-    LLM_MAX_TOKENS         : int                                                = 1024     # Default for all providers
-    LLM_TOP_P              : float                                              = 0.95     # Default top-p sampling
-    LLM_REPEAT_PENALTY     : float                                              = 1.1      # Default repeat penalty
     LLM_SYSTEM_PROMPT      : str                                                = "You are a helpful legal assistant specializing in contract analysis and risk assessment."
     # Analysis Limits

     HF_API_TOKEN           : Optional[str]                                      = None   # HF token for gated models
     # LLM Generation Settings (Shared across providers)
+    LLM_TEMPERATURE        : float                                              = 0.1    # Default for all providers
+    LLM_MAX_TOKENS         : int                                                = 1024   # Default for all providers
+    LLM_TOP_P              : float                                              = 0.95   # Default top-p sampling
+    LLM_REPEAT_PENALTY     : float                                              = 1.1    # Default repeat penalty
     LLM_SYSTEM_PROMPT      : str                                                = "You are a helpful legal assistant specializing in contract analysis and risk assessment."
     # Analysis Limits

model_manager/llm_manager.py CHANGED Viewed

@@ -652,10 +652,15 @@ class LLMManager:
         """
         Lazy load the Llama.cpp model
         """
-        log_info("Loading Llama.cpp model", model_path=str(settings.LLAMA_CPP_MODEL_PATH))
         # Ensure model exists, download if needed
-        if( not settings.LLAMA_CPP_MODEL_PATH.exists()):
             self._download_llama_cpp_model()
         # Load model with appropriate GPU layers / CPU loading

         """
         Lazy load the Llama.cpp model
         """
+        # Handle None model path
+        if settings.LLAMA_CPP_MODEL_PATH is None:
+            settings.LLAMA_CPP_MODEL_PATH = settings.MODEL_CACHE_DIR / settings.LLAMA_CPP_MODEL_FILE
+            log_info(f"Model path was None, set to: {settings.LLAMA_CPP_MODEL_PATH}")
+        log_info("Loading Llama.cpp model", model_path = str(settings.LLAMA_CPP_MODEL_PATH))
         # Ensure model exists, download if needed
+        if (not settings.LLAMA_CPP_MODEL_PATH.exists()):
             self._download_llama_cpp_model()
         # Load model with appropriate GPU layers / CPU loading