satyaki-mitra commited on
Commit
2473068
·
1 Parent(s): b213f99

Fix: Handle None LLAMA_CPP_MODEL_PATH and pre-download model

Browse files
Files changed (3) hide show
  1. Dockerfile +13 -1
  2. config/settings.py +4 -4
  3. model_manager/llm_manager.py +7 -2
Dockerfile CHANGED
@@ -5,6 +5,7 @@ ENV PIP_NO_CACHE_DIR=1
5
  ENV DOCKER_CONTAINER=true
6
  ENV SPACE_APP_DATA=/data
7
  ENV HF_HOME=/data/huggingface
 
8
 
9
  # Optimize llama-cpp-python build for CPU only
10
  ENV CMAKE_ARGS="-DLLAMA_BLAS=0 -DLLAMA_CUBLAS=0"
@@ -37,6 +38,17 @@ RUN python -m spacy download en_core_web_sm
37
  # Create directories that your app expects
38
  RUN mkdir -p /data/models /data/uploads /data/cache /data/logs /data/huggingface
39
 
 
 
 
 
 
 
 
 
 
 
 
40
  # Copy app code
41
  COPY . .
42
 
@@ -46,7 +58,7 @@ RUN chmod -R 755 /app && \
46
 
47
  # Health check
48
  HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
49
- CMD curl -f http://localhost:7860/docs || exit 1 # Changed to /docs endpoint
50
 
51
  EXPOSE 7860
52
 
 
5
  ENV DOCKER_CONTAINER=true
6
  ENV SPACE_APP_DATA=/data
7
  ENV HF_HOME=/data/huggingface
8
+ ENV LLAMA_CPP_MODEL_PATH=/data/models/Hermes-2-Pro-Llama-3-8B-GGUF.Q4_K_M.gguf
9
 
10
  # Optimize llama-cpp-python build for CPU only
11
  ENV CMAKE_ARGS="-DLLAMA_BLAS=0 -DLLAMA_CUBLAS=0"
 
38
  # Create directories that your app expects
39
  RUN mkdir -p /data/models /data/uploads /data/cache /data/logs /data/huggingface
40
 
41
+ # Download GGUF model during build (BEFORE copying app code)
42
+ RUN python -c "from huggingface_hub import hf_hub_download; \
43
+ import shutil; \
44
+ downloaded = hf_hub_download( \
45
+ repo_id='NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF', \
46
+ filename='Hermes-2-Pro-Llama-3-8B-GGUF.Q4_K_M.gguf', \
47
+ cache_dir='/data/huggingface' \
48
+ ); \
49
+ shutil.copy(downloaded, '/data/models/Hermes-2-Pro-Llama-3-8B-GGUF.Q4_K_M.gguf')" && \
50
+ echo "Model downloaded to /data/models/"
51
+
52
  # Copy app code
53
  COPY . .
54
 
 
58
 
59
  # Health check
60
  HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
61
+ CMD curl -f http://localhost:7860/api/v1/health || exit 1
62
 
63
  EXPOSE 7860
64
 
config/settings.py CHANGED
@@ -91,10 +91,10 @@ class Settings(BaseSettings):
91
  HF_API_TOKEN : Optional[str] = None # HF token for gated models
92
 
93
  # LLM Generation Settings (Shared across providers)
94
- LLM_TEMPERATURE : float = 0.1 # Default for all providers
95
- LLM_MAX_TOKENS : int = 1024 # Default for all providers
96
- LLM_TOP_P : float = 0.95 # Default top-p sampling
97
- LLM_REPEAT_PENALTY : float = 1.1 # Default repeat penalty
98
  LLM_SYSTEM_PROMPT : str = "You are a helpful legal assistant specializing in contract analysis and risk assessment."
99
 
100
  # Analysis Limits
 
91
  HF_API_TOKEN : Optional[str] = None # HF token for gated models
92
 
93
  # LLM Generation Settings (Shared across providers)
94
+ LLM_TEMPERATURE : float = 0.1 # Default for all providers
95
+ LLM_MAX_TOKENS : int = 1024 # Default for all providers
96
+ LLM_TOP_P : float = 0.95 # Default top-p sampling
97
+ LLM_REPEAT_PENALTY : float = 1.1 # Default repeat penalty
98
  LLM_SYSTEM_PROMPT : str = "You are a helpful legal assistant specializing in contract analysis and risk assessment."
99
 
100
  # Analysis Limits
model_manager/llm_manager.py CHANGED
@@ -652,10 +652,15 @@ class LLMManager:
652
  """
653
  Lazy load the Llama.cpp model
654
  """
655
- log_info("Loading Llama.cpp model", model_path=str(settings.LLAMA_CPP_MODEL_PATH))
 
 
 
 
 
656
 
657
  # Ensure model exists, download if needed
658
- if( not settings.LLAMA_CPP_MODEL_PATH.exists()):
659
  self._download_llama_cpp_model()
660
 
661
  # Load model with appropriate GPU layers / CPU loading
 
652
  """
653
  Lazy load the Llama.cpp model
654
  """
655
+ # Handle None model path
656
+ if settings.LLAMA_CPP_MODEL_PATH is None:
657
+ settings.LLAMA_CPP_MODEL_PATH = settings.MODEL_CACHE_DIR / settings.LLAMA_CPP_MODEL_FILE
658
+ log_info(f"Model path was None, set to: {settings.LLAMA_CPP_MODEL_PATH}")
659
+
660
+ log_info("Loading Llama.cpp model", model_path = str(settings.LLAMA_CPP_MODEL_PATH))
661
 
662
  # Ensure model exists, download if needed
663
+ if (not settings.LLAMA_CPP_MODEL_PATH.exists()):
664
  self._download_llama_cpp_model()
665
 
666
  # Load model with appropriate GPU layers / CPU loading