File size: 2,324 Bytes
5f3adb3
 
 
 
 
 
 
 
 
 
 
 
 
3f97386
 
5f3adb3
3f97386
1260e28
 
3f97386
1260e28
 
3f97386
1260e28
3f97386
1260e28
 
 
 
3f97386
1260e28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import os, requests, json, time
from utils.constants import CHAT_ENDPOINT_DEFAULT, REQUEST_TIMEOUT_SECONDS_DEFAULT, RETRIES_DEFAULT, BACKOFF_SECONDS_DEFAULT
from utils.persona import AI_GYNO_PERSONA_V2

def active_chat_endpoint():
    return os.getenv("HF_CHAT_ENDPOINT") or os.getenv("CHAT_ENDPOINT") or CHAT_ENDPOINT_DEFAULT

def _headers():
    tok = os.getenv("HF_API_TOKEN")
    return {"Authorization": f"Bearer {tok}","Content-Type":"application/json"} if tok else {}

def chat(user_message: str, mode: str = "patient"):
    url = active_chat_endpoint()
    headers = _headers()
    if not headers:
        return "⚠ Add HF_API_TOKEN in Settings β†’ Secrets."

    system = AI_GYNO_PERSONA_V2 + ("\nUse simple, supportive language." if mode=='patient' else "\nProvide differentials, initial workup, and red flags.")
    prompt = f"""{system}

Patient narrative:
{user_message}

Assistant:"""

    payload = {
        "inputs": prompt,
        "parameters": {"max_new_tokens": 400, "temperature": 0.2, "return_full_text": False}
    }

    for attempt in range(1, RETRIES_DEFAULT+1):
        try:
            r = requests.post(url, headers=headers, json=payload, timeout=REQUEST_TIMEOUT_SECONDS_DEFAULT)
            text = r.text
            try:
                data = r.json()
                if isinstance(data, list) and data and "generated_text" in data[0]:
                    return data[0]["generated_text"]
                if isinstance(data, dict) and "generated_text" in data:
                    return data["generated_text"]
                if isinstance(data, dict) and "outputs" in data and isinstance(data["outputs"], list) and data["outputs"]:
                    gt = data["outputs"][0].get("generated_text")
                    if gt: return gt
                return json.dumps(data)[:1500]
            except Exception:
                if "loading" in text.lower():
                    time.sleep(BACKOFF_SECONDS_DEFAULT * attempt); continue
                if r.status_code == 404:
                    return "❌ 404 from router. Check model path: /hf-inference/text-generation/<MODEL>"
                return f"⚠ Non-JSON response:\n{text[:1000]}"
        except Exception as e:
            time.sleep(BACKOFF_SECONDS_DEFAULT * attempt)
    return "❌ Endpoint unavailable after retries."