# pages/99_Test_Results.py import os import time from pathlib import Path import pandas as pd import streamlit as st from src import ai_core TEST_OUT_DIR = Path("/data/econsult/tests") TEST_OUT_DIR.mkdir(parents=True, exist_ok=True) RESULTS_CSV = TEST_OUT_DIR / "results.csv" RUN_LOG = TEST_OUT_DIR / "run_logs.txt" def _write_run_log(text: str): with RUN_LOG.open("a", encoding="utf-8") as f: f.write(text.rstrip() + "\n") def _cases(): """Two stable cases for hybrid mapping validation.""" lipids = { "age": "58", "sex": "F", "chief_complaint": "Statin intolerance and LDL management", "history": ( "Hx of type 2 diabetes (A1c 7.6), prior NSTEMI 2019, BMI 31. " "Reports myalgias on high-intensity statin; stopped atorvastatin 80 mg after 3 weeks. " "No rhabdo. Hypothyroidism on levothyroxine. Smokes <5 cigs/day." ), "medications": "levothyroxine 100 mcg daily; ezetimibe 10 mg daily; no current statin.", "allergies": "no known drug allergies", "labs": "LDL 162 mg/dL; HDL 41; TG 210; AST/ALT normal; TSH 2.1", "question": "Evidence-based plan for LDL lowering in ASCVD with prior statin myalgias.", } ckd_dose = { "age": "72", "sex": "M", "chief_complaint": "Medication dosing question in CKD", "history": ( "Stage 4 CKD likely diabetic nephropathy; eGFR 22 ml/min. " "HTN, CAD s/p PCI (2021). Recent cellulitis requiring antibiotics." ), "medications": "metoprolol succinate 50 mg daily; lisinopril 20 mg daily; aspirin 81 mg daily; rosuvastatin 10 mg nightly.", "allergies": "penicillin (rash)", "labs": "Cr 2.8 mg/dL; eGFR 22; K 5.0; A1c 8.1; LFTs WNL", "question": "Recommend renal-adjusted antibiotic options and statin dosing safety.", } return [("lipids", lipids), ("ckd_dose", ckd_dose)] def main(): st.title("Step 99 — Hybrid Mapping Validation") # Pin validation mode os.environ["MAP_MODE"] = "validation" st.info("MAP_MODE set to **validation** (registry cap = 20).") max_new_tokens = st.number_input("Max new tokens", min_value=300, max_value=1200, value=700, step=50) run_btn = st.button("Run validation") if not run_btn: st.stop() results = [] for case_id, intake in _cases(): _write_run_log("=== Hybrid Mapping Validation Run ===") _write_run_log(f"--- Running case: {case_id} ---") st.write(f"Running case: `{case_id}` ...") t0 = time.time() out = ai_core.generate_soap_draft( intake, mode="mapping", max_new_tokens=int(max_new_tokens), temperature=0.2, top_p=0.95, explain=False, # default off ) dt = time.time() - t0 soap = out["soap"] mapping = out.get("mapping") or {} timings = out.get("timings") or {} assess_n = len(soap.get("assessment") or []) plan_n = len(soap.get("plan") or []) annotated_n = mapping.get("claims_count") or (assess_n + plan_n) uniq_evid = mapping.get("unique_evidence_count") or 0 cap = mapping.get("registry_cap") # Console telemetry mirrors prior style _write_run_log( f"Result: {{'case_id': '{case_id}', 'generate_secs': {timings.get('generate_secs', 0)}, " f"'map_secs': {timings.get('map_secs', 0)}, 'total_runtime': {dt:.3f}, " f"'assessment_items': {assess_n}, 'plan_items': {plan_n}, 'annotated_items': {annotated_n}, " f"'unique_evidence': {uniq_evid}, 'cache_stub': ''}}" ) st.success( f"{case_id}: assessed {assess_n} / plan {plan_n} | claims {annotated_n} | unique evidence {uniq_evid} | " f"gen {timings.get('generate_secs', 0)}s, map {timings.get('map_secs', 0)}s" ) print(f"[page99] MAP_MODE='validation' cap={cap} unique_evidence={uniq_evid}") results.append( { "case_id": case_id, "generate_secs": timings.get("generate_secs", 0), "map_secs": timings.get("map_secs", 0), "total_runtime": round(dt, 3), "assessment_items": assess_n, "plan_items": plan_n, "annotated_items": annotated_n, "unique_evidence": uniq_evid, } ) # Save results if RESULTS_CSV.exists(): prev = pd.read_csv(RESULTS_CSV) df = pd.concat([prev, pd.DataFrame(results)], ignore_index=True) else: df = pd.DataFrame(results) df.to_csv(RESULTS_CSV, index=False) _write_run_log(f"Results saved to: {RESULTS_CSV}") st.dataframe(df) st.success(f"Saved to: {RESULTS_CSV} and {RUN_LOG}") if __name__ == "__main__": main()