Spaces:

amogneandualem
/

amogne-vlm-LLM

Running

File size: 5,694 Bytes

99aa0fa

// Configuration
const HF_TOKEN = "YOUR_HUGGINGFACE_TOKEN"; // Get from https://huggingface.co/settings/tokens
const BACKEND_URL = "http://localhost:5000"; // Your Flask backend

// DOM Elements
let cameraStream = null;
let capturedImage = null;

// Initialize Hugging Face connection
async function checkHFConnection() {
    try {
        const response = await fetch(`${BACKEND_URL}/health`);
        const data = await response.json();
        document.getElementById('hfStatus').textContent = data.hf_status;
        document.getElementById('hfStatus').style.color = data.hf_status === 'Available' ? 'green' : 'red';
    } catch (error) {
        document.getElementById('hfStatus').textContent = 'Unavailable';
        document.getElementById('hfStatus').style.color = 'red';
    }
}

// Camera Functions
async function startCamera() {
    try {
        cameraStream = await navigator.mediaDevices.getUserMedia({
            video: { 
                facingMode: 'environment',
                width: { ideal: 1280 },
                height: { ideal: 720 }
            }
        });
        
        const video = document.getElementById('cameraFeed');
        video.srcObject = cameraStream;
        document.getElementById('videoContainer').style.display = 'block';
        
        // Hide upload option when camera is active
        document.querySelector('input[type="file"]').style.display = 'none';
    } catch (error) {
        alert(`Camera Error: ${error.message}`);
    }
}

function stopCamera() {
    if (cameraStream) {
        cameraStream.getTracks().forEach(track => track.stop());
        cameraStream = null;
        document.getElementById('videoContainer').style.display = 'none';
        document.querySelector('input[type="file"]').style.display = 'inline-block';
    }
}

function captureImage() {
    const video = document.getElementById('cameraFeed');
    const canvas = document.createElement('canvas');
    canvas.width = video.videoWidth;
    canvas.height = video.videoHeight;
    
    const ctx = canvas.getContext('2d');
    ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
    
    canvas.toBlob(blob => {
        capturedImage = blob;
        displayCapturedImage(URL.createObjectURL(blob));
    }, 'image/jpeg', 0.9);
}

// Handle file upload
document.getElementById('fileUpload').addEventListener('change', function(e) {
    if (e.target.files[0]) {
        capturedImage = e.target.files[0];
        displayCapturedImage(URL.createObjectURL(capturedImage));
    }
});

function displayCapturedImage(url) {
    document.getElementById('previewImage').src = url;
    document.getElementById('imagePreview').style.display = 'block';
    stopCamera(); // Stop camera when image is captured/uploaded
}

// Main function to generate description
async function generateDescription() {
    if (!capturedImage) {
        alert('Please capture or upload an image first.');
        return;
    }

    // Show results section
    document.getElementById('results').style.display = 'block';
    
    // Get user selections
    const language = document.getElementById('language').value;
    const vlmModel = document.getElementById('vlmModel').value;
    const llamaModel = document.getElementById('llamaModel').value;

    // Create FormData for the image and parameters
    const formData = new FormData();
    formData.append('image', capturedImage, 'captured.jpg');
    formData.append('language', language);
    formData.append('vlm_model', vlmModel);
    formData.append('llama_model', llamaModel);

    try {
        // Send to backend
        const response = await fetch(`${BACKEND_URL}/analyze`, {
            method: 'POST',
            body: formData
        });

        if (!response.ok) {
            throw new Error(`HTTP error! status: ${response.status}`);
        }

        const results = await response.json();
        
        // Display results
        document.getElementById('basicDescription').textContent = results.basic_description || 'No description generated';
        document.getElementById('detailedAnalysis').textContent = results.detailed_analysis || 'No detailed analysis';
        document.getElementById('translation').textContent = results.translation || 'No translation';
        document.getElementById('jsonOutput').textContent = JSON.stringify(results, null, 2);
        
    } catch (error) {
        console.error('Error:', error);
        document.getElementById('basicDescription').textContent = `Error: ${error.message}`;
        document.getElementById('results').style.display = 'block';
    }
}

// Enhanced object detection using region-based analysis[citation:10]
async function getDetailedObjectAnalysis(imageBlob, llamaModel) {
    // This is a simplified approach - in production, you'd want to:
    // 1. Use an object detector (like YOLO) to get bounding boxes[citation:10]
    // 2. Crop image regions
    // 3. Use BLIP/Llama to describe each region
    // 4. Synthesize into comprehensive description
    
    const formData = new FormData();
    formData.append('image', imageBlob);
    formData.append('llama_model', llamaModel);
    
    try {
        const response = await fetch(`${BACKEND_URL}/detailed-analysis`, {
            method: 'POST',
            body: formData
        });
        return await response.json();
    } catch (error) {
        return { error: error.message };
    }
}

// Initialize
window.onload = function() {
    checkHFConnection();
    setInterval(checkHFConnection, 30000); // Check every 30 seconds
};