Spaces:
Running
Running
| // Configuration | |
| const HF_TOKEN = "YOUR_HUGGINGFACE_TOKEN"; // Get from https://huggingface.co/settings/tokens | |
| const BACKEND_URL = "http://localhost:5000"; // Your Flask backend | |
| // DOM Elements | |
| let cameraStream = null; | |
| let capturedImage = null; | |
| // Initialize Hugging Face connection | |
| async function checkHFConnection() { | |
| try { | |
| const response = await fetch(`${BACKEND_URL}/health`); | |
| const data = await response.json(); | |
| document.getElementById('hfStatus').textContent = data.hf_status; | |
| document.getElementById('hfStatus').style.color = data.hf_status === 'Available' ? 'green' : 'red'; | |
| } catch (error) { | |
| document.getElementById('hfStatus').textContent = 'Unavailable'; | |
| document.getElementById('hfStatus').style.color = 'red'; | |
| } | |
| } | |
| // Camera Functions | |
| async function startCamera() { | |
| try { | |
| cameraStream = await navigator.mediaDevices.getUserMedia({ | |
| video: { | |
| facingMode: 'environment', | |
| width: { ideal: 1280 }, | |
| height: { ideal: 720 } | |
| } | |
| }); | |
| const video = document.getElementById('cameraFeed'); | |
| video.srcObject = cameraStream; | |
| document.getElementById('videoContainer').style.display = 'block'; | |
| // Hide upload option when camera is active | |
| document.querySelector('input[type="file"]').style.display = 'none'; | |
| } catch (error) { | |
| alert(`Camera Error: ${error.message}`); | |
| } | |
| } | |
| function stopCamera() { | |
| if (cameraStream) { | |
| cameraStream.getTracks().forEach(track => track.stop()); | |
| cameraStream = null; | |
| document.getElementById('videoContainer').style.display = 'none'; | |
| document.querySelector('input[type="file"]').style.display = 'inline-block'; | |
| } | |
| } | |
| function captureImage() { | |
| const video = document.getElementById('cameraFeed'); | |
| const canvas = document.createElement('canvas'); | |
| canvas.width = video.videoWidth; | |
| canvas.height = video.videoHeight; | |
| const ctx = canvas.getContext('2d'); | |
| ctx.drawImage(video, 0, 0, canvas.width, canvas.height); | |
| canvas.toBlob(blob => { | |
| capturedImage = blob; | |
| displayCapturedImage(URL.createObjectURL(blob)); | |
| }, 'image/jpeg', 0.9); | |
| } | |
| // Handle file upload | |
| document.getElementById('fileUpload').addEventListener('change', function(e) { | |
| if (e.target.files[0]) { | |
| capturedImage = e.target.files[0]; | |
| displayCapturedImage(URL.createObjectURL(capturedImage)); | |
| } | |
| }); | |
| function displayCapturedImage(url) { | |
| document.getElementById('previewImage').src = url; | |
| document.getElementById('imagePreview').style.display = 'block'; | |
| stopCamera(); // Stop camera when image is captured/uploaded | |
| } | |
| // Main function to generate description | |
| async function generateDescription() { | |
| if (!capturedImage) { | |
| alert('Please capture or upload an image first.'); | |
| return; | |
| } | |
| // Show results section | |
| document.getElementById('results').style.display = 'block'; | |
| // Get user selections | |
| const language = document.getElementById('language').value; | |
| const vlmModel = document.getElementById('vlmModel').value; | |
| const llamaModel = document.getElementById('llamaModel').value; | |
| // Create FormData for the image and parameters | |
| const formData = new FormData(); | |
| formData.append('image', capturedImage, 'captured.jpg'); | |
| formData.append('language', language); | |
| formData.append('vlm_model', vlmModel); | |
| formData.append('llama_model', llamaModel); | |
| try { | |
| // Send to backend | |
| const response = await fetch(`${BACKEND_URL}/analyze`, { | |
| method: 'POST', | |
| body: formData | |
| }); | |
| if (!response.ok) { | |
| throw new Error(`HTTP error! status: ${response.status}`); | |
| } | |
| const results = await response.json(); | |
| // Display results | |
| document.getElementById('basicDescription').textContent = results.basic_description || 'No description generated'; | |
| document.getElementById('detailedAnalysis').textContent = results.detailed_analysis || 'No detailed analysis'; | |
| document.getElementById('translation').textContent = results.translation || 'No translation'; | |
| document.getElementById('jsonOutput').textContent = JSON.stringify(results, null, 2); | |
| } catch (error) { | |
| console.error('Error:', error); | |
| document.getElementById('basicDescription').textContent = `Error: ${error.message}`; | |
| document.getElementById('results').style.display = 'block'; | |
| } | |
| } | |
| // Enhanced object detection using region-based analysis[citation:10] | |
| async function getDetailedObjectAnalysis(imageBlob, llamaModel) { | |
| // This is a simplified approach - in production, you'd want to: | |
| // 1. Use an object detector (like YOLO) to get bounding boxes[citation:10] | |
| // 2. Crop image regions | |
| // 3. Use BLIP/Llama to describe each region | |
| // 4. Synthesize into comprehensive description | |
| const formData = new FormData(); | |
| formData.append('image', imageBlob); | |
| formData.append('llama_model', llamaModel); | |
| try { | |
| const response = await fetch(`${BACKEND_URL}/detailed-analysis`, { | |
| method: 'POST', | |
| body: formData | |
| }); | |
| return await response.json(); | |
| } catch (error) { | |
| return { error: error.message }; | |
| } | |
| } | |
| // Initialize | |
| window.onload = function() { | |
| checkHFConnection(); | |
| setInterval(checkHFConnection, 30000); // Check every 30 seconds | |
| }; |