amogne-vlm-LLM / app.js
amogneandualem's picture
Upload app.js
99aa0fa verified
// Configuration
const HF_TOKEN = "YOUR_HUGGINGFACE_TOKEN"; // Get from https://huggingface.co/settings/tokens
const BACKEND_URL = "http://localhost:5000"; // Your Flask backend
// DOM Elements
let cameraStream = null;
let capturedImage = null;
// Initialize Hugging Face connection
async function checkHFConnection() {
try {
const response = await fetch(`${BACKEND_URL}/health`);
const data = await response.json();
document.getElementById('hfStatus').textContent = data.hf_status;
document.getElementById('hfStatus').style.color = data.hf_status === 'Available' ? 'green' : 'red';
} catch (error) {
document.getElementById('hfStatus').textContent = 'Unavailable';
document.getElementById('hfStatus').style.color = 'red';
}
}
// Camera Functions
async function startCamera() {
try {
cameraStream = await navigator.mediaDevices.getUserMedia({
video: {
facingMode: 'environment',
width: { ideal: 1280 },
height: { ideal: 720 }
}
});
const video = document.getElementById('cameraFeed');
video.srcObject = cameraStream;
document.getElementById('videoContainer').style.display = 'block';
// Hide upload option when camera is active
document.querySelector('input[type="file"]').style.display = 'none';
} catch (error) {
alert(`Camera Error: ${error.message}`);
}
}
function stopCamera() {
if (cameraStream) {
cameraStream.getTracks().forEach(track => track.stop());
cameraStream = null;
document.getElementById('videoContainer').style.display = 'none';
document.querySelector('input[type="file"]').style.display = 'inline-block';
}
}
function captureImage() {
const video = document.getElementById('cameraFeed');
const canvas = document.createElement('canvas');
canvas.width = video.videoWidth;
canvas.height = video.videoHeight;
const ctx = canvas.getContext('2d');
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
canvas.toBlob(blob => {
capturedImage = blob;
displayCapturedImage(URL.createObjectURL(blob));
}, 'image/jpeg', 0.9);
}
// Handle file upload
document.getElementById('fileUpload').addEventListener('change', function(e) {
if (e.target.files[0]) {
capturedImage = e.target.files[0];
displayCapturedImage(URL.createObjectURL(capturedImage));
}
});
function displayCapturedImage(url) {
document.getElementById('previewImage').src = url;
document.getElementById('imagePreview').style.display = 'block';
stopCamera(); // Stop camera when image is captured/uploaded
}
// Main function to generate description
async function generateDescription() {
if (!capturedImage) {
alert('Please capture or upload an image first.');
return;
}
// Show results section
document.getElementById('results').style.display = 'block';
// Get user selections
const language = document.getElementById('language').value;
const vlmModel = document.getElementById('vlmModel').value;
const llamaModel = document.getElementById('llamaModel').value;
// Create FormData for the image and parameters
const formData = new FormData();
formData.append('image', capturedImage, 'captured.jpg');
formData.append('language', language);
formData.append('vlm_model', vlmModel);
formData.append('llama_model', llamaModel);
try {
// Send to backend
const response = await fetch(`${BACKEND_URL}/analyze`, {
method: 'POST',
body: formData
});
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const results = await response.json();
// Display results
document.getElementById('basicDescription').textContent = results.basic_description || 'No description generated';
document.getElementById('detailedAnalysis').textContent = results.detailed_analysis || 'No detailed analysis';
document.getElementById('translation').textContent = results.translation || 'No translation';
document.getElementById('jsonOutput').textContent = JSON.stringify(results, null, 2);
} catch (error) {
console.error('Error:', error);
document.getElementById('basicDescription').textContent = `Error: ${error.message}`;
document.getElementById('results').style.display = 'block';
}
}
// Enhanced object detection using region-based analysis[citation:10]
async function getDetailedObjectAnalysis(imageBlob, llamaModel) {
// This is a simplified approach - in production, you'd want to:
// 1. Use an object detector (like YOLO) to get bounding boxes[citation:10]
// 2. Crop image regions
// 3. Use BLIP/Llama to describe each region
// 4. Synthesize into comprehensive description
const formData = new FormData();
formData.append('image', imageBlob);
formData.append('llama_model', llamaModel);
try {
const response = await fetch(`${BACKEND_URL}/detailed-analysis`, {
method: 'POST',
body: formData
});
return await response.json();
} catch (error) {
return { error: error.message };
}
}
// Initialize
window.onload = function() {
checkHFConnection();
setInterval(checkHFConnection, 30000); // Check every 30 seconds
};