Spaces:

amogneandualem
/

amogne-vlm-LLM

Running

App Files Files Community

amogne-vlm-LLM / app.js

amogneandualem's picture

Upload app.js

99aa0fa verified 2 days ago

history blame contribute delete

5.69 kB

	// Configuration
	const HF_TOKEN = "YOUR_HUGGINGFACE_TOKEN"; // Get from https://huggingface.co/settings/tokens
	const BACKEND_URL = "http://localhost:5000"; // Your Flask backend

	// DOM Elements
	let cameraStream = null;
	let capturedImage = null;

	// Initialize Hugging Face connection
	async function checkHFConnection() {
	try {
	const response = await fetch(`${BACKEND_URL}/health`);
	const data = await response.json();
	document.getElementById('hfStatus').textContent = data.hf_status;
	document.getElementById('hfStatus').style.color = data.hf_status === 'Available' ? 'green' : 'red';
	} catch (error) {
	document.getElementById('hfStatus').textContent = 'Unavailable';
	document.getElementById('hfStatus').style.color = 'red';
	}
	}

	// Camera Functions
	async function startCamera() {
	try {
	cameraStream = await navigator.mediaDevices.getUserMedia({
	video: {
	facingMode: 'environment',
	width: { ideal: 1280 },
	height: { ideal: 720 }
	}
	});

	const video = document.getElementById('cameraFeed');
	video.srcObject = cameraStream;
	document.getElementById('videoContainer').style.display = 'block';

	// Hide upload option when camera is active
	document.querySelector('input[type="file"]').style.display = 'none';
	} catch (error) {
	alert(`Camera Error: ${error.message}`);
	}
	}

	function stopCamera() {
	if (cameraStream) {
	cameraStream.getTracks().forEach(track => track.stop());
	cameraStream = null;
	document.getElementById('videoContainer').style.display = 'none';
	document.querySelector('input[type="file"]').style.display = 'inline-block';
	}
	}

	function captureImage() {
	const video = document.getElementById('cameraFeed');
	const canvas = document.createElement('canvas');
	canvas.width = video.videoWidth;
	canvas.height = video.videoHeight;

	const ctx = canvas.getContext('2d');
	ctx.drawImage(video, 0, 0, canvas.width, canvas.height);

	canvas.toBlob(blob => {
	capturedImage = blob;
	displayCapturedImage(URL.createObjectURL(blob));
	}, 'image/jpeg', 0.9);
	}

	// Handle file upload
	document.getElementById('fileUpload').addEventListener('change', function(e) {
	if (e.target.files[0]) {
	capturedImage = e.target.files[0];
	displayCapturedImage(URL.createObjectURL(capturedImage));
	}
	});

	function displayCapturedImage(url) {
	document.getElementById('previewImage').src = url;
	document.getElementById('imagePreview').style.display = 'block';
	stopCamera(); // Stop camera when image is captured/uploaded
	}

	// Main function to generate description
	async function generateDescription() {
	if (!capturedImage) {
	alert('Please capture or upload an image first.');
	return;
	}

	// Show results section
	document.getElementById('results').style.display = 'block';

	// Get user selections
	const language = document.getElementById('language').value;
	const vlmModel = document.getElementById('vlmModel').value;
	const llamaModel = document.getElementById('llamaModel').value;

	// Create FormData for the image and parameters
	const formData = new FormData();
	formData.append('image', capturedImage, 'captured.jpg');
	formData.append('language', language);
	formData.append('vlm_model', vlmModel);
	formData.append('llama_model', llamaModel);

	try {
	// Send to backend
	const response = await fetch(`${BACKEND_URL}/analyze`, {
	method: 'POST',
	body: formData
	});

	if (!response.ok) {
	throw new Error(`HTTP error! status: ${response.status}`);
	}

	const results = await response.json();

	// Display results
	document.getElementById('basicDescription').textContent = results.basic_description \|\| 'No description generated';
	document.getElementById('detailedAnalysis').textContent = results.detailed_analysis \|\| 'No detailed analysis';
	document.getElementById('translation').textContent = results.translation \|\| 'No translation';
	document.getElementById('jsonOutput').textContent = JSON.stringify(results, null, 2);

	} catch (error) {
	console.error('Error:', error);
	document.getElementById('basicDescription').textContent = `Error: ${error.message}`;
	document.getElementById('results').style.display = 'block';
	}
	}

	// Enhanced object detection using region-based analysis[citation:10]
	async function getDetailedObjectAnalysis(imageBlob, llamaModel) {
	// This is a simplified approach - in production, you'd want to:
	// 1. Use an object detector (like YOLO) to get bounding boxes[citation:10]
	// 2. Crop image regions
	// 3. Use BLIP/Llama to describe each region
	// 4. Synthesize into comprehensive description

	const formData = new FormData();
	formData.append('image', imageBlob);
	formData.append('llama_model', llamaModel);

	try {
	const response = await fetch(`${BACKEND_URL}/detailed-analysis`, {
	method: 'POST',
	body: formData
	});
	return await response.json();
	} catch (error) {
	return { error: error.message };
	}
	}

	// Initialize
	window.onload = function() {
	checkHFConnection();
	setInterval(checkHFConnection, 30000); // Check every 30 seconds
	};