Spaces:
Running
Running
| """ | |
| 🌍 Multilingual Image Describer - SIMPLE | |
| Using pre-trained multilingual model for direct captioning | |
| """ | |
| import streamlit as st | |
| import torch | |
| from PIL import Image | |
| import requests | |
| from io import BytesIO | |
| import time | |
| from datetime import datetime | |
| import pandas as pd | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| # Set page config | |
| st.set_page_config( | |
| page_title="Multilingual Image Describer", | |
| page_icon="🌍", | |
| layout="wide" | |
| ) | |
| # Initialize session state | |
| if 'model' not in st.session_state: | |
| st.session_state.model = None | |
| # Language settings | |
| LANGUAGES = { | |
| "en": {"name": "English", "prompt": "a photo of"}, | |
| "zh": {"name": "中文", "prompt": "一张照片"}, | |
| "am": {"name": "አማርኛ", "prompt": "የሚያሳይ ፎቶ"}, | |
| "es": {"name": "Español", "prompt": "una foto de"}, | |
| "fr": {"name": "Français", "prompt": "une photo de"}, | |
| "de": {"name": "Deutsch", "prompt": "ein Foto von"}, | |
| "ar": {"name": "العربية", "prompt": "صورة"}, | |
| "hi": {"name": "हिन्दी", "prompt": "की एक तस्वीर"}, | |
| "ru": {"name": "Русский", "prompt": "фотография"}, | |
| "ja": {"name": "日本語", "prompt": "の写真"} | |
| } | |
| def load_model(): | |
| """Load multilingual image captioning model""" | |
| try: | |
| from transformers import Blip2Processor, Blip2ForConditionalGeneration | |
| # Using BLIP-2 with multilingual capabilities | |
| processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b") | |
| model = Blip2ForConditionalGeneration.from_pretrained( | |
| "Salesforce/blip2-opt-2.7b", | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 | |
| ) | |
| # Move to GPU if available | |
| if torch.cuda.is_available(): | |
| model = model.to("cuda") | |
| return processor, model | |
| except Exception as e: | |
| st.error(f"Model loading error: {str(e)[:100]}") | |
| return None, None | |
| def generate_multilingual_caption(image, language="en"): | |
| """Generate caption directly in the target language""" | |
| if st.session_state.model is None: | |
| return "Model not loaded" | |
| processor, model = st.session_state.model | |
| try: | |
| # Prepare prompt based on language | |
| prompt_text = LANGUAGES.get(language, LANGUAGES["en"])["prompt"] | |
| # Process image | |
| inputs = processor(image, text=prompt_text, return_tensors="pt") | |
| # Move to device | |
| if torch.cuda.is_available(): | |
| inputs = {k: v.to("cuda") for k, v in inputs.items()} | |
| # Generate caption | |
| with torch.no_grad(): | |
| outputs = model.generate(**inputs, max_length=50) | |
| # Decode the output | |
| caption = processor.decode(outputs[0], skip_special_tokens=True) | |
| # Remove the prompt from the beginning if present | |
| if caption.lower().startswith(prompt_text.lower()): | |
| caption = caption[len(prompt_text):].strip() | |
| return caption.strip() | |
| except Exception as e: | |
| return f"An image with various objects. (Error: {str(e)[:50]})" | |
| def main(): | |
| # Title | |
| st.title("🌍 Multilingual Image Describer") | |
| st.markdown("Upload an image to get descriptions in multiple languages") | |
| # Load model | |
| with st.spinner("Loading AI model..."): | |
| if st.session_state.model is None: | |
| st.session_state.model = load_model() | |
| if st.session_state.model is None: | |
| st.error("Failed to load model. Please refresh the page.") | |
| return | |
| # Sidebar | |
| with st.sidebar: | |
| st.header("📸 Upload Image") | |
| uploaded_file = st.file_uploader( | |
| "Choose an image", | |
| type=["jpg", "jpeg", "png", "webp"], | |
| help="Upload any image file" | |
| ) | |
| st.markdown("---") | |
| st.header("🌐 Select Languages") | |
| # Language selection with checkboxes | |
| selected_languages = [] | |
| cols = st.columns(2) | |
| lang_list = list(LANGUAGES.items()) | |
| for i, (code, info) in enumerate(lang_list): | |
| col_idx = i % 2 | |
| with cols[col_idx]: | |
| if st.checkbox(f"{info['name']}", key=f"lang_{code}", value=(code == "en")): | |
| selected_languages.append(code) | |
| if not selected_languages: | |
| selected_languages = ["en"] | |
| st.info("English selected by default") | |
| st.markdown("---") | |
| # Generate button | |
| generate_btn = st.button( | |
| "🚀 Generate Descriptions", | |
| type="primary", | |
| use_container_width=True, | |
| disabled=uploaded_file is None | |
| ) | |
| if st.button("🔄 Clear", use_container_width=True): | |
| st.rerun() | |
| # Main content | |
| col1, col2 = st.columns([1, 1]) | |
| with col1: | |
| st.subheader("Input Image") | |
| if uploaded_file: | |
| image = Image.open(uploaded_file).convert("RGB") | |
| st.image(image, use_column_width=True) | |
| st.caption(f"Size: {image.size[0]}×{image.size[1]} pixels") | |
| else: | |
| st.info("👈 Upload an image from the sidebar") | |
| st.image( | |
| "https://images.unsplash.com/photo-1579546929662-711aa81148cf?w=400&auto=format", | |
| caption="Sample background", | |
| use_column_width=True | |
| ) | |
| with col2: | |
| st.subheader("Results") | |
| if generate_btn and uploaded_file: | |
| image = Image.open(uploaded_file).convert("RGB") | |
| with st.spinner("Generating descriptions..."): | |
| results = {} | |
| progress_bar = st.progress(0) | |
| for i, lang_code in enumerate(selected_languages): | |
| # Update progress | |
| progress = (i + 1) / len(selected_languages) | |
| progress_bar.progress(progress) | |
| # Generate caption for this language | |
| caption = generate_multilingual_caption(image, lang_code) | |
| lang_name = LANGUAGES[lang_code]["name"] | |
| results[lang_name] = caption | |
| progress_bar.empty() | |
| # Display results | |
| st.success(f"✅ Generated {len(results)} descriptions") | |
| # Create results DataFrame | |
| df_results = pd.DataFrame({ | |
| "Language": list(results.keys()), | |
| "Description": list(results.values()) | |
| }) | |
| # Display table | |
| st.dataframe( | |
| df_results, | |
| use_container_width=True, | |
| hide_index=True | |
| ) | |
| # Show individual descriptions | |
| st.markdown("### Descriptions by Language") | |
| for lang_name, description in results.items(): | |
| with st.expander(f"{lang_name}", expanded=(lang_name == "English")): | |
| st.markdown(f"**{description}**") | |
| # Export option | |
| st.markdown("---") | |
| st.markdown("### 💾 Export Results") | |
| # Create export text | |
| export_text = f"""Multilingual Image Descriptions | |
| Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | |
| Image: {uploaded_file.name if uploaded_file else 'Unknown'} | |
| """ | |
| for lang_name, description in results.items(): | |
| export_text += f"\n{lang_name}:\n{description}\n" | |
| # Download button | |
| st.download_button( | |
| "📥 Download as TXT", | |
| export_text, | |
| f"descriptions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt", | |
| "text/plain" | |
| ) | |
| elif uploaded_file: | |
| st.info("👈 Click 'Generate Descriptions' to analyze the image") | |
| # Footer | |
| st.markdown("---") | |
| st.caption(""" | |
| **Powered by:** BLIP-2 Multilingual Model • **UCAS @2025** • | |
| Model: Salesforce/blip2-opt-2.7b | |
| """) | |
| if __name__ == "__main__": | |
| main() |