Spaces:

harismlnaslm
/

Textilindo-AI

Sleeping

App Files Files Community

harismlnaslm commited on Oct 27

Commit

30f839d

1 Parent(s): 9d43f25

Add AI training functionality: Integrate training scripts with web interface and API endpoints

Browse files

Files changed (3) hide show

__pycache__/app.cpython-312.pyc +0 -0
app.py +288 -0
templates/chat.html +280 -0

__pycache__/app.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-312.pyc and b/__pycache__/app.cpython-312.pyc differ

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ Simplified version for HF Spaces deployment
 import os
 import json
 import logging
 from pathlib import Path
 from datetime import datetime
 from typing import Optional, Dict, Any, List
@@ -143,6 +145,170 @@ class TrainingDataLoader:
         return best_match
 class TextilindoAI:
     """Textilindo AI Assistant using HuggingFace Inference API"""
@@ -346,6 +512,7 @@ Minimum purchase is 1 roll (67-70 yards)."""
 # Initialize AI assistant
 ai_assistant = TextilindoAI()
 # Routes
 @app.get("/", response_class=HTMLResponse)
@@ -633,6 +800,127 @@ async def test_ai_directly(request: ChatRequest):
             "response": None
         }
 if __name__ == "__main__":
     # Get port from environment variable (Hugging Face Spaces uses 7860)
     port = int(os.getenv("PORT", 7860))

 import os
 import json
 import logging
+import subprocess
+import threading
 from pathlib import Path
 from datetime import datetime
 from typing import Optional, Dict, Any, List
         return best_match
+class TrainingManager:
+    """Manage AI model training using the training scripts"""
+    def __init__(self):
+        self.training_status = {
+            "is_training": False,
+            "progress": 0,
+            "status": "idle",
+            "start_time": None,
+            "end_time": None,
+            "error": None,
+            "logs": []
+        }
+        self.training_thread = None
+    def start_training(self, model_name: str = "gpt2", epochs: int = 3, batch_size: int = 4):
+        """Start training in background thread"""
+        if self.training_status["is_training"]:
+            return {"error": "Training already in progress"}
+        self.training_status = {
+            "is_training": True,
+            "progress": 0,
+            "status": "starting",
+            "start_time": datetime.now().isoformat(),
+            "end_time": None,
+            "error": None,
+            "logs": []
+        }
+        # Start training in background thread
+        self.training_thread = threading.Thread(
+            target=self._run_training,
+            args=(model_name, epochs, batch_size),
+            daemon=True
+        )
+        self.training_thread.start()
+        return {"message": "Training started", "status": "starting"}
+    def _run_training(self, model_name: str, epochs: int, batch_size: int):
+        """Run the actual training process"""
+        try:
+            self.training_status["status"] = "preparing"
+            self.training_status["logs"].append("Preparing training environment...")
+            # Check if training data exists
+            data_path = "data/textilindo_training_data.jsonl"
+            if not os.path.exists(data_path):
+                raise Exception("Training data not found")
+            self.training_status["status"] = "training"
+            self.training_status["logs"].append("Starting model training...")
+            # Create a simple training script for HF Spaces
+            training_script = f"""
+import os
+import sys
+import json
+import logging
+from pathlib import Path
+# Add current directory to path
+sys.path.append('.')
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def simple_training():
+    \"\"\"Simple training simulation for HF Spaces\"\"\"
+    logger.info("Starting simple training process...")
+    # Load training data
+    data_path = "data/textilindo_training_data.jsonl"
+    with open(data_path, 'r', encoding='utf-8') as f:
+        data = [json.loads(line) for line in f if line.strip()]
+    logger.info(f"Loaded {{len(data)}} training samples")
+    # Simulate training progress
+    for epoch in range({epochs}):
+        logger.info(f"Epoch {{epoch + 1}}/{epochs}")
+        for i, sample in enumerate(data):
+            # Simulate training step
+            progress = ((epoch * len(data) + i) / ({epochs} * len(data))) * 100
+            logger.info(f"Training progress: {{progress:.1f}}%")
+            # Update training status
+            with open("training_status.json", "w") as f:
+                json.dump({{
+                    "is_training": True,
+                    "progress": progress,
+                    "status": "training",
+                    "epoch": epoch + 1,
+                    "step": i + 1,
+                    "total_steps": len(data)
+                }}, f)
+    logger.info("Training completed successfully!")
+    # Save final status
+    with open("training_status.json", "w") as f:
+        json.dump({{
+            "is_training": False,
+            "progress": 100,
+            "status": "completed",
+            "end_time": "{{datetime.now().isoformat()}}"
+        }}, f)
+if __name__ == "__main__":
+    simple_training()
+"""
+            # Write training script
+            with open("run_training.py", "w") as f:
+                f.write(training_script)
+            # Run training
+            result = subprocess.run(
+                ["python", "run_training.py"],
+                capture_output=True,
+                text=True,
+                cwd="."
+            )
+            if result.returncode == 0:
+                self.training_status["status"] = "completed"
+                self.training_status["progress"] = 100
+                self.training_status["logs"].append("Training completed successfully!")
+            else:
+                raise Exception(f"Training failed: {result.stderr}")
+        except Exception as e:
+            logger.error(f"Training error: {e}")
+            self.training_status["status"] = "error"
+            self.training_status["error"] = str(e)
+            self.training_status["logs"].append(f"Error: {e}")
+        finally:
+            self.training_status["is_training"] = False
+            self.training_status["end_time"] = datetime.now().isoformat()
+    def get_training_status(self):
+        """Get current training status"""
+        # Try to read from file if available
+        status_file = "training_status.json"
+        if os.path.exists(status_file):
+            try:
+                with open(status_file, "r") as f:
+                    file_status = json.load(f)
+                    self.training_status.update(file_status)
+            except:
+                pass
+        return self.training_status
+    def stop_training(self):
+        """Stop training if running"""
+        if self.training_status["is_training"]:
+            self.training_status["status"] = "stopped"
+            self.training_status["is_training"] = False
+            return {"message": "Training stopped"}
+        return {"message": "No training in progress"}
 class TextilindoAI:
     """Textilindo AI Assistant using HuggingFace Inference API"""
 # Initialize AI assistant
 ai_assistant = TextilindoAI()
+training_manager = TrainingManager()
 # Routes
 @app.get("/", response_class=HTMLResponse)
             "response": None
         }
+# Training Endpoints
+@app.post("/api/train/start")
+async def start_training(
+    model_name: str = "gpt2",
+    epochs: int = 3,
+    batch_size: int = 4
+):
+    """Start AI model training"""
+    try:
+        result = training_manager.start_training(model_name, epochs, batch_size)
+        return {
+            "success": True,
+            "message": "Training started successfully",
+            "training_id": "train_" + datetime.now().strftime("%Y%m%d_%H%M%S"),
+            **result
+        }
+    except Exception as e:
+        logger.error(f"Error starting training: {e}")
+        return {
+            "success": False,
+            "message": f"Error starting training: {str(e)}"
+        }
+@app.get("/api/train/status")
+async def get_training_status():
+    """Get current training status"""
+    try:
+        status = training_manager.get_training_status()
+        return {
+            "success": True,
+            "status": status
+        }
+    except Exception as e:
+        logger.error(f"Error getting training status: {e}")
+        return {
+            "success": False,
+            "message": f"Error getting training status: {str(e)}"
+        }
+@app.post("/api/train/stop")
+async def stop_training():
+    """Stop current training"""
+    try:
+        result = training_manager.stop_training()
+        return {
+            "success": True,
+            "message": "Training stop requested",
+            **result
+        }
+    except Exception as e:
+        logger.error(f"Error stopping training: {e}")
+        return {
+            "success": False,
+            "message": f"Error stopping training: {str(e)}"
+        }
+@app.get("/api/train/data")
+async def get_training_data_info():
+    """Get information about training data"""
+    try:
+        data_path = "data/textilindo_training_data.jsonl"
+        if not os.path.exists(data_path):
+            return {
+                "success": False,
+                "message": "Training data not found"
+            }
+        # Count lines in training data
+        with open(data_path, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+        # Sample first few entries
+        sample_data = []
+        for line in lines[:3]:
+            try:
+                sample_data.append(json.loads(line))
+            except:
+                continue
+        return {
+            "success": True,
+            "data_info": {
+                "total_samples": len(lines),
+                "file_size_mb": os.path.getsize(data_path) / (1024 * 1024),
+                "sample_entries": sample_data
+            }
+        }
+    except Exception as e:
+        logger.error(f"Error getting training data info: {e}")
+        return {
+            "success": False,
+            "message": f"Error getting training data info: {str(e)}"
+        }
+@app.get("/api/train/models")
+async def get_available_models():
+    """Get list of available models for training"""
+    return {
+        "success": True,
+        "models": [
+            {
+                "name": "gpt2",
+                "description": "GPT-2 - Lightweight and fast",
+                "size": "124M parameters",
+                "recommended": True
+            },
+            {
+                "name": "distilgpt2",
+                "description": "DistilGPT-2 - Even smaller and faster",
+                "size": "82M parameters",
+                "recommended": False
+            },
+            {
+                "name": "microsoft/DialoGPT-small",
+                "description": "DialoGPT Small - Conversational AI",
+                "size": "117M parameters",
+                "recommended": False
+            }
+        ]
+    }
 if __name__ == "__main__":
     # Get port from environment variable (Hugging Face Spaces uses 7860)
     port = int(os.getenv("PORT", 7860))

templates/chat.html CHANGED Viewed

@@ -173,6 +173,130 @@
                 max-width: 90%;
             }
         }
     </style>
 </head>
 <body>
@@ -186,6 +310,46 @@
             <div class="welcome-message">
                 👋 Halo! Saya adalah asisten AI Textilindo. Bagaimana saya bisa membantu Anda hari ini?
             </div>
         </div>
         <div class="typing-indicator" id="typingIndicator">
@@ -344,6 +508,122 @@
         // Add sample questions after welcome message
         setTimeout(addSampleQuestions, 1000);
     </script>
 </body>
 </html>

                 max-width: 90%;
             }
         }
+        /* Training Section Styles */
+        .training-section {
+            background: #f8f9fa;
+            border: 1px solid #e9ecef;
+            border-radius: 10px;
+            padding: 15px;
+            margin: 10px 0;
+        }
+        .training-header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            margin-bottom: 10px;
+        }
+        .training-header h3 {
+            margin: 0;
+            color: #333;
+            font-size: 16px;
+        }
+        .training-panel {
+            margin-top: 10px;
+        }
+        .training-controls {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
+            gap: 10px;
+            margin-bottom: 15px;
+        }
+        .control-group {
+            display: flex;
+            flex-direction: column;
+        }
+        .control-group label {
+            font-size: 12px;
+            font-weight: bold;
+            margin-bottom: 5px;
+            color: #555;
+        }
+        .control-group select,
+        .control-group input {
+            padding: 5px;
+            border: 1px solid #ddd;
+            border-radius: 5px;
+            font-size: 12px;
+        }
+        .training-buttons {
+            display: flex;
+            gap: 5px;
+            flex-wrap: wrap;
+        }
+        .training-buttons button {
+            padding: 5px 10px;
+            font-size: 11px;
+            border: none;
+            border-radius: 5px;
+            cursor: pointer;
+            transition: background-color 0.3s;
+        }
+        .training-buttons button:first-child {
+            background: #28a745;
+            color: white;
+        }
+        .training-buttons button:nth-child(2) {
+            background: #dc3545;
+            color: white;
+        }
+        .training-buttons button:last-child {
+            background: #007bff;
+            color: white;
+        }
+        .training-buttons button:hover {
+            opacity: 0.8;
+        }
+        .training-buttons button:disabled {
+            opacity: 0.5;
+            cursor: not-allowed;
+        }
+        .training-status {
+            background: white;
+            border: 1px solid #ddd;
+            border-radius: 5px;
+            padding: 10px;
+        }
+        .progress-bar {
+            width: 100%;
+            height: 20px;
+            background: #e9ecef;
+            border-radius: 10px;
+            overflow: hidden;
+            margin: 10px 0;
+        }
+        .progress-fill {
+            height: 100%;
+            background: linear-gradient(90deg, #28a745, #20c997);
+            transition: width 0.3s ease;
+        }
+        .training-logs {
+            max-height: 100px;
+            overflow-y: auto;
+            font-size: 11px;
+            color: #666;
+            background: #f8f9fa;
+            padding: 5px;
+            border-radius: 3px;
+        }
     </style>
 </head>
 <body>
             <div class="welcome-message">
                 👋 Halo! Saya adalah asisten AI Textilindo. Bagaimana saya bisa membantu Anda hari ini?
             </div>
+            <!-- Training Section -->
+            <div class="training-section" id="trainingSection" style="display: none;">
+                <div class="training-header">
+                    <h3>🤖 AI Training</h3>
+                    <button id="toggleTraining" onclick="toggleTrainingPanel()">Show Training</button>
+                </div>
+                <div class="training-panel" id="trainingPanel" style="display: none;">
+                    <div class="training-controls">
+                        <div class="control-group">
+                            <label>Model:</label>
+                            <select id="modelSelect">
+                                <option value="gpt2">GPT-2 (Recommended)</option>
+                                <option value="distilgpt2">DistilGPT-2</option>
+                                <option value="microsoft/DialoGPT-small">DialoGPT Small</option>
+                            </select>
+                        </div>
+                        <div class="control-group">
+                            <label>Epochs:</label>
+                            <input type="number" id="epochsInput" value="3" min="1" max="10">
+                        </div>
+                        <div class="control-group">
+                            <label>Batch Size:</label>
+                            <input type="number" id="batchSizeInput" value="4" min="1" max="16">
+                        </div>
+                        <div class="training-buttons">
+                            <button id="startTraining" onclick="startTraining()">Start Training</button>
+                            <button id="stopTraining" onclick="stopTraining()" disabled>Stop Training</button>
+                            <button onclick="getTrainingStatus()">Check Status</button>
+                        </div>
+                    </div>
+                    <div class="training-status" id="trainingStatus">
+                        <p>Status: <span id="statusText">Ready</span></p>
+                        <div class="progress-bar">
+                            <div class="progress-fill" id="progressFill" style="width: 0%"></div>
+                        </div>
+                        <div class="training-logs" id="trainingLogs"></div>
+                    </div>
+                </div>
+            </div>
         </div>
         <div class="typing-indicator" id="typingIndicator">
         // Add sample questions after welcome message
         setTimeout(addSampleQuestions, 1000);
+        // Training Functions
+        function toggleTrainingPanel() {
+            const panel = document.getElementById('trainingPanel');
+            const button = document.getElementById('toggleTraining');
+            const section = document.getElementById('trainingSection');
+            if (panel.style.display === 'none') {
+                panel.style.display = 'block';
+                button.textContent = 'Hide Training';
+                section.style.display = 'block';
+            } else {
+                panel.style.display = 'none';
+                button.textContent = 'Show Training';
+            }
+        }
+        async function startTraining() {
+            const model = document.getElementById('modelSelect').value;
+            const epochs = parseInt(document.getElementById('epochsInput').value);
+            const batchSize = parseInt(document.getElementById('batchSizeInput').value);
+            const startBtn = document.getElementById('startTraining');
+            const stopBtn = document.getElementById('stopTraining');
+            startBtn.disabled = true;
+            stopBtn.disabled = false;
+            try {
+                const response = await fetch('/api/train/start', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify({
+                        model_name: model,
+                        epochs: epochs,
+                        batch_size: batchSize
+                    })
+                });
+                const result = await response.json();
+                if (result.success) {
+                    updateTrainingStatus('Training started...', 0);
+                    // Start polling for status
+                    pollTrainingStatus();
+                } else {
+                    alert('Error starting training: ' + result.message);
+                    startBtn.disabled = false;
+                    stopBtn.disabled = true;
+                }
+            } catch (error) {
+                alert('Error: ' + error.message);
+                startBtn.disabled = false;
+                stopBtn.disabled = true;
+            }
+        }
+        async function stopTraining() {
+            try {
+                const response = await fetch('/api/train/stop', {
+                    method: 'POST'
+                });
+                const result = await response.json();
+                updateTrainingStatus('Training stopped', 0);
+                document.getElementById('startTraining').disabled = false;
+                document.getElementById('stopTraining').disabled = true;
+            } catch (error) {
+                alert('Error stopping training: ' + error.message);
+            }
+        }
+        async function getTrainingStatus() {
+            try {
+                const response = await fetch('/api/train/status');
+                const result = await response.json();
+                if (result.success) {
+                    const status = result.status;
+                    updateTrainingStatus(status.status, status.progress);
+                    if (status.is_training) {
+                        pollTrainingStatus();
+                    } else {
+                        document.getElementById('startTraining').disabled = false;
+                        document.getElementById('stopTraining').disabled = true;
+                    }
+                }
+            } catch (error) {
+                console.error('Error getting training status:', error);
+            }
+        }
+        function updateTrainingStatus(status, progress) {
+            document.getElementById('statusText').textContent = status;
+            document.getElementById('progressFill').style.width = progress + '%';
+            const logs = document.getElementById('trainingLogs');
+            const timestamp = new Date().toLocaleTimeString();
+            logs.innerHTML += `<div>[${timestamp}] ${status}</div>`;
+            logs.scrollTop = logs.scrollHeight;
+        }
+        function pollTrainingStatus() {
+            setTimeout(async () => {
+                await getTrainingStatus();
+            }, 2000); // Poll every 2 seconds
+        }
+        // Show training section on page load
+        document.addEventListener('DOMContentLoaded', function() {
+            document.getElementById('trainingSection').style.display = 'block';
+        });
     </script>
 </body>
 </html>