Michael Hu commited on
Commit
a5e7aac
Β·
1 Parent(s): 5b02cb3
DEVELOPER_GUIDE.md DELETED
@@ -1,701 +0,0 @@
1
- # Developer Guide
2
-
3
- This guide provides comprehensive instructions for extending the Audio Translation System with new providers and contributing to the codebase.
4
-
5
- ## Table of Contents
6
-
7
- - [Architecture Overview](#architecture-overview)
8
- - [Adding New TTS Providers](#adding-new-tts-providers)
9
- - [Adding New STT Providers](#adding-new-stt-providers)
10
- - [Adding New Translation Providers](#adding-new-translation-providers)
11
- - [Testing Guidelines](#testing-guidelines)
12
- - [Code Style and Standards](#code-style-and-standards)
13
- - [Debugging and Troubleshooting](#debugging-and-troubleshooting)
14
- - [Performance Considerations](#performance-considerations)
15
-
16
- ## Architecture Overview
17
-
18
- The system follows Domain-Driven Design (DDD) principles with clear separation of concerns:
19
-
20
- ```
21
- src/
22
- β”œβ”€β”€ domain/ # Core business logic
23
- β”‚ β”œβ”€β”€ interfaces/ # Service contracts (ports)
24
- β”‚ β”œβ”€β”€ models/ # Domain entities and value objects
25
- β”‚ β”œβ”€β”€ services/ # Domain services
26
- β”‚ └── exceptions.py # Domain-specific exceptions
27
- β”œβ”€β”€ application/ # Use case orchestration
28
- β”‚ β”œβ”€β”€ services/ # Application services
29
- β”‚ β”œβ”€β”€ dtos/ # Data transfer objects
30
- β”‚ └── error_handling/ # Application error handling
31
- β”œβ”€β”€ infrastructure/ # External service implementations
32
- β”‚ β”œβ”€β”€ tts/ # TTS provider implementations
33
- β”‚ β”œβ”€β”€ stt/ # STT provider implementations
34
- β”‚ β”œβ”€β”€ translation/ # Translation service implementations
35
- β”‚ β”œβ”€β”€ base/ # Provider base classes
36
- β”‚ └── config/ # Configuration and DI container
37
- └── presentation/ # UI layer (app.py)
38
- ```
39
-
40
- ### Key Design Patterns
41
-
42
- 1. **Provider Pattern**: Pluggable implementations for different services
43
- 2. **Factory Pattern**: Provider creation with fallback logic
44
- 3. **Dependency Injection**: Loose coupling between components
45
- 4. **Repository Pattern**: Data access abstraction
46
- 5. **Strategy Pattern**: Runtime algorithm selection
47
-
48
- ## Adding New TTS Providers
49
-
50
- ### Step 1: Implement the Provider Class
51
-
52
- Create a new provider class that inherits from `TTSProviderBase`:
53
-
54
- ```python
55
- # src/infrastructure/tts/my_tts_provider.py
56
-
57
- import logging
58
- from typing import Iterator, List
59
- from ..base.tts_provider_base import TTSProviderBase
60
- from ...domain.models.speech_synthesis_request import SpeechSynthesisRequest
61
- from ...domain.exceptions import SpeechSynthesisException
62
-
63
- logger = logging.getLogger(__name__)
64
-
65
-
66
- class MyTTSProvider(TTSProviderBase):
67
- """Custom TTS provider implementation."""
68
-
69
- def __init__(self, api_key: str = None, **kwargs):
70
- """Initialize the TTS provider.
71
-
72
- Args:
73
- api_key: Optional API key for cloud-based services
74
- **kwargs: Additional provider-specific configuration
75
- """
76
- super().__init__(
77
- provider_name="my_tts",
78
- supported_languages=["en", "zh", "es", "fr"]
79
- )
80
- self.api_key = api_key
81
- self._initialize_provider()
82
-
83
- def _initialize_provider(self):
84
- """Initialize provider-specific resources."""
85
- try:
86
- # Initialize your TTS engine/model here
87
- # Example: self.engine = MyTTSEngine(api_key=self.api_key)
88
- pass
89
- except Exception as e:
90
- logger.error(f"Failed to initialize {self.provider_name}: {e}")
91
- raise SpeechSynthesisException(f"Provider initialization failed: {e}")
92
-
93
- def is_available(self) -> bool:
94
- """Check if the provider is available and ready to use."""
95
- try:
96
- # Check if dependencies are installed
97
- # Check if models are loaded
98
- # Check if API is accessible (for cloud services)
99
- return True # Replace with actual availability check
100
- except Exception:
101
- return False
102
-
103
- def get_available_voices(self) -> List[str]:
104
- """Get list of available voices for this provider."""
105
- # Return actual voice IDs supported by your provider
106
- return ["voice1", "voice2", "voice3"]
107
-
108
- def _generate_audio(self, request: SpeechSynthesisRequest) -> tuple[bytes, int]:
109
- """Generate audio data from synthesis request.
110
-
111
- Args:
112
- request: The speech synthesis request
113
-
114
- Returns:
115
- tuple: (audio_data_bytes, sample_rate)
116
- """
117
- try:
118
- text = request.text_content.text
119
- voice_id = request.voice_settings.voice_id
120
- speed = request.voice_settings.speed
121
-
122
- # Implement your TTS synthesis logic here
123
- # Example:
124
- # audio_data = self.engine.synthesize(
125
- # text=text,
126
- # voice=voice_id,
127
- # speed=speed
128
- # )
129
-
130
- # Return audio data and sample rate
131
- audio_data = b"dummy_audio_data" # Replace with actual synthesis
132
- sample_rate = 22050 # Replace with actual sample rate
133
-
134
- return audio_data, sample_rate
135
-
136
- except Exception as e:
137
- self._handle_provider_error(e, "audio generation")
138
-
139
- def _generate_audio_stream(self, request: SpeechSynthesisRequest) -> Iterator[tuple[bytes, int, bool]]:
140
- """Generate audio data stream from synthesis request.
141
-
142
- Args:
143
- request: The speech synthesis request
144
-
145
- Yields:
146
- tuple: (audio_data_bytes, sample_rate, is_final)
147
- """
148
- try:
149
- # Implement streaming synthesis if supported
150
- # For non-streaming providers, you can yield the complete audio as a single chunk
151
-
152
- audio_data, sample_rate = self._generate_audio(request)
153
- yield audio_data, sample_rate, True
154
-
155
- except Exception as e:
156
- self._handle_provider_error(e, "streaming audio generation")
157
- ```
158
-
159
- ### Step 2: Register the Provider
160
-
161
- Add your provider to the factory registration:
162
-
163
- ```python
164
- # src/infrastructure/tts/provider_factory.py
165
-
166
- def _register_default_providers(self):
167
- """Register all available TTS providers."""
168
- # ... existing providers ...
169
-
170
- # Try to register your custom provider
171
- try:
172
- from .my_tts_provider import MyTTSProvider
173
- self._providers['my_tts'] = MyTTSProvider
174
- logger.info("Registered MyTTS provider")
175
- except ImportError as e:
176
- logger.info(f"MyTTS provider not available: {e}")
177
- ```
178
-
179
- ### Step 3: Add Configuration Support
180
-
181
- Update the configuration to include your provider:
182
-
183
- ```python
184
- # src/infrastructure/config/app_config.py
185
-
186
- class AppConfig:
187
- # ... existing configuration ...
188
-
189
- # TTS Provider Configuration
190
- TTS_PROVIDERS = os.getenv('TTS_PROVIDERS', 'chatterbox,my_tts').split(',')
191
-
192
- # Provider-specific settings
193
- MY_TTS_API_KEY = os.getenv('MY_TTS_API_KEY')
194
- MY_TTS_MODEL = os.getenv('MY_TTS_MODEL', 'default')
195
- ```
196
-
197
- ### Step 4: Add Tests
198
-
199
- Create comprehensive tests for your provider:
200
-
201
- ```python
202
- # tests/unit/infrastructure/tts/test_my_tts_provider.py
203
-
204
- import pytest
205
- from unittest.mock import Mock, patch
206
- from src.infrastructure.tts.my_tts_provider import MyTTSProvider
207
- from src.domain.models.speech_synthesis_request import SpeechSynthesisRequest
208
- from src.domain.models.text_content import TextContent
209
- from src.domain.models.voice_settings import VoiceSettings
210
- from src.domain.exceptions import SpeechSynthesisException
211
-
212
-
213
- class TestMyTTSProvider:
214
- """Test suite for MyTTS provider."""
215
-
216
- @pytest.fixture
217
- def provider(self):
218
- """Create a test provider instance."""
219
- return MyTTSProvider(api_key="test_key")
220
-
221
- @pytest.fixture
222
- def synthesis_request(self):
223
- """Create a test synthesis request."""
224
- text_content = TextContent(text="Hello world", language="en")
225
- voice_settings = VoiceSettings(voice_id="voice1", speed=1.0)
226
- return SpeechSynthesisRequest(
227
- text_content=text_content,
228
- voice_settings=voice_settings
229
- )
230
-
231
- def test_provider_initialization(self, provider):
232
- """Test provider initializes correctly."""
233
- assert provider.provider_name == "my_tts"
234
- assert "en" in provider.supported_languages
235
- assert provider.is_available()
236
-
237
- def test_get_available_voices(self, provider):
238
- """Test voice listing."""
239
- voices = provider.get_available_voices()
240
- assert isinstance(voices, list)
241
- assert len(voices) > 0
242
- assert "voice1" in voices
243
-
244
- def test_synthesize_success(self, provider, synthesis_request):
245
- """Test successful synthesis."""
246
- with patch.object(provider, '_generate_audio') as mock_generate:
247
- mock_generate.return_value = (b"audio_data", 22050)
248
-
249
- result = provider.synthesize(synthesis_request)
250
-
251
- assert result.data == b"audio_data"
252
- assert result.format == "wav"
253
- assert result.sample_rate == 22050
254
- mock_generate.assert_called_once_with(synthesis_request)
255
-
256
- def test_synthesize_failure(self, provider, synthesis_request):
257
- """Test synthesis failure handling."""
258
- with patch.object(provider, '_generate_audio') as mock_generate:
259
- mock_generate.side_effect = Exception("Synthesis failed")
260
-
261
- with pytest.raises(SpeechSynthesisException):
262
- provider.synthesize(synthesis_request)
263
-
264
- def test_synthesize_stream(self, provider, synthesis_request):
265
- """Test streaming synthesis."""
266
- chunks = list(provider.synthesize_stream(synthesis_request))
267
-
268
- assert len(chunks) > 0
269
- assert chunks[-1].is_final # Last chunk should be marked as final
270
-
271
- # Verify chunk structure
272
- for chunk in chunks:
273
- assert hasattr(chunk, 'data')
274
- assert hasattr(chunk, 'sample_rate')
275
- assert hasattr(chunk, 'is_final')
276
- ```
277
-
278
- ### Step 5: Add Integration Tests
279
-
280
- ```python
281
- # tests/integration/test_my_tts_integration.py
282
-
283
- import pytest
284
- from src.infrastructure.config.container_setup import initialize_global_container
285
- from src.infrastructure.tts.provider_factory import TTSProviderFactory
286
- from src.domain.models.speech_synthesis_request import SpeechSynthesisRequest
287
- from src.domain.models.text_content import TextContent
288
- from src.domain.models.voice_settings import VoiceSettings
289
-
290
-
291
- @pytest.mark.integration
292
- class TestMyTTSIntegration:
293
- """Integration tests for MyTTS provider."""
294
-
295
- def test_provider_factory_integration(self):
296
- """Test provider works with factory."""
297
- factory = TTSProviderFactory()
298
-
299
- if 'my_tts' in factory.get_available_providers():
300
- provider = factory.create_provider('my_tts')
301
- assert provider.is_available()
302
- assert len(provider.get_available_voices()) > 0
303
-
304
- def test_end_to_end_synthesis(self):
305
- """Test complete synthesis workflow."""
306
- container = initialize_global_container()
307
- factory = container.resolve(TTSProviderFactory)
308
-
309
- if 'my_tts' in factory.get_available_providers():
310
- provider = factory.create_provider('my_tts')
311
-
312
- # Create synthesis request
313
- text_content = TextContent(text="Integration test", language="en")
314
- voice_settings = VoiceSettings(voice_id="voice1", speed=1.0)
315
- request = SpeechSynthesisRequest(
316
- text_content=text_content,
317
- voice_settings=voice_settings
318
- )
319
-
320
- # Synthesize audio
321
- result = provider.synthesize(request)
322
-
323
- assert result.data is not None
324
- assert result.duration > 0
325
- assert result.sample_rate > 0
326
- ```
327
-
328
- ## Adding New STT Providers
329
-
330
- ### Step 1: Implement the Provider Class
331
-
332
- ```python
333
- # src/infrastructure/stt/my_stt_provider.py
334
-
335
- import logging
336
- from typing import List
337
- from ..base.stt_provider_base import STTProviderBase
338
- from ...domain.models.audio_content import AudioContent
339
- from ...domain.models.text_content import TextContent
340
- from ...domain.exceptions import SpeechRecognitionException
341
-
342
- logger = logging.getLogger(__name__)
343
-
344
-
345
- class MySTTProvider(STTProviderBase):
346
- """Custom STT provider implementation."""
347
-
348
- def __init__(self, model_path: str = None, **kwargs):
349
- """Initialize the STT provider.
350
-
351
- Args:
352
- model_path: Path to the STT model
353
- **kwargs: Additional provider-specific configuration
354
- """
355
- super().__init__(
356
- provider_name="my_stt",
357
- supported_languages=["en", "zh", "es", "fr"],
358
- supported_models=["my_stt_small", "my_stt_large"]
359
- )
360
- self.model_path = model_path
361
- self._initialize_provider()
362
-
363
- def _initialize_provider(self):
364
- """Initialize provider-specific resources."""
365
- try:
366
- # Initialize your STT engine/model here
367
- # Example: self.model = MySTTModel.load(self.model_path)
368
- pass
369
- except Exception as e:
370
- logger.error(f"Failed to initialize {self.provider_name}: {e}")
371
- raise SpeechRecognitionException(f"Provider initialization failed: {e}")
372
-
373
- def is_available(self) -> bool:
374
- """Check if the provider is available."""
375
- try:
376
- # Check dependencies, model availability, etc.
377
- return True # Replace with actual check
378
- except Exception:
379
- return False
380
-
381
- def get_supported_models(self) -> List[str]:
382
- """Get list of supported models."""
383
- return self.supported_models
384
-
385
- def _transcribe_audio(self, audio: AudioContent, model: str) -> tuple[str, float, dict]:
386
- """Transcribe audio using the specified model.
387
-
388
- Args:
389
- audio: Audio content to transcribe
390
- model: Model identifier to use
391
-
392
- Returns:
393
- tuple: (transcribed_text, confidence_score, metadata)
394
- """
395
- try:
396
- # Implement your STT logic here
397
- # Example:
398
- # result = self.model.transcribe(
399
- # audio_data=audio.data,
400
- # sample_rate=audio.sample_rate,
401
- # model=model
402
- # )
403
-
404
- # Return transcription results
405
- text = "Transcribed text" # Replace with actual transcription
406
- confidence = 0.95 # Replace with actual confidence
407
- metadata = {
408
- "model_used": model,
409
- "processing_time": 1.5,
410
- "language_detected": "en"
411
- }
412
-
413
- return text, confidence, metadata
414
-
415
- except Exception as e:
416
- self._handle_provider_error(e, "transcription")
417
- ```
418
-
419
- ### Step 2: Register and Test
420
-
421
- Follow similar steps as TTS providers for registration, configuration, and testing.
422
-
423
- ## Adding New Translation Providers
424
-
425
- ### Step 1: Implement the Provider Class
426
-
427
- ```python
428
- # src/infrastructure/translation/my_translation_provider.py
429
-
430
- import logging
431
- from typing import List, Dict
432
- from ..base.translation_provider_base import TranslationProviderBase
433
- from ...domain.models.translation_request import TranslationRequest
434
- from ...domain.models.text_content import TextContent
435
- from ...domain.exceptions import TranslationFailedException
436
-
437
- logger = logging.getLogger(__name__)
438
-
439
-
440
- class MyTranslationProvider(TranslationProviderBase):
441
- """Custom translation provider implementation."""
442
-
443
- def __init__(self, api_key: str = None, **kwargs):
444
- """Initialize the translation provider."""
445
- super().__init__(
446
- provider_name="my_translation",
447
- supported_languages=["en", "zh", "es", "fr", "de", "ja"]
448
- )
449
- self.api_key = api_key
450
- self._initialize_provider()
451
-
452
- def _initialize_provider(self):
453
- """Initialize provider-specific resources."""
454
- try:
455
- # Initialize your translation engine/model here
456
- pass
457
- except Exception as e:
458
- logger.error(f"Failed to initialize {self.provider_name}: {e}")
459
- raise TranslationFailedException(f"Provider initialization failed: {e}")
460
-
461
- def is_available(self) -> bool:
462
- """Check if the provider is available."""
463
- try:
464
- # Check dependencies, API connectivity, etc.
465
- return True # Replace with actual check
466
- except Exception:
467
- return False
468
-
469
- def get_supported_language_pairs(self) -> List[tuple[str, str]]:
470
- """Get supported language pairs."""
471
- # Return list of (source_lang, target_lang) tuples
472
- pairs = []
473
- for source in self.supported_languages:
474
- for target in self.supported_languages:
475
- if source != target:
476
- pairs.append((source, target))
477
- return pairs
478
-
479
- def _translate_text(self, request: TranslationRequest) -> tuple[str, float, dict]:
480
- """Translate text using the provider.
481
-
482
- Args:
483
- request: Translation request
484
-
485
- Returns:
486
- tuple: (translated_text, confidence_score, metadata)
487
- """
488
- try:
489
- source_text = request.text_content.text
490
- source_lang = request.source_language or request.text_content.language
491
- target_lang = request.target_language
492
-
493
- # Implement your translation logic here
494
- # Example:
495
- # result = self.translator.translate(
496
- # text=source_text,
497
- # source_lang=source_lang,
498
- # target_lang=target_lang
499
- # )
500
-
501
- # Return translation results
502
- translated_text = f"Translated: {source_text}" # Replace with actual translation
503
- confidence = 0.92 # Replace with actual confidence
504
- metadata = {
505
- "source_language_detected": source_lang,
506
- "target_language": target_lang,
507
- "processing_time": 0.5,
508
- "model_used": "my_translation_model"
509
- }
510
-
511
- return translated_text, confidence, metadata
512
-
513
- except Exception as e:
514
- self._handle_provider_error(e, "translation")
515
- ```
516
-
517
- ## Testing Guidelines
518
-
519
- ### Unit Testing
520
-
521
- - Test each provider in isolation using mocks
522
- - Cover success and failure scenarios
523
- - Test edge cases (empty input, invalid parameters)
524
- - Verify error handling and exception propagation
525
-
526
- ### Integration Testing
527
-
528
- - Test provider integration with factories
529
- - Test complete pipeline workflows
530
- - Test fallback mechanisms
531
- - Test with real external services (when available)
532
-
533
- ### Performance Testing
534
-
535
- - Measure processing times for different input sizes
536
- - Test memory usage and resource cleanup
537
- - Test concurrent processing capabilities
538
- - Benchmark against existing providers
539
-
540
- ### Test Structure
541
-
542
- ```
543
- tests/
544
- β”œβ”€β”€ unit/
545
- β”‚ β”œβ”€β”€ domain/
546
- β”‚ β”œβ”€β”€ application/
547
- β”‚ └── infrastructure/
548
- β”‚ β”œβ”€β”€ tts/
549
- β”‚ β”œβ”€β”€ stt/
550
- β”‚ └── translation/
551
- β”œβ”€β”€ integration/
552
- β”‚ β”œβ”€β”€ test_complete_pipeline.py
553
- β”‚ β”œβ”€β”€ test_provider_fallback.py
554
- β”‚ └── test_error_recovery.py
555
- └── performance/
556
- β”œβ”€β”€ test_processing_speed.py
557
- β”œβ”€β”€ test_memory_usage.py
558
- └── test_concurrent_processing.py
559
- ```
560
-
561
- ## Code Style and Standards
562
-
563
- ### Python Style Guide
564
-
565
- - Follow PEP 8 for code formatting
566
- - Use type hints for all public methods
567
- - Write comprehensive docstrings (Google style)
568
- - Use meaningful variable and function names
569
- - Keep functions focused and small (< 50 lines)
570
-
571
- ### Documentation Standards
572
-
573
- - Document all public interfaces
574
- - Include usage examples in docstrings
575
- - Explain complex algorithms and business logic
576
- - Keep documentation up-to-date with code changes
577
-
578
- ### Error Handling
579
-
580
- - Use domain-specific exceptions
581
- - Provide detailed error messages
582
- - Log errors with appropriate levels
583
- - Implement graceful degradation where possible
584
-
585
- ### Logging
586
-
587
- ```python
588
- import logging
589
-
590
- logger = logging.getLogger(__name__)
591
-
592
- # Use appropriate log levels
593
- logger.info("Detailed debugging information")
594
- logger.info("General information about program execution")
595
- logger.warning("Something unexpected happened")
596
- logger.error("A serious error occurred")
597
- logger.critical("A very serious error occurred")
598
- ```
599
-
600
- ## Debugging and Troubleshooting
601
-
602
- ### Common Issues
603
-
604
- 1. **Provider Not Available**
605
- - Check dependencies are installed
606
- - Verify configuration settings
607
- - Check logs for initialization errors
608
-
609
- 2. **Poor Quality Output**
610
- - Verify input audio quality
611
- - Check model parameters
612
- - Review provider-specific settings
613
-
614
- 3. **Performance Issues**
615
- - Profile code execution
616
- - Check memory usage
617
- - Optimize audio processing pipeline
618
-
619
- ### Debugging Tools
620
-
621
- - Use Python debugger (pdb) for step-through debugging
622
- - Enable detailed logging for troubleshooting
623
- - Use profiling tools (cProfile, memory_profiler)
624
- - Monitor system resources during processing
625
-
626
- ### Logging Configuration
627
-
628
- ```python
629
- # Enable debug logging for development
630
- import logging
631
- logging.basicConfig(
632
- level=logging.DEBUG,
633
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
634
- handlers=[
635
- logging.FileHandler("debug.log"),
636
- logging.StreamHandler()
637
- ]
638
- )
639
- ```
640
-
641
- ## Performance Considerations
642
-
643
- ### Optimization Strategies
644
-
645
- 1. **Audio Processing**
646
- - Use appropriate sample rates
647
- - Implement streaming where possible
648
- - Cache processed results
649
- - Optimize memory usage
650
-
651
- 2. **Model Loading**
652
- - Load models once and reuse
653
- - Use lazy loading for optional providers
654
- - Implement model caching strategies
655
-
656
- 3. **Concurrent Processing**
657
- - Use async/await for I/O operations
658
- - Implement thread-safe providers
659
- - Consider multiprocessing for CPU-intensive tasks
660
-
661
- ### Memory Management
662
-
663
- - Clean up temporary files
664
- - Release model resources when not needed
665
- - Monitor memory usage in long-running processes
666
- - Implement resource pooling for expensive operations
667
-
668
- ### Monitoring and Metrics
669
-
670
- - Track processing times
671
- - Monitor error rates
672
- - Measure resource utilization
673
- - Implement health checks
674
-
675
- ## Contributing Guidelines
676
-
677
- ### Development Workflow
678
-
679
- 1. Fork the repository
680
- 2. Create a feature branch
681
- 3. Implement changes with tests
682
- 4. Run the full test suite
683
- 5. Submit a pull request
684
-
685
- ### Code Review Process
686
-
687
- - All changes require code review
688
- - Tests must pass before merging
689
- - Documentation must be updated
690
- - Performance impact should be assessed
691
-
692
- ### Release Process
693
-
694
- - Follow semantic versioning
695
- - Update changelog
696
- - Tag releases appropriately
697
- - Deploy to staging before production
698
-
699
- ---
700
-
701
- For questions or support, please refer to the project documentation or open an issue in the repository.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2025 Michael
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pre-requirements.txt DELETED
@@ -1 +0,0 @@
1
- Cython
 
 
test_parakeet_update.py DELETED
@@ -1,77 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Test script to verify the updated Parakeet provider works correctly."""
3
-
4
- import sys
5
- import os
6
- from pathlib import Path
7
-
8
- # Set up the path to work with the package structure
9
- current_dir = Path(__file__).parent
10
- sys.path.insert(0, str(current_dir))
11
- os.chdir(current_dir)
12
-
13
- def test_parakeet_provider():
14
- """Test the updated Parakeet STT provider."""
15
- try:
16
- # Import with absolute imports from the project root
17
- from src.infrastructure.stt.parakeet_provider import ParakeetSTTProvider
18
-
19
- print("βœ“ Successfully imported ParakeetSTTProvider")
20
-
21
- # Initialize the provider
22
- provider = ParakeetSTTProvider()
23
- print("βœ“ Successfully initialized ParakeetSTTProvider")
24
-
25
- # Test availability check
26
- is_available = provider.is_available()
27
- print(f"βœ“ Provider availability: {is_available}")
28
-
29
- if not is_available:
30
- print("⚠ Provider not available - missing dependencies")
31
- return False
32
-
33
- # Test model listing
34
- available_models = provider.get_available_models()
35
- print(f"βœ“ Available models: {available_models}")
36
-
37
- # Test default model
38
- default_model = provider.get_default_model()
39
- print(f"βœ“ Default model: {default_model}")
40
-
41
- # Test basic model loading (without actual transcription)
42
- print("βœ“ Testing model loading...")
43
- try:
44
- provider._load_model(default_model)
45
- print("βœ“ Model loaded successfully")
46
- except Exception as e:
47
- print(f"⚠ Model loading failed (expected on first run): {e}")
48
- print(" This is normal if model needs to be downloaded from Hugging Face")
49
-
50
- return True
51
-
52
- except ImportError as e:
53
- print(f"βœ— Import error: {e}")
54
- return False
55
- except Exception as e:
56
- print(f"βœ— Unexpected error: {e}")
57
- return False
58
-
59
- if __name__ == "__main__":
60
- print("Testing updated Parakeet STT provider...")
61
- print("=" * 50)
62
-
63
- success = test_parakeet_provider()
64
-
65
- print("=" * 50)
66
- if success:
67
- print("βœ“ All basic tests passed!")
68
- print("\nThe Parakeet provider has been successfully updated to use:")
69
- print("- Hugging Face Transformers instead of NeMo Toolkit")
70
- print("- AutoProcessor and AutoModelForCTC")
71
- print("- nvidia/parakeet-ctc-0.6b model")
72
- else:
73
- print("βœ— Some tests failed!")
74
-
75
- print("\nNext steps:")
76
- print("1. Install dependencies: uv sync")
77
- print("2. Test with actual audio file for full validation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_simple_parakeet.py DELETED
@@ -1,128 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Simple test to validate Parakeet provider structure without full dependencies."""
3
-
4
- import sys
5
- import ast
6
-
7
- def test_parakeet_syntax():
8
- """Test that the Parakeet provider has valid Python syntax."""
9
- try:
10
- with open("src/infrastructure/stt/parakeet_provider.py", "r") as f:
11
- content = f.read()
12
-
13
- # Parse the AST to check syntax
14
- tree = ast.parse(content)
15
- print("βœ“ Parakeet provider has valid Python syntax")
16
-
17
- # Check for key components
18
- imports_found = []
19
- classes_found = []
20
- methods_found = []
21
-
22
- for node in ast.walk(tree):
23
- if isinstance(node, ast.Import):
24
- for alias in node.names:
25
- imports_found.append(alias.name)
26
- elif isinstance(node, ast.ImportFrom):
27
- if node.module:
28
- imports_found.append(node.module)
29
- elif isinstance(node, ast.ClassDef):
30
- classes_found.append(node.name)
31
- for item in node.body:
32
- if isinstance(item, ast.FunctionDef):
33
- methods_found.append(f"{node.name}.{item.name}")
34
-
35
- print(f"βœ“ Found class: {classes_found}")
36
-
37
- # Check for required transformers imports
38
- required_imports = ['torch', 'librosa', 'transformers']
39
- transformers_import_found = any('transformers' in imp for imp in imports_found)
40
-
41
- if transformers_import_found:
42
- print("βœ“ Transformers import found")
43
- else:
44
- print("⚠ Transformers import not found in imports")
45
-
46
- # Check for key methods
47
- required_methods = [
48
- 'ParakeetSTTProvider._perform_transcription',
49
- 'ParakeetSTTProvider._load_model',
50
- 'ParakeetSTTProvider.is_available',
51
- 'ParakeetSTTProvider.get_available_models',
52
- 'ParakeetSTTProvider.get_default_model'
53
- ]
54
-
55
- for method in required_methods:
56
- if method in methods_found:
57
- print(f"βœ“ Found method: {method}")
58
- else:
59
- print(f"βœ— Missing method: {method}")
60
-
61
- # Check for transformers-specific code patterns
62
- torch_found = 'torch' in content
63
- autoprocessor_found = 'AutoProcessor' in content
64
- automodelctc_found = 'AutoModelForCTC' in content
65
- librosa_found = 'librosa' in content
66
-
67
- print(f"βœ“ Uses torch: {torch_found}")
68
- print(f"βœ“ Uses AutoProcessor: {autoprocessor_found}")
69
- print(f"βœ“ Uses AutoModelForCTC: {automodelctc_found}")
70
- print(f"βœ“ Uses librosa: {librosa_found}")
71
-
72
- return True
73
-
74
- except SyntaxError as e:
75
- print(f"βœ— Syntax error: {e}")
76
- return False
77
- except Exception as e:
78
- print(f"βœ— Error: {e}")
79
- return False
80
-
81
- def test_model_mapping():
82
- """Test that the model mapping is correct."""
83
- try:
84
- with open("src/infrastructure/stt/parakeet_provider.py", "r") as f:
85
- content = f.read()
86
-
87
- # Check for the correct model mapping
88
- if 'nvidia/parakeet-ctc-0.6b' in content:
89
- print("βœ“ Correct Hugging Face model path found")
90
- else:
91
- print("βœ— Missing correct model path")
92
-
93
- # Check that old NeMo references are removed
94
- if 'nemo' in content.lower() and 'nemo_asr' not in content:
95
- print("βœ— Still contains NeMo references")
96
- elif 'nemo' not in content.lower():
97
- print("βœ“ NeMo references removed")
98
- else:
99
- print("⚠ Some NeMo references may remain")
100
-
101
- return True
102
-
103
- except Exception as e:
104
- print(f"βœ— Error checking model mapping: {e}")
105
- return False
106
-
107
- if __name__ == "__main__":
108
- print("Testing Parakeet STT Provider Update...")
109
- print("=" * 50)
110
-
111
- syntax_ok = test_parakeet_syntax()
112
- mapping_ok = test_model_mapping()
113
-
114
- print("=" * 50)
115
- if syntax_ok and mapping_ok:
116
- print("βœ“ Parakeet provider successfully updated!")
117
- print("\nKey Changes Made:")
118
- print("- βœ“ Switched from NeMo Toolkit to Hugging Face Transformers")
119
- print("- βœ“ Using AutoProcessor and AutoModelForCTC")
120
- print("- βœ“ Updated to use nvidia/parakeet-ctc-0.6b model")
121
- print("- βœ“ Proper audio loading with librosa")
122
- print("- βœ“ CTC decoding for transcription")
123
- print("\nNext Steps:")
124
- print("1. Install dependencies: uv sync (when dependency issues are resolved)")
125
- print("2. Test with actual audio files")
126
- print("3. Verify transcription quality")
127
- else:
128
- print("βœ— Some issues found - review above messages")