Spaces:

DroolingPanda
/

teachingAssistant

Sleeping

teachingAssistant / test_simple_parakeet.py

Michael Hu

chore: update dependencies and replace NeMo with HF transformers for Parakeet STT provider

c762284 3 months ago

4.8 kB

	#!/usr/bin/env python3
	"""Simple test to validate Parakeet provider structure without full dependencies."""

	import sys
	import ast

	def test_parakeet_syntax():
	"""Test that the Parakeet provider has valid Python syntax."""
	try:
	with open("src/infrastructure/stt/parakeet_provider.py", "r") as f:
	content = f.read()

	# Parse the AST to check syntax
	tree = ast.parse(content)
	print("✓ Parakeet provider has valid Python syntax")

	# Check for key components
	imports_found = []
	classes_found = []
	methods_found = []

	for node in ast.walk(tree):
	if isinstance(node, ast.Import):
	for alias in node.names:
	imports_found.append(alias.name)
	elif isinstance(node, ast.ImportFrom):
	if node.module:
	imports_found.append(node.module)
	elif isinstance(node, ast.ClassDef):
	classes_found.append(node.name)
	for item in node.body:
	if isinstance(item, ast.FunctionDef):
	methods_found.append(f"{node.name}.{item.name}")

	print(f"✓ Found class: {classes_found}")

	# Check for required transformers imports
	required_imports = ['torch', 'librosa', 'transformers']
	transformers_import_found = any('transformers' in imp for imp in imports_found)

	if transformers_import_found:
	print("✓ Transformers import found")
	else:
	print("⚠ Transformers import not found in imports")

	# Check for key methods
	required_methods = [
	'ParakeetSTTProvider._perform_transcription',
	'ParakeetSTTProvider._load_model',
	'ParakeetSTTProvider.is_available',
	'ParakeetSTTProvider.get_available_models',
	'ParakeetSTTProvider.get_default_model'
	]

	for method in required_methods:
	if method in methods_found:
	print(f"✓ Found method: {method}")
	else:
	print(f"✗ Missing method: {method}")

	# Check for transformers-specific code patterns
	torch_found = 'torch' in content
	autoprocessor_found = 'AutoProcessor' in content
	automodelctc_found = 'AutoModelForCTC' in content
	librosa_found = 'librosa' in content

	print(f"✓ Uses torch: {torch_found}")
	print(f"✓ Uses AutoProcessor: {autoprocessor_found}")
	print(f"✓ Uses AutoModelForCTC: {automodelctc_found}")
	print(f"✓ Uses librosa: {librosa_found}")

	return True

	except SyntaxError as e:
	print(f"✗ Syntax error: {e}")
	return False
	except Exception as e:
	print(f"✗ Error: {e}")
	return False

	def test_model_mapping():
	"""Test that the model mapping is correct."""
	try:
	with open("src/infrastructure/stt/parakeet_provider.py", "r") as f:
	content = f.read()

	# Check for the correct model mapping
	if 'nvidia/parakeet-ctc-0.6b' in content:
	print("✓ Correct Hugging Face model path found")
	else:
	print("✗ Missing correct model path")

	# Check that old NeMo references are removed
	if 'nemo' in content.lower() and 'nemo_asr' not in content:
	print("✗ Still contains NeMo references")
	elif 'nemo' not in content.lower():
	print("✓ NeMo references removed")
	else:
	print("⚠ Some NeMo references may remain")

	return True

	except Exception as e:
	print(f"✗ Error checking model mapping: {e}")
	return False

	if __name__ == "__main__":
	print("Testing Parakeet STT Provider Update...")
	print("=" * 50)

	syntax_ok = test_parakeet_syntax()
	mapping_ok = test_model_mapping()

	print("=" * 50)
	if syntax_ok and mapping_ok:
	print("✓ Parakeet provider successfully updated!")
	print("\nKey Changes Made:")
	print("- ✓ Switched from NeMo Toolkit to Hugging Face Transformers")
	print("- ✓ Using AutoProcessor and AutoModelForCTC")
	print("- ✓ Updated to use nvidia/parakeet-ctc-0.6b model")
	print("- ✓ Proper audio loading with librosa")
	print("- ✓ CTC decoding for transcription")
	print("\nNext Steps:")
	print("1. Install dependencies: uv sync (when dependency issues are resolved)")
	print("2. Test with actual audio files")
	print("3. Verify transcription quality")
	else:
	print("✗ Some issues found - review above messages")