Spaces:
Running
Running
Added WiNGPT-Babel-2.1
Browse files
app.py
CHANGED
|
@@ -13,7 +13,6 @@ df = pl.read_parquet("isolanguages.parquet")
|
|
| 13 |
non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
|
| 14 |
# all_langs = languagecodes.iso_languages_byname
|
| 15 |
all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
|
| 16 |
-
# iso1_to_name = {codes[0]: lang for entry in all_langs for lang, codes in entry.items()} # {'ro': 'Romanian', 'de': 'German'}
|
| 17 |
# iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
|
| 18 |
iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
|
| 19 |
langs = list(favourite_langs.keys())
|
|
@@ -33,7 +32,8 @@ models = ["Helsinki-NLP", "QUICKMT", "Argos", "Google", "HPLT", "HPLT-OPUS",
|
|
| 33 |
"google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
|
| 34 |
"google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
|
| 35 |
"NiuTrans/LMT-60-0.6B", "NiuTrans/LMT-60-1.7B", "NiuTrans/LMT-60-4B",
|
| 36 |
-
"Lego-MT/Lego-MT", "HuggingFaceTB/SmolLM3-3B",
|
|
|
|
| 37 |
"utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
|
| 38 |
"Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2"
|
| 39 |
]
|
|
@@ -449,7 +449,6 @@ class Translators:
|
|
| 449 |
generated_ids = [
|
| 450 |
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
| 451 |
]
|
| 452 |
-
print(tokenizer.batch_decode(generated_ids, skip_special_tokens=True))
|
| 453 |
output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 454 |
result = output.split('\n')[-1].strip() if '\n' in output else output.strip()
|
| 455 |
return result
|
|
@@ -624,7 +623,7 @@ def translate_text(input_text: str, s_language: str, t_language: str, model_name
|
|
| 624 |
elif model_name == "HuggingFaceTB/SmolLM3-3B":
|
| 625 |
translated_text = Translators(model_name, s_language, t_language, input_text).smollm()
|
| 626 |
|
| 627 |
-
elif
|
| 628 |
translated_text = Translators(model_name, s_language, t_language, input_text).wingpt()
|
| 629 |
|
| 630 |
elif "HPLT" in model_name:
|
|
|
|
| 13 |
non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
|
| 14 |
# all_langs = languagecodes.iso_languages_byname
|
| 15 |
all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
|
|
|
|
| 16 |
# iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
|
| 17 |
iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
|
| 18 |
langs = list(favourite_langs.keys())
|
|
|
|
| 32 |
"google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
|
| 33 |
"google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
|
| 34 |
"NiuTrans/LMT-60-0.6B", "NiuTrans/LMT-60-1.7B", "NiuTrans/LMT-60-4B",
|
| 35 |
+
"Lego-MT/Lego-MT", "HuggingFaceTB/SmolLM3-3B",
|
| 36 |
+
"winninghealth/WiNGPT-Babel-2", "winninghealth/WiNGPT-Babel-2.1",
|
| 37 |
"utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
|
| 38 |
"Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2"
|
| 39 |
]
|
|
|
|
| 449 |
generated_ids = [
|
| 450 |
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
| 451 |
]
|
|
|
|
| 452 |
output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 453 |
result = output.split('\n')[-1].strip() if '\n' in output else output.strip()
|
| 454 |
return result
|
|
|
|
| 623 |
elif model_name == "HuggingFaceTB/SmolLM3-3B":
|
| 624 |
translated_text = Translators(model_name, s_language, t_language, input_text).smollm()
|
| 625 |
|
| 626 |
+
elif "winninghealth/WiNGPT" in model_name:
|
| 627 |
translated_text = Translators(model_name, s_language, t_language, input_text).wingpt()
|
| 628 |
|
| 629 |
elif "HPLT" in model_name:
|