Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,6 +18,7 @@ models = ["Helsinki-NLP",
|
|
| 18 |
"google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
|
| 19 |
"facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B",
|
| 20 |
"facebook/mbart-large-50-many-to-many-mmt", "facebook/mbart-large-50-one-to-many-mmt", "facebook/mbart-large-50-many-to-one-mmt",
|
|
|
|
| 21 |
"bigscience/mt0-small", "bigscience/mt0-base", "bigscience/mt0-large", "bigscience/mt0-xl",
|
| 22 |
"bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
|
| 23 |
"utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
|
|
@@ -64,6 +65,15 @@ def google(sl, tl, input_text):
|
|
| 64 |
response = requests.get(url)
|
| 65 |
return response.json()[0][0][0]
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
def HelsinkiNLPAutoTokenizer(sl, tl, input_text):
|
| 68 |
if model_name == "Helsinki-NLP":
|
| 69 |
message_text = f'Translated from {sl} to {tl} with {model_name}.'
|
|
@@ -269,67 +279,56 @@ def translate_text(input_text: str, sselected_language: str, tselected_language:
|
|
| 269 |
|
| 270 |
if model_name.startswith("Helsinki-NLP"):
|
| 271 |
translated_text, message_text = HelsinkiNLP(sl, tl, input_text)
|
| 272 |
-
return translated_text, message_text
|
| 273 |
|
| 274 |
elif model_name == 'Argos':
|
| 275 |
translated_text = argos(sl, tl, input_text)
|
| 276 |
-
return translated_text, message_text
|
| 277 |
|
| 278 |
elif model_name == 'Google':
|
| 279 |
translated_text = google(sl, tl, input_text)
|
| 280 |
-
|
|
|
|
|
|
|
| 281 |
|
| 282 |
elif model_name == "utter-project/EuroLLM-1.7B-Instruct":
|
| 283 |
translated_text = eurollm_instruct(model_name, sselected_language, tselected_language, input_text)
|
| 284 |
-
return translated_text, message_text
|
| 285 |
|
| 286 |
elif model_name == "utter-project/EuroLLM-1.7B":
|
| 287 |
translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
|
| 288 |
-
return translated_text, message_text
|
| 289 |
|
| 290 |
elif 'flan' in model_name.lower():
|
| 291 |
translated_text = flan(model_name, sselected_language, tselected_language, input_text)
|
| 292 |
-
return translated_text, message_text
|
| 293 |
|
| 294 |
elif 'teuken' in model_name.lower():
|
| 295 |
translated_text = teuken(model_name, sselected_language, tselected_language, input_text)
|
| 296 |
-
return translated_text, message_text
|
| 297 |
|
| 298 |
elif 'mt0' in model_name.lower():
|
| 299 |
translated_text = bigscience(model_name, sselected_language, tselected_language, input_text)
|
| 300 |
-
return translated_text, message_text
|
| 301 |
|
| 302 |
elif 'bloomz' in model_name.lower():
|
| 303 |
translated_text = bloomz(model_name, sselected_language, tselected_language, input_text)
|
| 304 |
-
return translated_text, message_text
|
| 305 |
|
| 306 |
elif 'nllb' in model_name.lower():
|
| 307 |
nnlbsl, nnlbtl = languagecodes.nllb_language_codes[sselected_language], languagecodes.nllb_language_codes[tselected_language]
|
| 308 |
translated_text = nllb(model_name, nnlbsl, nnlbtl, input_text)
|
| 309 |
-
return translated_text, message_text
|
| 310 |
|
| 311 |
elif model_name == "facebook/mbart-large-50-many-to-many-mmt":
|
| 312 |
translated_text = mbart_many_to_many(model_name, sselected_language, tselected_language, input_text)
|
| 313 |
-
return translated_text, message_text
|
| 314 |
|
| 315 |
elif model_name == "facebook/mbart-large-50-one-to-many-mmt":
|
| 316 |
translated_text = mbart_one_to_many(model_name, sselected_language, tselected_language, input_text)
|
| 317 |
-
return translated_text, message_text
|
| 318 |
|
| 319 |
elif model_name == "facebook/mbart-large-50-many-to-one-mmt":
|
| 320 |
translated_text = mbart_many_to_one(model_name, sselected_language, tselected_language, input_text)
|
| 321 |
-
return translated_text, message_text
|
| 322 |
|
| 323 |
elif 'Unbabel' in model_name:
|
| 324 |
translated_text = unbabel(model_name, sselected_language, tselected_language, input_text)
|
| 325 |
-
return translated_text, message_text
|
| 326 |
|
| 327 |
elif model_name.startswith('t5'):
|
| 328 |
translated_text = tfive(model_name, sselected_language, tselected_language, input_text)
|
| 329 |
-
return translated_text, message_text
|
| 330 |
|
| 331 |
else:
|
| 332 |
-
|
|
|
|
| 333 |
|
| 334 |
# Define a function to swap dropdown values
|
| 335 |
def swap_languages(src_lang, tgt_lang):
|
|
|
|
| 18 |
"google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
|
| 19 |
"facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B",
|
| 20 |
"facebook/mbart-large-50-many-to-many-mmt", "facebook/mbart-large-50-one-to-many-mmt", "facebook/mbart-large-50-many-to-one-mmt",
|
| 21 |
+
"facebook/m2m100_418M", "facebook/m2m100_1.2B",
|
| 22 |
"bigscience/mt0-small", "bigscience/mt0-base", "bigscience/mt0-large", "bigscience/mt0-xl",
|
| 23 |
"bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
|
| 24 |
"utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
|
|
|
|
| 65 |
response = requests.get(url)
|
| 66 |
return response.json()[0][0][0]
|
| 67 |
|
| 68 |
+
def mtom(model_name, sl, tl, input_text):
|
| 69 |
+
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
|
| 70 |
+
model = M2M100ForConditionalGeneration.from_pretrained(model_name)
|
| 71 |
+
tokenizer = M2M100Tokenizer.from_pretrained(model_name)
|
| 72 |
+
tokenizer.src_lang = sl
|
| 73 |
+
encoded = tokenizer(input_text, return_tensors="pt")
|
| 74 |
+
generated_tokens = model.generate(**encoded, forced_bos_token_id=tokenizer.get_lang_id(tl))
|
| 75 |
+
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
| 76 |
+
|
| 77 |
def HelsinkiNLPAutoTokenizer(sl, tl, input_text):
|
| 78 |
if model_name == "Helsinki-NLP":
|
| 79 |
message_text = f'Translated from {sl} to {tl} with {model_name}.'
|
|
|
|
| 279 |
|
| 280 |
if model_name.startswith("Helsinki-NLP"):
|
| 281 |
translated_text, message_text = HelsinkiNLP(sl, tl, input_text)
|
|
|
|
| 282 |
|
| 283 |
elif model_name == 'Argos':
|
| 284 |
translated_text = argos(sl, tl, input_text)
|
|
|
|
| 285 |
|
| 286 |
elif model_name == 'Google':
|
| 287 |
translated_text = google(sl, tl, input_text)
|
| 288 |
+
|
| 289 |
+
elif "m2m" in model_name:
|
| 290 |
+
translated_text = mtom(model_name, sl, tl, input_text)
|
| 291 |
|
| 292 |
elif model_name == "utter-project/EuroLLM-1.7B-Instruct":
|
| 293 |
translated_text = eurollm_instruct(model_name, sselected_language, tselected_language, input_text)
|
|
|
|
| 294 |
|
| 295 |
elif model_name == "utter-project/EuroLLM-1.7B":
|
| 296 |
translated_text = eurollm(model_name, sselected_language, tselected_language, input_text)
|
|
|
|
| 297 |
|
| 298 |
elif 'flan' in model_name.lower():
|
| 299 |
translated_text = flan(model_name, sselected_language, tselected_language, input_text)
|
|
|
|
| 300 |
|
| 301 |
elif 'teuken' in model_name.lower():
|
| 302 |
translated_text = teuken(model_name, sselected_language, tselected_language, input_text)
|
|
|
|
| 303 |
|
| 304 |
elif 'mt0' in model_name.lower():
|
| 305 |
translated_text = bigscience(model_name, sselected_language, tselected_language, input_text)
|
|
|
|
| 306 |
|
| 307 |
elif 'bloomz' in model_name.lower():
|
| 308 |
translated_text = bloomz(model_name, sselected_language, tselected_language, input_text)
|
|
|
|
| 309 |
|
| 310 |
elif 'nllb' in model_name.lower():
|
| 311 |
nnlbsl, nnlbtl = languagecodes.nllb_language_codes[sselected_language], languagecodes.nllb_language_codes[tselected_language]
|
| 312 |
translated_text = nllb(model_name, nnlbsl, nnlbtl, input_text)
|
|
|
|
| 313 |
|
| 314 |
elif model_name == "facebook/mbart-large-50-many-to-many-mmt":
|
| 315 |
translated_text = mbart_many_to_many(model_name, sselected_language, tselected_language, input_text)
|
|
|
|
| 316 |
|
| 317 |
elif model_name == "facebook/mbart-large-50-one-to-many-mmt":
|
| 318 |
translated_text = mbart_one_to_many(model_name, sselected_language, tselected_language, input_text)
|
|
|
|
| 319 |
|
| 320 |
elif model_name == "facebook/mbart-large-50-many-to-one-mmt":
|
| 321 |
translated_text = mbart_many_to_one(model_name, sselected_language, tselected_language, input_text)
|
|
|
|
| 322 |
|
| 323 |
elif 'Unbabel' in model_name:
|
| 324 |
translated_text = unbabel(model_name, sselected_language, tselected_language, input_text)
|
|
|
|
| 325 |
|
| 326 |
elif model_name.startswith('t5'):
|
| 327 |
translated_text = tfive(model_name, sselected_language, tselected_language, input_text)
|
|
|
|
| 328 |
|
| 329 |
else:
|
| 330 |
+
translated_text "No model selected or error in application", message_text
|
| 331 |
+
return translated_text, message_text
|
| 332 |
|
| 333 |
# Define a function to swap dropdown values
|
| 334 |
def swap_languages(src_lang, tgt_lang):
|