Spaces:
Running
Running
Add mitre
Browse files
app.py
CHANGED
|
@@ -29,6 +29,7 @@ models = ["Helsinki-NLP", "QUICKMT", "Argos", "Google", "HPLT", "HPLT-OPUS",
|
|
| 29 |
"bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
|
| 30 |
"google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
|
| 31 |
"NiuTrans/LMT-60-0.6B", "NiuTrans/LMT-60-1.7B", "NiuTrans/LMT-60-4B",
|
|
|
|
| 32 |
"Lego-MT/Lego-MT", "BSC-LT/salamandraTA-2b-instruct",
|
| 33 |
"winninghealth/WiNGPT-Babel", "winninghealth/WiNGPT-Babel-2", "winninghealth/WiNGPT-Babel-2.1",
|
| 34 |
"Unbabel/Tower-Plus-2B", "HuggingFaceTB/SmolLM3-3B", "Unbabel/TowerInstruct-7B-v0.2",
|
|
@@ -40,7 +41,7 @@ DEFAULTS = [langs[0], langs[1], models[0]]
|
|
| 40 |
|
| 41 |
def timer(func):
|
| 42 |
from time import time
|
| 43 |
-
def
|
| 44 |
start_time = time()
|
| 45 |
translated_text, message_text = func(input_text, s_language, t_language, model_name)
|
| 46 |
end_time = time()
|
|
@@ -48,7 +49,7 @@ def timer(func):
|
|
| 48 |
# print(f"Function {func.__name__!r} executed in {execution_time:.2f} seconds.")
|
| 49 |
message_text = f'Executed in {execution_time:.2f} seconds! {message_text}'
|
| 50 |
return translated_text, message_text
|
| 51 |
-
return
|
| 52 |
|
| 53 |
def model_to_cuda(model):
|
| 54 |
# Move the model to GPU if available
|
|
@@ -101,6 +102,18 @@ class Translators:
|
|
| 101 |
except Exception as error:
|
| 102 |
return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
def niutrans(self):
|
| 105 |
tokenizer = AutoTokenizer.from_pretrained(self.model_name, padding_side='left')
|
| 106 |
model = AutoModelForCausalLM.from_pretrained(self.model_name)
|
|
@@ -583,6 +596,9 @@ def translate_text(input_text: str, s_language: str, t_language: str, model_name
|
|
| 583 |
elif model_name == "Helsinki-NLP/opus-mt-tc-bible-big-roa-en":
|
| 584 |
translated_text, message_text = Translators(model_name, sl, tl, input_text).simplepipe()
|
| 585 |
|
|
|
|
|
|
|
|
|
|
| 586 |
elif "m2m" in model_name.lower():
|
| 587 |
translated_text = Translators(model_name, sl, tl, input_text).mtom()
|
| 588 |
|
|
|
|
| 29 |
"bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
|
| 30 |
"google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
|
| 31 |
"NiuTrans/LMT-60-0.6B", "NiuTrans/LMT-60-1.7B", "NiuTrans/LMT-60-4B",
|
| 32 |
+
"naist-nlp/mitre_466m", "naist-nlp/mitre_913m",
|
| 33 |
"Lego-MT/Lego-MT", "BSC-LT/salamandraTA-2b-instruct",
|
| 34 |
"winninghealth/WiNGPT-Babel", "winninghealth/WiNGPT-Babel-2", "winninghealth/WiNGPT-Babel-2.1",
|
| 35 |
"Unbabel/Tower-Plus-2B", "HuggingFaceTB/SmolLM3-3B", "Unbabel/TowerInstruct-7B-v0.2",
|
|
|
|
| 41 |
|
| 42 |
def timer(func):
|
| 43 |
from time import time
|
| 44 |
+
def translate(input_text, s_language, t_language, model_name) -> tuple[str, str]:
|
| 45 |
start_time = time()
|
| 46 |
translated_text, message_text = func(input_text, s_language, t_language, model_name)
|
| 47 |
end_time = time()
|
|
|
|
| 49 |
# print(f"Function {func.__name__!r} executed in {execution_time:.2f} seconds.")
|
| 50 |
message_text = f'Executed in {execution_time:.2f} seconds! {message_text}'
|
| 51 |
return translated_text, message_text
|
| 52 |
+
return translate
|
| 53 |
|
| 54 |
def model_to_cuda(model):
|
| 55 |
# Move the model to GPU if available
|
|
|
|
| 102 |
except Exception as error:
|
| 103 |
return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
|
| 104 |
|
| 105 |
+
def mitre(self):
|
| 106 |
+
from transformers import AutoModel, AutoTokenizer
|
| 107 |
+
tokenizer = AutoTokenizer.from_pretrained(self.model_name, trust_remote_code=True, use_fast=False)
|
| 108 |
+
model = AutoModel.from_pretrained(self.model_name, trust_remote_code=True).to(self.device)
|
| 109 |
+
model.eval()
|
| 110 |
+
# Translating from one or several sentences to a sole language
|
| 111 |
+
src_tokens = tokenizer.encode_source_tokens_to_input_ids(self.input_text, target_language=self.tl)
|
| 112 |
+
with torch.inference_mode(): # no_grad inference_mode
|
| 113 |
+
generated_tokens = model.generate(src_tokens)
|
| 114 |
+
result = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
| 115 |
+
return result
|
| 116 |
+
|
| 117 |
def niutrans(self):
|
| 118 |
tokenizer = AutoTokenizer.from_pretrained(self.model_name, padding_side='left')
|
| 119 |
model = AutoModelForCausalLM.from_pretrained(self.model_name)
|
|
|
|
| 596 |
elif model_name == "Helsinki-NLP/opus-mt-tc-bible-big-roa-en":
|
| 597 |
translated_text, message_text = Translators(model_name, sl, tl, input_text).simplepipe()
|
| 598 |
|
| 599 |
+
elif 'mitre' in model_name.lower():
|
| 600 |
+
translated_text = Translators(model_name, sl, tl, input_text).mitre()
|
| 601 |
+
|
| 602 |
elif "m2m" in model_name.lower():
|
| 603 |
translated_text = Translators(model_name, sl, tl, input_text).mtom()
|
| 604 |
|