Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,6 +30,7 @@ models = ["Helsinki-NLP", "QUICKMT", "Argos", "Google", "HPLT", "HPLT-OPUS",
|
|
| 30 |
"google-t5/t5-small", "google-t5/t5-base", "google-t5/t5-large",
|
| 31 |
"google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
|
| 32 |
"google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
|
|
|
|
| 33 |
"HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2",
|
| 34 |
"utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
|
| 35 |
"Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2"
|
|
@@ -84,6 +85,23 @@ class Translators:
|
|
| 84 |
return translation[0]['translation_text'], message
|
| 85 |
except Exception as error:
|
| 86 |
return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
def hplt(self, opus = False):
|
| 89 |
# langs = ['ar', 'bs', 'ca', 'en', 'et', 'eu', 'fi', 'ga', 'gl', 'hi', 'hr', 'is', 'mt', 'nn', 'sq', 'sw', 'zh_hant']
|
|
@@ -106,12 +124,10 @@ class Translators:
|
|
| 106 |
return translated_text, message_text
|
| 107 |
|
| 108 |
@staticmethod
|
| 109 |
-
def download_argos_model(from_code, to_code):
|
| 110 |
import argostranslate.package
|
| 111 |
print('Downloading model', from_code, to_code)
|
| 112 |
# Download and install Argos Translate package
|
| 113 |
-
argostranslate.package.update_package_index()
|
| 114 |
-
available_packages = argostranslate.package.get_available_packages()
|
| 115 |
package_to_install = next(
|
| 116 |
filter(lambda x: x.from_code == from_code and x.to_code == to_code, available_packages)
|
| 117 |
)
|
|
@@ -119,12 +135,19 @@ class Translators:
|
|
| 119 |
|
| 120 |
def argos(self):
|
| 121 |
import argostranslate.translate, argostranslate.package
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
try:
|
| 123 |
-
Translators.download_argos_model(self.sl, self.tl) # Download model
|
| 124 |
translated_text = argostranslate.translate.translate(self.input_text, self.sl, self.tl) # Translate
|
| 125 |
except StopIteration:
|
| 126 |
-
# packages_info = ', '.join(f"{pkg.get_description()}->{str(pkg.links)} {str(pkg.source_languages)}" for pkg in
|
| 127 |
-
packages_info = ', '.join(f"{pkg.from_name} ({pkg.from_code}) -> {pkg.to_name} ({pkg.to_code})" for pkg in argostranslate.package.get_available_packages())
|
| 128 |
translated_text = f"No Argos model for {self.sl} to {self.tl}. Try other model or languages combination from the available Argos models: {packages_info}."
|
| 129 |
except Exception as error:
|
| 130 |
translated_text = error
|
|
@@ -159,15 +182,19 @@ class Translators:
|
|
| 159 |
# from quickmt.hub import hf_list
|
| 160 |
# quickmt_models = [i.split("/quickmt-")[1] for i in hf_list()]
|
| 161 |
# quickmt_models.sort()
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
|
|
|
|
|
|
| 167 |
# available_languages = list(set([lang for model in quickmt_models for lang in model.split('-')]))
|
| 168 |
# available_languages.sort()
|
| 169 |
-
available_languages = ['ar', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fr', 'he',
|
| 170 |
-
'id', 'it', 'ja', 'ko', 'lv', 'pl', 'pt', 'ro', 'ru',
|
|
|
|
|
|
|
| 171 |
# Direct translation model
|
| 172 |
if f"{self.sl}-{self.tl}" in quickmt_models:
|
| 173 |
model_path = Translators.quickmtdownload(model_name)
|
|
@@ -497,7 +524,10 @@ def translate_text(input_text: str, s_language: str, t_language: str, model_name
|
|
| 497 |
|
| 498 |
elif "lego" in model_name.lower():
|
| 499 |
translated_text = Translators(model_name, sl, tl, input_text).LegoMT()
|
| 500 |
-
|
|
|
|
|
|
|
|
|
|
| 501 |
elif model_name.startswith('google-t5'):
|
| 502 |
translated_text = Translators(model_name, s_language, t_language, input_text).tfive()
|
| 503 |
|
|
|
|
| 30 |
"google-t5/t5-small", "google-t5/t5-base", "google-t5/t5-large",
|
| 31 |
"google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
|
| 32 |
"google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
|
| 33 |
+
"NiuTrans/LMT-60-0.6B", "NiuTrans/LMT-60-1.7B", "NiuTrans/LMT-60-4B",
|
| 34 |
"HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2",
|
| 35 |
"utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
|
| 36 |
"Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2"
|
|
|
|
| 85 |
return translation[0]['translation_text'], message
|
| 86 |
except Exception as error:
|
| 87 |
return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
|
| 88 |
+
|
| 89 |
+
def niutrans(self):
|
| 90 |
+
tokenizer = AutoTokenizer.from_pretrained(self.model_name, padding_side='left')
|
| 91 |
+
model = AutoModelForCausalLM.from_pretrained(self.model_name)
|
| 92 |
+
prompt = f"Translate the following text from {self.sl} into {self.tl}.\n{self.sl}: {self.input_text}.\n{self.tl}: "
|
| 93 |
+
messages = [{"role": "user", "content": prompt}]
|
| 94 |
+
text = tokenizer.apply_chat_template(
|
| 95 |
+
messages,
|
| 96 |
+
tokenize=False,
|
| 97 |
+
add_generation_prompt=True,
|
| 98 |
+
)
|
| 99 |
+
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
| 100 |
+
generated_ids = model.generate(**model_inputs, max_new_tokens=512, num_beams=5, do_sample=False)
|
| 101 |
+
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
|
| 102 |
+
outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
|
| 103 |
+
outputs = ''.join(outputs) if isinstance(outputs, list) else outputs
|
| 104 |
+
return outputs
|
| 105 |
|
| 106 |
def hplt(self, opus = False):
|
| 107 |
# langs = ['ar', 'bs', 'ca', 'en', 'et', 'eu', 'fi', 'ga', 'gl', 'hi', 'hr', 'is', 'mt', 'nn', 'sq', 'sw', 'zh_hant']
|
|
|
|
| 124 |
return translated_text, message_text
|
| 125 |
|
| 126 |
@staticmethod
|
| 127 |
+
def download_argos_model(available_packages, from_code, to_code):
|
| 128 |
import argostranslate.package
|
| 129 |
print('Downloading model', from_code, to_code)
|
| 130 |
# Download and install Argos Translate package
|
|
|
|
|
|
|
| 131 |
package_to_install = next(
|
| 132 |
filter(lambda x: x.from_code == from_code and x.to_code == to_code, available_packages)
|
| 133 |
)
|
|
|
|
| 135 |
|
| 136 |
def argos(self):
|
| 137 |
import argostranslate.translate, argostranslate.package
|
| 138 |
+
argostranslate.package.update_package_index()
|
| 139 |
+
available_packages = argostranslate.package.get_available_packages()
|
| 140 |
+
available_slanguages = [lang.from_code for lang in available_packages]
|
| 141 |
+
available_tlanguages = [lang.to_code for lang in available_packages]
|
| 142 |
+
available_languages = sorted(list(set(available_slanguages + available_tlanguages)))
|
| 143 |
+
zipped = sorted(list(zip(available_slanguages, available_tlanguages)))
|
| 144 |
+
packages_info = ', '.join(f"{pkg.from_name} ({pkg.from_code}) -> {pkg.to_name} ({pkg.to_code})" for pkg in available_packages)
|
| 145 |
+
print(available_languages, zipped)
|
| 146 |
try:
|
| 147 |
+
Translators.download_argos_model(available_packages, self.sl, self.tl) # Download model
|
| 148 |
translated_text = argostranslate.translate.translate(self.input_text, self.sl, self.tl) # Translate
|
| 149 |
except StopIteration:
|
| 150 |
+
# packages_info = ', '.join(f"{pkg.get_description()}->{str(pkg.links)} {str(pkg.source_languages)}" for pkg in available_packages)
|
|
|
|
| 151 |
translated_text = f"No Argos model for {self.sl} to {self.tl}. Try other model or languages combination from the available Argos models: {packages_info}."
|
| 152 |
except Exception as error:
|
| 153 |
translated_text = error
|
|
|
|
| 182 |
# from quickmt.hub import hf_list
|
| 183 |
# quickmt_models = [i.split("/quickmt-")[1] for i in hf_list()]
|
| 184 |
# quickmt_models.sort()
|
| 185 |
+
quickmt_models = ['ar-en', 'bn-en', 'cs-en', 'da-en', 'de-en', 'el-en', 'en-ar', 'en-bn',
|
| 186 |
+
'en-cs', 'en-da', 'en-de', 'en-el', 'en-es', 'en-fa', 'en-fr', 'en-he',
|
| 187 |
+
'en-hi', 'en-hu', 'en-id', 'en-it', 'en-ja', 'en-ko', 'en-lv', 'en-pl',
|
| 188 |
+
'en-pt', 'en-ro', 'en-ru', 'en-sv', 'en-th', 'en-tr', 'en-ur', 'en-vi',
|
| 189 |
+
'en-zh', 'es-en', 'fa-en', 'fr-en', 'he-en', 'hi-en', 'hu-en', 'id-en',
|
| 190 |
+
'it-en', 'ja-en', 'ko-en', 'lv-en', 'pl-en', 'pt-en', 'ro-en', 'ru-en',
|
| 191 |
+
'th-en', 'tr-en', 'ur-en', 'vi-en', 'zh-en']
|
| 192 |
# available_languages = list(set([lang for model in quickmt_models for lang in model.split('-')]))
|
| 193 |
# available_languages.sort()
|
| 194 |
+
available_languages = ['ar', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fr', 'he',
|
| 195 |
+
'hi', 'hu', 'id', 'it', 'ja', 'ko', 'lv', 'pl', 'pt', 'ro', 'ru',
|
| 196 |
+
'sv', 'th', 'tr', 'ur', 'vi', 'zh']
|
| 197 |
+
# print(quickmt_models, available_languages)
|
| 198 |
# Direct translation model
|
| 199 |
if f"{self.sl}-{self.tl}" in quickmt_models:
|
| 200 |
model_path = Translators.quickmtdownload(model_name)
|
|
|
|
| 524 |
|
| 525 |
elif "lego" in model_name.lower():
|
| 526 |
translated_text = Translators(model_name, sl, tl, input_text).LegoMT()
|
| 527 |
+
|
| 528 |
+
elif "niutrans" in model_name.lower():
|
| 529 |
+
translated_text = Translators(model_name, sl, tl, input_text).niutrans()
|
| 530 |
+
|
| 531 |
elif model_name.startswith('google-t5'):
|
| 532 |
translated_text = Translators(model_name, s_language, t_language, input_text).tfive()
|
| 533 |
|