Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,7 +14,7 @@ non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
|
|
| 14 |
# all_langs = languagecodes.iso_languages_byname
|
| 15 |
all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
|
| 16 |
# iso1_to_name = {codes[0]: lang for entry in all_langs for lang, codes in entry.items()} # {'ro': 'Romanian', 'de': 'German'}
|
| 17 |
-
iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
|
| 18 |
iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
|
| 19 |
langs = list(favourite_langs.keys())
|
| 20 |
langs.extend(list(all_langs.keys())) # Language options as list, add favourite languages first
|
|
@@ -83,7 +83,7 @@ class Translators:
|
|
| 83 |
try:
|
| 84 |
pipe = pipeline("translation", model=self.model_name, device=self.device)
|
| 85 |
translation = pipe(self.input_text)
|
| 86 |
-
message = f'Translated from {
|
| 87 |
return translation[0]['translation_text'], message
|
| 88 |
except Exception as error:
|
| 89 |
return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
|
|
@@ -132,9 +132,9 @@ class Translators:
|
|
| 132 |
pipe = pipeline("translation", model=hplt_model, device=self.device)
|
| 133 |
translation = pipe(self.input_text)
|
| 134 |
translated_text = translation[0]['translation_text']
|
| 135 |
-
message_text = f'Translated from {
|
| 136 |
else:
|
| 137 |
-
translated_text = f'HPLT model from {
|
| 138 |
message_text = f"Available models: {', '.join(hplt_models)}"
|
| 139 |
return translated_text, message_text
|
| 140 |
|
|
@@ -159,7 +159,7 @@ class Translators:
|
|
| 159 |
packages_info = ', '.join(f"{pkg.from_name} ({pkg.from_code}) -> {pkg.to_name} ({pkg.to_code})" for pkg in available_packages)
|
| 160 |
# print(available_languages, combos, packages_info)
|
| 161 |
if self.sl not in available_languages and self.tl not in available_languages:
|
| 162 |
-
translated_text = f'''No supported Argos model available from {
|
| 163 |
Try other model or languages combination from the available Argos models: {', '.join(available_languages)}.'''
|
| 164 |
else:
|
| 165 |
try:
|
|
@@ -171,12 +171,12 @@ class Translators:
|
|
| 171 |
translated_pivottext = argostranslate.translate.translate(self.input_text, self.sl, 'en') # Translate to pivot language English
|
| 172 |
self.__class__.download_argos_model(available_packages, 'en', self.tl) # Download model
|
| 173 |
translated_text = argostranslate.translate.translate(translated_pivottext, 'en', self.tl) # Translate from pivot language English
|
| 174 |
-
message = f'Translated from {
|
| 175 |
else:
|
| 176 |
-
translated_text = f"No Argos model for {
|
| 177 |
except StopIteration as IterationError:
|
| 178 |
# packages_info = ', '.join(f"{pkg.get_description()}->{str(pkg.links)} {str(pkg.source_languages)}" for pkg in available_packages)
|
| 179 |
-
translated_text = f"No Argos model for {
|
| 180 |
except Exception as generalerror:
|
| 181 |
translated_text = f"General error: {generalerror}"
|
| 182 |
return translated_text
|
|
@@ -227,7 +227,7 @@ class Translators:
|
|
| 227 |
if f"{self.sl}-{self.tl}" in quickmt_models:
|
| 228 |
model_path = Translators.quickmtdownload(model_name)
|
| 229 |
translated_text = Translators.quickmttranslate(model_path, self.input_text)
|
| 230 |
-
message = f'Translated from {
|
| 231 |
# Pivot language English
|
| 232 |
elif self.sl in available_languages and self.tl in available_languages:
|
| 233 |
model_name = f"quickmt-{self.sl}-en"
|
|
@@ -236,9 +236,9 @@ class Translators:
|
|
| 236 |
model_name = f"quickmt-en-{self.tl}"
|
| 237 |
model_path = Translators.quickmtdownload(model_name)
|
| 238 |
translated_text = Translators.quickmttranslate(model_path, entranslation)
|
| 239 |
-
message = f'Translated from {
|
| 240 |
else:
|
| 241 |
-
translated_text = f'No Quickmt model available for translation from {
|
| 242 |
message = f"Available models: {', '.join(quickmt_models)}"
|
| 243 |
return translated_text, message
|
| 244 |
|
|
@@ -256,13 +256,13 @@ class Translators:
|
|
| 256 |
model_name = f"Helsinki-NLP/opus-mt-{self.sl}-{self.tl}"
|
| 257 |
pipe = pipeline("translation", model=model_name, device=self.device)
|
| 258 |
translation = pipe(self.input_text)
|
| 259 |
-
return translation[0]['translation_text'], f'Translated from {
|
| 260 |
except EnvironmentError:
|
| 261 |
try: # Tatoeba models
|
| 262 |
model_name = f"Helsinki-NLP/opus-tatoeba-{self.sl}-{self.tl}"
|
| 263 |
pipe = pipeline("translation", model=model_name, device=self.device)
|
| 264 |
translation = pipe(self.input_text)
|
| 265 |
-
return translation[0]['translation_text'], f'Translated from {
|
| 266 |
except EnvironmentError as error:
|
| 267 |
self.model_name = "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul" # Last resort: try multi to multi
|
| 268 |
return self.HelsinkiNLP_mulroa()
|
|
@@ -402,18 +402,18 @@ class Translators:
|
|
| 402 |
|
| 403 |
def seamlessm4t1(self):
|
| 404 |
from transformers import AutoProcessor, SeamlessM4TModel
|
| 405 |
-
processor = AutoProcessor.from_pretrained(self.
|
| 406 |
-
model = SeamlessM4TModel.from_pretrained(self.
|
| 407 |
src_lang = iso1toall.get(self.sl)[2] # 'deu', 'ron', 'eng', 'fra'
|
| 408 |
tgt_lang = iso1toall.get(self.tl)[2]
|
| 409 |
text_inputs = processor(text = self.input_text, src_lang=src_lang, return_tensors="pt")
|
| 410 |
output_tokens = model.generate(**text_inputs, tgt_lang=tgt_lang, generate_speech=False)
|
| 411 |
return processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
|
| 412 |
-
|
| 413 |
def seamlessm4t2(self):
|
| 414 |
from transformers import AutoProcessor, SeamlessM4Tv2ForTextToText
|
| 415 |
-
processor = AutoProcessor.from_pretrained(self.
|
| 416 |
-
model = SeamlessM4Tv2ForTextToText.from_pretrained(self.
|
| 417 |
src_lang = iso1toall.get(self.sl)[2] # 'deu', 'ron', 'eng', 'fra'
|
| 418 |
tgt_lang = iso1toall.get(self.tl)[2]
|
| 419 |
text_inputs = processor(text=self.input_text, src_lang=src_lang, return_tensors="pt")
|
|
|
|
| 14 |
# all_langs = languagecodes.iso_languages_byname
|
| 15 |
all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
|
| 16 |
# iso1_to_name = {codes[0]: lang for entry in all_langs for lang, codes in entry.items()} # {'ro': 'Romanian', 'de': 'German'}
|
| 17 |
+
# iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
|
| 18 |
iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
|
| 19 |
langs = list(favourite_langs.keys())
|
| 20 |
langs.extend(list(all_langs.keys())) # Language options as list, add favourite languages first
|
|
|
|
| 83 |
try:
|
| 84 |
pipe = pipeline("translation", model=self.model_name, device=self.device)
|
| 85 |
translation = pipe(self.input_text)
|
| 86 |
+
message = f'Translated from {iso1toall[self.sl][0]} to {iso1toall[self.tl][0]} with {self.model_name}.'
|
| 87 |
return translation[0]['translation_text'], message
|
| 88 |
except Exception as error:
|
| 89 |
return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
|
|
|
|
| 132 |
pipe = pipeline("translation", model=hplt_model, device=self.device)
|
| 133 |
translation = pipe(self.input_text)
|
| 134 |
translated_text = translation[0]['translation_text']
|
| 135 |
+
message_text = f'Translated from {iso1toall[self.sl][0]} to {iso1toall[self.tl][0]} with {hplt_model}.'
|
| 136 |
else:
|
| 137 |
+
translated_text = f'HPLT model from {iso1toall[self.sl][0]} to {iso1toall[self.tl][0]} not available!'
|
| 138 |
message_text = f"Available models: {', '.join(hplt_models)}"
|
| 139 |
return translated_text, message_text
|
| 140 |
|
|
|
|
| 159 |
packages_info = ', '.join(f"{pkg.from_name} ({pkg.from_code}) -> {pkg.to_name} ({pkg.to_code})" for pkg in available_packages)
|
| 160 |
# print(available_languages, combos, packages_info)
|
| 161 |
if self.sl not in available_languages and self.tl not in available_languages:
|
| 162 |
+
translated_text = f'''No supported Argos model available from {iso1toall[self.sl][0]} to {iso1toall[self.tl][0]}!
|
| 163 |
Try other model or languages combination from the available Argos models: {', '.join(available_languages)}.'''
|
| 164 |
else:
|
| 165 |
try:
|
|
|
|
| 171 |
translated_pivottext = argostranslate.translate.translate(self.input_text, self.sl, 'en') # Translate to pivot language English
|
| 172 |
self.__class__.download_argos_model(available_packages, 'en', self.tl) # Download model
|
| 173 |
translated_text = argostranslate.translate.translate(translated_pivottext, 'en', self.tl) # Translate from pivot language English
|
| 174 |
+
message = f'Translated from {iso1toall[self.sl][0]} to {iso1toall[self.tl][0]} with Argos using pivot language English.'
|
| 175 |
else:
|
| 176 |
+
translated_text = f"No Argos model for {iso1toall[self.sl][0]} to {iso1toall[self.tl][0]}. Try other model or languages combination from the available Argos models: {packages_info}."
|
| 177 |
except StopIteration as IterationError:
|
| 178 |
# packages_info = ', '.join(f"{pkg.get_description()}->{str(pkg.links)} {str(pkg.source_languages)}" for pkg in available_packages)
|
| 179 |
+
translated_text = f"No Argos model for {iso1toall[self.sl][0]} to {iso1toall[self.tl][0]}. Error: {IterationError}. Try other model or languages combination from the available Argos models: {packages_info}."
|
| 180 |
except Exception as generalerror:
|
| 181 |
translated_text = f"General error: {generalerror}"
|
| 182 |
return translated_text
|
|
|
|
| 227 |
if f"{self.sl}-{self.tl}" in quickmt_models:
|
| 228 |
model_path = Translators.quickmtdownload(model_name)
|
| 229 |
translated_text = Translators.quickmttranslate(model_path, self.input_text)
|
| 230 |
+
message = f'Translated from {iso1toall[self.sl][0]} to {iso1toall[self.tl][0]} with {model_name}.'
|
| 231 |
# Pivot language English
|
| 232 |
elif self.sl in available_languages and self.tl in available_languages:
|
| 233 |
model_name = f"quickmt-{self.sl}-en"
|
|
|
|
| 236 |
model_name = f"quickmt-en-{self.tl}"
|
| 237 |
model_path = Translators.quickmtdownload(model_name)
|
| 238 |
translated_text = Translators.quickmttranslate(model_path, entranslation)
|
| 239 |
+
message = f'Translated from {iso1toall[self.sl][0]} to {iso1toall[self.tl][0]} with Quickmt using pivot language English.'
|
| 240 |
else:
|
| 241 |
+
translated_text = f'No Quickmt model available for translation from {iso1toall[self.sl][0]} to {iso1toall[self.tl][0]}!'
|
| 242 |
message = f"Available models: {', '.join(quickmt_models)}"
|
| 243 |
return translated_text, message
|
| 244 |
|
|
|
|
| 256 |
model_name = f"Helsinki-NLP/opus-mt-{self.sl}-{self.tl}"
|
| 257 |
pipe = pipeline("translation", model=model_name, device=self.device)
|
| 258 |
translation = pipe(self.input_text)
|
| 259 |
+
return translation[0]['translation_text'], f'Translated from {iso1toall[self.sl][0]} to {iso1toall[self.tl][0]} with {model_name}.'
|
| 260 |
except EnvironmentError:
|
| 261 |
try: # Tatoeba models
|
| 262 |
model_name = f"Helsinki-NLP/opus-tatoeba-{self.sl}-{self.tl}"
|
| 263 |
pipe = pipeline("translation", model=model_name, device=self.device)
|
| 264 |
translation = pipe(self.input_text)
|
| 265 |
+
return translation[0]['translation_text'], f'Translated from {iso1toall[self.sl][0]} to {iso1toall[self.tl][0]} with {model_name}.'
|
| 266 |
except EnvironmentError as error:
|
| 267 |
self.model_name = "Helsinki-NLP/opus-mt-tc-bible-big-mul-mul" # Last resort: try multi to multi
|
| 268 |
return self.HelsinkiNLP_mulroa()
|
|
|
|
| 402 |
|
| 403 |
def seamlessm4t1(self):
|
| 404 |
from transformers import AutoProcessor, SeamlessM4TModel
|
| 405 |
+
processor = AutoProcessor.from_pretrained(self.model_name)
|
| 406 |
+
model = SeamlessM4TModel.from_pretrained(self.model_name)
|
| 407 |
src_lang = iso1toall.get(self.sl)[2] # 'deu', 'ron', 'eng', 'fra'
|
| 408 |
tgt_lang = iso1toall.get(self.tl)[2]
|
| 409 |
text_inputs = processor(text = self.input_text, src_lang=src_lang, return_tensors="pt")
|
| 410 |
output_tokens = model.generate(**text_inputs, tgt_lang=tgt_lang, generate_speech=False)
|
| 411 |
return processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
|
| 412 |
+
|
| 413 |
def seamlessm4t2(self):
|
| 414 |
from transformers import AutoProcessor, SeamlessM4Tv2ForTextToText
|
| 415 |
+
processor = AutoProcessor.from_pretrained(self.model_name)
|
| 416 |
+
model = SeamlessM4Tv2ForTextToText.from_pretrained(self.model_name)
|
| 417 |
src_lang = iso1toall.get(self.sl)[2] # 'deu', 'ron', 'eng', 'fra'
|
| 418 |
tgt_lang = iso1toall.get(self.tl)[2]
|
| 419 |
text_inputs = processor(text=self.input_text, src_lang=src_lang, return_tensors="pt")
|