Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -43,6 +43,42 @@ def model_to_cuda(model):
|
|
| 43 |
print("CUDA not available! Using CPU.")
|
| 44 |
return model
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
def flan(model_name, sl, tl, input_text):
|
| 47 |
tokenizer = T5Tokenizer.from_pretrained(model_name, legacy=False)
|
| 48 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
|
@@ -174,21 +210,12 @@ def translate_text(input_text: str, sselected_language: str, tselected_language:
|
|
| 174 |
|
| 175 |
sl = all_langs[sselected_language]
|
| 176 |
tl = all_langs[tselected_language]
|
| 177 |
-
message_text = f'
|
| 178 |
print(message_text)
|
| 179 |
-
if model_name == "Helsinki-NLP":
|
| 180 |
-
try:
|
| 181 |
-
model_name = f"Helsinki-NLP/opus-mt-{sl}-{tl}"
|
| 182 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 183 |
-
model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
|
| 184 |
-
except EnvironmentError:
|
| 185 |
-
try:
|
| 186 |
-
model_name = f"Helsinki-NLP/opus-tatoeba-{sl}-{tl}"
|
| 187 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 188 |
-
model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
|
| 189 |
-
except EnvironmentError as error:
|
| 190 |
-
return f"Error finding model: {model_name}! Try other available language combination.", error
|
| 191 |
|
|
|
|
|
|
|
|
|
|
| 192 |
elif model_name == "utter-project/EuroLLM-1.7B-Instruct":
|
| 193 |
translated_text = eurollm_instruct(model_name, sselected_language, tselected_language, input_text)
|
| 194 |
return translated_text, message_text
|
|
@@ -226,22 +253,18 @@ def translate_text(input_text: str, sselected_language: str, tselected_language:
|
|
| 226 |
translated_text = unbabel(model_name, sselected_language, tselected_language, input_text)
|
| 227 |
return translated_text, message_text
|
| 228 |
|
| 229 |
-
|
| 230 |
tokenizer = T5Tokenizer.from_pretrained(model_name)
|
| 231 |
model = T5ForConditionalGeneration.from_pretrained(model_name, device_map="auto")
|
| 232 |
-
|
| 233 |
-
if model_name.startswith("Helsinki-NLP"):
|
| 234 |
-
prompt = input_text
|
| 235 |
-
else:
|
| 236 |
prompt = f"translate {sselected_language} to {tselected_language}: {input_text}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 241 |
-
|
| 242 |
-
message_text = f'Translated from {sselected_language} to {tselected_language} with {model_name}.'
|
| 243 |
-
print(f'Translated from {sselected_language} to {tselected_language} with {model_name}:', f'{input_text} = {translated_text}', sep='\n')
|
| 244 |
-
return translated_text, message_text
|
| 245 |
|
| 246 |
# Define a function to swap dropdown values
|
| 247 |
def swap_languages(src_lang, tgt_lang):
|
|
|
|
| 43 |
print("CUDA not available! Using CPU.")
|
| 44 |
return model
|
| 45 |
|
| 46 |
+
def HelsinkiNLPAutoTokenizer(sl, tl, input_text):
|
| 47 |
+
if model_name == "Helsinki-NLP":
|
| 48 |
+
message_text = f'Translated from {sl} to {tl} with {model_name}.'
|
| 49 |
+
try:
|
| 50 |
+
model_name = f"Helsinki-NLP/opus-mt-{sl}-{tl}"
|
| 51 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 52 |
+
model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
|
| 53 |
+
except EnvironmentError:
|
| 54 |
+
try:
|
| 55 |
+
model_name = f"Helsinki-NLP/opus-tatoeba-{sl}-{tl}"
|
| 56 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 57 |
+
model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
|
| 58 |
+
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
| 59 |
+
output_ids = model.generate(input_ids, max_length=512)
|
| 60 |
+
translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 61 |
+
return translated_text, message_text
|
| 62 |
+
except EnvironmentError as error:
|
| 63 |
+
return f"Error finding model: {model_name}! Try other available language combination.", error
|
| 64 |
+
|
| 65 |
+
def HelsinkiNLP(sl, tl, input_text):
|
| 66 |
+
try:
|
| 67 |
+
model_name = f"Helsinki-NLP/opus-mt-{sl}-{tl}"
|
| 68 |
+
pipe = pipeline("translation", model=model_name)
|
| 69 |
+
# translation = pipe(input_text)
|
| 70 |
+
# return translation[0]['translation_text'], f'Translated from {sl} to {tl} with {model_name}.'
|
| 71 |
+
except EnvironmentError:
|
| 72 |
+
try:
|
| 73 |
+
model_name = f"Helsinki-NLP/opus-tatoeba-{sl}-{tl}"
|
| 74 |
+
pipe = pipeline("translation", model=model_name)
|
| 75 |
+
translation = pipe(input_text)
|
| 76 |
+
return translation[0]['translation_text'], f'Translated from {sl} to {tl} with {model_name}.'
|
| 77 |
+
except EnvironmentError as error:
|
| 78 |
+
return f"Error finding model: {model_name}! Try other available language combination.", error
|
| 79 |
+
except KeyError as error:
|
| 80 |
+
return f"Error: Translation direction {source_readable} to {target} is not supported by Helsinki Translation Models", error
|
| 81 |
+
|
| 82 |
def flan(model_name, sl, tl, input_text):
|
| 83 |
tokenizer = T5Tokenizer.from_pretrained(model_name, legacy=False)
|
| 84 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
|
|
|
| 210 |
|
| 211 |
sl = all_langs[sselected_language]
|
| 212 |
tl = all_langs[tselected_language]
|
| 213 |
+
message_text = f'Translated from {sselected_language} to {tselected_language} with {model_name}'
|
| 214 |
print(message_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
+
if model_name.startswith("Helsinki-NLP"):
|
| 217 |
+
return translated_text, message_text = HelsinkiNLP(sl, tl, input_text)
|
| 218 |
+
|
| 219 |
elif model_name == "utter-project/EuroLLM-1.7B-Instruct":
|
| 220 |
translated_text = eurollm_instruct(model_name, sselected_language, tselected_language, input_text)
|
| 221 |
return translated_text, message_text
|
|
|
|
| 253 |
translated_text = unbabel(model_name, sselected_language, tselected_language, input_text)
|
| 254 |
return translated_text, message_text
|
| 255 |
|
| 256 |
+
elif model_name.startswith('t5'):
|
| 257 |
tokenizer = T5Tokenizer.from_pretrained(model_name)
|
| 258 |
model = T5ForConditionalGeneration.from_pretrained(model_name, device_map="auto")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
prompt = f"translate {sselected_language} to {tselected_language}: {input_text}"
|
| 260 |
+
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
| 261 |
+
output_ids = model.generate(input_ids, max_length=512)
|
| 262 |
+
translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 263 |
+
# print(f'Translated from {sselected_language} to {tselected_language} with {model_name}:', f'{input_text} = {translated_text}', sep='\n')
|
| 264 |
+
return translated_text, message_text
|
| 265 |
|
| 266 |
+
else:
|
| 267 |
+
return "No model selected or error in application" message_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
|
| 269 |
# Define a function to swap dropdown values
|
| 270 |
def swap_languages(src_lang, tgt_lang):
|