TiberiuCristianLeon commited on
Commit
3b73549
·
verified ·
1 Parent(s): 69ec00b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -25
app.py CHANGED
@@ -43,6 +43,42 @@ def model_to_cuda(model):
43
  print("CUDA not available! Using CPU.")
44
  return model
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  def flan(model_name, sl, tl, input_text):
47
  tokenizer = T5Tokenizer.from_pretrained(model_name, legacy=False)
48
  model = T5ForConditionalGeneration.from_pretrained(model_name)
@@ -174,21 +210,12 @@ def translate_text(input_text: str, sselected_language: str, tselected_language:
174
 
175
  sl = all_langs[sselected_language]
176
  tl = all_langs[tselected_language]
177
- message_text = f'Translating from {sselected_language} to {tselected_language} with {model_name}'
178
  print(message_text)
179
- if model_name == "Helsinki-NLP":
180
- try:
181
- model_name = f"Helsinki-NLP/opus-mt-{sl}-{tl}"
182
- tokenizer = AutoTokenizer.from_pretrained(model_name)
183
- model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
184
- except EnvironmentError:
185
- try:
186
- model_name = f"Helsinki-NLP/opus-tatoeba-{sl}-{tl}"
187
- tokenizer = AutoTokenizer.from_pretrained(model_name)
188
- model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
189
- except EnvironmentError as error:
190
- return f"Error finding model: {model_name}! Try other available language combination.", error
191
 
 
 
 
192
  elif model_name == "utter-project/EuroLLM-1.7B-Instruct":
193
  translated_text = eurollm_instruct(model_name, sselected_language, tselected_language, input_text)
194
  return translated_text, message_text
@@ -226,22 +253,18 @@ def translate_text(input_text: str, sselected_language: str, tselected_language:
226
  translated_text = unbabel(model_name, sselected_language, tselected_language, input_text)
227
  return translated_text, message_text
228
 
229
- if model_name.startswith('t5'):
230
  tokenizer = T5Tokenizer.from_pretrained(model_name)
231
  model = T5ForConditionalGeneration.from_pretrained(model_name, device_map="auto")
232
-
233
- if model_name.startswith("Helsinki-NLP"):
234
- prompt = input_text
235
- else:
236
  prompt = f"translate {sselected_language} to {tselected_language}: {input_text}"
 
 
 
 
 
237
 
238
- input_ids = tokenizer.encode(prompt, return_tensors="pt")
239
- output_ids = model.generate(input_ids, max_length=512)
240
- translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
241
-
242
- message_text = f'Translated from {sselected_language} to {tselected_language} with {model_name}.'
243
- print(f'Translated from {sselected_language} to {tselected_language} with {model_name}:', f'{input_text} = {translated_text}', sep='\n')
244
- return translated_text, message_text
245
 
246
  # Define a function to swap dropdown values
247
  def swap_languages(src_lang, tgt_lang):
 
43
  print("CUDA not available! Using CPU.")
44
  return model
45
 
46
+ def HelsinkiNLPAutoTokenizer(sl, tl, input_text):
47
+ if model_name == "Helsinki-NLP":
48
+ message_text = f'Translated from {sl} to {tl} with {model_name}.'
49
+ try:
50
+ model_name = f"Helsinki-NLP/opus-mt-{sl}-{tl}"
51
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
52
+ model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
53
+ except EnvironmentError:
54
+ try:
55
+ model_name = f"Helsinki-NLP/opus-tatoeba-{sl}-{tl}"
56
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
57
+ model = model_to_cuda(AutoModelForSeq2SeqLM.from_pretrained(model_name))
58
+ input_ids = tokenizer.encode(prompt, return_tensors="pt")
59
+ output_ids = model.generate(input_ids, max_length=512)
60
+ translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
61
+ return translated_text, message_text
62
+ except EnvironmentError as error:
63
+ return f"Error finding model: {model_name}! Try other available language combination.", error
64
+
65
+ def HelsinkiNLP(sl, tl, input_text):
66
+ try:
67
+ model_name = f"Helsinki-NLP/opus-mt-{sl}-{tl}"
68
+ pipe = pipeline("translation", model=model_name)
69
+ # translation = pipe(input_text)
70
+ # return translation[0]['translation_text'], f'Translated from {sl} to {tl} with {model_name}.'
71
+ except EnvironmentError:
72
+ try:
73
+ model_name = f"Helsinki-NLP/opus-tatoeba-{sl}-{tl}"
74
+ pipe = pipeline("translation", model=model_name)
75
+ translation = pipe(input_text)
76
+ return translation[0]['translation_text'], f'Translated from {sl} to {tl} with {model_name}.'
77
+ except EnvironmentError as error:
78
+ return f"Error finding model: {model_name}! Try other available language combination.", error
79
+ except KeyError as error:
80
+ return f"Error: Translation direction {source_readable} to {target} is not supported by Helsinki Translation Models", error
81
+
82
  def flan(model_name, sl, tl, input_text):
83
  tokenizer = T5Tokenizer.from_pretrained(model_name, legacy=False)
84
  model = T5ForConditionalGeneration.from_pretrained(model_name)
 
210
 
211
  sl = all_langs[sselected_language]
212
  tl = all_langs[tselected_language]
213
+ message_text = f'Translated from {sselected_language} to {tselected_language} with {model_name}'
214
  print(message_text)
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
+ if model_name.startswith("Helsinki-NLP"):
217
+ return translated_text, message_text = HelsinkiNLP(sl, tl, input_text)
218
+
219
  elif model_name == "utter-project/EuroLLM-1.7B-Instruct":
220
  translated_text = eurollm_instruct(model_name, sselected_language, tselected_language, input_text)
221
  return translated_text, message_text
 
253
  translated_text = unbabel(model_name, sselected_language, tselected_language, input_text)
254
  return translated_text, message_text
255
 
256
+ elif model_name.startswith('t5'):
257
  tokenizer = T5Tokenizer.from_pretrained(model_name)
258
  model = T5ForConditionalGeneration.from_pretrained(model_name, device_map="auto")
 
 
 
 
259
  prompt = f"translate {sselected_language} to {tselected_language}: {input_text}"
260
+ input_ids = tokenizer.encode(prompt, return_tensors="pt")
261
+ output_ids = model.generate(input_ids, max_length=512)
262
+ translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
263
+ # print(f'Translated from {sselected_language} to {tselected_language} with {model_name}:', f'{input_text} = {translated_text}', sep='\n')
264
+ return translated_text, message_text
265
 
266
+ else:
267
+ return "No model selected or error in application" message_text
 
 
 
 
 
268
 
269
  # Define a function to swap dropdown values
270
  def swap_languages(src_lang, tgt_lang):