TiberiuCristianLeon commited on
Commit
0e657e3
·
verified ·
1 Parent(s): ad52b14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -6
app.py CHANGED
@@ -15,6 +15,7 @@ non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
15
  all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
16
  # iso1_to_name = {codes[0]: lang for entry in all_langs for lang, codes in entry.items()} # {'ro': 'Romanian', 'de': 'German'}
17
  iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
 
18
  langs = list(favourite_langs.keys())
19
  langs.extend(list(all_langs.keys())) # Language options as list, add favourite languages first
20
 
@@ -24,14 +25,15 @@ models = ["Helsinki-NLP", "QUICKMT", "Argos", "Google", "HPLT", "HPLT-OPUS",
24
  "Helsinki-NLP/opus-mt-tc-bible-big-roa-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-roa", "Helsinki-NLP/opus-mt-tc-bible-big-roa-en",
25
  "facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-3.3B",
26
  "facebook/mbart-large-50-many-to-many-mmt", "facebook/mbart-large-50-one-to-many-mmt", "facebook/mbart-large-50-many-to-one-mmt",
27
- "facebook/m2m100_418M", "facebook/m2m100_1.2B", "alirezamsh/small100", "Lego-MT/Lego-MT",
 
28
  "bigscience/mt0-small", "bigscience/mt0-base", "bigscience/mt0-large", "bigscience/mt0-xl",
29
  "bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
30
  "google-t5/t5-small", "google-t5/t5-base", "google-t5/t5-large",
31
  "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
32
  "google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
33
  "NiuTrans/LMT-60-0.6B", "NiuTrans/LMT-60-1.7B", "NiuTrans/LMT-60-4B",
34
- "HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2",
35
  "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
36
  "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2"
37
  ]
@@ -243,10 +245,9 @@ class Translators:
243
  def HelsinkiNLP_mulroa(self):
244
  try:
245
  pipe = pipeline("translation", model=self.model_name, device=self.device)
246
- iso1to3 = {iso[1]: iso[3] for iso in non_empty_isos} # {'ro': 'ron'}
247
- iso3tl = iso1to3.get(self.tl) # 'deu', 'ron', 'eng', 'fra'
248
- translation = pipe(f'>>{iso3tl}<< {self.input_text}')
249
- return translation[0]['translation_text'], f'Translated from {iso1_to_name[self.sl]} to {iso1_to_name[self.tl]} with {self.model_name}.'
250
  except Exception as error:
251
  return f"Error translating with model: {self.model_name}! Try other available language combination.", error
252
 
@@ -398,6 +399,26 @@ class Translators:
398
  translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=self.sl, tgt_lang=self.tl)
399
  translated_text = translator(self.input_text, max_length=512)
400
  return translated_text[0]['translation_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
 
402
  def wingpt(self):
403
  model = AutoModelForCausalLM.from_pretrained(
@@ -585,6 +606,12 @@ def translate_text(input_text: str, s_language: str, t_language: str, model_name
585
  elif model_name == "facebook/mbart-large-50-many-to-one-mmt":
586
  translated_text = Translators(model_name, s_language, t_language, input_text).mbart_many_to_one()
587
 
 
 
 
 
 
 
588
  elif model_name == "utter-project/EuroLLM-1.7B-Instruct":
589
  translated_text = Translators(model_name, s_language, t_language, input_text).eurollm_instruct()
590
 
 
15
  all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
16
  # iso1_to_name = {codes[0]: lang for entry in all_langs for lang, codes in entry.items()} # {'ro': 'Romanian', 'de': 'German'}
17
  iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
18
+ iso1toall = {iso[1]: (iso[0], iso[2], iso[3] for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
19
  langs = list(favourite_langs.keys())
20
  langs.extend(list(all_langs.keys())) # Language options as list, add favourite languages first
21
 
 
25
  "Helsinki-NLP/opus-mt-tc-bible-big-roa-deu_eng_fra_por_spa", "Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-roa", "Helsinki-NLP/opus-mt-tc-bible-big-roa-en",
26
  "facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-3.3B",
27
  "facebook/mbart-large-50-many-to-many-mmt", "facebook/mbart-large-50-one-to-many-mmt", "facebook/mbart-large-50-many-to-one-mmt",
28
+ "facebook/m2m100_418M", "facebook/m2m100_1.2B", "alirezamsh/small100",
29
+ "facebook/hf-seamless-m4t-medium", "facebook/seamless-m4t-large", "facebook/seamless-m4t-v2-large",
30
  "bigscience/mt0-small", "bigscience/mt0-base", "bigscience/mt0-large", "bigscience/mt0-xl",
31
  "bigscience/bloomz-560m", "bigscience/bloomz-1b1", "bigscience/bloomz-1b7", "bigscience/bloomz-3b",
32
  "google-t5/t5-small", "google-t5/t5-base", "google-t5/t5-large",
33
  "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
34
  "google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
35
  "NiuTrans/LMT-60-0.6B", "NiuTrans/LMT-60-1.7B", "NiuTrans/LMT-60-4B",
36
+ "Lego-MT/Lego-MT", "HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2",
37
  "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
38
  "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2"
39
  ]
 
245
  def HelsinkiNLP_mulroa(self):
246
  try:
247
  pipe = pipeline("translation", model=self.model_name, device=self.device)
248
+ tgt_lang = iso1toall.get(self.tl)[2] # 'deu', 'ron', 'eng', 'fra'
249
+ translation = pipe(f'>>{tgt_lang}<< {self.input_text}')
250
+ return translation[0]['translation_text'], f'Translated from {iso1toall[self.sl][0]} to {iso1toall[self.tl][0]} with {self.model_name}.'
 
251
  except Exception as error:
252
  return f"Error translating with model: {self.model_name}! Try other available language combination.", error
253
 
 
399
  translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=self.sl, tgt_lang=self.tl)
400
  translated_text = translator(self.input_text, max_length=512)
401
  return translated_text[0]['translation_text']
402
+
403
+ def seamlessm4t1(self):
404
+ from transformers import AutoProcessor, SeamlessM4TModel
405
+ processor = AutoProcessor.from_pretrained(self.model)
406
+ model = SeamlessM4TModel.from_pretrained(self.model)
407
+ src_lang = iso1toall.get(self.sl)[2] # 'deu', 'ron', 'eng', 'fra'
408
+ tgt_lang = iso1toall.get(self.tl)[2]
409
+ text_inputs = processor(text = self.input_text, src_lang=src_lang, return_tensors="pt")
410
+ output_tokens = model.generate(**text_inputs, tgt_lang=tgt_lang, generate_speech=False)
411
+ return processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
412
+
413
+ def seamlessm4t2(self):
414
+ from transformers import AutoProcessor, SeamlessM4Tv2ForTextToText
415
+ processor = AutoProcessor.from_pretrained(self.model)
416
+ model = SeamlessM4Tv2ForTextToText.from_pretrained(self.model)
417
+ src_lang = iso1toall.get(self.sl)[2] # 'deu', 'ron', 'eng', 'fra'
418
+ tgt_lang = iso1toall.get(self.tl)[2]
419
+ text_inputs = processor(text=self.input_text, src_lang=src_lang, return_tensors="pt")
420
+ decoder_input_ids = model.generate(**text_inputs, tgt_lang=tgt_lang)[0].tolist()
421
+ return processor.decode(decoder_input_ids, skip_special_tokens=True)
422
 
423
  def wingpt(self):
424
  model = AutoModelForCausalLM.from_pretrained(
 
606
  elif model_name == "facebook/mbart-large-50-many-to-one-mmt":
607
  translated_text = Translators(model_name, s_language, t_language, input_text).mbart_many_to_one()
608
 
609
+ elif model_name == "facebook/seamless-m4t-v2-large":
610
+ translated_text = Translators(model_name, s_language, t_language, input_text).seamlessm4t2()
611
+
612
+ elif "m4t-medium" in model_name or "m4t-large" in model_name:
613
+ translated_text = Translators(model_name, s_language, t_language, input_text).seamlessm4t1()
614
+
615
  elif model_name == "utter-project/EuroLLM-1.7B-Instruct":
616
  translated_text = Translators(model_name, s_language, t_language, input_text).eurollm_instruct()
617