TiberiuCristianLeon commited on
Commit
306697e
·
verified ·
1 Parent(s): c5ab29b

Added WiNGPT-Babel-2.1

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -13,7 +13,6 @@ df = pl.read_parquet("isolanguages.parquet")
13
  non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
14
  # all_langs = languagecodes.iso_languages_byname
15
  all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
16
- # iso1_to_name = {codes[0]: lang for entry in all_langs for lang, codes in entry.items()} # {'ro': 'Romanian', 'de': 'German'}
17
  # iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
18
  iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
19
  langs = list(favourite_langs.keys())
@@ -33,7 +32,8 @@ models = ["Helsinki-NLP", "QUICKMT", "Argos", "Google", "HPLT", "HPLT-OPUS",
33
  "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
34
  "google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
35
  "NiuTrans/LMT-60-0.6B", "NiuTrans/LMT-60-1.7B", "NiuTrans/LMT-60-4B",
36
- "Lego-MT/Lego-MT", "HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2",
 
37
  "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
38
  "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2"
39
  ]
@@ -449,7 +449,6 @@ class Translators:
449
  generated_ids = [
450
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
451
  ]
452
- print(tokenizer.batch_decode(generated_ids, skip_special_tokens=True))
453
  output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
454
  result = output.split('\n')[-1].strip() if '\n' in output else output.strip()
455
  return result
@@ -624,7 +623,7 @@ def translate_text(input_text: str, s_language: str, t_language: str, model_name
624
  elif model_name == "HuggingFaceTB/SmolLM3-3B":
625
  translated_text = Translators(model_name, s_language, t_language, input_text).smollm()
626
 
627
- elif model_name == "winninghealth/WiNGPT-Babel-2":
628
  translated_text = Translators(model_name, s_language, t_language, input_text).wingpt()
629
 
630
  elif "HPLT" in model_name:
 
13
  non_empty_isos = df.slice(1).filter(pl.col("ISO639-1") != "").rows()
14
  # all_langs = languagecodes.iso_languages_byname
15
  all_langs = {iso[0]: (iso[1], iso[2], iso[3]) for iso in non_empty_isos} # {'Romanian': ('ro', 'rum', 'ron')}
 
16
  # iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', 'de': 'German'}
17
  iso1toall = {iso[1]: (iso[0], iso[2], iso[3]) for iso in non_empty_isos} # {'ro': ('Romanian', 'rum', 'ron')}
18
  langs = list(favourite_langs.keys())
 
32
  "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
33
  "google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
34
  "NiuTrans/LMT-60-0.6B", "NiuTrans/LMT-60-1.7B", "NiuTrans/LMT-60-4B",
35
+ "Lego-MT/Lego-MT", "HuggingFaceTB/SmolLM3-3B",
36
+ "winninghealth/WiNGPT-Babel-2", "winninghealth/WiNGPT-Babel-2.1",
37
  "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
38
  "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2"
39
  ]
 
449
  generated_ids = [
450
  output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
451
  ]
 
452
  output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
453
  result = output.split('\n')[-1].strip() if '\n' in output else output.strip()
454
  return result
 
623
  elif model_name == "HuggingFaceTB/SmolLM3-3B":
624
  translated_text = Translators(model_name, s_language, t_language, input_text).smollm()
625
 
626
+ elif "winninghealth/WiNGPT" in model_name:
627
  translated_text = Translators(model_name, s_language, t_language, input_text).wingpt()
628
 
629
  elif "HPLT" in model_name: