TiberiuCristianLeon commited on
Commit
67ad314
·
verified ·
1 Parent(s): 90c0000

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -14
app.py CHANGED
@@ -30,6 +30,7 @@ models = ["Helsinki-NLP", "QUICKMT", "Argos", "Google", "HPLT", "HPLT-OPUS",
30
  "google-t5/t5-small", "google-t5/t5-base", "google-t5/t5-large",
31
  "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
32
  "google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
 
33
  "HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2",
34
  "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
35
  "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2"
@@ -84,6 +85,23 @@ class Translators:
84
  return translation[0]['translation_text'], message
85
  except Exception as error:
86
  return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  def hplt(self, opus = False):
89
  # langs = ['ar', 'bs', 'ca', 'en', 'et', 'eu', 'fi', 'ga', 'gl', 'hi', 'hr', 'is', 'mt', 'nn', 'sq', 'sw', 'zh_hant']
@@ -106,12 +124,10 @@ class Translators:
106
  return translated_text, message_text
107
 
108
  @staticmethod
109
- def download_argos_model(from_code, to_code):
110
  import argostranslate.package
111
  print('Downloading model', from_code, to_code)
112
  # Download and install Argos Translate package
113
- argostranslate.package.update_package_index()
114
- available_packages = argostranslate.package.get_available_packages()
115
  package_to_install = next(
116
  filter(lambda x: x.from_code == from_code and x.to_code == to_code, available_packages)
117
  )
@@ -119,12 +135,19 @@ class Translators:
119
 
120
  def argos(self):
121
  import argostranslate.translate, argostranslate.package
 
 
 
 
 
 
 
 
122
  try:
123
- Translators.download_argos_model(self.sl, self.tl) # Download model
124
  translated_text = argostranslate.translate.translate(self.input_text, self.sl, self.tl) # Translate
125
  except StopIteration:
126
- # packages_info = ', '.join(f"{pkg.get_description()}->{str(pkg.links)} {str(pkg.source_languages)}" for pkg in argostranslate.package.get_available_packages())
127
- packages_info = ', '.join(f"{pkg.from_name} ({pkg.from_code}) -> {pkg.to_name} ({pkg.to_code})" for pkg in argostranslate.package.get_available_packages())
128
  translated_text = f"No Argos model for {self.sl} to {self.tl}. Try other model or languages combination from the available Argos models: {packages_info}."
129
  except Exception as error:
130
  translated_text = error
@@ -159,15 +182,19 @@ class Translators:
159
  # from quickmt.hub import hf_list
160
  # quickmt_models = [i.split("/quickmt-")[1] for i in hf_list()]
161
  # quickmt_models.sort()
162
- # print(quickmt_models)
163
- quickmt_models = ['ar-en', 'bn-en', 'cs-en', 'da-en', 'de-en', 'el-en', 'en-ar', 'en-bn', 'en-cs', 'en-de', 'en-el', 'en-es',
164
- 'en-fa', 'en-fr', 'en-he', 'en-hi', 'en-hu', 'en-id', 'en-it', 'en-ja', 'en-ko', 'en-lv', 'en-pl', 'en-pt',
165
- 'en-ro', 'en-ru', 'en-th', 'en-tr', 'en-ur', 'en-vi', 'en-zh', 'es-en', 'fa-en', 'fr-en', 'he-en', 'hi-en',
166
- 'hu-en', 'id-en', 'it-en', 'ja-en', 'ko-en', 'lv-en', 'pl-en', 'pt-en', 'ro-en', 'ru-en', 'th-en', 'tr-en', 'ur-en', 'vi-en', 'zh-en']
 
 
167
  # available_languages = list(set([lang for model in quickmt_models for lang in model.split('-')]))
168
  # available_languages.sort()
169
- available_languages = ['ar', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fr', 'he', 'hi', 'hu',
170
- 'id', 'it', 'ja', 'ko', 'lv', 'pl', 'pt', 'ro', 'ru', 'th', 'tr', 'ur', 'vi', 'zh']
 
 
171
  # Direct translation model
172
  if f"{self.sl}-{self.tl}" in quickmt_models:
173
  model_path = Translators.quickmtdownload(model_name)
@@ -497,7 +524,10 @@ def translate_text(input_text: str, s_language: str, t_language: str, model_name
497
 
498
  elif "lego" in model_name.lower():
499
  translated_text = Translators(model_name, sl, tl, input_text).LegoMT()
500
-
 
 
 
501
  elif model_name.startswith('google-t5'):
502
  translated_text = Translators(model_name, s_language, t_language, input_text).tfive()
503
 
 
30
  "google-t5/t5-small", "google-t5/t5-base", "google-t5/t5-large",
31
  "google/flan-t5-small", "google/flan-t5-base", "google/flan-t5-large", "google/flan-t5-xl",
32
  "google/madlad400-3b-mt", "jbochi/madlad400-3b-mt",
33
+ "NiuTrans/LMT-60-0.6B", "NiuTrans/LMT-60-1.7B", "NiuTrans/LMT-60-4B",
34
  "HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2",
35
  "utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
36
  "Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2"
 
85
  return translation[0]['translation_text'], message
86
  except Exception as error:
87
  return f"Error translating with model: {self.model_name}! Try other available language combination or model.", error
88
+
89
+ def niutrans(self):
90
+ tokenizer = AutoTokenizer.from_pretrained(self.model_name, padding_side='left')
91
+ model = AutoModelForCausalLM.from_pretrained(self.model_name)
92
+ prompt = f"Translate the following text from {self.sl} into {self.tl}.\n{self.sl}: {self.input_text}.\n{self.tl}: "
93
+ messages = [{"role": "user", "content": prompt}]
94
+ text = tokenizer.apply_chat_template(
95
+ messages,
96
+ tokenize=False,
97
+ add_generation_prompt=True,
98
+ )
99
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
100
+ generated_ids = model.generate(**model_inputs, max_new_tokens=512, num_beams=5, do_sample=False)
101
+ output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
102
+ outputs = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
103
+ outputs = ''.join(outputs) if isinstance(outputs, list) else outputs
104
+ return outputs
105
 
106
  def hplt(self, opus = False):
107
  # langs = ['ar', 'bs', 'ca', 'en', 'et', 'eu', 'fi', 'ga', 'gl', 'hi', 'hr', 'is', 'mt', 'nn', 'sq', 'sw', 'zh_hant']
 
124
  return translated_text, message_text
125
 
126
  @staticmethod
127
+ def download_argos_model(available_packages, from_code, to_code):
128
  import argostranslate.package
129
  print('Downloading model', from_code, to_code)
130
  # Download and install Argos Translate package
 
 
131
  package_to_install = next(
132
  filter(lambda x: x.from_code == from_code and x.to_code == to_code, available_packages)
133
  )
 
135
 
136
  def argos(self):
137
  import argostranslate.translate, argostranslate.package
138
+ argostranslate.package.update_package_index()
139
+ available_packages = argostranslate.package.get_available_packages()
140
+ available_slanguages = [lang.from_code for lang in available_packages]
141
+ available_tlanguages = [lang.to_code for lang in available_packages]
142
+ available_languages = sorted(list(set(available_slanguages + available_tlanguages)))
143
+ zipped = sorted(list(zip(available_slanguages, available_tlanguages)))
144
+ packages_info = ', '.join(f"{pkg.from_name} ({pkg.from_code}) -> {pkg.to_name} ({pkg.to_code})" for pkg in available_packages)
145
+ print(available_languages, zipped)
146
  try:
147
+ Translators.download_argos_model(available_packages, self.sl, self.tl) # Download model
148
  translated_text = argostranslate.translate.translate(self.input_text, self.sl, self.tl) # Translate
149
  except StopIteration:
150
+ # packages_info = ', '.join(f"{pkg.get_description()}->{str(pkg.links)} {str(pkg.source_languages)}" for pkg in available_packages)
 
151
  translated_text = f"No Argos model for {self.sl} to {self.tl}. Try other model or languages combination from the available Argos models: {packages_info}."
152
  except Exception as error:
153
  translated_text = error
 
182
  # from quickmt.hub import hf_list
183
  # quickmt_models = [i.split("/quickmt-")[1] for i in hf_list()]
184
  # quickmt_models.sort()
185
+ quickmt_models = ['ar-en', 'bn-en', 'cs-en', 'da-en', 'de-en', 'el-en', 'en-ar', 'en-bn',
186
+ 'en-cs', 'en-da', 'en-de', 'en-el', 'en-es', 'en-fa', 'en-fr', 'en-he',
187
+ 'en-hi', 'en-hu', 'en-id', 'en-it', 'en-ja', 'en-ko', 'en-lv', 'en-pl',
188
+ 'en-pt', 'en-ro', 'en-ru', 'en-sv', 'en-th', 'en-tr', 'en-ur', 'en-vi',
189
+ 'en-zh', 'es-en', 'fa-en', 'fr-en', 'he-en', 'hi-en', 'hu-en', 'id-en',
190
+ 'it-en', 'ja-en', 'ko-en', 'lv-en', 'pl-en', 'pt-en', 'ro-en', 'ru-en',
191
+ 'th-en', 'tr-en', 'ur-en', 'vi-en', 'zh-en']
192
  # available_languages = list(set([lang for model in quickmt_models for lang in model.split('-')]))
193
  # available_languages.sort()
194
+ available_languages = ['ar', 'bn', 'cs', 'da', 'de', 'el', 'en', 'es', 'fa', 'fr', 'he',
195
+ 'hi', 'hu', 'id', 'it', 'ja', 'ko', 'lv', 'pl', 'pt', 'ro', 'ru',
196
+ 'sv', 'th', 'tr', 'ur', 'vi', 'zh']
197
+ # print(quickmt_models, available_languages)
198
  # Direct translation model
199
  if f"{self.sl}-{self.tl}" in quickmt_models:
200
  model_path = Translators.quickmtdownload(model_name)
 
524
 
525
  elif "lego" in model_name.lower():
526
  translated_text = Translators(model_name, sl, tl, input_text).LegoMT()
527
+
528
+ elif "niutrans" in model_name.lower():
529
+ translated_text = Translators(model_name, sl, tl, input_text).niutrans()
530
+
531
  elif model_name.startswith('google-t5'):
532
  translated_text = Translators(model_name, s_language, t_language, input_text).tfive()
533