|
|
import gradio as gr |
|
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
import torch |
|
|
import sys |
|
|
|
|
|
|
|
|
|
|
|
model_name = "facebook/nllb-200-distilled-600M" |
|
|
|
|
|
print(f"๋ชจ๋ธ({model_name})์ ๋ก๋ํ๋ ์ค์
๋๋ค... ์ ์๋ง ๊ธฐ๋ค๋ ค์ฃผ์ธ์.") |
|
|
|
|
|
|
|
|
tokenizer = None |
|
|
model = None |
|
|
|
|
|
try: |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) |
|
|
print("๋ชจ๋ธ ๋ก๋ ์๋ฃ!") |
|
|
except Exception as e: |
|
|
|
|
|
print(f"โ ๋ชจ๋ธ ๋ก๋ ์ค ์น๋ช
์ ์ธ ์ค๋ฅ ๋ฐ์: {e}") |
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
LANG_CODES = { |
|
|
"์์ด (English)": "eng_Latn", |
|
|
"์ผ๋ณธ์ด (Japanese)": "jpn_Jpan", |
|
|
"์ค๊ตญ์ด (Chinese Simplified)": "zho_Hans" |
|
|
} |
|
|
|
|
|
TARGET_LANG_CODE = "kor_Hang" |
|
|
|
|
|
def translate_text(text, source_lang_name): |
|
|
""" |
|
|
์
๋ ฅ ํ
์คํธ๋ฅผ ํ๊ตญ์ด๋ก ๋ฒ์ญ |
|
|
""" |
|
|
if not text: |
|
|
return "๋ฒ์ญํ ๋ด์ฉ์ ์
๋ ฅํด์ฃผ์ธ์." |
|
|
|
|
|
if model is None or tokenizer is None: |
|
|
return "๋ชจ๋ธ์ด ๋ก๋๋์ง ์์์ต๋๋ค. ์๋ฒ ๋ก๊ทธ๋ฅผ ํ์ธํด์ฃผ์ธ์." |
|
|
|
|
|
try: |
|
|
|
|
|
src_code = LANG_CODES.get(source_lang_name) |
|
|
|
|
|
|
|
|
tokenizer.src_lang = src_code |
|
|
|
|
|
|
|
|
inputs = tokenizer(text, return_tensors="pt") |
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
|
|
|
|
target_token_id = tokenizer.convert_tokens_to_ids(TARGET_LANG_CODE) |
|
|
|
|
|
generated_tokens = model.generate( |
|
|
**inputs, |
|
|
forced_bos_token_id=target_token_id, |
|
|
max_length=500, |
|
|
|
|
|
num_beams=1 |
|
|
) |
|
|
|
|
|
|
|
|
result = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] |
|
|
return result |
|
|
|
|
|
except Exception as e: |
|
|
return f"๋ฒ์ญ ์๋ฌ: {str(e)}" |
|
|
|
|
|
|
|
|
with gr.Blocks(title="ํ๊ธ๋ก (Hangullo) - ๋ค๊ตญ์ด ๋ฒ์ญ๊ธฐ") as demo: |
|
|
gr.Markdown( |
|
|
""" |
|
|
# ๐ฐ๐ท ํ๊ธ๋ก (Hangullo) |
|
|
**์์ด, ์ผ๋ณธ์ด, ์ค๊ตญ์ด**๋ฅผ ์
๋ ฅํ๋ฉด ์์ฐ์ค๋ฌ์ด **ํ๊ตญ์ด**๋ก ๋ฒ์ญํด ๋๋ฆฝ๋๋ค. |
|
|
*(Powered by Meta NLLB-200)* |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
src_lang = gr.Dropdown( |
|
|
choices=list(LANG_CODES.keys()), |
|
|
value="์์ด (English)", |
|
|
label="์
๋ ฅ ์ธ์ด" |
|
|
) |
|
|
input_text = gr.Textbox( |
|
|
lines=5, |
|
|
placeholder="๋ฒ์ญํ ๋ฌธ์ฅ์ ์
๋ ฅํ์ธ์...", |
|
|
label="์
๋ ฅ (Source)" |
|
|
) |
|
|
translate_btn = gr.Button("ํ๊ตญ์ด๋ก ๋ณํ", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
output_text = gr.Textbox( |
|
|
lines=5, |
|
|
label="ํ๊ตญ์ด ๊ฒฐ๊ณผ (Korean)", |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["The quick brown fox jumps over the lazy dog.", "์์ด (English)"], |
|
|
["AIใฎ็บๅฑใซใใฃใฆใ็งใใกใฎ็ๆดปใฏๅคงใใๅคๅใใฆใใพใใ", "์ผ๋ณธ์ด (Japanese)"], |
|
|
["ไปๅคฉๅคฉๆฐ็ๅฅฝ๏ผๆไปฌๅปๅ
ฌๅญๆฃๆญฅๅงใ", "์ค๊ตญ์ด (Chinese Simplified)"] |
|
|
], |
|
|
inputs=[input_text, src_lang] |
|
|
) |
|
|
|
|
|
translate_btn.click( |
|
|
fn=translate_text, |
|
|
inputs=[input_text, src_lang], |
|
|
outputs=output_text |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
demo.queue().launch() |