Update app.py
Browse files
app.py
CHANGED
|
@@ -1,23 +1,29 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 3 |
import torch
|
|
|
|
| 4 |
|
| 5 |
# 1. ๋ชจ๋ธ ๋ฐ ํ ํฌ๋์ด์ ์ค์
|
| 6 |
-
# Hugging Face์ ๋ฌด๋ฃ CPU
|
| 7 |
-
# ์ฒ์ ์คํ ์ ๋ชจ๋ธ์ ๋ค์ด๋ก๋ํ๋ฏ๋ก ์๊ฐ์ด ์กฐ๊ธ ๊ฑธ๋ฆด ์ ์์ต๋๋ค.
|
| 8 |
model_name = "facebook/nllb-200-distilled-600M"
|
| 9 |
|
| 10 |
-
print("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
try:
|
|
|
|
| 12 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 13 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
| 14 |
-
translator = pipeline("translation", model=model, tokenizer=tokenizer, src_lang="eng_Latn", tgt_lang="kor_Hang", max_length=400)
|
| 15 |
print("๋ชจ๋ธ ๋ก๋ ์๋ฃ!")
|
| 16 |
except Exception as e:
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
# 2. ์ธ์ด ์ฝ๋ ๋งคํ
|
| 20 |
-
# NLLB ๋ชจ๋ธ์ ์ผ๋ฐ์ ์ธ 'en', 'ja' ๋์ 200๊ฐ ์ธ์ด์ ๋ํ ๊ณ ์ ์ฝ๋๋ฅผ ์ฌ์ฉํฉ๋๋ค.
|
| 21 |
LANG_CODES = {
|
| 22 |
"์์ด (English)": "eng_Latn",
|
| 23 |
"์ผ๋ณธ์ด (Japanese)": "jpn_Jpan",
|
|
@@ -28,29 +34,31 @@ TARGET_LANG_CODE = "kor_Hang" # ํ๊ตญ์ด
|
|
| 28 |
|
| 29 |
def translate_text(text, source_lang_name):
|
| 30 |
"""
|
| 31 |
-
|
| 32 |
"""
|
| 33 |
if not text:
|
| 34 |
return "๋ฒ์ญํ ๋ด์ฉ์ ์
๋ ฅํด์ฃผ์ธ์."
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
# ๋ฒ์ญ ์ํ
|
| 40 |
-
# NLLB ๋ชจ๋ธ์ ์
๋ ฅ ์ธ์ด ์ฝ๋๋ฅผ ๋ช
์ํด์ผ ์ ํ๋๊ฐ ๋์ต๋๋ค.
|
| 41 |
-
# pipeline์ ์ฌ์ฉํ ๋๋ ํ์ดํ๋ผ์ธ ์์ฑ ์๋ ํธ์ถ ์ src_lang์ ์ง์ ํด์ผ ํฉ๋๋ค.
|
| 42 |
-
# ์ฌ๊ธฐ์๋ ํธ์ถํ ๋ ์ง์ ํ ํฌ๋์ด์ ์ค์ ์ ์กฐ์ ํ์ฌ ๋ฒ์ญํฉ๋๋ค.
|
| 43 |
-
|
| 44 |
try:
|
| 45 |
-
# ์
๋ ฅ
|
|
|
|
|
|
|
|
|
|
| 46 |
tokenizer.src_lang = src_code
|
|
|
|
|
|
|
| 47 |
inputs = tokenizer(text, return_tensors="pt")
|
| 48 |
|
| 49 |
-
#
|
| 50 |
generated_tokens = model.generate(
|
| 51 |
**inputs,
|
| 52 |
forced_bos_token_id=tokenizer.lang_code_to_id[TARGET_LANG_CODE],
|
| 53 |
-
max_length=500
|
|
|
|
|
|
|
| 54 |
)
|
| 55 |
|
| 56 |
# ๊ฒฐ๊ณผ ๋์ฝ๋ฉ
|
|
@@ -58,53 +66,50 @@ def translate_text(text, source_lang_name):
|
|
| 58 |
return result
|
| 59 |
|
| 60 |
except Exception as e:
|
| 61 |
-
return f"๋ฒ์ญ
|
| 62 |
|
| 63 |
-
# 3. Gradio ์ธํฐํ์ด์ค
|
| 64 |
-
|
| 65 |
-
with gr.Blocks(title="๋ค๊ตญ์ด ํ๊ตญ์ด ๋ฒ์ญ๊ธฐ") as demo:
|
| 66 |
gr.Markdown(
|
| 67 |
"""
|
| 68 |
-
#
|
| 69 |
-
**์์ด, ์ผ๋ณธ์ด, ์ค๊ตญ์ด**๋ฅผ ์
๋ ฅํ๋ฉด **ํ๊ตญ์ด**๋ก ๋ฒ์ญํด
|
| 70 |
-
(Meta
|
| 71 |
"""
|
| 72 |
)
|
| 73 |
|
| 74 |
with gr.Row():
|
| 75 |
with gr.Column():
|
| 76 |
-
# ์
๋ ฅ ์ค์
|
| 77 |
src_lang = gr.Dropdown(
|
| 78 |
choices=list(LANG_CODES.keys()),
|
| 79 |
value="์์ด (English)",
|
| 80 |
-
label="์
๋ ฅ ์ธ์ด
|
| 81 |
)
|
| 82 |
input_text = gr.Textbox(
|
| 83 |
lines=5,
|
| 84 |
-
placeholder="๋ฒ์ญํ
|
| 85 |
-
label="์
๋ ฅ (
|
| 86 |
)
|
| 87 |
-
translate_btn = gr.Button("ํ๊ตญ์ด๋ก
|
| 88 |
|
| 89 |
with gr.Column():
|
| 90 |
-
# ์ถ๋ ฅ ์ค์
|
| 91 |
output_text = gr.Textbox(
|
| 92 |
lines=5,
|
| 93 |
-
label="ํ๊ตญ์ด
|
| 94 |
-
interactive=False
|
|
|
|
| 95 |
)
|
| 96 |
|
| 97 |
-
# ์์
|
| 98 |
gr.Examples(
|
| 99 |
examples=[
|
| 100 |
-
["
|
| 101 |
-
["
|
| 102 |
-
["
|
| 103 |
],
|
| 104 |
inputs=[input_text, src_lang]
|
| 105 |
)
|
| 106 |
|
| 107 |
-
# ๋ฒํผ ํด๋ฆญ ์ด๋ฒคํธ ์ฐ๊ฒฐ
|
| 108 |
translate_btn.click(
|
| 109 |
fn=translate_text,
|
| 110 |
inputs=[input_text, src_lang],
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 3 |
import torch
|
| 4 |
+
import sys
|
| 5 |
|
| 6 |
# 1. ๋ชจ๋ธ ๋ฐ ํ ํฌ๋์ด์ ์ค์
|
| 7 |
+
# Hugging Face Spaces์ ๋ฌด๋ฃ CPU ํ๊ฒฝ(16GB RAM)์ ๋ง์ถฐ 600M ๋ชจ๋ธ ์ฌ์ฉ
|
|
|
|
| 8 |
model_name = "facebook/nllb-200-distilled-600M"
|
| 9 |
|
| 10 |
+
print(f"๋ชจ๋ธ({model_name})์ ๋ก๋ํ๋ ์ค์
๋๋ค... ์ ์๋ง ๊ธฐ๋ค๋ ค์ฃผ์ธ์.")
|
| 11 |
+
|
| 12 |
+
# ์ ์ญ ๋ณ์๋ก ์ ์ธ
|
| 13 |
+
tokenizer = None
|
| 14 |
+
model = None
|
| 15 |
+
|
| 16 |
try:
|
| 17 |
+
# ํ ํฌ๋์ด์ ์ ๋ชจ๋ธ ๋ก๋
|
| 18 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 19 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
|
|
|
| 20 |
print("๋ชจ๋ธ ๋ก๋ ์๋ฃ!")
|
| 21 |
except Exception as e:
|
| 22 |
+
# ๋ชจ๋ธ ๋ก๋ ์คํจ ์ ์ฑ์ ๊ฐ์ ๋ก ์ข
๋ฃํ์ฌ Logs ํญ์์ ์ ํํ ์์ธ์ ๋ณผ ์ ์๊ฒ ํจ
|
| 23 |
+
print(f"โ ๋ชจ๋ธ ๋ก๋ ์ค ์น๋ช
์ ์ธ ์ค๋ฅ ๋ฐ์: {e}")
|
| 24 |
+
sys.exit(1)
|
| 25 |
|
| 26 |
+
# 2. ์ธ์ด ์ฝ๋ ๋งคํ
|
|
|
|
| 27 |
LANG_CODES = {
|
| 28 |
"์์ด (English)": "eng_Latn",
|
| 29 |
"์ผ๋ณธ์ด (Japanese)": "jpn_Jpan",
|
|
|
|
| 34 |
|
| 35 |
def translate_text(text, source_lang_name):
|
| 36 |
"""
|
| 37 |
+
์
๋ ฅ ํ
์คํธ๋ฅผ ํ๊ตญ์ด๋ก ๋ฒ์ญ
|
| 38 |
"""
|
| 39 |
if not text:
|
| 40 |
return "๋ฒ์ญํ ๋ด์ฉ์ ์
๋ ฅํด์ฃผ์ธ์."
|
| 41 |
|
| 42 |
+
if model is None or tokenizer is None:
|
| 43 |
+
return "๋ชจ๋ธ์ด ๋ก๋๋์ง ์์์ต๋๋ค. ์๋ฒ ๋ก๊ทธ๋ฅผ ํ์ธํด์ฃผ์ธ์."
|
| 44 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
try:
|
| 46 |
+
# ์
๋ ฅ ์ธ์ด ์ฝ๋ ๊ฐ์ ธ์ค๊ธฐ
|
| 47 |
+
src_code = LANG_CODES.get(source_lang_name)
|
| 48 |
+
|
| 49 |
+
# ๋ฒ์ญ ์ต์
์ค์ : ์
๋ ฅ ์ธ์ด ์ง์
|
| 50 |
tokenizer.src_lang = src_code
|
| 51 |
+
|
| 52 |
+
# ์
๋ ฅ ํ
์คํธ ํ ํฐํ
|
| 53 |
inputs = tokenizer(text, return_tensors="pt")
|
| 54 |
|
| 55 |
+
# ๋ชจ๋ธ ์ถ๋ก (ํ๊ตญ์ด๋ก ์์ฑ ๊ฐ์ )
|
| 56 |
generated_tokens = model.generate(
|
| 57 |
**inputs,
|
| 58 |
forced_bos_token_id=tokenizer.lang_code_to_id[TARGET_LANG_CODE],
|
| 59 |
+
max_length=500,
|
| 60 |
+
num_beams=4, # ๋ฒ์ญ ํ์ง์ ์ํด ๋น ์์น ์ฌ์ฉ (์ฝ๊ฐ ๋๋ ค์ง ์ ์์)
|
| 61 |
+
early_stopping=True
|
| 62 |
)
|
| 63 |
|
| 64 |
# ๊ฒฐ๊ณผ ๋์ฝ๋ฉ
|
|
|
|
| 66 |
return result
|
| 67 |
|
| 68 |
except Exception as e:
|
| 69 |
+
return f"๋ฒ์ญ ์๋ฌ: {str(e)}"
|
| 70 |
|
| 71 |
+
# 3. Gradio ์ธํฐํ์ด์ค
|
| 72 |
+
with gr.Blocks(title="ํ๊ธ๋ก (Hangullo) - ๋ค๊ตญ์ด ๋ฒ์ญ๊ธฐ") as demo:
|
|
|
|
| 73 |
gr.Markdown(
|
| 74 |
"""
|
| 75 |
+
# ๐ฐ๐ท ํ๊ธ๋ก (Hangullo)
|
| 76 |
+
**์์ด, ์ผ๋ณธ์ด, ์ค๊ตญ์ด**๋ฅผ ์
๋ ฅํ๋ฉด ์์ฐ์ค๋ฌ์ด **ํ๊ตญ์ด**๋ก ๋ฒ์ญํด ๋๋ฆฝ๋๋ค.
|
| 77 |
+
*(Powered by Meta NLLB-200)*
|
| 78 |
"""
|
| 79 |
)
|
| 80 |
|
| 81 |
with gr.Row():
|
| 82 |
with gr.Column():
|
|
|
|
| 83 |
src_lang = gr.Dropdown(
|
| 84 |
choices=list(LANG_CODES.keys()),
|
| 85 |
value="์์ด (English)",
|
| 86 |
+
label="์
๋ ฅ ์ธ์ด"
|
| 87 |
)
|
| 88 |
input_text = gr.Textbox(
|
| 89 |
lines=5,
|
| 90 |
+
placeholder="๋ฒ์ญํ ๋ฌธ์ฅ์ ์
๋ ฅํ์ธ์...",
|
| 91 |
+
label="์
๋ ฅ (Source)"
|
| 92 |
)
|
| 93 |
+
translate_btn = gr.Button("ํ๊ตญ์ด๋ก ๋ณํ", variant="primary")
|
| 94 |
|
| 95 |
with gr.Column():
|
|
|
|
| 96 |
output_text = gr.Textbox(
|
| 97 |
lines=5,
|
| 98 |
+
label="ํ๊ตญ์ด ๊ฒฐ๊ณผ (Korean)",
|
| 99 |
+
interactive=False,
|
| 100 |
+
show_copy_button=True # ๋ณต์ฌ ๋ฒํผ ์ถ๊ฐ
|
| 101 |
)
|
| 102 |
|
| 103 |
+
# ์์ ๋ฐ์ดํฐ
|
| 104 |
gr.Examples(
|
| 105 |
examples=[
|
| 106 |
+
["The quick brown fox jumps over the lazy dog.", "์์ด (English)"],
|
| 107 |
+
["AIใฎ็บๅฑใซใใฃใฆใ็งใใกใฎ็ๆดปใฏๅคงใใๅคๅใใฆใใพใใ", "์ผ๋ณธ์ด (Japanese)"],
|
| 108 |
+
["ไปๅคฉๅคฉๆฐ็ๅฅฝ๏ผๆไปฌๅปๅ
ฌๅญๆฃๆญฅๅงใ", "์ค๊ตญ์ด (Chinese Simplified)"]
|
| 109 |
],
|
| 110 |
inputs=[input_text, src_lang]
|
| 111 |
)
|
| 112 |
|
|
|
|
| 113 |
translate_btn.click(
|
| 114 |
fn=translate_text,
|
| 115 |
inputs=[input_text, src_lang],
|