File size: 4,135 Bytes
d369415 06a30c8 d369415 06a30c8 d369415 06a30c8 d369415 06a30c8 d369415 06a30c8 d369415 06a30c8 d369415 06a30c8 d369415 06a30c8 d369415 06a30c8 d369415 06a30c8 d369415 06a30c8 d369415 66af3db 80b48ed 99d8712 66af3db 99d8712 66af3db 80b48ed 66af3db d369415 06a30c8 d369415 06a30c8 d369415 06a30c8 d369415 06a30c8 d369415 06a30c8 d369415 06a30c8 d369415 06a30c8 ec5f7ac d369415 06a30c8 d369415 06a30c8 d369415 66af3db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import sys
# 1. ๋ชจ๋ธ ๋ฐ ํ ํฌ๋์ด์ ์ค์
# Hugging Face Spaces์ ๋ฌด๋ฃ CPU ํ๊ฒฝ(16GB RAM)์ ๋ง์ถฐ 600M ๋ชจ๋ธ ์ฌ์ฉ
model_name = "facebook/nllb-200-distilled-600M"
print(f"๋ชจ๋ธ({model_name})์ ๋ก๋ํ๋ ์ค์
๋๋ค... ์ ์๋ง ๊ธฐ๋ค๋ ค์ฃผ์ธ์.")
# ์ ์ญ ๋ณ์๋ก ์ ์ธ
tokenizer = None
model = None
try:
# ํ ํฌ๋์ด์ ์ ๋ชจ๋ธ ๋ก๋
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
print("๋ชจ๋ธ ๋ก๋ ์๋ฃ!")
except Exception as e:
# ๋ชจ๋ธ ๋ก๋ ์คํจ ์ ์ฑ์ ๊ฐ์ ๋ก ์ข
๋ฃํ์ฌ Logs ํญ์์ ์ ํํ ์์ธ์ ๋ณผ ์ ์๊ฒ ํจ
print(f"โ ๋ชจ๋ธ ๋ก๋ ์ค ์น๋ช
์ ์ธ ์ค๋ฅ ๋ฐ์: {e}")
sys.exit(1)
# 2. ์ธ์ด ์ฝ๋ ๋งคํ
LANG_CODES = {
"์์ด (English)": "eng_Latn",
"์ผ๋ณธ์ด (Japanese)": "jpn_Jpan",
"์ค๊ตญ์ด (Chinese Simplified)": "zho_Hans"
}
TARGET_LANG_CODE = "kor_Hang" # ํ๊ตญ์ด
def translate_text(text, source_lang_name):
"""
์
๋ ฅ ํ
์คํธ๋ฅผ ํ๊ตญ์ด๋ก ๋ฒ์ญ
"""
if not text:
return "๋ฒ์ญํ ๋ด์ฉ์ ์
๋ ฅํด์ฃผ์ธ์."
if model is None or tokenizer is None:
return "๋ชจ๋ธ์ด ๋ก๋๋์ง ์์์ต๋๋ค. ์๋ฒ ๋ก๊ทธ๋ฅผ ํ์ธํด์ฃผ์ธ์."
try:
# ์
๋ ฅ ์ธ์ด ์ฝ๋ ๊ฐ์ ธ์ค๊ธฐ
src_code = LANG_CODES.get(source_lang_name)
# ๋ฒ์ญ ์ต์
์ค์ : ์
๋ ฅ ์ธ์ด ์ง์
tokenizer.src_lang = src_code
# ์
๋ ฅ ํ
์คํธ ํ ํฐํ
inputs = tokenizer(text, return_tensors="pt")
# [์ค์] ๋ฉ๋ชจ๋ฆฌ ์ต์ ํ๋ฅผ ์ํด no_grad() ์ฌ์ฉ
with torch.no_grad():
target_token_id = tokenizer.convert_tokens_to_ids(TARGET_LANG_CODE)
generated_tokens = model.generate(
**inputs,
forced_bos_token_id=target_token_id,
max_length=500,
# [์ค์] CPU ํ๊ฒฝ ์์ ์ฑ์ ์ํด Beam Search ๋์ Greedy Search ์ฌ์ฉ
num_beams=1
)
# ๊ฒฐ๊ณผ ๋์ฝ๋ฉ
result = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
return result
except Exception as e:
return f"๋ฒ์ญ ์๋ฌ: {str(e)}"
# 3. Gradio ์ธํฐํ์ด์ค
with gr.Blocks(title="ํ๊ธ๋ก (Hangullo) - ๋ค๊ตญ์ด ๋ฒ์ญ๊ธฐ") as demo:
gr.Markdown(
"""
# ๐ฐ๐ท ํ๊ธ๋ก (Hangullo)
**์์ด, ์ผ๋ณธ์ด, ์ค๊ตญ์ด**๋ฅผ ์
๋ ฅํ๋ฉด ์์ฐ์ค๋ฌ์ด **ํ๊ตญ์ด**๋ก ๋ฒ์ญํด ๋๋ฆฝ๋๋ค.
*(Powered by Meta NLLB-200)*
"""
)
with gr.Row():
with gr.Column():
src_lang = gr.Dropdown(
choices=list(LANG_CODES.keys()),
value="์์ด (English)",
label="์
๋ ฅ ์ธ์ด"
)
input_text = gr.Textbox(
lines=5,
placeholder="๋ฒ์ญํ ๋ฌธ์ฅ์ ์
๋ ฅํ์ธ์...",
label="์
๋ ฅ (Source)"
)
translate_btn = gr.Button("ํ๊ตญ์ด๋ก ๋ณํ", variant="primary")
with gr.Column():
output_text = gr.Textbox(
lines=5,
label="ํ๊ตญ์ด ๊ฒฐ๊ณผ (Korean)",
interactive=False
)
# ์์ ๋ฐ์ดํฐ
gr.Examples(
examples=[
["The quick brown fox jumps over the lazy dog.", "์์ด (English)"],
["AIใฎ็บๅฑใซใใฃใฆใ็งใใกใฎ็ๆดปใฏๅคงใใๅคๅใใฆใใพใใ", "์ผ๋ณธ์ด (Japanese)"],
["ไปๅคฉๅคฉๆฐ็ๅฅฝ๏ผๆไปฌๅปๅ
ฌๅญๆฃๆญฅๅงใ", "์ค๊ตญ์ด (Chinese Simplified)"]
],
inputs=[input_text, src_lang]
)
translate_btn.click(
fn=translate_text,
inputs=[input_text, src_lang],
outputs=output_text
)
# 4. ์ฑ ์คํ
if __name__ == "__main__":
# [์ค์] ํ(Queue)๋ฅผ ํ์ฑํํ์ฌ ์์ฒญ ์ถฉ๋ ๋ฐฉ์ง
demo.queue().launch() |