Hangullo / app.py
santakan's picture
Update app.py
80b48ed verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import sys
# 1. ๋ชจ๋ธ ๋ฐ ํ† ํฌ๋‚˜์ด์ € ์„ค์ •
# Hugging Face Spaces์˜ ๋ฌด๋ฃŒ CPU ํ™˜๊ฒฝ(16GB RAM)์— ๋งž์ถฐ 600M ๋ชจ๋ธ ์‚ฌ์šฉ
model_name = "facebook/nllb-200-distilled-600M"
print(f"๋ชจ๋ธ({model_name})์„ ๋กœ๋“œํ•˜๋Š” ์ค‘์ž…๋‹ˆ๋‹ค... ์ž ์‹œ๋งŒ ๊ธฐ๋‹ค๋ ค์ฃผ์„ธ์š”.")
# ์ „์—ญ ๋ณ€์ˆ˜๋กœ ์„ ์–ธ
tokenizer = None
model = None
try:
# ํ† ํฌ๋‚˜์ด์ €์™€ ๋ชจ๋ธ ๋กœ๋“œ
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
print("๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ!")
except Exception as e:
# ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ ์‹œ ์•ฑ์„ ๊ฐ•์ œ๋กœ ์ข…๋ฃŒํ•˜์—ฌ Logs ํƒญ์—์„œ ์ •ํ™•ํ•œ ์›์ธ์„ ๋ณผ ์ˆ˜ ์žˆ๊ฒŒ ํ•จ
print(f"โŒ ๋ชจ๋ธ ๋กœ๋“œ ์ค‘ ์น˜๋ช…์ ์ธ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
sys.exit(1)
# 2. ์–ธ์–ด ์ฝ”๋“œ ๋งคํ•‘
LANG_CODES = {
"์˜์–ด (English)": "eng_Latn",
"์ผ๋ณธ์–ด (Japanese)": "jpn_Jpan",
"์ค‘๊ตญ์–ด (Chinese Simplified)": "zho_Hans"
}
TARGET_LANG_CODE = "kor_Hang" # ํ•œ๊ตญ์–ด
def translate_text(text, source_lang_name):
"""
์ž…๋ ฅ ํ…์ŠคํŠธ๋ฅผ ํ•œ๊ตญ์–ด๋กœ ๋ฒˆ์—ญ
"""
if not text:
return "๋ฒˆ์—ญํ•  ๋‚ด์šฉ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
if model is None or tokenizer is None:
return "๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์„œ๋ฒ„ ๋กœ๊ทธ๋ฅผ ํ™•์ธํ•ด์ฃผ์„ธ์š”."
try:
# ์ž…๋ ฅ ์–ธ์–ด ์ฝ”๋“œ ๊ฐ€์ ธ์˜ค๊ธฐ
src_code = LANG_CODES.get(source_lang_name)
# ๋ฒˆ์—ญ ์˜ต์…˜ ์„ค์ •: ์ž…๋ ฅ ์–ธ์–ด ์ง€์ •
tokenizer.src_lang = src_code
# ์ž…๋ ฅ ํ…์ŠคํŠธ ํ† ํฐํ™”
inputs = tokenizer(text, return_tensors="pt")
# [์ค‘์š”] ๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™”๋ฅผ ์œ„ํ•ด no_grad() ์‚ฌ์šฉ
with torch.no_grad():
target_token_id = tokenizer.convert_tokens_to_ids(TARGET_LANG_CODE)
generated_tokens = model.generate(
**inputs,
forced_bos_token_id=target_token_id,
max_length=500,
# [์ค‘์š”] CPU ํ™˜๊ฒฝ ์•ˆ์ •์„ฑ์„ ์œ„ํ•ด Beam Search ๋Œ€์‹  Greedy Search ์‚ฌ์šฉ
num_beams=1
)
# ๊ฒฐ๊ณผ ๋””์ฝ”๋”ฉ
result = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
return result
except Exception as e:
return f"๋ฒˆ์—ญ ์—๋Ÿฌ: {str(e)}"
# 3. Gradio ์ธํ„ฐํŽ˜์ด์Šค
with gr.Blocks(title="ํ•œ๊ธ€๋กœ (Hangullo) - ๋‹ค๊ตญ์–ด ๋ฒˆ์—ญ๊ธฐ") as demo:
gr.Markdown(
"""
# ๐Ÿ‡ฐ๐Ÿ‡ท ํ•œ๊ธ€๋กœ (Hangullo)
**์˜์–ด, ์ผ๋ณธ์–ด, ์ค‘๊ตญ์–ด**๋ฅผ ์ž…๋ ฅํ•˜๋ฉด ์ž์—ฐ์Šค๋Ÿฌ์šด **ํ•œ๊ตญ์–ด**๋กœ ๋ฒˆ์—ญํ•ด ๋“œ๋ฆฝ๋‹ˆ๋‹ค.
*(Powered by Meta NLLB-200)*
"""
)
with gr.Row():
with gr.Column():
src_lang = gr.Dropdown(
choices=list(LANG_CODES.keys()),
value="์˜์–ด (English)",
label="์ž…๋ ฅ ์–ธ์–ด"
)
input_text = gr.Textbox(
lines=5,
placeholder="๋ฒˆ์—ญํ•  ๋ฌธ์žฅ์„ ์ž…๋ ฅํ•˜์„ธ์š”...",
label="์ž…๋ ฅ (Source)"
)
translate_btn = gr.Button("ํ•œ๊ตญ์–ด๋กœ ๋ณ€ํ™˜", variant="primary")
with gr.Column():
output_text = gr.Textbox(
lines=5,
label="ํ•œ๊ตญ์–ด ๊ฒฐ๊ณผ (Korean)",
interactive=False
)
# ์˜ˆ์ œ ๋ฐ์ดํ„ฐ
gr.Examples(
examples=[
["The quick brown fox jumps over the lazy dog.", "์˜์–ด (English)"],
["AIใฎ็™บๅฑ•ใซใ‚ˆใฃใฆใ€็งใŸใกใฎ็”Ÿๆดปใฏๅคงใใๅค‰ๅŒ–ใ—ใฆใ„ใพใ™ใ€‚", "์ผ๋ณธ์–ด (Japanese)"],
["ไปŠๅคฉๅคฉๆฐ”็œŸๅฅฝ๏ผŒๆˆ‘ไปฌๅŽปๅ…ฌๅ›ญๆ•ฃๆญฅๅงใ€‚", "์ค‘๊ตญ์–ด (Chinese Simplified)"]
],
inputs=[input_text, src_lang]
)
translate_btn.click(
fn=translate_text,
inputs=[input_text, src_lang],
outputs=output_text
)
# 4. ์•ฑ ์‹คํ–‰
if __name__ == "__main__":
# [์ค‘์š”] ํ(Queue)๋ฅผ ํ™œ์„ฑํ™”ํ•˜์—ฌ ์š”์ฒญ ์ถฉ๋Œ ๋ฐฉ์ง€
demo.queue().launch()