Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -193,7 +193,101 @@ def read_uploaded_file(file):
|
|
| 193 |
return f"โ ํ์ผ ์ฝ๊ธฐ ์ค๋ฅ: {str(e)}", "error"
|
| 194 |
|
| 195 |
|
| 196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
|
| 199 |
@spaces.GPU
|
|
|
|
| 193 |
return f"โ ํ์ผ ์ฝ๊ธฐ ์ค๋ฅ: {str(e)}", "error"
|
| 194 |
|
| 195 |
|
| 196 |
+
def read_uploaded_file(file):
|
| 197 |
+
if file is None:
|
| 198 |
+
return "", ""
|
| 199 |
+
try:
|
| 200 |
+
file_ext = os.path.splitext(file.name)[1].lower()
|
| 201 |
+
|
| 202 |
+
if file_ext == '.parquet':
|
| 203 |
+
df = pd.read_parquet(file.name)
|
| 204 |
+
content = f"๐ ๋ฐ์ดํฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:\n{df.head(10).to_markdown(index=False)}\n\n"
|
| 205 |
+
content += f"\n๐ ๋ฐ์ดํฐ ์ ๋ณด:\n"
|
| 206 |
+
content += f"- ์ ์ฒด ํ ์: {len(df)}\n"
|
| 207 |
+
content += f"- ์ ์ฒด ์ด ์: {len(df.columns)}\n"
|
| 208 |
+
content += f"- ์ปฌ๋ผ ๋ชฉ๋ก: {', '.join(df.columns)}\n"
|
| 209 |
+
return content, "parquet"
|
| 210 |
+
|
| 211 |
+
elif file_ext == '.csv':
|
| 212 |
+
encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
|
| 213 |
+
for encoding in encodings:
|
| 214 |
+
try:
|
| 215 |
+
df = pd.read_csv(file.name, encoding=encoding)
|
| 216 |
+
content = f"๐ ๋ฐ์ดํฐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ:\n{df.head(10).to_markdown(index=False)}\n\n"
|
| 217 |
+
content += f"\n๐ ๋ฐ์ดํฐ ์ ๋ณด:\n"
|
| 218 |
+
content += f"- ์ ์ฒด ํ ์: {len(df)}\n"
|
| 219 |
+
content += f"- ์ ์ฒด ์ด ์: {len(df.columns)}\n"
|
| 220 |
+
content += f"- ์ปฌ๋ผ ๋ชฉ๋ก: {', '.join(df.columns)}\n"
|
| 221 |
+
content += f"\n๐ ์ปฌ๋ผ ๋ฐ์ดํฐ ํ์
:\n"
|
| 222 |
+
for col, dtype in df.dtypes.items():
|
| 223 |
+
content += f"- {col}: {dtype}\n"
|
| 224 |
+
null_counts = df.isnull().sum()
|
| 225 |
+
if null_counts.any():
|
| 226 |
+
content += f"\nโ ๏ธ ๊ฒฐ์ธก์น:\n"
|
| 227 |
+
for col, null_count in null_counts[null_counts > 0].items():
|
| 228 |
+
content += f"- {col}: {null_count}๊ฐ ๋๋ฝ\n"
|
| 229 |
+
return content, "csv"
|
| 230 |
+
except UnicodeDecodeError:
|
| 231 |
+
continue
|
| 232 |
+
raise UnicodeDecodeError(f"์ง์๋๋ ์ธ์ฝ๋ฉ์ผ๋ก ํ์ผ์ ์ฝ์ ์ ์์ต๋๋ค ({', '.join(encodings)})")
|
| 233 |
+
|
| 234 |
+
else: # ํ
์คํธ ํ์ผ
|
| 235 |
+
encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
|
| 236 |
+
for encoding in encodings:
|
| 237 |
+
try:
|
| 238 |
+
with open(file.name, 'r', encoding=encoding) as f:
|
| 239 |
+
content = f.read()
|
| 240 |
+
|
| 241 |
+
# ํ์ผ ๋ด์ฉ ๋ถ์
|
| 242 |
+
lines = content.split('\n')
|
| 243 |
+
total_lines = len(lines)
|
| 244 |
+
non_empty_lines = len([line for line in lines if line.strip()])
|
| 245 |
+
|
| 246 |
+
# ์ฝ๋ ํ์ผ ์ฌ๋ถ ํ์ธ
|
| 247 |
+
is_code = any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function'])
|
| 248 |
+
|
| 249 |
+
if is_code:
|
| 250 |
+
# ์ฝ๋ ํ์ผ ๋ถ์
|
| 251 |
+
functions = len([line for line in lines if 'def ' in line])
|
| 252 |
+
classes = len([line for line in lines if 'class ' in line])
|
| 253 |
+
imports = len([line for line in lines if 'import ' in line or 'from ' in line])
|
| 254 |
+
|
| 255 |
+
analysis = f"\n๐ ์ฝ๋ ๋ถ์:\n"
|
| 256 |
+
analysis += f"- ์ ์ฒด ๋ผ์ธ ์: {total_lines}\n"
|
| 257 |
+
analysis += f"- ํจ์ ์: {functions}\n"
|
| 258 |
+
analysis += f"- ํด๋์ค ์: {classes}\n"
|
| 259 |
+
analysis += f"- import ๋ฌธ ์: {imports}\n"
|
| 260 |
+
else:
|
| 261 |
+
# ์ผ๋ฐ ํ
์คํธ ํ์ผ ๋ถ์
|
| 262 |
+
words = len(content.split())
|
| 263 |
+
chars = len(content)
|
| 264 |
+
|
| 265 |
+
analysis = f"\n๐ ํ
์คํธ ๋ถ์:\n"
|
| 266 |
+
analysis += f"- ์ ์ฒด ๋ผ์ธ ์: {total_lines}\n"
|
| 267 |
+
analysis += f"- ์ค์ ๋ด์ฉ์ด ์๋ ๋ผ์ธ ์: {non_empty_lines}\n"
|
| 268 |
+
analysis += f"- ๋จ์ด ์: {words}\n"
|
| 269 |
+
analysis += f"- ๋ฌธ์ ์: {chars}\n"
|
| 270 |
+
|
| 271 |
+
return content + analysis, "text"
|
| 272 |
+
except UnicodeDecodeError:
|
| 273 |
+
continue
|
| 274 |
+
raise UnicodeDecodeError(f"์ง์๋๋ ์ธ์ฝ๋ฉ์ผ๋ก ํ์ผ์ ์ฝ์ ์ ์์ต๋๋ค ({', '.join(encodings)})")
|
| 275 |
+
|
| 276 |
+
except Exception as e:
|
| 277 |
+
return f"ํ์ผ ์ฝ๊ธฐ ์ค๋ฅ: {str(e)}", "error"
|
| 278 |
+
|
| 279 |
+
# ํ์ผ ์
๋ก๋ ์ด๋ฒคํธ ํธ๋ค๋ง ์์
|
| 280 |
+
def init_msg():
|
| 281 |
+
return "ํ์ผ์ ๋ถ์ํ๊ณ ์์ต๋๋ค..."
|
| 282 |
+
|
| 283 |
+
file_upload.change(
|
| 284 |
+
init_msg,
|
| 285 |
+
outputs=msg
|
| 286 |
+
).then(
|
| 287 |
+
stream_chat,
|
| 288 |
+
inputs=[msg, chatbot, file_upload, temperature, max_new_tokens, top_p, top_k, penalty],
|
| 289 |
+
outputs=[msg, chatbot]
|
| 290 |
+
)
|
| 291 |
|
| 292 |
|
| 293 |
@spaces.GPU
|