Spaces:

VIDraft
/

RAGOndevice

Running

App Files Files Community

cutechicken commited on Dec 17, 2024

Commit

6adfca3

verified ·

1 Parent(s): 1fb62e7

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -1

app.py CHANGED Viewed

@@ -193,7 +193,101 @@ def read_uploaded_file(file):
         return f"❌ 파일 읽기 오류: {str(e)}", "error"
 @spaces.GPU

         return f"❌ 파일 읽기 오류: {str(e)}", "error"
+def read_uploaded_file(file):
+    if file is None:
+        return "", ""
+    try:
+        file_ext = os.path.splitext(file.name)[1].lower()
+        if file_ext == '.parquet':
+            df = pd.read_parquet(file.name)
+            content = f"📊 데이터 미리보기:\n{df.head(10).to_markdown(index=False)}\n\n"
+            content += f"\n📈 데이터 정보:\n"
+            content += f"- 전체 행 수: {len(df)}\n"
+            content += f"- 전체 열 수: {len(df.columns)}\n"
+            content += f"- 컬럼 목록: {', '.join(df.columns)}\n"
+            return content, "parquet"
+        elif file_ext == '.csv':
+            encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
+            for encoding in encodings:
+                try:
+                    df = pd.read_csv(file.name, encoding=encoding)
+                    content = f"📊 데이터 미리보기:\n{df.head(10).to_markdown(index=False)}\n\n"
+                    content += f"\n📈 데이터 정보:\n"
+                    content += f"- 전체 행 수: {len(df)}\n"
+                    content += f"- 전체 열 수: {len(df.columns)}\n"
+                    content += f"- 컬럼 목록: {', '.join(df.columns)}\n"
+                    content += f"\n📋 컬럼 데이터 타입:\n"
+                    for col, dtype in df.dtypes.items():
+                        content += f"- {col}: {dtype}\n"
+                    null_counts = df.isnull().sum()
+                    if null_counts.any():
+                        content += f"\n⚠️ 결측치:\n"
+                        for col, null_count in null_counts[null_counts > 0].items():
+                            content += f"- {col}: {null_count}개 누락\n"
+                    return content, "csv"
+                except UnicodeDecodeError:
+                    continue
+            raise UnicodeDecodeError(f"지원되는 인코딩으로 파일을 읽을 수 없습니다 ({', '.join(encodings)})")
+        else:  # 텍스트 파일
+            encodings = ['utf-8', 'cp949', 'euc-kr', 'latin1']
+            for encoding in encodings:
+                try:
+                    with open(file.name, 'r', encoding=encoding) as f:
+                        content = f.read()
+                        # 파일 내용 분석
+                        lines = content.split('\n')
+                        total_lines = len(lines)
+                        non_empty_lines = len([line for line in lines if line.strip()])
+                        # 코드 파일 여부 확인
+                        is_code = any(keyword in content.lower() for keyword in ['def ', 'class ', 'import ', 'function'])
+                        if is_code:
+                            # 코드 파일 분석
+                            functions = len([line for line in lines if 'def ' in line])
+                            classes = len([line for line in lines if 'class ' in line])
+                            imports = len([line for line in lines if 'import ' in line or 'from ' in line])
+                            analysis = f"\n📝 코드 분석:\n"
+                            analysis += f"- 전체 라인 수: {total_lines}\n"
+                            analysis += f"- 함수 수: {functions}\n"
+                            analysis += f"- 클래스 수: {classes}\n"
+                            analysis += f"- import 문 수: {imports}\n"
+                        else:
+                            # 일반 텍스트 파일 분석
+                            words = len(content.split())
+                            chars = len(content)
+                            analysis = f"\n📝 텍스트 분석:\n"
+                            analysis += f"- 전체 라인 수: {total_lines}\n"
+                            analysis += f"- 실제 내용이 있는 라인 수: {non_empty_lines}\n"
+                            analysis += f"- 단어 수: {words}\n"
+                            analysis += f"- 문자 수: {chars}\n"
+                        return content + analysis, "text"
+                except UnicodeDecodeError:
+                    continue
+            raise UnicodeDecodeError(f"지원되는 인코딩으로 파일을 읽을 수 없습니다 ({', '.join(encodings)})")
+    except Exception as e:
+        return f"파일 읽기 오류: {str(e)}", "error"
+# 파일 업로드 이벤트 핸들링 수정
+def init_msg():
+    return "파일을 분석하고 있습니다..."
+file_upload.change(
+    init_msg,
+    outputs=msg
+).then(
+    stream_chat,
+    inputs=[msg, chatbot, file_upload, temperature, max_new_tokens, top_p, top_k, penalty],
+    outputs=[msg, chatbot]
+)
 @spaces.GPU