Spaces:

Wplotnikow
/

TeacherChat

Sleeping

App Files Files Community

Wplotnikow commited on Aug 20

Commit

2b9cf4d

verified ·

1 Parent(s): bf2aee6

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -11

app.py CHANGED Viewed

@@ -13,16 +13,26 @@ def get_blocks_from_docx():
         return ["Файл .docx не найден!"]
     doc = Document(docx_list[0])
     blocks = []
-    # Абзацы
     for p in doc.paragraphs:
         txt = p.text.strip()
-        if txt and not (len(txt) <= 3 and txt.isdigit()):
             blocks.append(txt)
-    # Таблицы
     for table in doc.tables:
         for row in table.rows:
             row_text = " | ".join(cell.text.strip() for cell in row.cells if cell.text.strip())
-            if row_text:
                 blocks.append(row_text)
     seen = set()
     uniq_blocks = []
@@ -53,14 +63,18 @@ def ask_chatbot(question):
         return "Пожалуйста, введите вопрос."
     if len(blocks) < 2:
         return "Ошибка: база знаний пуста или слишком мала. Проверьте .docx."
-    # Берём ТРИ лучших блока для большего контекста (больше данных для генерации)
     user_vec = vectorizer.transform([question])
-    sims = cosine_similarity(user_vec, matrix)[0]
     top_idxs = sims.argsort()[-3:][::-1]
-    context = " ".join([blocks[i] for i in top_idxs if sims[i] > 0.08])
     answer = rut5_answer(question, context)
-    # Проверяем: выдаём только развёрнутые (2 предложения и более) или повторяем часть исходника как расширение
-    if len(answer.split('.')) < 2:
         answer += "\n\n" + context
     return answer
@@ -87,8 +101,10 @@ with gr.Blocks() as demo:
     ask_btn.click(ask_chatbot, question, answer)
     question.submit(ask_chatbot, question, answer)
     gr.Markdown("#### Примеры вопросов:")
-    for ex in EXAMPLES:
-        gr.Markdown(f"- {ex}")
     gr.Markdown("""
     ---

         return ["Файл .docx не найден!"]
     doc = Document(docx_list[0])
     blocks = []
     for p in doc.paragraphs:
         txt = p.text.strip()
+        # Убираем короткие заголовки:
+        if (
+            txt
+            and not (len(txt) <= 3 and txt.isdigit())
+            and not (
+                len(txt) < 35
+                and txt == txt.upper()
+                and txt.endswith(('.', ':', '?', '!')) is False
+            )
+            and len(txt.split()) > 3  # минимум 3 слова = явно не заголовок
+        ):
             blocks.append(txt)
+    # Таблицы:
     for table in doc.tables:
         for row in table.rows:
             row_text = " | ".join(cell.text.strip() for cell in row.cells if cell.text.strip())
+            # Аналогично — игнорируем сверхкороткие строки/возможные заголовки из таблиц:
+            if row_text and len(row_text) > 35 and len(row_text.split()) > 3:
                 blocks.append(row_text)
     seen = set()
     uniq_blocks = []
         return "Пожалуйста, введите вопрос."
     if len(blocks) < 2:
         return "Ошибка: база знаний пуста или слишком мала. Проверьте .docx."
     user_vec = vectorizer.transform([question])
+    sims = cosine_similarity(user_vec, matrix)
+    # ТОП-3 самых осмысленных блока
     top_idxs = sims.argsort()[-3:][::-1]
+    # Используем только НЕКОРОТКИЕ блоки как контекст
+    context_blocks = [
+        blocks[i] for i in top_idxs if sims[i] > 0.08 and len(blocks[i].split()) > 3 and len(blocks[i]) > 35
+    ]
+    context = " ".join(context_blocks)
     answer = rut5_answer(question, context)
+    # Подстраховка — если ответ ТОЛЬКО заголовок, просто версифицируем и дополняем контекстом:
+    if len(answer.strip().split()) < 8 or len(answer.split('.')) < 2:
         answer += "\n\n" + context
     return answer
     ask_btn.click(ask_chatbot, question, answer)
     question.submit(ask_chatbot, question, answer)
     gr.Markdown("#### Примеры вопросов:")
+    # ВОЗВРАЩАЕМ КЛИКАБЕЛЬНЫЕ примеры
+    gr.Examples(EXAMPLES, inputs=question)
     gr.Markdown("""
     ---