Spaces:

vikramvasudevan
/

sanatan_ai

Running

App Files Files Community

vikramvasudevan commited on Sep 1

Commit

5f4344d

verified ·

1 Parent(s): 3e95dda

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

db.py +4 -1
modules/db/relevance.py +43 -0
modules/nodes/init.py +43 -6

db.py CHANGED Viewed

@@ -6,6 +6,7 @@ import logging
 from pydantic import BaseModel
 from metadata import MetadataFilter, MetadataWhereClause
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -51,7 +52,9 @@ class SanatanDatabase:
                 distances=[],
             )
-        return response
     def search_for_literal(
         self, collection_name: str, literal_to_search_for: str, n_results=2

 from pydantic import BaseModel
 from metadata import MetadataFilter, MetadataWhereClause
+from modules.db.relevance import validate_relevance_queryresult
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
                 distances=[],
             )
+        validated_response = validate_relevance_queryresult(query, response)
+        return validated_response["result"]
     def search_for_literal(
         self, collection_name: str, literal_to_search_for: str, n_results=2

modules/db/relevance.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from chromadb.api.types import QueryResult
+def validate_relevance_queryresult(query: str, result: QueryResult, max_distance: float = 0.35):
+    """
+    Checks if the QueryResult from Chroma is relevant enough using distances.
+    Returns the original QueryResult unchanged, along with a status and reason.
+    Args:
+        query: the user query (for logging)
+        result: QueryResult returned from Chroma (dict-like)
+        max_distance: maximum acceptable distance for relevance
+    Returns:
+        dict with:
+          - 'status': "ok" | "not_found" | "not_relevant"
+          - 'reason': string explanation
+          - 'result': the original QueryResult object
+    """
+    documents = result.get("documents", [])
+    distances = result.get("distances", [])
+    if not documents:
+        return {
+            "status": "not_found",
+            "reason": "No results",
+            "result": result
+        }
+    # distances can be List[List[float]]; get the first distance of the first result
+    best_distance = distances[0][0] if distances and isinstance(distances[0], list) else (distances[0] if distances else float('inf'))
+    if best_distance > max_distance:
+        return {
+            "status": "not_relevant",
+            "reason": f"Best distance {best_distance:.4f} > {max_distance}",
+            "result": result
+        }
+    return {
+        "status": "ok",
+        "reason": "Relevant",
+        "result": result
+    }

modules/nodes/init.py CHANGED Viewed

@@ -27,6 +27,41 @@ def init_system_prompt_node(state: ChatState) -> ChatState:
             SystemMessage(
                 content=f"Here is the list of all scriptures along with their metadata configurations:\n{json.dumps(scriptures, indent=1)}\n"
             ),
             SystemMessage(
                 content="you must ALWAYS call one of the standardization tools (`tool_get_standardized_azhwar_names`,`tool_get_standardized_prabandham_names`,`tool_get_standardized_divya_desam_names`) available to get the correct entity name before using the `tool_search_db_by_metadata` tool."
             ),
@@ -56,15 +91,17 @@ If the answer is not directly stated in the verses but is present in explanatory
 If the answer WAS indeed found in the context, use the following response format (in Markdown) othereise clearly state **"I do not have enough information from the {collection_name} to answer this. I searched using {search_methodology}. Do you want me try to another search like {alternative_searchmethod}?"**
 ### 🧾 Answer
-- Present a brief summary of your response in concise **English**.
-### 🕉️ Scripture
 - {sanatanConfig.get_scripture_by_collection(collection_name=collection_name)["title"]}
-### 🕮 Chapter Title
 - Mention the chapter(s) from which the references were taken.  Use the field *title* here from the context if available. For example `TVM 1.8.3`
-### 🕮 Verse Number
 - Mention the *verse number* from which the references were taken.
 ### 🔗 Reference Link(s)
@@ -87,11 +124,11 @@ If the answer WAS indeed found in the context, use the following response format
 > If you are unsure about a character, leave it as it is rather than guessing.
-### 📜 English Transliteration
 - For each verse above, provide the **matching English transliteration**.
 - Maintain the **same order** as the verses listed above.
-### 📜 English Translation
 - Provide the **English meaning** for each verse listed above.
 - Again, follow the **same order**.
 - Do **not** repeat the original verse here — just the translation.

             SystemMessage(
                 content=f"Here is the list of all scriptures along with their metadata configurations:\n{json.dumps(scriptures, indent=1)}\n"
             ),
+            SystemMessage(
+                content="""
+You have access to three scripture search tools. You MUST follow these rules when choosing a tool:
+1. **tool_search_db_by_metadata** – Use this **only** when the user explicitly provides metadata criteria such as:
+   - A specific azhwar (e.g., “Thirumālirum Solai”)
+   - A prabandham or prabandham code
+   - A pasuram or verse number (e.g., “verse 34”)
+   - A decade number
+   - A divya desam name
+   Before using this tool, always call the corresponding standardization tool:
+     - `tool_get_standardized_azhwar_names` for azhwar names
+     - `tool_get_standardized_prabandham_names` for prabandham names
+     - `tool_get_standardized_divya_desam_names` for divya desam names
+   Use the **exact standardized name returned**; do not modify, translate, or simplify it in any way.
+   ⚠️ This is the **default tool** for precise verse or metadata-based requests.
+2. **tool_semantic_vector_search** – Use this when the user is asking about:
+   - Themes, stories, ideas, emotions, or meanings in the scriptures
+   - Any query that does **not** mention specific verse numbers, pasuram numbers, or metadata
+   This tool performs semantic understanding and returns contextually relevant results.
+   Do **not** use this if the user explicitly requests literal or exact matches.
+3. **tool_search_db_by_literal_text** – Use this **only** if:
+   - The user explicitly says “literal match” or “exact phrase”
+   - OR if previous attempts with metadata or semantic search fail to yield accurate results
+   ⚠️ This tool is **not the default**. Only invoke it for exact-match requirements.
+Example user queries and tool usage:
+- “Show me verse 34 of Vishnu Sahasranama” → metadata search
+- “Show me references to Garuda in Vishnu Purananm.” → semantic search
+- “Search for the exact phrase ‘Om Namo Narayanaya’” → literal text search
+            """
+            ),
             SystemMessage(
                 content="you must ALWAYS call one of the standardization tools (`tool_get_standardized_azhwar_names`,`tool_get_standardized_prabandham_names`,`tool_get_standardized_divya_desam_names`) available to get the correct entity name before using the `tool_search_db_by_metadata` tool."
             ),
 If the answer WAS indeed found in the context, use the following response format (in Markdown) othereise clearly state **"I do not have enough information from the {collection_name} to answer this. I searched using {search_methodology}. Do you want me try to another search like {alternative_searchmethod}?"**
 ### 🧾 Answer
+- Present a brief summary of your response in concise **English**. Mention only the scripture(s), chapter(s) and verse number(s) available if multiple matches are available.
+The following format should be used to show only the most relevant match. Do not show all matches at once.
+### 🕉️ Scripture(s)
 - {sanatanConfig.get_scripture_by_collection(collection_name=collection_name)["title"]}
+### 🕮 Chapter Title(s)
 - Mention the chapter(s) from which the references were taken.  Use the field *title* here from the context if available. For example `TVM 1.8.3`
+### 🕮 Verse Number(s)
 - Mention the *verse number* from which the references were taken.
 ### 🔗 Reference Link(s)
 > If you are unsure about a character, leave it as it is rather than guessing.
+### 📜 English Transliteration(s)
 - For each verse above, provide the **matching English transliteration**.
 - Maintain the **same order** as the verses listed above.
+### 📜 English Translation(s)
 - Provide the **English meaning** for each verse listed above.
 - Again, follow the **same order**.
 - Do **not** repeat the original verse here — just the translation.