Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- db.py +4 -1
- modules/db/relevance.py +43 -0
- modules/nodes/init.py +43 -6
db.py
CHANGED
|
@@ -6,6 +6,7 @@ import logging
|
|
| 6 |
from pydantic import BaseModel
|
| 7 |
|
| 8 |
from metadata import MetadataFilter, MetadataWhereClause
|
|
|
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
logger.setLevel(logging.INFO)
|
|
@@ -51,7 +52,9 @@ class SanatanDatabase:
|
|
| 51 |
distances=[],
|
| 52 |
)
|
| 53 |
|
| 54 |
-
|
|
|
|
|
|
|
| 55 |
|
| 56 |
def search_for_literal(
|
| 57 |
self, collection_name: str, literal_to_search_for: str, n_results=2
|
|
|
|
| 6 |
from pydantic import BaseModel
|
| 7 |
|
| 8 |
from metadata import MetadataFilter, MetadataWhereClause
|
| 9 |
+
from modules.db.relevance import validate_relevance_queryresult
|
| 10 |
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
logger.setLevel(logging.INFO)
|
|
|
|
| 52 |
distances=[],
|
| 53 |
)
|
| 54 |
|
| 55 |
+
validated_response = validate_relevance_queryresult(query, response)
|
| 56 |
+
|
| 57 |
+
return validated_response["result"]
|
| 58 |
|
| 59 |
def search_for_literal(
|
| 60 |
self, collection_name: str, literal_to_search_for: str, n_results=2
|
modules/db/relevance.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from chromadb.api.types import QueryResult
|
| 2 |
+
|
| 3 |
+
def validate_relevance_queryresult(query: str, result: QueryResult, max_distance: float = 0.35):
|
| 4 |
+
"""
|
| 5 |
+
Checks if the QueryResult from Chroma is relevant enough using distances.
|
| 6 |
+
Returns the original QueryResult unchanged, along with a status and reason.
|
| 7 |
+
|
| 8 |
+
Args:
|
| 9 |
+
query: the user query (for logging)
|
| 10 |
+
result: QueryResult returned from Chroma (dict-like)
|
| 11 |
+
max_distance: maximum acceptable distance for relevance
|
| 12 |
+
|
| 13 |
+
Returns:
|
| 14 |
+
dict with:
|
| 15 |
+
- 'status': "ok" | "not_found" | "not_relevant"
|
| 16 |
+
- 'reason': string explanation
|
| 17 |
+
- 'result': the original QueryResult object
|
| 18 |
+
"""
|
| 19 |
+
documents = result.get("documents", [])
|
| 20 |
+
distances = result.get("distances", [])
|
| 21 |
+
|
| 22 |
+
if not documents:
|
| 23 |
+
return {
|
| 24 |
+
"status": "not_found",
|
| 25 |
+
"reason": "No results",
|
| 26 |
+
"result": result
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
# distances can be List[List[float]]; get the first distance of the first result
|
| 30 |
+
best_distance = distances[0][0] if distances and isinstance(distances[0], list) else (distances[0] if distances else float('inf'))
|
| 31 |
+
|
| 32 |
+
if best_distance > max_distance:
|
| 33 |
+
return {
|
| 34 |
+
"status": "not_relevant",
|
| 35 |
+
"reason": f"Best distance {best_distance:.4f} > {max_distance}",
|
| 36 |
+
"result": result
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
return {
|
| 40 |
+
"status": "ok",
|
| 41 |
+
"reason": "Relevant",
|
| 42 |
+
"result": result
|
| 43 |
+
}
|
modules/nodes/init.py
CHANGED
|
@@ -27,6 +27,41 @@ def init_system_prompt_node(state: ChatState) -> ChatState:
|
|
| 27 |
SystemMessage(
|
| 28 |
content=f"Here is the list of all scriptures along with their metadata configurations:\n{json.dumps(scriptures, indent=1)}\n"
|
| 29 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
SystemMessage(
|
| 31 |
content="you must ALWAYS call one of the standardization tools (`tool_get_standardized_azhwar_names`,`tool_get_standardized_prabandham_names`,`tool_get_standardized_divya_desam_names`) available to get the correct entity name before using the `tool_search_db_by_metadata` tool."
|
| 32 |
),
|
|
@@ -56,15 +91,17 @@ If the answer is not directly stated in the verses but is present in explanatory
|
|
| 56 |
If the answer WAS indeed found in the context, use the following response format (in Markdown) othereise clearly state **"I do not have enough information from the {collection_name} to answer this. I searched using {search_methodology}. Do you want me try to another search like {alternative_searchmethod}?"**
|
| 57 |
|
| 58 |
### 🧾 Answer
|
| 59 |
-
- Present a brief summary of your response in concise **English**.
|
|
|
|
|
|
|
| 60 |
|
| 61 |
-
### 🕉️ Scripture
|
| 62 |
- {sanatanConfig.get_scripture_by_collection(collection_name=collection_name)["title"]}
|
| 63 |
|
| 64 |
-
### 🕮 Chapter Title
|
| 65 |
- Mention the chapter(s) from which the references were taken. Use the field *title* here from the context if available. For example `TVM 1.8.3`
|
| 66 |
|
| 67 |
-
### 🕮 Verse Number
|
| 68 |
- Mention the *verse number* from which the references were taken.
|
| 69 |
|
| 70 |
### 🔗 Reference Link(s)
|
|
@@ -87,11 +124,11 @@ If the answer WAS indeed found in the context, use the following response format
|
|
| 87 |
> If you are unsure about a character, leave it as it is rather than guessing.
|
| 88 |
|
| 89 |
|
| 90 |
-
### 📜 English Transliteration
|
| 91 |
- For each verse above, provide the **matching English transliteration**.
|
| 92 |
- Maintain the **same order** as the verses listed above.
|
| 93 |
|
| 94 |
-
### 📜 English Translation
|
| 95 |
- Provide the **English meaning** for each verse listed above.
|
| 96 |
- Again, follow the **same order**.
|
| 97 |
- Do **not** repeat the original verse here — just the translation.
|
|
|
|
| 27 |
SystemMessage(
|
| 28 |
content=f"Here is the list of all scriptures along with their metadata configurations:\n{json.dumps(scriptures, indent=1)}\n"
|
| 29 |
),
|
| 30 |
+
SystemMessage(
|
| 31 |
+
content="""
|
| 32 |
+
You have access to three scripture search tools. You MUST follow these rules when choosing a tool:
|
| 33 |
+
|
| 34 |
+
1. **tool_search_db_by_metadata** – Use this **only** when the user explicitly provides metadata criteria such as:
|
| 35 |
+
- A specific azhwar (e.g., “Thirumālirum Solai”)
|
| 36 |
+
- A prabandham or prabandham code
|
| 37 |
+
- A pasuram or verse number (e.g., “verse 34”)
|
| 38 |
+
- A decade number
|
| 39 |
+
- A divya desam name
|
| 40 |
+
Before using this tool, always call the corresponding standardization tool:
|
| 41 |
+
- `tool_get_standardized_azhwar_names` for azhwar names
|
| 42 |
+
- `tool_get_standardized_prabandham_names` for prabandham names
|
| 43 |
+
- `tool_get_standardized_divya_desam_names` for divya desam names
|
| 44 |
+
Use the **exact standardized name returned**; do not modify, translate, or simplify it in any way.
|
| 45 |
+
⚠️ This is the **default tool** for precise verse or metadata-based requests.
|
| 46 |
+
|
| 47 |
+
2. **tool_semantic_vector_search** – Use this when the user is asking about:
|
| 48 |
+
- Themes, stories, ideas, emotions, or meanings in the scriptures
|
| 49 |
+
- Any query that does **not** mention specific verse numbers, pasuram numbers, or metadata
|
| 50 |
+
This tool performs semantic understanding and returns contextually relevant results.
|
| 51 |
+
Do **not** use this if the user explicitly requests literal or exact matches.
|
| 52 |
+
|
| 53 |
+
3. **tool_search_db_by_literal_text** – Use this **only** if:
|
| 54 |
+
- The user explicitly says “literal match” or “exact phrase”
|
| 55 |
+
- OR if previous attempts with metadata or semantic search fail to yield accurate results
|
| 56 |
+
⚠️ This tool is **not the default**. Only invoke it for exact-match requirements.
|
| 57 |
+
|
| 58 |
+
Example user queries and tool usage:
|
| 59 |
+
|
| 60 |
+
- “Show me verse 34 of Vishnu Sahasranama” → metadata search
|
| 61 |
+
- “Show me references to Garuda in Vishnu Purananm.” → semantic search
|
| 62 |
+
- “Search for the exact phrase ‘Om Namo Narayanaya’” → literal text search
|
| 63 |
+
"""
|
| 64 |
+
),
|
| 65 |
SystemMessage(
|
| 66 |
content="you must ALWAYS call one of the standardization tools (`tool_get_standardized_azhwar_names`,`tool_get_standardized_prabandham_names`,`tool_get_standardized_divya_desam_names`) available to get the correct entity name before using the `tool_search_db_by_metadata` tool."
|
| 67 |
),
|
|
|
|
| 91 |
If the answer WAS indeed found in the context, use the following response format (in Markdown) othereise clearly state **"I do not have enough information from the {collection_name} to answer this. I searched using {search_methodology}. Do you want me try to another search like {alternative_searchmethod}?"**
|
| 92 |
|
| 93 |
### 🧾 Answer
|
| 94 |
+
- Present a brief summary of your response in concise **English**. Mention only the scripture(s), chapter(s) and verse number(s) available if multiple matches are available.
|
| 95 |
+
|
| 96 |
+
The following format should be used to show only the most relevant match. Do not show all matches at once.
|
| 97 |
|
| 98 |
+
### 🕉️ Scripture(s)
|
| 99 |
- {sanatanConfig.get_scripture_by_collection(collection_name=collection_name)["title"]}
|
| 100 |
|
| 101 |
+
### 🕮 Chapter Title(s)
|
| 102 |
- Mention the chapter(s) from which the references were taken. Use the field *title* here from the context if available. For example `TVM 1.8.3`
|
| 103 |
|
| 104 |
+
### 🕮 Verse Number(s)
|
| 105 |
- Mention the *verse number* from which the references were taken.
|
| 106 |
|
| 107 |
### 🔗 Reference Link(s)
|
|
|
|
| 124 |
> If you are unsure about a character, leave it as it is rather than guessing.
|
| 125 |
|
| 126 |
|
| 127 |
+
### 📜 English Transliteration(s)
|
| 128 |
- For each verse above, provide the **matching English transliteration**.
|
| 129 |
- Maintain the **same order** as the verses listed above.
|
| 130 |
|
| 131 |
+
### 📜 English Translation(s)
|
| 132 |
- Provide the **English meaning** for each verse listed above.
|
| 133 |
- Again, follow the **same order**.
|
| 134 |
- Do **not** repeat the original verse here — just the translation.
|