vikramvasudevan commited on
Commit
5f4344d
·
verified ·
1 Parent(s): 3e95dda

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. db.py +4 -1
  2. modules/db/relevance.py +43 -0
  3. modules/nodes/init.py +43 -6
db.py CHANGED
@@ -6,6 +6,7 @@ import logging
6
  from pydantic import BaseModel
7
 
8
  from metadata import MetadataFilter, MetadataWhereClause
 
9
 
10
  logger = logging.getLogger(__name__)
11
  logger.setLevel(logging.INFO)
@@ -51,7 +52,9 @@ class SanatanDatabase:
51
  distances=[],
52
  )
53
 
54
- return response
 
 
55
 
56
  def search_for_literal(
57
  self, collection_name: str, literal_to_search_for: str, n_results=2
 
6
  from pydantic import BaseModel
7
 
8
  from metadata import MetadataFilter, MetadataWhereClause
9
+ from modules.db.relevance import validate_relevance_queryresult
10
 
11
  logger = logging.getLogger(__name__)
12
  logger.setLevel(logging.INFO)
 
52
  distances=[],
53
  )
54
 
55
+ validated_response = validate_relevance_queryresult(query, response)
56
+
57
+ return validated_response["result"]
58
 
59
  def search_for_literal(
60
  self, collection_name: str, literal_to_search_for: str, n_results=2
modules/db/relevance.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from chromadb.api.types import QueryResult
2
+
3
+ def validate_relevance_queryresult(query: str, result: QueryResult, max_distance: float = 0.35):
4
+ """
5
+ Checks if the QueryResult from Chroma is relevant enough using distances.
6
+ Returns the original QueryResult unchanged, along with a status and reason.
7
+
8
+ Args:
9
+ query: the user query (for logging)
10
+ result: QueryResult returned from Chroma (dict-like)
11
+ max_distance: maximum acceptable distance for relevance
12
+
13
+ Returns:
14
+ dict with:
15
+ - 'status': "ok" | "not_found" | "not_relevant"
16
+ - 'reason': string explanation
17
+ - 'result': the original QueryResult object
18
+ """
19
+ documents = result.get("documents", [])
20
+ distances = result.get("distances", [])
21
+
22
+ if not documents:
23
+ return {
24
+ "status": "not_found",
25
+ "reason": "No results",
26
+ "result": result
27
+ }
28
+
29
+ # distances can be List[List[float]]; get the first distance of the first result
30
+ best_distance = distances[0][0] if distances and isinstance(distances[0], list) else (distances[0] if distances else float('inf'))
31
+
32
+ if best_distance > max_distance:
33
+ return {
34
+ "status": "not_relevant",
35
+ "reason": f"Best distance {best_distance:.4f} > {max_distance}",
36
+ "result": result
37
+ }
38
+
39
+ return {
40
+ "status": "ok",
41
+ "reason": "Relevant",
42
+ "result": result
43
+ }
modules/nodes/init.py CHANGED
@@ -27,6 +27,41 @@ def init_system_prompt_node(state: ChatState) -> ChatState:
27
  SystemMessage(
28
  content=f"Here is the list of all scriptures along with their metadata configurations:\n{json.dumps(scriptures, indent=1)}\n"
29
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  SystemMessage(
31
  content="you must ALWAYS call one of the standardization tools (`tool_get_standardized_azhwar_names`,`tool_get_standardized_prabandham_names`,`tool_get_standardized_divya_desam_names`) available to get the correct entity name before using the `tool_search_db_by_metadata` tool."
32
  ),
@@ -56,15 +91,17 @@ If the answer is not directly stated in the verses but is present in explanatory
56
  If the answer WAS indeed found in the context, use the following response format (in Markdown) othereise clearly state **"I do not have enough information from the {collection_name} to answer this. I searched using {search_methodology}. Do you want me try to another search like {alternative_searchmethod}?"**
57
 
58
  ### 🧾 Answer
59
- - Present a brief summary of your response in concise **English**.
 
 
60
 
61
- ### 🕉️ Scripture
62
  - {sanatanConfig.get_scripture_by_collection(collection_name=collection_name)["title"]}
63
 
64
- ### 🕮 Chapter Title
65
  - Mention the chapter(s) from which the references were taken. Use the field *title* here from the context if available. For example `TVM 1.8.3`
66
 
67
- ### 🕮 Verse Number
68
  - Mention the *verse number* from which the references were taken.
69
 
70
  ### 🔗 Reference Link(s)
@@ -87,11 +124,11 @@ If the answer WAS indeed found in the context, use the following response format
87
  > If you are unsure about a character, leave it as it is rather than guessing.
88
 
89
 
90
- ### 📜 English Transliteration
91
  - For each verse above, provide the **matching English transliteration**.
92
  - Maintain the **same order** as the verses listed above.
93
 
94
- ### 📜 English Translation
95
  - Provide the **English meaning** for each verse listed above.
96
  - Again, follow the **same order**.
97
  - Do **not** repeat the original verse here — just the translation.
 
27
  SystemMessage(
28
  content=f"Here is the list of all scriptures along with their metadata configurations:\n{json.dumps(scriptures, indent=1)}\n"
29
  ),
30
+ SystemMessage(
31
+ content="""
32
+ You have access to three scripture search tools. You MUST follow these rules when choosing a tool:
33
+
34
+ 1. **tool_search_db_by_metadata** – Use this **only** when the user explicitly provides metadata criteria such as:
35
+ - A specific azhwar (e.g., “Thirumālirum Solai”)
36
+ - A prabandham or prabandham code
37
+ - A pasuram or verse number (e.g., “verse 34”)
38
+ - A decade number
39
+ - A divya desam name
40
+ Before using this tool, always call the corresponding standardization tool:
41
+ - `tool_get_standardized_azhwar_names` for azhwar names
42
+ - `tool_get_standardized_prabandham_names` for prabandham names
43
+ - `tool_get_standardized_divya_desam_names` for divya desam names
44
+ Use the **exact standardized name returned**; do not modify, translate, or simplify it in any way.
45
+ ⚠️ This is the **default tool** for precise verse or metadata-based requests.
46
+
47
+ 2. **tool_semantic_vector_search** – Use this when the user is asking about:
48
+ - Themes, stories, ideas, emotions, or meanings in the scriptures
49
+ - Any query that does **not** mention specific verse numbers, pasuram numbers, or metadata
50
+ This tool performs semantic understanding and returns contextually relevant results.
51
+ Do **not** use this if the user explicitly requests literal or exact matches.
52
+
53
+ 3. **tool_search_db_by_literal_text** – Use this **only** if:
54
+ - The user explicitly says “literal match” or “exact phrase”
55
+ - OR if previous attempts with metadata or semantic search fail to yield accurate results
56
+ ⚠️ This tool is **not the default**. Only invoke it for exact-match requirements.
57
+
58
+ Example user queries and tool usage:
59
+
60
+ - “Show me verse 34 of Vishnu Sahasranama” → metadata search
61
+ - “Show me references to Garuda in Vishnu Purananm.” → semantic search
62
+ - “Search for the exact phrase ‘Om Namo Narayanaya’” → literal text search
63
+ """
64
+ ),
65
  SystemMessage(
66
  content="you must ALWAYS call one of the standardization tools (`tool_get_standardized_azhwar_names`,`tool_get_standardized_prabandham_names`,`tool_get_standardized_divya_desam_names`) available to get the correct entity name before using the `tool_search_db_by_metadata` tool."
67
  ),
 
91
  If the answer WAS indeed found in the context, use the following response format (in Markdown) othereise clearly state **"I do not have enough information from the {collection_name} to answer this. I searched using {search_methodology}. Do you want me try to another search like {alternative_searchmethod}?"**
92
 
93
  ### 🧾 Answer
94
+ - Present a brief summary of your response in concise **English**. Mention only the scripture(s), chapter(s) and verse number(s) available if multiple matches are available.
95
+
96
+ The following format should be used to show only the most relevant match. Do not show all matches at once.
97
 
98
+ ### 🕉️ Scripture(s)
99
  - {sanatanConfig.get_scripture_by_collection(collection_name=collection_name)["title"]}
100
 
101
+ ### 🕮 Chapter Title(s)
102
  - Mention the chapter(s) from which the references were taken. Use the field *title* here from the context if available. For example `TVM 1.8.3`
103
 
104
+ ### 🕮 Verse Number(s)
105
  - Mention the *verse number* from which the references were taken.
106
 
107
  ### 🔗 Reference Link(s)
 
124
  > If you are unsure about a character, leave it as it is rather than guessing.
125
 
126
 
127
+ ### 📜 English Transliteration(s)
128
  - For each verse above, provide the **matching English transliteration**.
129
  - Maintain the **same order** as the verses listed above.
130
 
131
+ ### 📜 English Translation(s)
132
  - Provide the **English meaning** for each verse listed above.
133
  - Again, follow the **same order**.
134
  - Do **not** repeat the original verse here — just the translation.