Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| from metadata import MetadataWhereClause | |
| from typing import List, Dict | |
| from modules.kamba_ramayanam_helper import ( | |
| get_list_of_kandams, | |
| get_list_of_padalams, | |
| get_list_of_padalams_in_tamil, | |
| ) | |
| import nalayiram_helper | |
| class SanatanConfig: | |
| dbStorePath: str = "./chromadb-store" | |
| scriptures = [ | |
| { | |
| "name": "vishnu_puranam", | |
| "title": "Sri Vishnu Puranam", | |
| "output_dir": "./output/vishnu_puranam", | |
| "collection_name": "vishnu_puranam_openai", | |
| "collection_embedding_fn": "openai", | |
| "unit": "page", | |
| "metadata_fields": [ | |
| { | |
| "name": "file", | |
| "label": "File Name", | |
| "datatype": "str", | |
| "description": "name of the file from which the information was extracted", | |
| }, | |
| { | |
| "name": "page", | |
| "datatype": "int", | |
| "label": "Page Number", | |
| "description": "Page number from the source", | |
| "show_as_filter": True, | |
| }, | |
| ], | |
| "pdf_path": "./data/vishnu_puranam.pdf", | |
| "source": "https://dn720005.ca.archive.org/0/items/vishnu-purana-sanskrit-english-ocr/VISHNU-PURANA-Sanskrit-English-OCR.pdf", | |
| "language": "san+eng", | |
| "example_labels": [ | |
| "Vishnu's form", | |
| "About the five elements", | |
| "About Garuda", | |
| "Weapons of Vishnu", | |
| "Vishnu's form (all scriptures)", | |
| ], | |
| "examples": [ | |
| "describe Vishnu's form as defined in vishnu puranam", | |
| "five elements and their significance as per vishnu puranam", | |
| "What is the significance of Garuda? Show some verses from vishnu puranam that describe him.", | |
| "What weapons does Vishnu hold as mentioned in vishnu puranam?", | |
| "How is the form of Vishnu described across the scriptures?", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "shukla_yajur_vedam", | |
| "title": "Shukla Yajur Vedam", | |
| "output_dir": "./output/shukla_yajur_vedam", | |
| "collection_name": "shukla_yajur_vedam", | |
| "unit": "page", | |
| "metadata_fields": [ | |
| { | |
| "name": "file", | |
| "label": "File Name", | |
| "datatype": "str", | |
| "description": "name of the file from which the information was extracted", | |
| }, | |
| { | |
| "name": "page", | |
| "datatype": "int", | |
| "label": "Page Number", | |
| "description": "Page number from the source", | |
| "show_as_filter": True, | |
| }, | |
| ], | |
| "pdf_path": "./data/shukla-yajur-veda.pdf", | |
| "source": "https://www.thearyasamaj.org/uploads/book/2014/04/R1sSjG_eLb_sub_406_yajurveda.pdf", | |
| "language": "san+eng", | |
| "example_labels": [ | |
| "About Vedam", | |
| "About the five elements", | |
| "About Brahma", | |
| ], | |
| "examples": [ | |
| "Gist of Shukla Yajur Vedam. Give me some sanskrit verses.", | |
| "What is the significance of fire and water. show some sanskrit verses", | |
| "Brahma", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "bhagavat_gita", | |
| "title": "Bhagavat Gita", | |
| "output_dir": "./output/bhagavat_gita", | |
| "collection_name": "bhagavat_gita_openai", | |
| "collection_embedding_fn": "openai", | |
| "unit": "page", | |
| "metadata_fields": [ | |
| { | |
| "name": "file", | |
| "label": "File Name", | |
| "datatype": "str", | |
| "description": "name of the file from which the information was extracted", | |
| }, | |
| { | |
| "name": "page", | |
| "datatype": "int", | |
| "label": "Page Number", | |
| "description": "Page number from the source", | |
| "show_as_filter": True, | |
| }, | |
| ], | |
| "pdf_path": "./data/bhagavat_gita.pdf", | |
| "source": "https://dn790006.ca.archive.org/0/items/in.gov.ignca.279/279_text.pdf", | |
| "language": "san+eng", | |
| "example_labels": [ | |
| "About Arjuna", | |
| "About Karma", | |
| "About birth and death", | |
| "About the battle field", | |
| "About Krishna's form", | |
| "Krishna's Teachings", | |
| ], | |
| "examples": [ | |
| "Show some verses where Krishna advises Arjuna", | |
| "What does Krishna say about Karma", | |
| "What does Krishna say about birth and death", | |
| "describe the battle field", | |
| "How did Arjuna respond upon witnessing Krishna’s Vishwarupa?" | |
| "What teachings did Krishna share in the Gita?", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "valmiki_ramayanam", | |
| "title": "Valmiki Ramayanam", | |
| "output_dir": "./output/valmiki_ramayanam", | |
| "collection_name": "valmiki_ramayanam_openai", | |
| "collection_embedding_fn": "openai", | |
| "unit": "page", | |
| "metadata_fields": [ | |
| { | |
| "name": "file", | |
| "label": "File Name", | |
| "datatype": "str", | |
| "description": "name of the file from which the information was extracted", | |
| }, | |
| { | |
| "name": "page", | |
| "datatype": "int", | |
| "label": "Page Number", | |
| "description": "Page number from the source", | |
| "show_as_filter": True, | |
| }, | |
| ], | |
| "pdf_path": "./data/valmiki_ramayanam.pdf", | |
| "source": "https://ia800509.us.archive.org/28/items/valmiki-ramayana-gita-press-english/Valmiki%20Ramayana%20Gita%20Press%20English.pdf", | |
| "language": "san+eng", | |
| "example_labels": [ | |
| "About Jatayu", | |
| "About Hanuman", | |
| "About Vali", | |
| "About Sita", | |
| "About Ravana", | |
| "A slokam by name", | |
| "Vibheeshana sharanagathi slokam", | |
| ], | |
| "examples": [ | |
| "What is the significance of Jatayu? show some sanskrit verses to support the argument", | |
| "Show some verses where Hanuman is mentioned", | |
| "How did Rama kill Vali", | |
| "How was Sita abducted", | |
| "How did Rama kill Ravana?", | |
| "explain sakrudeva prapannaaya shlokam in ramayana", | |
| "give the shlokam in ramayanam that vibheeshana uses to perform sharanagathi to rama, give the sanskrit shlokam and its meaning", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "vishnu_sahasranamam", | |
| "title": "Vishnu Sahasranamam", | |
| "output_dir": "./output/vishnu_sahasranamam", | |
| "collection_name": "vishnu_sahasranamam_openai", | |
| "collection_embedding_fn": "openai", | |
| "unit": "verse", | |
| "field_mapping": { | |
| "text": "sanskrit", | |
| "title": lambda doc: f"Verse {doc.get('verse','')}", | |
| "unit_index": "verse", | |
| "transliteration": "transliteration", | |
| "word_by_word_native": "translation", | |
| "author": lambda doc: f"Sage Vyasa", | |
| "relative_path": lambda doc: f"Vishnu Sahasranamam-{doc.get("chapter","")}-{doc.get("verse","")}", | |
| }, | |
| "metadata_fields": [ | |
| { | |
| "name": "chapter", | |
| "datatype": "str", | |
| "label": "Chapter Name", | |
| "description": "Name of the Chapter", | |
| }, | |
| { | |
| "name": "page_number", | |
| "datatype": "int", | |
| "label": "Page Number", | |
| "description": "Page number from the source", | |
| }, | |
| { | |
| "name": "sanskrit", | |
| "label": "Lyrics in sanskrit", | |
| "datatype": "str", | |
| "description": "The original sloka in sanskrit.", | |
| "show_as_filter": True, | |
| }, | |
| { | |
| "name": "translation", | |
| "label": "English Translation", | |
| "datatype": "str", | |
| "description": "The english translation.", | |
| "show_as_filter": True, | |
| }, | |
| { | |
| "name": "transliteration", | |
| "label": "English Transliteration", | |
| "datatype": "str", | |
| "description": "The english transliteration.", | |
| "show_as_filter": True, | |
| }, | |
| { | |
| "name": "verse", | |
| "labek": "Verse Number", | |
| "datatype": "int", | |
| "description": "The verse number of the sloka.", | |
| "show_as_filter": True, | |
| }, | |
| ], | |
| "pdf_path": "./data/vishnu_sahasranamam.pdf", | |
| "source": "https://www.swami-krishnananda.org/vishnu/Sri_Vishnu_Sahasranama_Stotram.pdf", | |
| "language": "san+eng", | |
| "example_labels": ["Vanamali", "1000 names", "Sanskrit text search"], | |
| "examples": [ | |
| "Vanamali", | |
| "Show some of the 1000 names of Vishnu along with their meaning", | |
| "show the verse that begins with शुक्लाम्बरधरं", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "divya_prabandham", | |
| "title": "4000 Divya Prabandham", | |
| "output_dir": "./output/divya_prabandham", | |
| "collection_name": "divya_prabandham", | |
| "collection_embedding_fn": "openai", | |
| "unit": "verse", | |
| "credits": "We owe deep gratitude to uveda.org for providing such beautifully curated data. 🙏 This section would not have been possible without the dedicated efforts of the Uveda team.", | |
| "field_mapping": { | |
| "text": "pasuram_ta", | |
| "title": lambda doc: f"{doc.get('prabandham_name','')} {doc.get('chapter','')}-{doc.get('decade','')}:{doc.get('position_in_chapter','')}", | |
| "location": "divya_desams", | |
| "word_by_word_native": "wbw_ta", | |
| "unit_index": "verse", | |
| "transliteration": "pasuram_en", | |
| "reference_link": "html_url", | |
| "author": "azhwar_name", | |
| "chapter_name": "prabandham_name", | |
| "relative_path": lambda doc: "-".join( | |
| filter( | |
| None, | |
| [ | |
| doc.get("prabandham_name", ""), | |
| *( | |
| str(doc.get(k)) | |
| for k in ["decade", "chapter", "position_in_chapter"] | |
| if doc.get(k, -1) != -1 | |
| ), | |
| ], | |
| ) | |
| ), | |
| }, | |
| "metadata_fields": [ | |
| { | |
| "name": "prabandham_code", | |
| "label": "Prabandham Code", | |
| "datatype": "str", | |
| "description": "contains the short prabandham_code. e.g. `TPL` for `Thiruppallandu`", | |
| }, | |
| { | |
| "name": "prabandham_name", | |
| "label": "Prabandham Name", | |
| "datatype": "str", | |
| "description": "contains the prabandham name. e.g. `Thiruppallandu`", | |
| "show_as_filter": True, | |
| "component": "dropdown", | |
| "lov": lambda: [ | |
| p.prabandham_name | |
| for p in nalayiram_helper.get_standardized_prabandham_names() | |
| ], | |
| }, | |
| { | |
| "name": "azhwar_name", | |
| "label": "Azhwar Name", | |
| "datatype": "str", | |
| "description": "contains the azhwar name. e.g. `Thirumangai Azhwar`", | |
| "show_as_filter": True, | |
| "component": "dropdown", | |
| "lov": lambda: [ | |
| azhwar_name | |
| for azhwar_name in nalayiram_helper.get_standardized_azhwar_names() | |
| ], | |
| }, | |
| { | |
| "name": "divya_desams", | |
| "label": "Divya Desams", | |
| "datatype": "str", | |
| "description": "comma separated list of divya desams. e.g. Thiruneermalai,Thiruvallikkeni.", | |
| "show_as_filter": True, | |
| "component": "dropdown", | |
| "lov": lambda: [ | |
| divya_desam_name | |
| for divya_desam_name in nalayiram_helper.get_standardized_divya_desam_names() | |
| ], | |
| }, | |
| { | |
| "name": "title", | |
| "label": "Pasuram Title", | |
| "datatype": "str", | |
| "description": ( | |
| "Exact title of a pasuram in one of the following formats:\n" | |
| "1. '{prabandham_code} {decade}.{chapter}.{pasuram}' — use when the prabandham has decades.\n" | |
| "2. '{prabandham_code} {chapter}.{pasuram}' — use when the prabandham does not have decades.\n\n" | |
| "⚠️ Use this field ONLY when the user provides a specific prabandham and a relative verse number.\n" | |
| "Examples of valid usage:\n" | |
| "- User query: '3rd pasuram in the 8th Thiruvaimozhi of the 1st decade.'\n" | |
| " → Convert to: '{prabandham_code} 1.8.3' and pass as `title` filter.\n" | |
| "- User query: '2nd pasuram of chapter 5 in [Prabandham with no decades].'\n" | |
| " → Convert to: '{prabandham_code} 5.2' and pass as `title` filter.\n" | |
| "Do NOT use `title` for general queries or keyword searches — leave it empty in those cases." | |
| ), | |
| "show_as_filter": True, | |
| }, | |
| { | |
| "name": "verse", | |
| "label": "Absolute Pasuram Number", | |
| "datatype": "int", | |
| "is_unique": True, | |
| "description": ( | |
| "Absolute verse number or pasuram number. Each verse has a unique number." | |
| # "Use it only when a specific prabandham name is NOT mentioned in the user query." | |
| "For e.g. 'Give me pasuram 1176'" | |
| ), | |
| "show_as_filter": True, | |
| }, | |
| { | |
| "name": "decade", | |
| "label": "Decade Number (Pathu)", | |
| "datatype": "int", | |
| "description": ( | |
| "The decade (or `pathu` in Tamil) that this pasuram belongs to. decade is -1 when there is no associated decade." | |
| ), | |
| "show_as_filter": True, | |
| }, | |
| { | |
| "name": "chapter", | |
| "label": "Chapter Number", | |
| "datatype": "int", | |
| "description": ( | |
| "chapter number of this pasuram. is -1 when there is no associated chapter number" | |
| ), | |
| "show_as_filter": True, | |
| }, | |
| { | |
| "name": "position_in_chapter", | |
| "label": "Relative Pasuram Number", | |
| "datatype": "int", | |
| "description": ( | |
| "Relative verse number or pasuram number within a chapter." | |
| "Use it only when a specific prabandham name is mentioned in the user query." | |
| "For e.g. 'Give me the 5th pasuram from Thirupavai'" | |
| ), | |
| "show_as_filter": True, | |
| }, | |
| ], | |
| "pdf_path": "./data/divya_prabandham.pdf", | |
| "source": "https://uveda.org", | |
| "language": "tamil", | |
| "example_labels": [ | |
| "About the five elements", | |
| "About Garuda", | |
| "Pasuram about Krishna's Flute", | |
| "Andal's pasuram", | |
| "Specific Pasuram (absolute)", | |
| "Pasuram by Azhwar", | |
| "Specific pasuram(relative)", | |
| "Decade and Chapter Search", | |
| ], | |
| "examples": [ | |
| "five elements and their significance as defined in divya_prabandham", | |
| "What is the significance of Garuda? Show some verses from divya prabandham that describe him.", | |
| "Show me a pasuram that talks about how the animals and birds enjoy Krishna's flute playing.", | |
| "Give me a pasuram by Andal", | |
| "Show me Pasuram 1187 ", | |
| "Show me a pasuram by Thondaradippodi azhwar", | |
| "Give me the 2nd pasuram in the 3rd Thiruvaimozhi from the 2nd decade", | |
| "Give me just a few words from the starting lines and reference links of all 11 pasurams from thiruvaimozhi 5th decade 4th chapter.", | |
| ], | |
| "llm_hints": [ | |
| "If the user wishes to query at a decade or chapter level for a given prabandham, use the direct metadata query on the appropriate fields once instead of querying the tool multiple times for each pasuram from the chapter." | |
| ], | |
| }, | |
| { | |
| "name": "bhagavata_purana", | |
| "title": "Bhagavatha Puranam", | |
| "output_dir": "./output/bhagavata_purana", | |
| "collection_name": "bhagavata_purana", | |
| "unit": "page", | |
| "metadata_fields": [ | |
| { | |
| "name": "file", | |
| "label": "File Name", | |
| "datatype": "str", | |
| "description": "name of the file from which the information was extracted", | |
| }, | |
| { | |
| "name": "page", | |
| "datatype": "int", | |
| "label": "Page Number", | |
| "description": "Page number from the source", | |
| "show_as_filter": True, | |
| }, | |
| ], | |
| "pdf_path": "./data/bhagavata_purana.pdf", | |
| "source": "https://dn790003.ca.archive.org/0/items/bhagavatapuranagitapress_201907/Bhagavata%20Purana%20-%20Gita%20Press_text.pdf", | |
| "language": "san+eng", | |
| "example_labels": ["Gajendra Moksham", "Prahalad"], | |
| "examples": [ | |
| "State some verses that showcase the devotion of Gajendra the elephant", | |
| "State some verses that showcase the devotion of Prahlada", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "kamba_ramayanam_en", | |
| "title": "Kamba Ramayanam (English)", | |
| "output_dir": "./output/kamba_ramayanam", | |
| "collection_name": "kamba_ramayanam_en", | |
| "unit": "verse", | |
| "unit_field": "verse_number", | |
| "field_mapping": { | |
| "chapter_name": lambda doc: f"{doc.get('kandam','')}", | |
| "title": lambda doc: f"{doc.get('padalam_ta','')} - {doc.get('padalam_en','')}".strip(), | |
| "author": lambda doc: "Kamban", | |
| "unit_index": "verse_number", | |
| "verse": lambda doc: int(doc.get("verse_number", "0")), | |
| "relative_path": lambda doc: f"{doc.get('padalam_ta','')} - {doc.get('padalam_en','')}".strip(), | |
| }, | |
| "metadata_fields": [ | |
| { | |
| "name": "kandam", | |
| "label": "Kandam", | |
| "datatype": "str", | |
| "description": "The name of the Kandam or the chapter.", | |
| "show_as_filter": True, | |
| "component": "dropdown", | |
| "lov": lambda: get_list_of_kandams(), | |
| }, | |
| { | |
| "name": "padalam_en", | |
| "label": "Chapter Name", | |
| "datatype": "str", | |
| "description": "The name of the Padalam (Episode) in English.", | |
| "show_as_filter": True, | |
| "component": "dropdown", | |
| "lov": lambda: get_list_of_padalams(), | |
| }, | |
| { | |
| "name": "padalam_ta", | |
| "label": "Padalam name in Tamil", | |
| "datatype": "str", | |
| "description": "The name of the Padalam (Episode) in Tamil.", | |
| "show_as_filter": True, | |
| "component": "dropdown", | |
| "lov": lambda: get_list_of_padalams_in_tamil(), | |
| }, | |
| { | |
| "name": "page", | |
| "datatype": "int", | |
| "label": "Page Number", | |
| "description": "Page number from the source", | |
| "show_as_filter": True, | |
| }, | |
| { | |
| "name": "verse_number", | |
| "datatype": "int", | |
| "label": "Verse Number", | |
| "description": "Verse Number", | |
| "show_as_filter": True, | |
| }, | |
| ], | |
| "pdf_path": "./data/kamba_ramayanam.pdf", | |
| "source": "https://www.hindupedia.com/images/1/13/Kamba_Ramayanam_I.pdf", | |
| "language": "tamil", | |
| "example_labels": [ | |
| "About Jatayu", | |
| "About Hanuman", | |
| "About Vali", | |
| "About Sita", | |
| "About Ravana", | |
| ], | |
| "examples": [ | |
| "What is the significance of Jatayu? show some sanskrit verses to support the argument", | |
| "Show some verses where Hanuman is mentioned", | |
| "How did Rama kill Vali", | |
| "How was Sita abducted", | |
| "How did Rama kill Ravana?", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "kamba_ramayanam", | |
| "title": "Kamba Ramayanam (Tamil)", | |
| "output_dir": "./output/kamba_ramayanam", | |
| "collection_name": "kamba_ramayanam", | |
| "unit": "chunk", | |
| "unit_field": "chunk_index", | |
| "field_mapping": { | |
| "chapter_name": lambda doc: f"{doc.get('kandam','')}", | |
| "title": lambda doc: f"{doc.get('padalam_ta','')} - {doc.get('padalam_en','')}".strip(), | |
| "author": lambda doc: "Kamban", | |
| "unit_index": "chunk_index", | |
| "verse": lambda doc: int(doc.get("verse_number", "0")), | |
| "relative_path": lambda doc: f"{doc.get('padalam_ta','')} - {doc.get('padalam_en','')}".strip(), | |
| }, | |
| "metadata_fields": [ | |
| { | |
| "name": "chunk_index", | |
| "label": "Page Index", | |
| "datatype": "int", | |
| "description": "The index of the chunk", | |
| "show_as_filter": True, | |
| }, | |
| { | |
| "name": "filename", | |
| "label": "File name from the source", | |
| "datatype": "str", | |
| "description": "The name of the file.", | |
| }, | |
| ], | |
| "pdf_path": "./data/kamba_ramayanam.pdf", | |
| "source": "https://archive.org/details/vrajeshkumar_gmail_061/01-%E0%AE%AA%E0%AE%BE%E0%AE%B2%20%E0%AE%95%E0%AE%BE%E0%AE%A3%E0%AF%8D%E0%AE%9F%E0%AE%AE%E0%AF%8D/page/n15/mode/2up", | |
| "language": "tamil", | |
| "example_labels": [ | |
| "About Jatayu", | |
| "About Hanuman", | |
| "About Vali", | |
| "About Sita", | |
| "About Ravana", | |
| ], | |
| "examples": [ | |
| "What is the significance of Jatayu? show some sanskrit verses to support the argument", | |
| "Show some verses where Hanuman is mentioned", | |
| "How did Rama kill Vali", | |
| "How was Sita abducted", | |
| "How did Rama kill Ravana?", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "chathusloki", | |
| "title": "Chathusloki by Sri Alavandar", | |
| "output_dir": "./output/chathusloki", | |
| "collection_name": "chathusloki", | |
| "unit": "verse", | |
| "unit_field": "sloka_number", | |
| "field_mapping": { | |
| "text": "sloka_devanagari", | |
| "title": lambda doc: f"Verse {doc.get('verse','')}", | |
| "unit_index": "sloka_number", | |
| "transliteration": "sloka_english_transliteration", | |
| "word_by_word_native": "meaning", | |
| "author": lambda doc: f"Sri Aalavandhaar", | |
| "relative_path": lambda doc: f"Chathusloki-{doc.get("sloka_number","")}", | |
| }, | |
| "metadata_fields": [ | |
| { | |
| "name": "sloka_number", | |
| "label": "Slokam Number", | |
| "datatype": "int", | |
| "description": "The index of the sloka or verse", | |
| "show_as_filter": True, | |
| }, | |
| { | |
| "name": "meaning_short", | |
| "label": "Short meaning", | |
| "datatype": "str", | |
| "description": "A short meaning of the sanskrit verse in English.", | |
| "show_as_filter": True, | |
| }, | |
| ], | |
| "pdf_path": "./data/chathusloki.pdf", | |
| "source": "https://www.sadagopan.org/ebook/pdf/Chatusloki%20-%20VS.pdf", | |
| "language": "san+eng", | |
| "example_labels": ["Recite a sloka", "Commentary", "Role of Sridevi"], | |
| "examples": [ | |
| "Recite the 1st sloka from Chathusloki", | |
| "Show detailed commentary for sloka 2 from Chathusloki", | |
| "What is the role of Sri Devi in the universe according to the Chathusloki?", | |
| ], | |
| "llm_hints": [], | |
| }, | |
| { | |
| "name": "sri_stavam", | |
| "title": "Sri Stavam by Sri Koorathazhwar", | |
| "output_dir": "./output/sri_stavam", | |
| "collection_name": "sri_stavam", | |
| "unit": "slokam", | |
| "unit_field": "sloka_number", | |
| "field_mapping": { | |
| "text": "sanskrit", | |
| "title": lambda doc: f"Verse {doc.get('verse','')}", | |
| "unit_index": "sloka_number", | |
| "transliteration": "transliteration", | |
| "word_by_word_native": "meaning_short", | |
| "author": lambda doc: f"Sri Koorathazhwar", | |
| "relative_path": lambda doc: f"Sri Stavam-{doc.get("sloka_number","")}", | |
| }, | |
| "metadata_fields": [ | |
| { | |
| "name": "sloka_number", | |
| "label": "Slokam Number", | |
| "datatype": "int", | |
| "description": "The index of the sloka or verse", | |
| "show_as_filter": True, | |
| }, | |
| { | |
| "name": "meaning_short", | |
| "label": "Short meaning", | |
| "datatype": "str", | |
| "description": "A short meaning of the sanskrit verse in English.", | |
| "show_as_filter": True, | |
| }, | |
| { | |
| "name": "sanskrit", | |
| "label": "Lyrics in sanskrit", | |
| "datatype": "str", | |
| "description": "Verse in sanskrit", | |
| }, | |
| { | |
| "name": "transliteration", | |
| "label": "English Transliteration", | |
| "datatype": "str", | |
| "description": "Verse transliterated to English", | |
| "show_as_filter": True, | |
| }, | |
| ], | |
| "pdf_path": "./data/sri_stavam.pdf", | |
| "source": "https://www.sadagopan.org/ebook/pdf/Sri%20Stavam.pdf", | |
| "language": "san+eng", | |
| "example_labels": ["Recite a sloka", "Commentary", "Role of Sridevi"], | |
| "examples": [ | |
| "Recite the 1st sloka from Sri Stavam", | |
| "Show detailed commentary for sloka 2 from Sri Stavam", | |
| "What is the role of Sri Devi in the universe according to the Sri Stavam?", | |
| ], | |
| "llm_hints": [ | |
| "if the user asks for nth sloka, do a metadata search on the `verse` field." | |
| ], | |
| }, | |
| { | |
| "name": "yt_metadata", | |
| "title": "Sampradayam in YouTube", | |
| "output_dir": "./output/yt_metadata", | |
| "collection_name": "yt_metadata", | |
| "collection_embedding_fn": "openai", | |
| "unit": "video", | |
| "field_mapping": { | |
| "text": "description", | |
| "title": "video_title", | |
| "author": "channel_title", | |
| "reference_link": lambda doc: f"https://www.youtube.com/watch?v={doc.get('video_id','')}", | |
| }, | |
| "metadata_fields": [ | |
| { | |
| "name": "video_id", | |
| "label": "Video ID", | |
| "datatype": "str", | |
| "description": "The video id as in YouTube", | |
| "show_as_filter": True, | |
| }, | |
| { | |
| "name": "video_title", | |
| "label": "Video Title", | |
| "datatype": "str", | |
| "description": "The title of the video as in YouTube", | |
| "show_as_filter": True, | |
| }, | |
| { | |
| "name": "description", | |
| "label": "Video Description", | |
| "datatype": "str", | |
| "description": "Description as in YouTube", | |
| "show_as_filter": True, | |
| }, | |
| { | |
| "name": "channel_url", | |
| "label": "Channel URL", | |
| "datatype": "str", | |
| "description": "URL of the YouTube Channel", | |
| }, | |
| { | |
| "name": "channel_title", | |
| "label": "Channel Title", | |
| "datatype": "str", | |
| "description": "Title of the YouTube Channel", | |
| "show_as_filter": True, | |
| }, | |
| ], | |
| "pdf_path": "./data/none.pdf", | |
| "source": "https://youtube.com", | |
| "language": "san+eng+tam", | |
| "example_labels": ["Srirangam", "Pasuram video"], | |
| "examples": [ | |
| "Show me YouTube videos that talk about Srirangam", | |
| "Show me lyrics of 1st pasuram of 1st decade in the 4st Thiruvaimozhi. Also show the related youtube videos.", | |
| ], | |
| "llm_hints": [ | |
| "if the user asks for YouTube videos, DO NOT do a web search, instead do a search on this collection." | |
| ], | |
| }, | |
| ] | |
| def get_scripture_by_collection(self, collection_name: str): | |
| return [ | |
| scripture | |
| for scripture in self.scriptures | |
| if scripture["collection_name"] == collection_name | |
| ][0] | |
| def is_metadata_field_allowed( | |
| self, collection_name: str, metadata_where_clause: MetadataWhereClause | |
| ): | |
| scripture = self.get_scripture_by_collection(collection_name=collection_name) | |
| allowed_fields = [field["name"] for field in scripture["metadata_fields"]] | |
| def validate_clause(clause: MetadataWhereClause): | |
| # validate direct filters | |
| if clause.filters: | |
| for f in clause.filters: | |
| if f.metadata_field not in allowed_fields: | |
| raise Exception( | |
| f"metadata_field: [{f.metadata_field}] not allowed in collection [{collection_name}]. " | |
| f"Here are the allowed fields with their descriptions: {scripture['metadata_fields']}" | |
| ) | |
| # recurse into groups | |
| if clause.groups: | |
| for g in clause.groups: | |
| validate_clause(g) | |
| validate_clause(metadata_where_clause) | |
| return True | |
| def get_embedding_for_collection(self, collection_name: str): | |
| scripture = self.get_scripture_by_collection(collection_name) | |
| embedding_fn = "hf" # default is huggingface sentence transformaers | |
| if "collection_embedding_fn" in scripture: | |
| embedding_fn = scripture["collection_embedding_fn"] # overridden in config | |
| return embedding_fn | |
| def filter_scriptures_fields(self, fields_to_keep: List[str]) -> List[Dict]: | |
| """ | |
| Return a list of scripture dicts containing only the specified fields. | |
| """ | |
| filtered = [] | |
| for s in self.scriptures: | |
| filtered.append({k: s[k] for k in fields_to_keep if k in s}) | |
| return filtered | |
| def canonicalize_document( | |
| self, scripture_name: str, document_text: str, metadata_doc: dict | |
| ): | |
| """ | |
| Convert scripture-specific document to a flattened canonical form. | |
| Supports static strings or lambdas in field mapping. | |
| Only allows keys from the allowed canonical fields list. | |
| """ | |
| allowed_keys = { | |
| "verse", | |
| "text", | |
| "title", | |
| "unit", | |
| "unit_index", | |
| "word_by_word_native", | |
| "transliteration", | |
| "reference_link", | |
| "author", | |
| "chapter_name", | |
| "relative_path", | |
| "location", | |
| } | |
| config = next((s for s in self.scriptures if s["name"] == scripture_name), None) | |
| if not config: | |
| raise ValueError(f"Unknown scripture: {scripture_name}") | |
| mapping = config.get("field_mapping", {}) | |
| def resolve_field(field): | |
| """Resolve a field: string key or lambda""" | |
| if callable(field): | |
| try: | |
| return field(metadata_doc) | |
| except Exception: | |
| return None | |
| elif isinstance(field, str): | |
| return metadata_doc.get(field) | |
| return None | |
| canonical_doc = {} | |
| for key, field in mapping.items(): | |
| if key in allowed_keys: # only include allowed canonical keys | |
| canonical_doc[key] = resolve_field(field) | |
| # optionally add global fields from config | |
| canonical_doc["scripture_name"] = config.get("name") | |
| canonical_doc["scripture_title"] = config.get("title") | |
| canonical_doc["source"] = config.get("source") | |
| canonical_doc["language"] = config.get("language") | |
| canonical_doc["unit"] = config.get("unit") | |
| canonical_doc["document"] = document_text | |
| if ( | |
| canonical_doc.get("text", "-") == "-" | |
| or canonical_doc.get("text", None) is None | |
| ): | |
| canonical_doc["text"] = canonical_doc["document"] | |
| canonical_doc["document"] = "-" | |
| verse = resolve_field(config.get("unit_field", config.get("unit"))) | |
| canonical_doc["verse"] = int(verse) if verse else 0 | |
| return canonical_doc | |
| def get_collection_name(self, scripture_name): | |
| config = next( | |
| (s for s in SanatanConfig().scriptures if s["name"] == scripture_name), None | |
| ) | |
| collection_name = config.get("collection_name") | |
| return collection_name | |