from typing import Any from graphgen.models.splitter.recursive_character_splitter import ( RecursiveCharacterSplitter, ) class MarkdownTextRefSplitter(RecursiveCharacterSplitter): """Attempts to split the text along Markdown-formatted headings.""" def __init__(self, **kwargs: Any) -> None: """Initialize a MarkdownTextRefSplitter.""" separators = [ # First, try to split along Markdown headings (starting with level 2) "\n#{1,6} ", # Note the alternative syntax for headings (below) is not handled here # Heading level 2 # --------------- # End of code block "```\n", # Horizontal lines "\n\\*\\*\\*+\n", "\n---+\n", "\n___+\n", # Note: horizontal lines defined by three or more of ***, ---, or ___ # are handled by the regexes above, but alternative syntaxes (e.g., with spaces) # are not handled. "\n\n", "\n", " ", "", ] super().__init__(separators=separators, **kwargs)