Spaces:
Runtime error
Runtime error
File size: 2,070 Bytes
63deadc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
"""**Text Splitters** are classes for splitting text.
**Class hierarchy:**
.. code-block::
BaseDocumentTransformer --> TextSplitter --> <name>TextSplitter # Example: CharacterTextSplitter
RecursiveCharacterTextSplitter --> <name>TextSplitter
Note: **MarkdownHeaderTextSplitter** and **HTMLHeaderTextSplitter do not derive from TextSplitter.
**Main helpers:**
.. code-block::
Document, Tokenizer, Language, LineType, HeaderType
""" # noqa: E501
from langchain_text_splitters.base import (
Language,
TextSplitter,
Tokenizer,
TokenTextSplitter,
split_text_on_tokens,
)
from langchain_text_splitters.character import (
CharacterTextSplitter,
RecursiveCharacterTextSplitter,
)
from langchain_text_splitters.html import (
ElementType,
HTMLHeaderTextSplitter,
HTMLSectionSplitter,
)
from langchain_text_splitters.json import RecursiveJsonSplitter
from langchain_text_splitters.konlpy import KonlpyTextSplitter
from langchain_text_splitters.latex import LatexTextSplitter
from langchain_text_splitters.markdown import (
HeaderType,
LineType,
MarkdownHeaderTextSplitter,
MarkdownTextSplitter,
)
from langchain_text_splitters.nltk import NLTKTextSplitter
from langchain_text_splitters.python import PythonCodeTextSplitter
from langchain_text_splitters.sentence_transformers import (
SentenceTransformersTokenTextSplitter,
)
from langchain_text_splitters.spacy import SpacyTextSplitter
__all__ = [
"TokenTextSplitter",
"TextSplitter",
"Tokenizer",
"Language",
"RecursiveCharacterTextSplitter",
"RecursiveJsonSplitter",
"LatexTextSplitter",
"PythonCodeTextSplitter",
"KonlpyTextSplitter",
"SpacyTextSplitter",
"NLTKTextSplitter",
"split_text_on_tokens",
"SentenceTransformersTokenTextSplitter",
"ElementType",
"HeaderType",
"LineType",
"HTMLHeaderTextSplitter",
"HTMLSectionSplitter",
"MarkdownHeaderTextSplitter",
"MarkdownTextSplitter",
"CharacterTextSplitter",
]
|