import { INode, INodeData, INodeParams } from '../../../src/Interface' import { getBaseClasses } from '../../../src/utils' import { TokenTextSplitter, TokenTextSplitterParams } from 'langchain/text_splitter' import { TiktokenEncoding } from '@dqbd/tiktoken' class TokenTextSplitter_TextSplitters implements INode { label: string name: string description: string type: string icon: string category: string baseClasses: string[] inputs: INodeParams[] constructor() { this.label = 'Token Text Splitter' this.name = 'tokenTextSplitter' this.type = 'TokenTextSplitter' this.icon = 'tiktoken.svg' this.category = 'Text Splitters' this.description = `Splits a raw text string by first converting the text into BPE tokens, then split these tokens into chunks and convert the tokens within a single chunk back into text.` this.baseClasses = [this.type, ...getBaseClasses(TokenTextSplitter)] this.inputs = [ { label: 'Encoding Name', name: 'encodingName', type: 'options', options: [ { label: 'gpt2', name: 'gpt2' }, { label: 'r50k_base', name: 'r50k_base' }, { label: 'p50k_base', name: 'p50k_base' }, { label: 'p50k_edit', name: 'p50k_edit' }, { label: 'cl100k_base', name: 'cl100k_base' } ], default: 'gpt2' }, { label: 'Chunk Size', name: 'chunkSize', type: 'number', default: 1000, optional: true }, { label: 'Chunk Overlap', name: 'chunkOverlap', type: 'number', optional: true } ] } async init(nodeData: INodeData): Promise { const encodingName = nodeData.inputs?.encodingName as string const chunkSize = nodeData.inputs?.chunkSize as string const chunkOverlap = nodeData.inputs?.chunkOverlap as string const obj = {} as TokenTextSplitterParams obj.encodingName = encodingName as TiktokenEncoding if (chunkSize) obj.chunkSize = parseInt(chunkSize, 10) if (chunkOverlap) obj.chunkOverlap = parseInt(chunkOverlap, 10) const splitter = new TokenTextSplitter(obj) return splitter } } module.exports = { nodeClass: TokenTextSplitter_TextSplitters }