File size: 1,489 Bytes
9705b6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
const tokenSplit = require('./tokenSplit');

describe('tokenSplit', () => {
  const text = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam id.';

  it('returns correct text chunks with provided parameters', async () => {
    const result = await tokenSplit({
      text: text,
      encodingName: 'gpt2',
      chunkSize: 2,
      chunkOverlap: 1,
      returnSize: 5,
    });

    expect(result).toEqual(['. Null', ' Nullam', 'am id', ' id.', '.']);
  });

  it('returns correct text chunks with default parameters', async () => {
    const result = await tokenSplit({ text });
    expect(result).toEqual([
      'Lorem',
      ' ipsum',
      ' dolor',
      ' sit',
      ' amet',
      ',',
      ' consectetur',
      ' adipiscing',
      ' elit',
      '.',
      ' Null',
      'am',
      ' id',
      '.',
    ]);
  });

  it('returns correct text chunks with specific return size', async () => {
    const result = await tokenSplit({ text, returnSize: 2 });
    expect(result.length).toEqual(2);
    expect(result).toEqual([' id', '.']);
  });

  it('returns correct text chunks with specified chunk size', async () => {
    const result = await tokenSplit({ text, chunkSize: 10 });
    expect(result).toEqual([
      'Lorem ipsum dolor sit amet, consectetur adipiscing elit.',
      ' Nullam id.',
    ]);
  });

  it('returns empty array with no text', async () => {
    const result = await tokenSplit({ text: '' });
    expect(result).toEqual([]);
  });
});