Spaces:
Running
Running
File size: 1,297 Bytes
48ec4db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import uuid
class Chunk:
def __init__(
self,
id: uuid.UUID,
filename: str,
page_number: int,
start_index: int,
start_line: int,
end_line: int,
text: str,
):
self.id: uuid.UUID = id
self.filename: str = filename
self.page_number: int = page_number
self.start_index: int = start_index
self.start_line: int = start_line
self.end_line: int = end_line
self.text: str = text
async def get_raw_text(self) -> str:
return self.text
async def get_splitted_text(self) -> list[str]:
return self.text.split(" ")
async def get_metadata(self) -> dict:
return {
"id": str(self.id),
"filename": self.filename,
"page_number": self.page_number,
"start_index": self.start_index,
"start_line": self.start_line,
"end_line": self.end_line,
}
async def __str__(self):
return (
f"Chunk from {self.filename.split('/')[-1]}, "
f"page - {self.page_number}, "
f"start - {self.start_line}, "
f"end - {self.end_line}, "
f"and text - {self.text[:100]}... ({len(self.text)})...{self.text[-20:]}\n"
)
|