Spaces:
Sleeping
Sleeping
File size: 596 Bytes
e6583bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
from langchain_community.document_loaders import SeleniumURLLoader
from shared import ResearchState
# Limit content length to ~100,000 characters (≈ 32,000 tokens max)
MAX_CHARS = 100_000
def load_node(state: ResearchState) -> dict:
if not state.url:
return {"content": "No URL to load"}
loader = SeleniumURLLoader(urls=[str(state.url)])
docs = loader.load()
content = docs[0].page_content if docs else "No content"
# Truncate early to prevent overload later
truncated_content = content[:MAX_CHARS]
return {"content": truncated_content} |