Spaces:
Running
Running
import { Hono } from 'hono'; | |
import { z } from 'zod'; | |
import { config } from '../config'; | |
import { cache } from '../services/cache'; | |
import { vespaRequest } from '../services/vespa-https'; | |
import { v4 as uuidv4 } from 'uuid'; | |
const searchApp = new Hono(); | |
// Search request schema | |
const searchQuerySchema = z.object({ | |
query: z.string().min(1).max(500), | |
ranking: z.enum(['hybrid', 'colpali', 'bm25']).optional().default('hybrid'), | |
}); | |
// Main search endpoint - direct to Vespa | |
searchApp.get('/', async (c) => { | |
try { | |
const query = c.req.query('query'); | |
const ranking = c.req.query('ranking') || 'hybrid'; | |
const validation = searchQuerySchema.safeParse({ query, ranking }); | |
if (!validation.success) { | |
return c.json({ error: 'Invalid request', details: validation.error.issues }, 400); | |
} | |
const validatedData = validation.data; | |
// Check cache | |
const cacheKey = `search:${validatedData.query}:${validatedData.ranking}`; | |
const cachedResult = cache.get(cacheKey); | |
if (cachedResult) { | |
c.header('X-Cache', 'HIT'); | |
return c.json(cachedResult); | |
} | |
// Build YQL query based on ranking | |
let yql = ''; | |
let rankProfile = 'default'; | |
switch (validatedData.ranking) { | |
case 'colpali': | |
yql = `select * from linqto where userQuery() limit 20`; | |
rankProfile = 'colpali'; | |
break; | |
case 'bm25': | |
yql = `select * from linqto where userQuery() order by bm25_score desc limit 20`; | |
break; | |
case 'hybrid': | |
default: | |
yql = `select * from linqto where userQuery() | rank (reciprocal_rank_fusion(bm25_score, max_sim)) limit 20`; | |
break; | |
} | |
// Query Vespa directly | |
const searchUrl = `${config.vespaAppUrl}/search/`; | |
const searchParams = new URLSearchParams({ | |
yql, | |
query: validatedData.query, | |
ranking: rankProfile, | |
hits: '20' | |
}); | |
const response = await vespaRequest(`${searchUrl}?${searchParams}`); | |
if (!response.ok) { | |
const errorText = await response.text(); | |
console.error('Vespa error:', errorText); | |
throw new Error(`Vespa returned ${response.status}: ${errorText}`); | |
} | |
const data = await response.json(); | |
// Generate query_id for sim_map compatibility | |
const queryId = uuidv4(); | |
// Transform to match expected format | |
if (data.root && data.root.children) { | |
data.root.children.forEach((hit: any, idx: number) => { | |
if (!hit.fields) hit.fields = {}; | |
// Add sim_map identifier for compatibility | |
hit.fields.sim_map = `${queryId}_${idx}`; | |
}); | |
} | |
// Cache the result | |
cache.set(cacheKey, data); | |
c.header('X-Cache', 'MISS'); | |
return c.json(data); | |
} catch (error) { | |
console.error('Search error:', error); | |
return c.json({ | |
error: 'Search failed', | |
message: error instanceof Error ? error.message : 'Unknown error' | |
}, 500); | |
} | |
}); | |
// Full image endpoint | |
searchApp.get('/full-image', async (c) => { | |
try { | |
const docId = c.req.query('docId'); | |
if (!docId) { | |
return c.json({ error: 'docId is required' }, 400); | |
} | |
// Check cache | |
const cacheKey = `fullimage:${docId}`; | |
const cachedImage = cache.get<{ base64_image: string }>(cacheKey); | |
if (cachedImage) { | |
c.header('X-Cache', 'HIT'); | |
return c.json(cachedImage); | |
} | |
// Query Vespa for the document | |
const searchUrl = `${config.vespaAppUrl}/search/`; | |
const searchParams = new URLSearchParams({ | |
yql: `select * from linqto where id contains "${docId}"`, | |
hits: '1' | |
}); | |
const response = await vespaRequest(`${searchUrl}?${searchParams}`); | |
if (!response.ok) { | |
throw new Error(`Vespa returned ${response.status}`); | |
} | |
const data = await response.json(); | |
if (data.root?.children?.[0]?.fields) { | |
const fields = data.root.children[0].fields; | |
const base64Image = fields.full_image || fields.image; | |
if (base64Image) { | |
const result = { base64_image: base64Image }; | |
cache.set(cacheKey, result, 86400); // 24 hours | |
c.header('X-Cache', 'MISS'); | |
return c.json(result); | |
} | |
} | |
return c.json({ error: 'Image not found' }, 404); | |
} catch (error) { | |
console.error('Full image error:', error); | |
return c.json({ | |
error: 'Failed to fetch image', | |
message: error instanceof Error ? error.message : 'Unknown error' | |
}, 500); | |
} | |
}); | |
// Query suggestions endpoint | |
searchApp.get('/suggestions', async (c) => { | |
try { | |
const query = c.req.query('query'); | |
// Static suggestions for now | |
const staticSuggestions = [ | |
'linqto bankruptcy', | |
'linqto filing date', | |
'linqto creditors', | |
'linqto assets', | |
'linqto liabilities', | |
'linqto chapter 11', | |
'linqto docket', | |
'linqto plan', | |
'linqto disclosure statement', | |
'linqto claims', | |
]; | |
if (!query) { | |
return c.json({ suggestions: staticSuggestions.slice(0, 5) }); | |
} | |
const lowerQuery = query.toLowerCase(); | |
const filtered = staticSuggestions | |
.filter(s => s.toLowerCase().includes(lowerQuery)) | |
.slice(0, 5); | |
return c.json({ suggestions: filtered }); | |
} catch (error) { | |
console.error('Suggestions error:', error); | |
return c.json({ | |
error: 'Failed to fetch suggestions', | |
suggestions: [] | |
}, 500); | |
} | |
}); | |
// Similarity maps endpoint (placeholder) | |
searchApp.get('/similarity-maps', async (c) => { | |
try { | |
const queryId = c.req.query('queryId'); | |
const idx = c.req.query('idx'); | |
const token = c.req.query('token'); | |
const tokenIdx = c.req.query('tokenIdx'); | |
if (!queryId || !idx || !token || !tokenIdx) { | |
return c.json({ error: 'Missing required parameters' }, 400); | |
} | |
// Return placeholder HTML | |
const html = ` | |
<div style="padding: 20px; text-align: center;"> | |
<h3>Similarity Map</h3> | |
<p>Query: ${token}</p> | |
<p>Document: ${idx}</p> | |
<p style="color: #666;"> | |
Similarity map generation requires the ColPali model. | |
This is a placeholder for the demo. | |
</p> | |
</div> | |
`; | |
return c.html(html); | |
} catch (error) { | |
console.error('Similarity map error:', error); | |
return c.json({ | |
error: 'Failed to generate similarity map', | |
message: error instanceof Error ? error.message : 'Unknown error' | |
}, 500); | |
} | |
}); | |
export { searchApp }; |