File size: 3,139 Bytes
5dfbe50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import { Hono } from 'hono';
import { z } from 'zod';
import { config } from '../config';
import { cache } from '../services/cache';

const colpaliSearchApp = new Hono();

// Search request schema
const searchQuerySchema = z.object({
  query: z.string().min(1).max(500),
  ranking: z.enum(['hybrid', 'colpali', 'bm25']).optional().default('hybrid'),
});

// Main search endpoint - direct to Vespa
colpaliSearchApp.get('/', async (c) => {
  try {
    const query = c.req.query('query');
    const ranking = c.req.query('ranking') || 'hybrid';
    
    const validation = searchQuerySchema.safeParse({ query, ranking });
    
    if (!validation.success) {
      return c.json({ error: 'Invalid request', details: validation.error.issues }, 400);
    }

    const validatedData = validation.data;
    
    // Check cache
    const cacheKey = `search:${validatedData.query}:${validatedData.ranking}`;
    const cachedResult = cache.get(cacheKey);
    
    if (cachedResult) {
      c.header('X-Cache', 'HIT');
      return c.json(cachedResult);
    }

    // Prepare YQL query based on ranking type
    let yql = '';
    switch (validatedData.ranking) {
      case 'colpali':
        yql = `select * from linqto where userQuery() limit 20`;
        break;
      case 'bm25':
        yql = `select * from linqto where userQuery() order by bm25_score desc limit 20`;
        break;
      case 'hybrid':
      default:
        yql = `select * from linqto where userQuery() | rank (reciprocal_rank_fusion(bm25_score, max_sim)) limit 20`;
        break;
    }

    // Query Vespa directly
    const searchUrl = `${config.vespaAppUrl}/search/`;
    const searchParams = new URLSearchParams({
      yql,
      query: validatedData.query,
      ranking: validatedData.ranking === 'colpali' ? 'colpali' : 'default',
      'summary': 'default',
      'format': 'json'
    });

    // For now, using direct fetch without certificate authentication
    // In production, you would use a proxy or configure certificates properly
    const response = await fetch(`${searchUrl}?${searchParams}`, {
      method: 'GET',
      headers: {
        'Accept': 'application/json',
      }
    });

    if (!response.ok) {
      throw new Error(`Vespa returned ${response.status}`);
    }

    const data = await response.json();

    // Transform to match expected format (add sim_map if needed)
    const transformedData = {
      ...data,
      root: {
        ...data.root,
        children: data.root?.children?.map((hit: any, idx: number) => ({
          ...hit,
          fields: {
            ...hit.fields,
            // Add sim_map field if not present (for compatibility)
            sim_map: hit.fields.sim_map || `sim_map_${idx}`,
          }
        })) || []
      }
    };

    // Cache the result
    cache.set(cacheKey, transformedData);
    c.header('X-Cache', 'MISS');

    return c.json(transformedData);
  } catch (error) {
    console.error('Search error:', error);
    return c.json({ 
      error: 'Search failed', 
      message: error instanceof Error ? error.message : 'Unknown error' 
    }, 500);
  }
});

export { colpaliSearchApp };