Spaces:
Running
Running
File size: 9,048 Bytes
c89f65f fa7eb7f c89f65f fa7eb7f c89f65f fa7eb7f c89f65f fa7eb7f c89f65f fa7eb7f c89f65f fa7eb7f c89f65f 5a63add c89f65f 5a63add c89f65f 5a63add c89f65f 5a63add c89f65f fa7eb7f c89f65f fa7eb7f c89f65f fa7eb7f c89f65f fa7eb7f c89f65f fa7eb7f c89f65f fa7eb7f c89f65f fa7eb7f c89f65f fa7eb7f c89f65f fa7eb7f c89f65f fa7eb7f c89f65f fa7eb7f c89f65f fa7eb7f c89f65f fa7eb7f c89f65f 5a63add fa7eb7f c89f65f 5a63add c89f65f 5a63add fa7eb7f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 |
"""Utility functions for AION Search."""
import json
import logging
import uuid
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, Optional
from src.config import CUTOUT_FOV, CUTOUT_SIZE, VCU_COST_PER_MILLION
from src.hf_logging import log_query_event, SESSION_ID
logger = logging.getLogger(__name__)
def cutout_url(ra: float, dec: float, fov: float = CUTOUT_FOV, size: int = CUTOUT_SIZE) -> str:
"""Generate Legacy Survey cutout URL from RA/Dec coordinates.
Args:
ra: Right Ascension in degrees
dec: Declination in degrees
fov: Field of view in degrees
size: Image size in pixels
Returns:
URL string for the cutout image
"""
return (
f"https://alasky.cds.unistra.fr/hips-image-services/hips2fits"
f"?hips=CDS/P/DESI-Legacy-Surveys/DR10/color"
f"&ra={ra}&dec={dec}&fov={fov}&width={size}&height={size}&format=jpg"
)
def log_zilliz_query(
query_type: str,
query_info: Dict[str, Any],
result_count: int,
query_time: float,
cost_vcu: int = 0,
request_id: Optional[str] = None,
error_occurred: bool = False,
error_message: Optional[str] = None,
error_type: Optional[str] = None
) -> None:
"""Print Zilliz query info to terminal and log to HF dataset.
Args:
query_type: Type of query (e.g., "vector_search", "text_search")
query_info: Dictionary containing query details
result_count: Number of results returned
query_time: Query execution time in seconds
cost_vcu: Cost in vCU units
request_id: Unique ID for this request
error_occurred: Whether an error occurred
error_message: Error message if error_occurred is True
error_type: Type of error if error_occurred is True
"""
timestamp = datetime.now().isoformat()
# Convert vCU cost to dollars
cost_usd = (cost_vcu / 1e6) * VCU_COST_PER_MILLION
log_data = {
"timestamp": timestamp,
"query_type": query_type,
"query_info": query_info,
"result_count": result_count,
"query_time_seconds": query_time,
"cost_vCU": cost_vcu,
"cost_usd": cost_usd
}
# Print to terminal
print("\n" + "="*80)
print(f"ZILLIZ QUERY: {query_type}")
print("="*80)
print(json.dumps(log_data, indent=2))
print("="*80 + "\n")
logger.info(
f"{result_count} results in {query_time:.3f}s | "
f"{cost_vcu} vCU (${cost_usd:.6f})"
)
# Log Zilliz stats to HF dataset
try:
payload = {
"log_type": "zilliz_query_stats",
"timestamp": timestamp,
"query_type": query_type,
"query_info": query_info,
"result_count": result_count,
"query_time_seconds": query_time,
"cost_vcu": cost_vcu,
"cost_usd": cost_usd,
"error_occurred": error_occurred,
}
if request_id:
payload["request_id"] = request_id
if error_occurred:
payload["error_message"] = error_message
payload["error_type"] = error_type
log_query_event(payload)
except Exception as e:
logger.error(f"Failed to send Zilliz stats to HF dataset: {e}")
def format_galaxy_count(count: int) -> str:
"""Format galaxy count with thousands separator.
Args:
count: Number of galaxies
Returns:
Formatted string (e.g., "259,636 galaxies")
"""
return f"{count:,} galaxies"
def build_query_xml(
text_queries: list = None,
text_weights: list = None,
image_queries: list = None,
image_weights: list = None,
rmag_min: float = None,
rmag_max: float = None
) -> str:
"""Build XML representation of a query according to aql.md specification.
Args:
text_queries: List of text query strings
text_weights: List of weight magnitudes for text queries (e.g., 1.0, -1.0, 2.0, -5.0)
image_queries: List of dicts with 'ra', 'dec', 'fov' keys
image_weights: List of weight magnitudes for image queries (e.g., 1.0, -1.0, 2.0, -5.0)
rmag_min: Minimum r_mag filter value
rmag_max: Maximum r_mag filter value
Returns:
XML string representation of the query (single line)
"""
xml_parts = ['<query>']
# Add text queries
if text_queries and len(text_queries) > 0:
xml_parts.append('<text>')
for query, weight in zip(text_queries, text_weights):
xml_parts.append('<term>')
xml_parts.append(f'<weight>{weight}</weight>')
xml_parts.append(f'<content>{query}</content>')
xml_parts.append('</term>')
xml_parts.append('</text>')
# Add image queries
if image_queries and len(image_queries) > 0:
xml_parts.append('<image>')
for img_query, weight in zip(image_queries, image_weights):
xml_parts.append('<reference>')
xml_parts.append(f'<ra>{img_query["ra"]}</ra>')
xml_parts.append(f'<dec>{img_query["dec"]}</dec>')
xml_parts.append(f'<fov>{img_query["fov"]}</fov>')
xml_parts.append(f'<weight>{weight}</weight>')
xml_parts.append('</reference>')
xml_parts.append('</image>')
# Add filters
if rmag_min is not None or rmag_max is not None:
xml_parts.append('<filters>')
if rmag_min is not None and rmag_max is not None:
xml_parts.append('<filter>')
xml_parts.append('<column>r_mag</column>')
xml_parts.append('<operator>between</operator>')
xml_parts.append(f'<value_min>{rmag_min}</value_min>')
xml_parts.append(f'<value_max>{rmag_max}</value_max>')
xml_parts.append('</filter>')
elif rmag_min is not None:
xml_parts.append('<filter>')
xml_parts.append('<column>r_mag</column>')
xml_parts.append('<operator>gte</operator>')
xml_parts.append(f'<value>{rmag_min}</value>')
xml_parts.append('</filter>')
elif rmag_max is not None:
xml_parts.append('<filter>')
xml_parts.append('<column>r_mag</column>')
xml_parts.append('<operator>lte</operator>')
xml_parts.append(f'<value>{rmag_max}</value>')
xml_parts.append('</filter>')
xml_parts.append('</filters>')
xml_parts.append('</query>')
return ''.join(xml_parts)
def log_query_to_csv(
query_xml: str,
csv_path: str = "logs/query_log.csv",
request_id: Optional[str] = None,
error_occurred: bool = False,
error_message: Optional[str] = None,
error_type: Optional[str] = None
) -> None:
"""Print query XML to terminal and log to HF dataset.
Args:
query_xml: XML string representation of the query
csv_path: Deprecated parameter (kept for backward compatibility)
request_id: Unique ID for this request
error_occurred: Whether an error occurred during search
error_message: Error message if error_occurred is True
error_type: Type of error if error_occurred is True
"""
timestamp = datetime.now().isoformat()
# Print query to terminal
print("\n" + "="*80)
print(f"QUERY EXECUTED AT: {timestamp}")
print("="*80)
print(query_xml)
print("="*80 + "\n")
logger.info(f"Query printed to terminal")
# Log to HF dataset
try:
payload = {
"log_type": "aql_query",
"timestamp": timestamp,
"query_xml": query_xml,
"error_occurred": error_occurred,
}
if request_id:
payload["request_id"] = request_id
if error_occurred:
payload["error_message"] = error_message
payload["error_type"] = error_type
log_query_event(payload)
except Exception as e:
logger.error(f"Failed to send query log to HF dataset: {e}")
def log_click_event(
request_id: Optional[str],
rank: int,
primary_key: str,
ra: float,
dec: float,
r_mag: float,
distance: float
) -> None:
"""Log a galaxy tile click event to HF dataset.
Args:
request_id: Unique ID for the search request that produced this galaxy
rank: Position in search results (0-indexed)
primary_key: Primary key of the clicked galaxy
ra: Right ascension
dec: Declination
r_mag: r-band magnitude
distance: Cosine similarity score
"""
try:
payload = {
"log_type": "click_event",
"rank": rank,
"primary_key": primary_key,
"ra": ra,
"dec": dec,
"r_mag": r_mag,
"distance": distance,
}
if request_id:
payload["request_id"] = request_id
log_query_event(payload)
logger.info(f"Logged click event: rank={rank}, primary_key={primary_key}")
except Exception as e:
logger.error(f"Failed to log click event: {e}")
|