import requests
import json
import re
import time
from typing import Dict, Any, Optional, List
# Ollama configuration
OLLAMA_BASE_URL = "http://localhost:11434" # Default Ollama URL
MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0" # Replace with your actual model name in Ollama
# Inference configurations
INFERENCE_CONFIGS = {
"Optimized for Speed": {
"num_predict": 512,
"temperature": 0.7,
"top_p": 0.9,
"top_k": 40,
"repeat_penalty": 1.1,
"description": "Fast responses with limited output length"
},
"Middle-ground": {
"num_predict": 2048,
"temperature": 0.7,
"top_p": 0.9,
"top_k": 40,
"repeat_penalty": 1.1,
"description": "Balanced performance and output quality"
},
"Full Capacity": {
"num_predict": 4096,
"temperature": 0.7,
"top_p": 0.9,
"top_k": 40,
"repeat_penalty": 1.1,
"description": "Maximum output length with dynamic allocation"
}
}
def get_inference_configs():
"""Get available inference configurations"""
return INFERENCE_CONFIGS
def check_ollama_connection():
"""Check if Ollama is running and accessible"""
try:
response = requests.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=5)
return response.status_code == 200
except requests.RequestException:
return False
def list_ollama_models():
"""List available models in Ollama"""
try:
response = requests.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=5)
if response.status_code == 200:
models = response.json().get("models", [])
return [model["name"] for model in models]
return []
except requests.RequestException:
return []
def load_model():
"""Check Ollama connection and model availability"""
if not check_ollama_connection():
raise ConnectionError(
"Cannot connect to Ollama. Please make sure Ollama is running.\n"
"Start Ollama with: ollama serve"
)
available_models = list_ollama_models()
if MODEL_NAME not in available_models:
print(f"Warning: Model '{MODEL_NAME}' not found in Ollama.")
print(f"Available models: {available_models}")
print(f"Pull your model with: ollama pull {MODEL_NAME}")
return False
print(f"Using Ollama model: {MODEL_NAME}")
return True
# ===== TOOL DEFINITIONS =====
def calculate_numbers(operation: str, num1: float, num2: float) -> Dict[str, Any]:
"""
Sample tool to perform basic mathematical operations on two numbers.
Args:
operation: The operation to perform ('add', 'subtract', 'multiply', 'divide')
num1: First number
num2: Second number
Returns:
Dictionary with result and operation details
"""
try:
num1, num2 = float(num1), float(num2)
if operation.lower() == 'add':
result = num1 + num2
elif operation.lower() == 'subtract':
result = num1 - num2
elif operation.lower() == 'multiply':
result = num1 * num2
elif operation.lower() == 'divide':
if num2 == 0:
return {"error": "Division by zero is not allowed"}
result = num1 / num2
else:
return {"error": f"Unknown operation: {operation}"}
return {
"result": result,
"operation": operation,
"operands": [num1, num2],
"formatted": f"{num1} {operation} {num2} = {result}"
}
except ValueError as e:
return {"error": f"Invalid number format: {str(e)}"}
except Exception as e:
return {"error": f"Calculation error: {str(e)}"}
# Tool registry
AVAILABLE_TOOLS = {
"calculate_numbers": {
"function": calculate_numbers,
"description": "Perform basic mathematical operations (add, subtract, multiply, divide) on two numbers",
"parameters": {
"operation": "The mathematical operation to perform",
"num1": "First number",
"num2": "Second number"
}
}
}
def execute_tool_call(tool_name: str, **kwargs) -> Dict[str, Any]:
"""Execute a tool call with given parameters"""
print(f"Executing tool: {tool_name} with parameters: {kwargs}")
if tool_name not in AVAILABLE_TOOLS:
return {"error": f"Unknown tool: {tool_name}"}
try:
tool_function = AVAILABLE_TOOLS[tool_name]["function"]
result = tool_function(**kwargs)
return {
"tool_name": tool_name,
"parameters": kwargs,
"result": result
}
except Exception as e:
print(f"Tool execution failed: {str(e)}")
return {
"tool_name": tool_name,
"parameters": kwargs,
"error": f"Tool execution error: {str(e)}"
}
def parse_tool_calls(text: str) -> list:
"""
Parse tool calls from model output.
Supports both formats:
- [TOOL_CALL:tool_name(param1=value1, param2=value2)]
- {"name": "tool_name", "parameters": {"param1": "value1", "param2": "value2"}}
"""
tool_calls = []
# Pattern for both formats
pattern = r'(\[TOOL_CALL:(\w+)\((.*?)\)\]|\s*{"name":\s*"(\w+)",\s*"parameters":\s*{([^}]*)}\s*}\s*)'
matches = re.findall(pattern, text)
print("Raw matches:", matches)
for match in matches:
full_match, old_tool_name, old_params, json_tool_name, json_params = match
# Determine which format was matched
if old_tool_name: # Old format: [TOOL_CALL:tool_name(params)]
tool_name = old_tool_name
params_str = old_params
original_call = f"[TOOL_CALL:{tool_name}({params_str})]"
try:
params = {}
if params_str.strip():
param_pairs = params_str.split(',')
for pair in param_pairs:
if '=' in pair:
key, value = pair.split('=', 1)
key = key.strip()
value = value.strip().strip('"\'') # Remove quotes
params[key] = value
tool_calls.append({
"tool_name": tool_name,
"parameters": params,
"original_call": original_call
})
except Exception as e:
print(f"Error parsing old format tool call '{tool_name}({params_str})': {e}")
continue
elif json_tool_name: # JSON format: ...
tool_name = json_tool_name
params_str = json_params
original_call = full_match
try:
params = {}
if params_str.strip():
# Parse JSON-like parameters
param_pairs = params_str.split(',')
for pair in param_pairs:
if ':' in pair:
key, value = pair.split(':', 1)
key = key.strip().strip('"\'')
value = value.strip().strip('"\'')
params[key] = value
tool_calls.append({
"tool_name": tool_name,
"parameters": params,
"original_call": original_call
})
except Exception as e:
print(f"Error parsing JSON format tool call '{tool_name}': {e}")
continue
return tool_calls
def process_tool_calls(text: str) -> str:
"""Process tool calls in the generated text and replace with results"""
tool_calls = parse_tool_calls(text)
if not tool_calls:
return text
processed_text = text
for tool_call in tool_calls:
tool_name = tool_call["tool_name"]
parameters = tool_call["parameters"]
original_call = tool_call["original_call"]
try:
# Validate parameters before execution
if not isinstance(parameters, dict):
raise ValueError(f"Invalid parameters for tool {tool_name}: {parameters}")
# Execute tool
result = execute_tool_call(tool_name, **parameters)
# Create replacement text
if "error" in result:
replacement = f"[TOOL_ERROR: {result['error']}]"
else:
if "result" in result["result"]:
replacement = f"[TOOL_RESULT: {result['result']['formatted']}]"
else:
replacement = f"[TOOL_RESULT: {result['result']}]"
# Replace tool call with result
processed_text = processed_text.replace(original_call, replacement)
except Exception as e:
print(f"Error processing tool call '{tool_name}': {e}")
replacement = f"[TOOL_ERROR: Failed to process tool call: {str(e)}]"
processed_text = processed_text.replace(original_call, replacement)
return processed_text
def call_ollama_api(messages: List[Dict], config: Dict, stream: bool = False) -> str:
"""
Make a request to Ollama API
Args:
messages: List of message dictionaries with 'role' and 'content'
config: Configuration dictionary with inference parameters
stream: Whether to stream the response
Returns:
Generated response text
"""
# Convert messages to prompt format expected by your model
# This might need adjustment based on your model's expected format
prompt = ""
for msg in messages:
if msg["role"] == "system":
prompt += f"System: {msg['content']}\n\n"
elif msg["role"] == "user":
prompt += f"User: {msg['content']}\n\n"
elif msg["role"] == "assistant":
prompt += f"Assistant: {msg['content']}\n\n"
prompt += "Assistant: "
payload = {
"model": MODEL_NAME,
"prompt": prompt,
"stream": stream,
"options": {
"num_predict": config.get("num_predict", 2048),
"temperature": config.get("temperature", 0.7),
"top_p": config.get("top_p", 0.9),
"top_k": config.get("top_k", 40),
"repeat_penalty": config.get("repeat_penalty", 1.1),
}
}
try:
if stream:
return stream_ollama_response(payload)
else:
response = requests.post(
f"{OLLAMA_BASE_URL}/api/generate",
json=payload,
timeout=300 # 5 minutes timeout
)
response.raise_for_status()
result = response.json()
return result.get("response", "")
except requests.RequestException as e:
raise ConnectionError(f"Failed to connect to Ollama: {str(e)}")
except json.JSONDecodeError as e:
raise ValueError(f"Invalid response from Ollama: {str(e)}")
def stream_ollama_response(payload: Dict) -> str:
"""Stream response from Ollama and return complete text"""
full_response = ""
try:
response = requests.post(
f"{OLLAMA_BASE_URL}/api/generate",
json=payload,
stream=True,
timeout=300
)
response.raise_for_status()
for line in response.iter_lines():
if line:
try:
chunk = json.loads(line.decode('utf-8'))
if 'response' in chunk:
token = chunk['response']
full_response += token
print(token, end='', flush=True) # Print tokens as they come
if chunk.get('done', False):
break
except json.JSONDecodeError:
continue
except requests.RequestException as e:
raise ConnectionError(f"Streaming failed: {str(e)}")
print() # New line after streaming
return full_response
def generate_response(system_prompt: str, user_input: str, config_name: str = "Middle-ground",
stream: bool = False) -> str:
"""
Generate response using Ollama API with the given system prompt and user input.
Args:
system_prompt: System instruction for the model
user_input: User's input message
config_name: Configuration preset to use
stream: Whether to stream the response
Returns:
Generated response text
"""
# Load/check model
if not load_model():
return "Error: Model not available in Ollama"
config = INFERENCE_CONFIGS[config_name]
# Prepare messages
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_input}
]
start_time = time.time()
try:
# Generate response using Ollama
generated_response = call_ollama_api(messages, config, stream=stream)
inference_time = time.time() - start_time
print(f"Inference time: {inference_time:.2f} seconds")
# Process any tool calls in the generated response
processed_response = process_tool_calls(generated_response)
return processed_response
except Exception as e:
print(f"Error generating response: {str(e)}")
return f"Error: {str(e)}"
# Example usage and testing functions
def test_connection():
"""Test Ollama connection and model availability"""
print("Testing Ollama connection...")
if not check_ollama_connection():
print("❌ Cannot connect to Ollama")
print("Make sure Ollama is running: ollama serve")
return False
print("✅ Ollama is running")
models = list_ollama_models()
print(f"Available models: {models}")
if MODEL_NAME not in models:
print(f"❌ Model '{MODEL_NAME}' not found")
print(f"Pull the model with: ollama pull {MODEL_NAME}")
return False
print(f"✅ Model '{MODEL_NAME}' is available")
return True
def example_usage():
"""Example of how to use the system"""
if not test_connection():
return
system_prompt = """You are a helpful AI assistant with access to tools. When you need to perform mathematical calculations, use the available tools by calling them in this format: [TOOL_CALL:calculate_numbers(operation="add", num1="10", num2="5")]
Available tools:
- calculate_numbers: Perform basic math operations (add, subtract, multiply, divide)
"""
user_input = "What is 125 + 675? Please calculate this for me."
print("Generating response...")
response = generate_response(system_prompt, user_input, "Middle-ground", stream=True)
print(f"\nFinal response: {response}")
if __name__ == "__main__":
# Update MODEL_NAME to match your model in Ollama
MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0" # Change this!
example_usage()