|
import requests |
|
import json |
|
import re |
|
import time |
|
from typing import Dict, Any, Optional, List |
|
|
|
|
|
OLLAMA_BASE_URL = "http://localhost:11434" |
|
MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0" |
|
|
|
|
|
INFERENCE_CONFIGS = { |
|
"Optimized for Speed": { |
|
"num_predict": 512, |
|
"temperature": 0.7, |
|
"top_p": 0.9, |
|
"top_k": 40, |
|
"repeat_penalty": 1.1, |
|
"description": "Fast responses with limited output length" |
|
}, |
|
"Middle-ground": { |
|
"num_predict": 2048, |
|
"temperature": 0.7, |
|
"top_p": 0.9, |
|
"top_k": 40, |
|
"repeat_penalty": 1.1, |
|
"description": "Balanced performance and output quality" |
|
}, |
|
"Full Capacity": { |
|
"num_predict": 4096, |
|
"temperature": 0.7, |
|
"top_p": 0.9, |
|
"top_k": 40, |
|
"repeat_penalty": 1.1, |
|
"description": "Maximum output length with dynamic allocation" |
|
} |
|
} |
|
|
|
|
|
def get_inference_configs(): |
|
"""Get available inference configurations""" |
|
return INFERENCE_CONFIGS |
|
|
|
|
|
def check_ollama_connection(): |
|
"""Check if Ollama is running and accessible""" |
|
try: |
|
response = requests.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=5) |
|
return response.status_code == 200 |
|
except requests.RequestException: |
|
return False |
|
|
|
|
|
def list_ollama_models(): |
|
"""List available models in Ollama""" |
|
try: |
|
response = requests.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=5) |
|
if response.status_code == 200: |
|
models = response.json().get("models", []) |
|
return [model["name"] for model in models] |
|
return [] |
|
except requests.RequestException: |
|
return [] |
|
|
|
|
|
def load_model(): |
|
"""Check Ollama connection and model availability""" |
|
if not check_ollama_connection(): |
|
raise ConnectionError( |
|
"Cannot connect to Ollama. Please make sure Ollama is running.\n" |
|
"Start Ollama with: ollama serve" |
|
) |
|
|
|
available_models = list_ollama_models() |
|
if MODEL_NAME not in available_models: |
|
print(f"Warning: Model '{MODEL_NAME}' not found in Ollama.") |
|
print(f"Available models: {available_models}") |
|
print(f"Pull your model with: ollama pull {MODEL_NAME}") |
|
return False |
|
|
|
print(f"Using Ollama model: {MODEL_NAME}") |
|
return True |
|
|
|
|
|
|
|
|
|
def calculate_numbers(operation: str, num1: float, num2: float) -> Dict[str, Any]: |
|
""" |
|
Sample tool to perform basic mathematical operations on two numbers. |
|
|
|
Args: |
|
operation: The operation to perform ('add', 'subtract', 'multiply', 'divide') |
|
num1: First number |
|
num2: Second number |
|
|
|
Returns: |
|
Dictionary with result and operation details |
|
""" |
|
try: |
|
num1, num2 = float(num1), float(num2) |
|
|
|
if operation.lower() == 'add': |
|
result = num1 + num2 |
|
elif operation.lower() == 'subtract': |
|
result = num1 - num2 |
|
elif operation.lower() == 'multiply': |
|
result = num1 * num2 |
|
elif operation.lower() == 'divide': |
|
if num2 == 0: |
|
return {"error": "Division by zero is not allowed"} |
|
result = num1 / num2 |
|
else: |
|
return {"error": f"Unknown operation: {operation}"} |
|
|
|
return { |
|
"result": result, |
|
"operation": operation, |
|
"operands": [num1, num2], |
|
"formatted": f"{num1} {operation} {num2} = {result}" |
|
} |
|
except ValueError as e: |
|
return {"error": f"Invalid number format: {str(e)}"} |
|
except Exception as e: |
|
return {"error": f"Calculation error: {str(e)}"} |
|
|
|
|
|
|
|
AVAILABLE_TOOLS = { |
|
"calculate_numbers": { |
|
"function": calculate_numbers, |
|
"description": "Perform basic mathematical operations (add, subtract, multiply, divide) on two numbers", |
|
"parameters": { |
|
"operation": "The mathematical operation to perform", |
|
"num1": "First number", |
|
"num2": "Second number" |
|
} |
|
} |
|
} |
|
|
|
|
|
def execute_tool_call(tool_name: str, **kwargs) -> Dict[str, Any]: |
|
"""Execute a tool call with given parameters""" |
|
print(f"Executing tool: {tool_name} with parameters: {kwargs}") |
|
if tool_name not in AVAILABLE_TOOLS: |
|
return {"error": f"Unknown tool: {tool_name}"} |
|
|
|
try: |
|
tool_function = AVAILABLE_TOOLS[tool_name]["function"] |
|
result = tool_function(**kwargs) |
|
return { |
|
"tool_name": tool_name, |
|
"parameters": kwargs, |
|
"result": result |
|
} |
|
except Exception as e: |
|
print(f"Tool execution failed: {str(e)}") |
|
return { |
|
"tool_name": tool_name, |
|
"parameters": kwargs, |
|
"error": f"Tool execution error: {str(e)}" |
|
} |
|
|
|
|
|
def parse_tool_calls(text: str) -> list: |
|
""" |
|
Parse tool calls from model output. |
|
Supports both formats: |
|
- [TOOL_CALL:tool_name(param1=value1, param2=value2)] |
|
- <tool_call>{"name": "tool_name", "parameters": {"param1": "value1", "param2": "value2"}}</tool_call> |
|
""" |
|
tool_calls = [] |
|
|
|
|
|
pattern = r'(\[TOOL_CALL:(\w+)\((.*?)\)\]|<tool_call>\s*{"name":\s*"(\w+)",\s*"parameters":\s*{([^}]*)}\s*}\s*</tool_call>)' |
|
matches = re.findall(pattern, text) |
|
print("Raw matches:", matches) |
|
|
|
for match in matches: |
|
full_match, old_tool_name, old_params, json_tool_name, json_params = match |
|
|
|
|
|
if old_tool_name: |
|
tool_name = old_tool_name |
|
params_str = old_params |
|
original_call = f"[TOOL_CALL:{tool_name}({params_str})]" |
|
|
|
try: |
|
params = {} |
|
if params_str.strip(): |
|
param_pairs = params_str.split(',') |
|
for pair in param_pairs: |
|
if '=' in pair: |
|
key, value = pair.split('=', 1) |
|
key = key.strip() |
|
value = value.strip().strip('"\'') |
|
params[key] = value |
|
|
|
tool_calls.append({ |
|
"tool_name": tool_name, |
|
"parameters": params, |
|
"original_call": original_call |
|
}) |
|
|
|
except Exception as e: |
|
print(f"Error parsing old format tool call '{tool_name}({params_str})': {e}") |
|
continue |
|
|
|
elif json_tool_name: |
|
tool_name = json_tool_name |
|
params_str = json_params |
|
original_call = full_match |
|
|
|
try: |
|
params = {} |
|
if params_str.strip(): |
|
|
|
param_pairs = params_str.split(',') |
|
for pair in param_pairs: |
|
if ':' in pair: |
|
key, value = pair.split(':', 1) |
|
key = key.strip().strip('"\'') |
|
value = value.strip().strip('"\'') |
|
params[key] = value |
|
|
|
tool_calls.append({ |
|
"tool_name": tool_name, |
|
"parameters": params, |
|
"original_call": original_call |
|
}) |
|
|
|
except Exception as e: |
|
print(f"Error parsing JSON format tool call '{tool_name}': {e}") |
|
continue |
|
|
|
return tool_calls |
|
|
|
|
|
def process_tool_calls(text: str) -> str: |
|
"""Process tool calls in the generated text and replace with results""" |
|
tool_calls = parse_tool_calls(text) |
|
|
|
if not tool_calls: |
|
return text |
|
|
|
processed_text = text |
|
|
|
for tool_call in tool_calls: |
|
tool_name = tool_call["tool_name"] |
|
parameters = tool_call["parameters"] |
|
original_call = tool_call["original_call"] |
|
|
|
try: |
|
|
|
if not isinstance(parameters, dict): |
|
raise ValueError(f"Invalid parameters for tool {tool_name}: {parameters}") |
|
|
|
|
|
result = execute_tool_call(tool_name, **parameters) |
|
|
|
|
|
if "error" in result: |
|
replacement = f"[TOOL_ERROR: {result['error']}]" |
|
else: |
|
if "result" in result["result"]: |
|
replacement = f"[TOOL_RESULT: {result['result']['formatted']}]" |
|
else: |
|
replacement = f"[TOOL_RESULT: {result['result']}]" |
|
|
|
|
|
processed_text = processed_text.replace(original_call, replacement) |
|
|
|
except Exception as e: |
|
print(f"Error processing tool call '{tool_name}': {e}") |
|
replacement = f"[TOOL_ERROR: Failed to process tool call: {str(e)}]" |
|
processed_text = processed_text.replace(original_call, replacement) |
|
|
|
return processed_text |
|
|
|
|
|
def call_ollama_api(messages: List[Dict], config: Dict, stream: bool = False) -> str: |
|
""" |
|
Make a request to Ollama API |
|
|
|
Args: |
|
messages: List of message dictionaries with 'role' and 'content' |
|
config: Configuration dictionary with inference parameters |
|
stream: Whether to stream the response |
|
|
|
Returns: |
|
Generated response text |
|
""" |
|
|
|
|
|
prompt = "" |
|
for msg in messages: |
|
if msg["role"] == "system": |
|
prompt += f"System: {msg['content']}\n\n" |
|
elif msg["role"] == "user": |
|
prompt += f"User: {msg['content']}\n\n" |
|
elif msg["role"] == "assistant": |
|
prompt += f"Assistant: {msg['content']}\n\n" |
|
|
|
prompt += "Assistant: " |
|
|
|
payload = { |
|
"model": MODEL_NAME, |
|
"prompt": prompt, |
|
"stream": stream, |
|
"options": { |
|
"num_predict": config.get("num_predict", 2048), |
|
"temperature": config.get("temperature", 0.7), |
|
"top_p": config.get("top_p", 0.9), |
|
"top_k": config.get("top_k", 40), |
|
"repeat_penalty": config.get("repeat_penalty", 1.1), |
|
} |
|
} |
|
|
|
try: |
|
if stream: |
|
return stream_ollama_response(payload) |
|
else: |
|
response = requests.post( |
|
f"{OLLAMA_BASE_URL}/api/generate", |
|
json=payload, |
|
timeout=300 |
|
) |
|
response.raise_for_status() |
|
|
|
result = response.json() |
|
return result.get("response", "") |
|
|
|
except requests.RequestException as e: |
|
raise ConnectionError(f"Failed to connect to Ollama: {str(e)}") |
|
except json.JSONDecodeError as e: |
|
raise ValueError(f"Invalid response from Ollama: {str(e)}") |
|
|
|
|
|
def stream_ollama_response(payload: Dict) -> str: |
|
"""Stream response from Ollama and return complete text""" |
|
full_response = "" |
|
|
|
try: |
|
response = requests.post( |
|
f"{OLLAMA_BASE_URL}/api/generate", |
|
json=payload, |
|
stream=True, |
|
timeout=300 |
|
) |
|
response.raise_for_status() |
|
|
|
for line in response.iter_lines(): |
|
if line: |
|
try: |
|
chunk = json.loads(line.decode('utf-8')) |
|
if 'response' in chunk: |
|
token = chunk['response'] |
|
full_response += token |
|
print(token, end='', flush=True) |
|
|
|
if chunk.get('done', False): |
|
break |
|
|
|
except json.JSONDecodeError: |
|
continue |
|
|
|
except requests.RequestException as e: |
|
raise ConnectionError(f"Streaming failed: {str(e)}") |
|
|
|
print() |
|
return full_response |
|
|
|
|
|
def generate_response(system_prompt: str, user_input: str, config_name: str = "Middle-ground", |
|
stream: bool = False) -> str: |
|
""" |
|
Generate response using Ollama API with the given system prompt and user input. |
|
|
|
Args: |
|
system_prompt: System instruction for the model |
|
user_input: User's input message |
|
config_name: Configuration preset to use |
|
stream: Whether to stream the response |
|
|
|
Returns: |
|
Generated response text |
|
""" |
|
|
|
if not load_model(): |
|
return "Error: Model not available in Ollama" |
|
|
|
config = INFERENCE_CONFIGS[config_name] |
|
|
|
|
|
messages = [ |
|
{"role": "system", "content": system_prompt}, |
|
{"role": "user", "content": user_input} |
|
] |
|
|
|
start_time = time.time() |
|
|
|
try: |
|
|
|
generated_response = call_ollama_api(messages, config, stream=stream) |
|
|
|
inference_time = time.time() - start_time |
|
print(f"Inference time: {inference_time:.2f} seconds") |
|
|
|
|
|
processed_response = process_tool_calls(generated_response) |
|
|
|
return processed_response |
|
|
|
except Exception as e: |
|
print(f"Error generating response: {str(e)}") |
|
return f"Error: {str(e)}" |
|
|
|
|
|
|
|
def test_connection(): |
|
"""Test Ollama connection and model availability""" |
|
print("Testing Ollama connection...") |
|
|
|
if not check_ollama_connection(): |
|
print("β Cannot connect to Ollama") |
|
print("Make sure Ollama is running: ollama serve") |
|
return False |
|
|
|
print("β
Ollama is running") |
|
|
|
models = list_ollama_models() |
|
print(f"Available models: {models}") |
|
|
|
if MODEL_NAME not in models: |
|
print(f"β Model '{MODEL_NAME}' not found") |
|
print(f"Pull the model with: ollama pull {MODEL_NAME}") |
|
return False |
|
|
|
print(f"β
Model '{MODEL_NAME}' is available") |
|
return True |
|
|
|
|
|
def example_usage(): |
|
"""Example of how to use the system""" |
|
if not test_connection(): |
|
return |
|
|
|
system_prompt = """You are a helpful AI assistant with access to tools. When you need to perform mathematical calculations, use the available tools by calling them in this format: [TOOL_CALL:calculate_numbers(operation="add", num1="10", num2="5")] |
|
|
|
Available tools: |
|
- calculate_numbers: Perform basic math operations (add, subtract, multiply, divide) |
|
""" |
|
|
|
user_input = "What is 125 + 675? Please calculate this for me." |
|
|
|
print("Generating response...") |
|
response = generate_response(system_prompt, user_input, "Middle-ground", stream=True) |
|
print(f"\nFinal response: {response}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
MODEL_NAME = "hf.co/kshitijthakkar/loggenix-moe-0.3B-A0.1B-e3-lr7e5-b16-4090-v7-sft-v1-Q8_0-GGUF:Q8_0" |
|
|
|
example_usage() |