In [None]:
!pip install ai71 python-dotenv

In [None]:
import os
import time
from ai71 import AI71
from concurrent.futures import ThreadPoolExecutor, as_completed

# Optinal, but nice way to load environment variables from a .env file
from dotenv import load_dotenv

load_dotenv()
AI71_API_KEY = os.getenv("AI71_API_KEY")
AI71_BASE_URL = os.getenv("AI71_BASE_URL")

client = AI71(api_key=AI71_API_KEY, base_url=AI71_BASE_URL)

def complete(client: AI71, messages: list[dict], model: str = "tiiuae/falcon3-10b-instruct", max_tokens: int = 100, n_retries: int = 5):
 """Runs a single completion request.
 Args:
 client (AI71): The AI71 client.
 messages (list[dict]): List of messages for the request. (a conversation)
 model (str): Model to use for completion.
 max_tokens (int): Maximum number of tokens to generate.
 n_retries (int): Number of retries on failure.
 Returns:
 dict: The result of the completion request.
 """
 retries = 0
 while True:
 try:
 return client.chat.completions.create(
 model=model,
 messages=messages,
 max_tokens=max_tokens,
 )
 except Exception as e:
 retries += 1
 if n_retries < retries:
 raise e
 print(f"Retrying for the {retries} time(s)... (error: {e})")
 time.sleep(retries)

def batch_complete(
 client: AI71,
 list_of_messages: list[list[dict]],
 model: str = "tiiuae/falcon3-10b-instruct",
 max_tokens: int = 100,
 n_retries: int = 5,
 n_parallel: int = 10):
 """Runs a batch of completions in parallel.
 Args:
 client (AI71): The AI71 client.
 list_of_messages (list[list[dict]]): List of messages for each request. (list of conversations)
 model (str): Model to use for completion.
 max_tokens (int): Maximum number of tokens to generate.
 n_retries (int): Number of retries on failure.
 n_parallel (int): Number of parallel requests.
 Returns:
 list: List of results for each request.
 """

 results = []

 with ThreadPoolExecutor(max_workers=n_parallel) as executor:
 # Submit requests
 futures = [
 executor.submit(complete, client, messages, model, max_tokens, n_retries)
 for i, messages in enumerate(list_of_messages)
 ]

 # Collect results as they complete
 for future in as_completed(futures):
 try:
 result = future.result()
 results.append(result)
 except Exception as e:
 print(f"Request failed: {e}")
 results.append(None)

 return results

# Simple single request:
result = complete(client, [
 {"role":"system","content": "You are a helpful assistant"},
 {"role":"user","content":"What is artificial intelligence?"}
])
print(result)

# Run a batch of requests:
results = batch_complete(
 client,
 [
 [
 {"role":"system","content": "You are a helpful assistant"},
 {"role":"user","content":"What is artificial intelligence?"}
 ]
 ] * 20,
 n_parallel=10,
)
results