Spaces:
Sleeping
Sleeping
#!/usr/bin/env python3 | |
""" | |
AI Web Agent using Julia Browser with Direct Groq Integration | |
No CrewAI - Pure implementation with function calling | |
""" | |
import gradio as gr | |
import os | |
import json | |
from typing import Dict, List, Any, Optional | |
import traceback | |
from groq import Groq | |
from julia_browser import AgentSDK | |
# Initialize browser and Groq client | |
browser = AgentSDK() | |
groq_client = Groq(api_key=os.getenv("GROQ_API_KEY")) | |
class BrowserActions: | |
"""Direct browser action implementations""" | |
def open_website(url: str) -> Dict[str, Any]: | |
"""Open a website""" | |
try: | |
result = browser.open_website(url) | |
return {"success": True, "message": f"Opened: {result['title']} at {url}", "data": result} | |
except Exception as e: | |
return {"success": False, "message": f"Error opening {url}: {str(e)}"} | |
def list_elements() -> Dict[str, Any]: | |
"""List all interactive elements""" | |
try: | |
elements = browser.list_elements() | |
element_list = [] | |
for elem in elements.get("elements", []): | |
element_list.append(f"[{elem['id']}] {elem['type']}: {elem.get('text', 'N/A')}") | |
message = f"Found {elements['total_clickable']} clickable, {elements['total_inputs']} inputs:\n" + "\n".join(element_list) | |
return {"success": True, "message": message, "data": elements} | |
except Exception as e: | |
return {"success": False, "message": f"Error listing elements: {str(e)}"} | |
def click_element(element_id: int) -> Dict[str, Any]: | |
"""Click an element by ID""" | |
try: | |
result = browser.click_element(element_id) | |
return {"success": True, "message": f"Clicked: {result.get('element', 'Unknown')} - {result['status']}", "data": result} | |
except Exception as e: | |
return {"success": False, "message": f"Error clicking element {element_id}: {str(e)}"} | |
def type_text(field_id: int, text: str) -> Dict[str, Any]: | |
"""Type text into input field""" | |
try: | |
result = browser.type_text(field_id, text) | |
return {"success": True, "message": f"Typed '{text}' into field {field_id} - {result['status']}", "data": result} | |
except Exception as e: | |
return {"success": False, "message": f"Error typing into field {field_id}: {str(e)}"} | |
def submit_form() -> Dict[str, Any]: | |
"""Submit current form""" | |
try: | |
result = browser.submit_form() | |
return {"success": True, "message": f"Form submitted - New page: {result.get('title', 'Unknown')}", "data": result} | |
except Exception as e: | |
return {"success": False, "message": f"Error submitting form: {str(e)}"} | |
def get_page_info() -> Dict[str, Any]: | |
"""Get current page information""" | |
try: | |
info = browser.get_page_info() | |
message = f"Title: {info['title']}\nURL: {info['url']}\nContent preview: {info['content'][:200]}..." | |
return {"success": True, "message": message, "data": info} | |
except Exception as e: | |
return {"success": False, "message": f"Error getting page info: {str(e)}"} | |
def scroll_down(chunks: int = 1) -> Dict[str, Any]: | |
"""Scroll down the page""" | |
try: | |
result = browser.scroll_down(chunks) | |
return {"success": True, "message": f"Scrolled down {chunks} chunks - Position: {result['position']}", "data": result} | |
except Exception as e: | |
return {"success": False, "message": f"Error scrolling down: {str(e)}"} | |
def search_page(term: str) -> Dict[str, Any]: | |
"""Search for text on current page""" | |
try: | |
result = browser.search_page(term) | |
return {"success": True, "message": f"Found {result.get('matches', 0)} matches for '{term}'", "data": result} | |
except Exception as e: | |
return {"success": False, "message": f"Error searching page: {str(e)}"} | |
# Available functions for the AI | |
AVAILABLE_FUNCTIONS = { | |
"open_website": { | |
"function": BrowserActions.open_website, | |
"description": "Open a website", | |
"parameters": { | |
"type": "object", | |
"properties": { | |
"url": {"type": "string", "description": "The URL to open"} | |
}, | |
"required": ["url"] | |
} | |
}, | |
"list_elements": { | |
"function": BrowserActions.list_elements, | |
"description": "List all clickable elements and input fields on current page", | |
"parameters": {"type": "object", "properties": {}} | |
}, | |
"click_element": { | |
"function": BrowserActions.click_element, | |
"description": "Click an element by its ID number", | |
"parameters": { | |
"type": "object", | |
"properties": { | |
"element_id": {"type": "integer", "description": "The ID number of the element to click"} | |
}, | |
"required": ["element_id"] | |
} | |
}, | |
"type_text": { | |
"function": BrowserActions.type_text, | |
"description": "Type text into an input field", | |
"parameters": { | |
"type": "object", | |
"properties": { | |
"field_id": {"type": "integer", "description": "The ID of the input field"}, | |
"text": {"type": "string", "description": "The text to type"} | |
}, | |
"required": ["field_id", "text"] | |
} | |
}, | |
"submit_form": { | |
"function": BrowserActions.submit_form, | |
"description": "Submit the current form", | |
"parameters": {"type": "object", "properties": {}} | |
}, | |
"get_page_info": { | |
"function": BrowserActions.get_page_info, | |
"description": "Get current page title, URL and content", | |
"parameters": {"type": "object", "properties": {}} | |
}, | |
"scroll_down": { | |
"function": BrowserActions.scroll_down, | |
"description": "Scroll down the page", | |
"parameters": { | |
"type": "object", | |
"properties": { | |
"chunks": {"type": "integer", "description": "Number of chunks to scroll", "default": 1} | |
} | |
} | |
}, | |
"search_page": { | |
"function": BrowserActions.search_page, | |
"description": "Search for text within the current page", | |
"parameters": { | |
"type": "object", | |
"properties": { | |
"term": {"type": "string", "description": "Text to search for"} | |
}, | |
"required": ["term"] | |
} | |
} | |
} | |
class WebAutomationAgent: | |
"""AI Web Automation Agent with direct Groq integration""" | |
def __init__(self): | |
if not os.getenv("GROQ_API_KEY"): | |
raise ValueError("GROQ_API_KEY environment variable is required") | |
def execute_task(self, user_instruction: str) -> str: | |
"""Execute a web automation task using function calling""" | |
# Prepare function definitions for Groq | |
functions = [] | |
for name, func_info in AVAILABLE_FUNCTIONS.items(): | |
functions.append({ | |
"type": "function", | |
"function": { | |
"name": name, | |
"description": func_info["description"], | |
"parameters": func_info["parameters"] | |
} | |
}) | |
# System prompt | |
system_prompt = """You are a web automation expert. Execute the user's web automation task step by step using the available browser functions. | |
Available functions: | |
- open_website(url) - Open any website | |
- list_elements() - See all clickable elements and inputs on page | |
- click_element(element_id) - Click buttons, links by their ID number | |
- type_text(field_id, text) - Type into input fields by ID | |
- submit_form() - Submit forms | |
- get_page_info() - Get page details | |
- scroll_down(chunks) - Scroll to see more content | |
- search_page(term) - Find text on current page | |
Work step by step: | |
1. First understand what the user wants | |
2. Open the website if needed | |
3. List elements to see what's available | |
4. Interact with elements as needed | |
5. Provide clear feedback on each step | |
Always explain what you're doing and why.""" | |
messages = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": user_instruction} | |
] | |
execution_log = [] | |
max_iterations = 10 | |
try: | |
for iteration in range(max_iterations): | |
# Call Groq with function calling | |
response = groq_client.chat.completions.create( | |
model="moonshotai/kimi-k2-instruct", | |
messages=messages, | |
tools=functions, | |
tool_choice="auto", | |
max_tokens=1000, | |
temperature=0.1 | |
) | |
message = response.choices[0].message | |
# Add assistant message to conversation | |
messages.append({ | |
"role": "assistant", | |
"content": message.content, | |
"tool_calls": message.tool_calls | |
}) | |
# Log assistant response | |
if message.content: | |
execution_log.append(f"π€ **AI**: {message.content}") | |
# Execute function calls if any | |
if message.tool_calls: | |
for tool_call in message.tool_calls: | |
function_name = tool_call.function.name | |
function_args = json.loads(tool_call.function.arguments) | |
execution_log.append(f"π§ **Executing**: {function_name}({function_args})") | |
# Execute the function | |
if function_name in AVAILABLE_FUNCTIONS: | |
try: | |
result = AVAILABLE_FUNCTIONS[function_name]["function"](**function_args) | |
execution_log.append(f"β **Result**: {result['message']}") | |
# Add function result to conversation | |
messages.append({ | |
"role": "tool", | |
"tool_call_id": tool_call.id, | |
"content": json.dumps(result) | |
}) | |
except Exception as e: | |
error_msg = f"Error executing {function_name}: {str(e)}" | |
execution_log.append(f"β **Error**: {error_msg}") | |
messages.append({ | |
"role": "tool", | |
"tool_call_id": tool_call.id, | |
"content": json.dumps({"success": False, "message": error_msg}) | |
}) | |
else: | |
# No more function calls, task completed | |
break | |
return "\n\n".join(execution_log) | |
except Exception as e: | |
return f"β **Error**: {str(e)}\n\n{traceback.format_exc()}" | |
# Initialize agent | |
agent = WebAutomationAgent() | |
def execute_user_task(message: str, history: List[List[str]]) -> tuple: | |
"""Process user message and execute task""" | |
if not message.strip(): | |
return history, "" | |
# Add user message | |
history.append([message, "π€ Executing task..."]) | |
try: | |
# Execute task | |
result = agent.execute_task(message) | |
# Update with result | |
history[-1][1] = result | |
except Exception as e: | |
history[-1][1] = f"β **Error**: {str(e)}" | |
return history, "" | |
def clear_history(): | |
return [], "" | |
# Sample tasks | |
sample_tasks = [ | |
"Open google.com and search for 'web automation'", | |
"Go to example.com and list all elements on the page", | |
"Navigate to github.com and find the login button", | |
"Open a news website and get the page information", | |
"Visit stackoverflow.com and scroll down to see more content" | |
] | |
# Create Gradio Interface | |
with gr.Blocks(title="AI Web Agent", theme=gr.themes.Soft()) as demo: | |
gr.HTML(""" | |
<div style="text-align: center; margin: 20px;"> | |
<h1>π€ AI Web Automation Agent</h1> | |
<p><strong>Julia Browser + Direct Groq Integration (Qwen-32B)</strong></p> | |
<p>Pure implementation without CrewAI - Function calling with Groq!</p> | |
</div> | |
""") | |
# Main chat interface | |
chatbot = gr.Chatbot( | |
label="Agent Execution", | |
height=600, | |
show_copy_button=True | |
) | |
# Centered input section | |
with gr.Row(): | |
with gr.Column(scale=1): | |
pass # Left spacer | |
with gr.Column(scale=3): | |
with gr.Row(): | |
user_input = gr.Textbox( | |
placeholder="Tell me what to do on the web...", | |
container=False, | |
scale=4 | |
) | |
send_btn = gr.Button("π Execute", variant="primary", scale=1) | |
clear_btn = gr.Button("ποΈ Clear", variant="secondary", size="sm") | |
with gr.Column(scale=1): | |
pass # Right spacer | |
# Sample tasks section | |
with gr.Row(): | |
with gr.Column(scale=1): | |
pass # Left spacer | |
with gr.Column(scale=2): | |
gr.HTML("<h3 style='text-align: center;'>π Sample Tasks</h3>") | |
for i, task in enumerate(sample_tasks): | |
sample_btn = gr.Button( | |
f"Sample {i+1}: {task[:35]}...", | |
variant="outline", | |
size="sm" | |
) | |
sample_btn.click( | |
lambda t=task: t, | |
outputs=user_input | |
) | |
with gr.Row(): | |
with gr.Column(): | |
gr.HTML(""" | |
<div style="padding: 15px; background: #f8f9fa; border-radius: 8px;"> | |
<h4>π‘ Features:</h4> | |
<ul style="font-size: 12px;"> | |
<li>Direct Groq function calling</li> | |
<li>No CrewAI dependencies</li> | |
<li>Step-by-step execution</li> | |
<li>Real browser automation</li> | |
</ul> | |
</div> | |
""") | |
with gr.Column(): | |
gr.HTML(""" | |
<div style="padding: 15px; background: #e3f2fd; border-radius: 8px;"> | |
<h4>βοΈ Setup:</h4> | |
<p style="font-size: 12px;"> | |
Set GROQ_API_KEY:<br> | |
<code>export GROQ_API_KEY="gsk_..."</code> | |
</p> | |
</div> | |
""") | |
with gr.Column(scale=1): | |
pass # Right spacer | |
# Event handlers | |
send_btn.click( | |
execute_user_task, | |
inputs=[user_input, chatbot], | |
outputs=[chatbot, user_input] | |
) | |
user_input.submit( | |
execute_user_task, | |
inputs=[user_input, chatbot], | |
outputs=[chatbot, user_input] | |
) | |
clear_btn.click( | |
clear_history, | |
outputs=[chatbot, user_input] | |
) | |
if __name__ == "__main__": | |
# Check for API key | |
if not os.getenv("GROQ_API_KEY"): | |
print("β οΈ Warning: GROQ_API_KEY not found in environment variables") | |
print("Set it with: export GROQ_API_KEY='your_api_key_here'") | |
print("π Starting AI Web Automation Agent (Direct Implementation)...") | |
print("π Available browser functions:") | |
for name, info in AVAILABLE_FUNCTIONS.items(): | |
print(f" - {name}: {info['description']}") | |
# For Hugging Face Spaces | |
port = int(os.getenv("PORT", 7860)) | |
demo.launch( | |
server_name="0.0.0.0", | |
server_port=port, | |
share=False, | |
show_error=True | |
) |