Initial Submission
Browse files- README.md +14 -14
- app.py +259 -196
- graph/__init__.py +0 -0
- graph/__pycache__/__init__.cpython-313.pyc +0 -0
- graph/__pycache__/graph_builder.cpython-313.pyc +0 -0
- graph/graph_builder.py +24 -0
- nodes/__init__.py +0 -0
- nodes/__pycache__/__init__.cpython-313.pyc +0 -0
- nodes/__pycache__/core.cpython-313.pyc +0 -0
- nodes/core.py +90 -0
- requirements.txt +13 -2
- states/__init__.py +0 -0
- states/__pycache__/__init__.cpython-313.pyc +0 -0
- states/__pycache__/state.cpython-313.pyc +0 -0
- states/state.py +7 -0
- tools/__init__.py +46 -0
- tools/__pycache__/__init__.cpython-313.pyc +0 -0
- tools/__pycache__/langchain_tools.cpython-313.pyc +0 -0
- tools/__pycache__/math_tools.cpython-313.pyc +0 -0
- tools/__pycache__/multimodal_tools.cpython-313.pyc +0 -0
- tools/__pycache__/search_tools.cpython-313.pyc +0 -0
- tools/__pycache__/utils.cpython-313.pyc +0 -0
- tools/__pycache__/youtube_tools.cpython-313.pyc +0 -0
- tools/langchain_tools.py +128 -0
- tools/math_tools.py +206 -0
- tools/multimodal_tools.py +166 -0
- tools/search_tools.py +223 -0
- tools/utils.py +36 -0
- tools/youtube_tools.py +315 -0
README.md
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
-
---
|
2 |
-
title: Template Final Assignment
|
3 |
-
emoji: 🕵🏻♂️
|
4 |
-
colorFrom: indigo
|
5 |
-
colorTo: indigo
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 5.25.2
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
hf_oauth: true
|
11 |
-
# optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
|
12 |
-
hf_oauth_expiration_minutes: 480
|
13 |
-
---
|
14 |
-
|
15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
---
|
2 |
+
title: Template Final Assignment
|
3 |
+
emoji: 🕵🏻♂️
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: indigo
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 5.25.2
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
hf_oauth: true
|
11 |
+
# optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
|
12 |
+
hf_oauth_expiration_minutes: 480
|
13 |
+
---
|
14 |
+
|
15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -1,196 +1,259 @@
|
|
1 |
-
import os
|
2 |
-
import gradio as gr
|
3 |
-
import requests
|
4 |
-
import inspect
|
5 |
-
import pandas as pd
|
6 |
-
|
7 |
-
|
8 |
-
#
|
9 |
-
|
10 |
-
|
11 |
-
#
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
def
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
print(
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
return
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
print(
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
import requests
|
4 |
+
import inspect
|
5 |
+
import pandas as pd
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
# Load environment variables
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
# Import your LangGraph agent
|
12 |
+
from graph.graph_builder import graph
|
13 |
+
from langchain_core.messages import HumanMessage
|
14 |
+
|
15 |
+
# (Keep Constants as is)
|
16 |
+
# --- Constants ---
|
17 |
+
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
18 |
+
|
19 |
+
# --- Your LangGraph Agent Definition ---
|
20 |
+
# ----- THIS IS WHERE YOU BUILD YOUR AGENT ------
|
21 |
+
class BasicAgent:
|
22 |
+
def __init__(self):
|
23 |
+
"""Initialize the LangGraph agent"""
|
24 |
+
print("LangGraph Agent initialized with multimodal, search, math, and YouTube tools.")
|
25 |
+
|
26 |
+
# Verify environment variables
|
27 |
+
if not os.getenv("OPENROUTER_API_KEY"):
|
28 |
+
raise ValueError("OPENROUTER_API_KEY not found in environment variables")
|
29 |
+
|
30 |
+
# The graph is already compiled and ready to use
|
31 |
+
self.graph = graph
|
32 |
+
print("✅ Agent ready with tools: multimodal, search, math, YouTube")
|
33 |
+
|
34 |
+
def __call__(self, question: str) -> str:
|
35 |
+
"""
|
36 |
+
Process a question using the LangGraph agent and return just the answer
|
37 |
+
|
38 |
+
Args:
|
39 |
+
question: The question to answer
|
40 |
+
|
41 |
+
Returns:
|
42 |
+
str: The final answer (formatted for evaluation)
|
43 |
+
"""
|
44 |
+
print(f"🤖 Processing question: {question[:50]}...")
|
45 |
+
|
46 |
+
try:
|
47 |
+
# Create initial state with the question
|
48 |
+
initial_state = {"messages": [HumanMessage(content=question)]}
|
49 |
+
|
50 |
+
# Run the LangGraph agent
|
51 |
+
result = self.graph.invoke(initial_state)
|
52 |
+
|
53 |
+
# Extract the final message content
|
54 |
+
final_message = result["messages"][-1]
|
55 |
+
answer = final_message.content
|
56 |
+
|
57 |
+
# Clean up the answer for evaluation (remove any extra formatting)
|
58 |
+
# The evaluation system expects just the answer, no explanations
|
59 |
+
if isinstance(answer, str):
|
60 |
+
answer = answer.strip()
|
61 |
+
|
62 |
+
# Remove common prefixes that might interfere with evaluation
|
63 |
+
prefixes_to_remove = [
|
64 |
+
"The answer is: ",
|
65 |
+
"Answer: ",
|
66 |
+
"The result is: ",
|
67 |
+
"Result: ",
|
68 |
+
"The final answer is: ",
|
69 |
+
]
|
70 |
+
|
71 |
+
for prefix in prefixes_to_remove:
|
72 |
+
if answer.startswith(prefix):
|
73 |
+
answer = answer[len(prefix):].strip()
|
74 |
+
break
|
75 |
+
|
76 |
+
print(f"✅ Agent answer: {answer}")
|
77 |
+
return answer
|
78 |
+
|
79 |
+
except Exception as e:
|
80 |
+
error_msg = f"Error processing question: {str(e)}"
|
81 |
+
print(f"❌ {error_msg}")
|
82 |
+
return error_msg
|
83 |
+
|
84 |
+
# Keep the rest of the file unchanged (run_and_submit_all function and Gradio interface)
|
85 |
+
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
86 |
+
"""
|
87 |
+
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
88 |
+
and displays the results.
|
89 |
+
"""
|
90 |
+
# --- Determine HF Space Runtime URL and Repo URL ---
|
91 |
+
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
92 |
+
|
93 |
+
if profile:
|
94 |
+
username= f"{profile.username}"
|
95 |
+
print(f"User logged in: {username}")
|
96 |
+
else:
|
97 |
+
print("User not logged in.")
|
98 |
+
return "Please Login to Hugging Face with the button.", None
|
99 |
+
|
100 |
+
api_url = DEFAULT_API_URL
|
101 |
+
questions_url = f"{api_url}/questions"
|
102 |
+
submit_url = f"{api_url}/submit"
|
103 |
+
|
104 |
+
# 1. Instantiate Agent (using your LangGraph agent)
|
105 |
+
try:
|
106 |
+
agent = BasicAgent()
|
107 |
+
except Exception as e:
|
108 |
+
print(f"Error instantiating agent: {e}")
|
109 |
+
return f"Error initializing agent: {e}", None
|
110 |
+
|
111 |
+
# In the case of an app running as a hugging Face space, this link points toward your codebase
|
112 |
+
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
113 |
+
print(agent_code)
|
114 |
+
|
115 |
+
# 2. Fetch Questions
|
116 |
+
print(f"Fetching questions from: {questions_url}")
|
117 |
+
try:
|
118 |
+
response = requests.get(questions_url, timeout=15)
|
119 |
+
response.raise_for_status()
|
120 |
+
questions_data = response.json()
|
121 |
+
if not questions_data:
|
122 |
+
print("Fetched questions list is empty.")
|
123 |
+
return "Fetched questions list is empty or invalid format.", None
|
124 |
+
print(f"Fetched {len(questions_data)} questions.")
|
125 |
+
except requests.exceptions.RequestException as e:
|
126 |
+
print(f"Error fetching questions: {e}")
|
127 |
+
return f"Error fetching questions: {e}", None
|
128 |
+
except Exception as e:
|
129 |
+
print(f"An unexpected error occurred fetching questions: {e}")
|
130 |
+
return f"An unexpected error occurred fetching questions: {e}", None
|
131 |
+
|
132 |
+
# 3. Run your Agent
|
133 |
+
results_log = []
|
134 |
+
answers_payload = []
|
135 |
+
print(f"Running agent on {len(questions_data)} questions...")
|
136 |
+
for item in questions_data:
|
137 |
+
task_id = item.get("task_id")
|
138 |
+
question_text = item.get("question")
|
139 |
+
if not task_id or question_text is None:
|
140 |
+
print(f"Skipping item with missing task_id or question: {item}")
|
141 |
+
continue
|
142 |
+
try:
|
143 |
+
submitted_answer = agent(question_text)
|
144 |
+
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
145 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
146 |
+
except Exception as e:
|
147 |
+
print(f"Error running agent on task {task_id}: {e}")
|
148 |
+
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
149 |
+
|
150 |
+
if not answers_payload:
|
151 |
+
print("Agent did not produce any answers to submit.")
|
152 |
+
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
153 |
+
|
154 |
+
# 4. Prepare Submission
|
155 |
+
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
156 |
+
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
157 |
+
print(status_update)
|
158 |
+
|
159 |
+
# 5. Submit
|
160 |
+
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
161 |
+
try:
|
162 |
+
response = requests.post(submit_url, json=submission_data, timeout=60)
|
163 |
+
response.raise_for_status()
|
164 |
+
result_data = response.json()
|
165 |
+
final_status = (
|
166 |
+
f"Submission Successful!\n"
|
167 |
+
f"User: {result_data.get('username')}\n"
|
168 |
+
f"Overall Score: {result_data.get('score', 'N/A')}% "
|
169 |
+
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
170 |
+
f"Message: {result_data.get('message', 'No message received.')}"
|
171 |
+
)
|
172 |
+
print("Submission successful.")
|
173 |
+
results_df = pd.DataFrame(results_log)
|
174 |
+
return final_status, results_df
|
175 |
+
except requests.exceptions.HTTPError as e:
|
176 |
+
error_detail = f"Server responded with status {e.response.status_code}."
|
177 |
+
try:
|
178 |
+
error_json = e.response.json()
|
179 |
+
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
|
180 |
+
except requests.exceptions.JSONDecodeError:
|
181 |
+
error_detail += f" Response: {e.response.text[:500]}"
|
182 |
+
status_message = f"Submission Failed: {error_detail}"
|
183 |
+
print(status_message)
|
184 |
+
results_df = pd.DataFrame(results_log)
|
185 |
+
return status_message, results_df
|
186 |
+
except requests.exceptions.Timeout:
|
187 |
+
status_message = "Submission Failed: The request timed out."
|
188 |
+
print(status_message)
|
189 |
+
results_df = pd.DataFrame(results_log)
|
190 |
+
return status_message, results_df
|
191 |
+
except requests.exceptions.RequestException as e:
|
192 |
+
status_message = f"Submission Failed: Network error - {e}"
|
193 |
+
print(status_message)
|
194 |
+
results_df = pd.DataFrame(results_log)
|
195 |
+
return status_message, results_df
|
196 |
+
except Exception as e:
|
197 |
+
status_message = f"An unexpected error occurred during submission: {e}"
|
198 |
+
print(status_message)
|
199 |
+
results_df = pd.DataFrame(results_log)
|
200 |
+
return status_message, results_df
|
201 |
+
|
202 |
+
# --- Build Gradio Interface using Blocks ---
|
203 |
+
with gr.Blocks() as demo:
|
204 |
+
gr.Markdown("# LangGraph Agent Evaluation Runner")
|
205 |
+
gr.Markdown(
|
206 |
+
"""
|
207 |
+
**Instructions:**
|
208 |
+
|
209 |
+
This space uses a LangGraph agent with multimodal, search, math, and YouTube tools powered by OpenRouter.
|
210 |
+
|
211 |
+
1. Log in to your Hugging Face account using the button below.
|
212 |
+
2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
|
213 |
+
|
214 |
+
**Agent Capabilities:**
|
215 |
+
- 🎨 **Multimodal**: Analyze images, extract text (OCR), process audio transcripts
|
216 |
+
- 🔍 **Search**: Web search using multiple providers (DuckDuckGo, Tavily, SerpAPI)
|
217 |
+
- 🧮 **Math**: Basic arithmetic, complex calculations, percentages, factorials
|
218 |
+
- 📺 **YouTube**: Extract captions, get video information
|
219 |
+
|
220 |
+
---
|
221 |
+
**Note:** Processing all questions may take some time as the agent carefully analyzes each question and uses appropriate tools.
|
222 |
+
"""
|
223 |
+
)
|
224 |
+
|
225 |
+
gr.LoginButton()
|
226 |
+
|
227 |
+
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
228 |
+
|
229 |
+
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
230 |
+
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
231 |
+
|
232 |
+
run_button.click(
|
233 |
+
fn=run_and_submit_all,
|
234 |
+
outputs=[status_output, results_table]
|
235 |
+
)
|
236 |
+
|
237 |
+
if __name__ == "__main__":
|
238 |
+
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
239 |
+
# Check for SPACE_HOST and SPACE_ID at startup for information
|
240 |
+
space_host_startup = os.getenv("SPACE_HOST")
|
241 |
+
space_id_startup = os.getenv("SPACE_ID")
|
242 |
+
|
243 |
+
if space_host_startup:
|
244 |
+
print(f"✅ SPACE_HOST found: {space_host_startup}")
|
245 |
+
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
246 |
+
else:
|
247 |
+
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
|
248 |
+
|
249 |
+
if space_id_startup:
|
250 |
+
print(f"✅ SPACE_ID found: {space_id_startup}")
|
251 |
+
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
252 |
+
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
253 |
+
else:
|
254 |
+
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
255 |
+
|
256 |
+
print("-"*(60 + len(" App Starting ")) + "\n")
|
257 |
+
|
258 |
+
print("Launching Gradio Interface for LangGraph Agent Evaluation...")
|
259 |
+
demo.launch(debug=True, share=False)
|
graph/__init__.py
ADDED
File without changes
|
graph/__pycache__/__init__.cpython-313.pyc
ADDED
Binary file (180 Bytes). View file
|
|
graph/__pycache__/graph_builder.cpython-313.pyc
ADDED
Binary file (892 Bytes). View file
|
|
graph/graph_builder.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# graph/graph_builder.py (unchanged)
|
2 |
+
from langgraph.graph import START, StateGraph
|
3 |
+
from langgraph.prebuilt import tools_condition
|
4 |
+
from langgraph.prebuilt import ToolNode
|
5 |
+
from nodes.core import assistant, tools
|
6 |
+
from states.state import AgentState
|
7 |
+
|
8 |
+
## The graph
|
9 |
+
builder = StateGraph(AgentState)
|
10 |
+
|
11 |
+
# Define nodes: these do the work
|
12 |
+
builder.add_node("assistant", assistant)
|
13 |
+
builder.add_node("tools", ToolNode(tools))
|
14 |
+
|
15 |
+
# Define edges: these determine how the control flow moves
|
16 |
+
builder.add_edge(START, "assistant")
|
17 |
+
builder.add_conditional_edges(
|
18 |
+
"assistant",
|
19 |
+
# If the latest message requires a tool, route to tools
|
20 |
+
# Otherwise, provide a direct response
|
21 |
+
tools_condition,
|
22 |
+
)
|
23 |
+
builder.add_edge("tools", "assistant")
|
24 |
+
graph = builder.compile()
|
nodes/__init__.py
ADDED
File without changes
|
nodes/__pycache__/__init__.cpython-313.pyc
ADDED
Binary file (180 Bytes). View file
|
|
nodes/__pycache__/core.cpython-313.pyc
ADDED
Binary file (2.33 kB). View file
|
|
nodes/core.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# nodes/core.py
|
2 |
+
from states.state import AgentState
|
3 |
+
import os
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
from langchain_openai import ChatOpenAI # Using OpenAI-compatible API for OpenRouter
|
6 |
+
from tools.langchain_tools import (
|
7 |
+
extract_text,
|
8 |
+
analyze_image_tool,
|
9 |
+
analyze_audio_tool,
|
10 |
+
add,
|
11 |
+
subtract,
|
12 |
+
multiply,
|
13 |
+
divide,
|
14 |
+
search_tool,
|
15 |
+
extract_youtube_transcript,
|
16 |
+
get_youtube_info,
|
17 |
+
calculate_expression,
|
18 |
+
factorial,
|
19 |
+
square_root,
|
20 |
+
percentage,
|
21 |
+
average
|
22 |
+
)
|
23 |
+
|
24 |
+
load_dotenv()
|
25 |
+
|
26 |
+
# Read your API key from the environment variable
|
27 |
+
openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
|
28 |
+
|
29 |
+
if not openrouter_api_key:
|
30 |
+
raise ValueError("OPENROUTER_API_KEY not found in environment variables")
|
31 |
+
|
32 |
+
# Initialize OpenRouter ChatOpenAI with OpenRouter-specific configuration
|
33 |
+
chat = ChatOpenAI(
|
34 |
+
model="google/gemini-2.5-flash-preview-05-20", # Free multimodal model
|
35 |
+
# Alternative models you can use:
|
36 |
+
# model="mistralai/mistral-7b-instruct:free", # Fast, free text model
|
37 |
+
# model="google/gemma-2-9b-it:free", # Google's free model
|
38 |
+
# model="qwen/qwen-2.5-72b-instruct:free", # High-quality free model
|
39 |
+
|
40 |
+
temperature=0,
|
41 |
+
max_retries=2,
|
42 |
+
base_url="https://openrouter.ai/api/v1",
|
43 |
+
api_key=openrouter_api_key,
|
44 |
+
default_headers={
|
45 |
+
"HTTP-Referer": "https://your-app.com", # Optional: for analytics
|
46 |
+
"X-Title": "LangGraph Agent", # Optional: for analytics
|
47 |
+
}
|
48 |
+
)
|
49 |
+
|
50 |
+
# Core tools list (matching original structure)
|
51 |
+
tools = [
|
52 |
+
extract_text,
|
53 |
+
analyze_image_tool,
|
54 |
+
analyze_audio_tool,
|
55 |
+
extract_youtube_transcript,
|
56 |
+
add,
|
57 |
+
subtract,
|
58 |
+
multiply,
|
59 |
+
divide,
|
60 |
+
search_tool
|
61 |
+
]
|
62 |
+
|
63 |
+
# Extended tools list (if you want more capabilities)
|
64 |
+
extended_tools = tools + [
|
65 |
+
get_youtube_info,
|
66 |
+
calculate_expression,
|
67 |
+
factorial,
|
68 |
+
square_root,
|
69 |
+
percentage,
|
70 |
+
average
|
71 |
+
]
|
72 |
+
|
73 |
+
# Use core tools by default (matching original), but you can switch to extended_tools
|
74 |
+
chat_with_tools = chat.bind_tools(tools)
|
75 |
+
|
76 |
+
def assistant(state: AgentState):
|
77 |
+
"""
|
78 |
+
Assistant node - maintains the exact same system prompt for evaluation compatibility
|
79 |
+
"""
|
80 |
+
sys_msg = (
|
81 |
+
"You are a helpful assistant with access to tools. Understand user requests accurately. "
|
82 |
+
"Use your tools when needed to answer effectively. Strictly follow all user instructions and constraints. "
|
83 |
+
"Pay attention: your output needs to contain only the final answer without any reasoning since it will be "
|
84 |
+
"strictly evaluated against a dataset which contains only the specific response. "
|
85 |
+
"Your final output needs to be just the string or integer containing the answer, not an array or technical stuff."
|
86 |
+
)
|
87 |
+
|
88 |
+
return {
|
89 |
+
"messages": [chat_with_tools.invoke([sys_msg] + state["messages"])]
|
90 |
+
}
|
requirements.txt
CHANGED
@@ -1,2 +1,13 @@
|
|
1 |
-
|
2 |
-
requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python-dotenv
|
2 |
+
requests
|
3 |
+
pytubefix
|
4 |
+
pillow
|
5 |
+
langgraph
|
6 |
+
langchain
|
7 |
+
langchain-openai
|
8 |
+
langchain-core
|
9 |
+
langchain-community
|
10 |
+
gradio
|
11 |
+
pandas
|
12 |
+
gradio[oauth]
|
13 |
+
|
states/__init__.py
ADDED
File without changes
|
states/__pycache__/__init__.cpython-313.pyc
ADDED
Binary file (181 Bytes). View file
|
|
states/__pycache__/state.cpython-313.pyc
ADDED
Binary file (692 Bytes). View file
|
|
states/state.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# states/state.py (unchanged)
|
2 |
+
from typing import TypedDict, Annotated
|
3 |
+
from langchain_core.messages import AnyMessage
|
4 |
+
from langgraph.graph.message import add_messages
|
5 |
+
|
6 |
+
class AgentState(TypedDict):
|
7 |
+
messages: Annotated[list[AnyMessage], add_messages]
|
tools/__init__.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tools/__init__.py
|
2 |
+
"""
|
3 |
+
Tools package for AI agents
|
4 |
+
Provides multimodal, search, math, and YouTube capabilities
|
5 |
+
"""
|
6 |
+
|
7 |
+
from .multimodal_tools import MultimodalTools, analyze_image, extract_text, analyze_transcript
|
8 |
+
from .search_tools import SearchTools, search_web, search_news
|
9 |
+
from .math_tools import MathTools, add, subtract, multiply, divide, power, factorial, square_root, percentage, average, calculate_expression
|
10 |
+
from .youtube_tools import YouTubeTools, get_video_info, download_video, download_audio, get_captions, get_playlist_info
|
11 |
+
|
12 |
+
__all__ = [
|
13 |
+
# Multimodal tools
|
14 |
+
'MultimodalTools',
|
15 |
+
'analyze_image',
|
16 |
+
'extract_text',
|
17 |
+
'analyze_transcript',
|
18 |
+
|
19 |
+
# Search tools
|
20 |
+
'SearchTools',
|
21 |
+
'search_web',
|
22 |
+
'search_news',
|
23 |
+
|
24 |
+
# Math tools
|
25 |
+
'MathTools',
|
26 |
+
'add',
|
27 |
+
'subtract',
|
28 |
+
'multiply',
|
29 |
+
'divide',
|
30 |
+
'power',
|
31 |
+
'factorial',
|
32 |
+
'square_root',
|
33 |
+
'percentage',
|
34 |
+
'average',
|
35 |
+
'calculate_expression',
|
36 |
+
|
37 |
+
# YouTube tools
|
38 |
+
'YouTubeTools',
|
39 |
+
'get_video_info',
|
40 |
+
'download_video',
|
41 |
+
'download_audio',
|
42 |
+
'get_captions',
|
43 |
+
'get_playlist_info'
|
44 |
+
]
|
45 |
+
|
46 |
+
__version__ = "1.0.0"
|
tools/__pycache__/__init__.cpython-313.pyc
ADDED
Binary file (1.15 kB). View file
|
|
tools/__pycache__/langchain_tools.cpython-313.pyc
ADDED
Binary file (6.91 kB). View file
|
|
tools/__pycache__/math_tools.cpython-313.pyc
ADDED
Binary file (10.5 kB). View file
|
|
tools/__pycache__/multimodal_tools.cpython-313.pyc
ADDED
Binary file (7.46 kB). View file
|
|
tools/__pycache__/search_tools.cpython-313.pyc
ADDED
Binary file (8.96 kB). View file
|
|
tools/__pycache__/utils.cpython-313.pyc
ADDED
Binary file (2.44 kB). View file
|
|
tools/__pycache__/youtube_tools.cpython-313.pyc
ADDED
Binary file (17.1 kB). View file
|
|
tools/langchain_tools.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tools/langchain_tools.py (Updated)
|
2 |
+
"""
|
3 |
+
LangChain-compatible tool wrappers for our existing tools
|
4 |
+
"""
|
5 |
+
|
6 |
+
from langchain_core.tools import tool
|
7 |
+
from typing import Optional
|
8 |
+
import os
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
|
11 |
+
# Load environment variables FIRST, before any tool imports
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
from .multimodal_tools import MultimodalTools, analyze_transcript as _analyze_transcript
|
15 |
+
from .search_tools import SearchTools
|
16 |
+
from .math_tools import MathTools
|
17 |
+
from .youtube_tools import YouTubeTools
|
18 |
+
|
19 |
+
# Initialize tool instances (now env vars are available)
|
20 |
+
multimodal_tools = MultimodalTools()
|
21 |
+
search_tools = SearchTools()
|
22 |
+
youtube_tools = YouTubeTools()
|
23 |
+
|
24 |
+
# Rest of the file remains the same...
|
25 |
+
@tool
|
26 |
+
def extract_text(image_path: str) -> str:
|
27 |
+
"""Extract text from an image using OCR"""
|
28 |
+
return multimodal_tools.extract_text_from_image(image_path)
|
29 |
+
|
30 |
+
@tool
|
31 |
+
def analyze_image_tool(image_path: str, question: str = "Describe this image in detail") -> str:
|
32 |
+
"""Analyze an image and answer questions about it"""
|
33 |
+
return multimodal_tools.analyze_image(image_path, question)
|
34 |
+
|
35 |
+
@tool
|
36 |
+
def analyze_audio_tool(transcript: str, question: str = "Summarize this audio content") -> str:
|
37 |
+
"""Analyze audio content via transcript"""
|
38 |
+
return multimodal_tools.analyze_audio_transcript(transcript, question)
|
39 |
+
|
40 |
+
@tool
|
41 |
+
def search_tool(query: str, max_results: int = 5) -> str:
|
42 |
+
"""Search the web for information"""
|
43 |
+
results = search_tools.search(query, max_results)
|
44 |
+
if not results:
|
45 |
+
return "No search results found"
|
46 |
+
|
47 |
+
# Format results for the LLM
|
48 |
+
formatted_results = []
|
49 |
+
for i, result in enumerate(results, 1):
|
50 |
+
title = result.get('title', 'No title')
|
51 |
+
content = result.get('content', 'No content')
|
52 |
+
url = result.get('url', 'No URL')
|
53 |
+
formatted_results.append(f"{i}. {title}\n{content[:200]}...\nSource: {url}\n")
|
54 |
+
|
55 |
+
return "\n".join(formatted_results)
|
56 |
+
|
57 |
+
@tool
|
58 |
+
def extract_youtube_transcript(url: str, language_code: str = 'en') -> str:
|
59 |
+
"""Extract transcript/captions from a YouTube video"""
|
60 |
+
captions = youtube_tools.get_captions(url, language_code)
|
61 |
+
if captions:
|
62 |
+
return captions
|
63 |
+
else:
|
64 |
+
return "No captions available for this video"
|
65 |
+
|
66 |
+
@tool
|
67 |
+
def add(a: float, b: float) -> float:
|
68 |
+
"""Add two numbers"""
|
69 |
+
return MathTools.add(a, b)
|
70 |
+
|
71 |
+
@tool
|
72 |
+
def subtract(a: float, b: float) -> float:
|
73 |
+
"""Subtract two numbers"""
|
74 |
+
return MathTools.subtract(a, b)
|
75 |
+
|
76 |
+
@tool
|
77 |
+
def multiply(a: float, b: float) -> float:
|
78 |
+
"""Multiply two numbers"""
|
79 |
+
return MathTools.multiply(a, b)
|
80 |
+
|
81 |
+
@tool
|
82 |
+
def divide(a: float, b: float) -> str:
|
83 |
+
"""Divide two numbers"""
|
84 |
+
result = MathTools.divide(a, b)
|
85 |
+
return str(result)
|
86 |
+
|
87 |
+
@tool
|
88 |
+
def get_youtube_info(url: str) -> str:
|
89 |
+
"""Get information about a YouTube video"""
|
90 |
+
info = youtube_tools.get_video_info(url)
|
91 |
+
if info:
|
92 |
+
return f"Title: {info.get('title', 'Unknown')}\nAuthor: {info.get('author', 'Unknown')}\nDuration: {info.get('length', 0)} seconds\nViews: {info.get('views', 0):,}"
|
93 |
+
else:
|
94 |
+
return "Could not retrieve video information"
|
95 |
+
|
96 |
+
@tool
|
97 |
+
def calculate_expression(expression: str) -> str:
|
98 |
+
"""Calculate a mathematical expression safely"""
|
99 |
+
from .math_tools import calculate_expression as calc_expr
|
100 |
+
return str(calc_expr(expression))
|
101 |
+
|
102 |
+
@tool
|
103 |
+
def factorial(n: int) -> str:
|
104 |
+
"""Calculate factorial of a number"""
|
105 |
+
result = MathTools.factorial(n)
|
106 |
+
return str(result)
|
107 |
+
|
108 |
+
@tool
|
109 |
+
def square_root(n: float) -> str:
|
110 |
+
"""Calculate square root of a number"""
|
111 |
+
result = MathTools.square_root(n)
|
112 |
+
return str(result)
|
113 |
+
|
114 |
+
@tool
|
115 |
+
def percentage(part: float, whole: float) -> str:
|
116 |
+
"""Calculate percentage"""
|
117 |
+
result = MathTools.percentage(part, whole)
|
118 |
+
return str(result)
|
119 |
+
|
120 |
+
@tool
|
121 |
+
def average(numbers: str) -> str:
|
122 |
+
"""Calculate average of numbers (provide as comma-separated string)"""
|
123 |
+
try:
|
124 |
+
number_list = [float(x.strip()) for x in numbers.split(',')]
|
125 |
+
result = MathTools.average(number_list)
|
126 |
+
return str(result)
|
127 |
+
except Exception as e:
|
128 |
+
return f"Error parsing numbers: {str(e)}"
|
tools/math_tools.py
ADDED
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tools/math_tools.py
|
2 |
+
import math
|
3 |
+
from typing import Union, List, Any
|
4 |
+
from .utils import logger
|
5 |
+
|
6 |
+
Number = Union[int, float]
|
7 |
+
|
8 |
+
class MathTools:
|
9 |
+
"""Simple math tools for basic calculations and utilities"""
|
10 |
+
|
11 |
+
@staticmethod
|
12 |
+
def add(a: Number, b: Number) -> Number:
|
13 |
+
"""Return the sum of a and b"""
|
14 |
+
return a + b
|
15 |
+
|
16 |
+
@staticmethod
|
17 |
+
def subtract(a: Number, b: Number) -> Number:
|
18 |
+
"""Return the difference of a and b"""
|
19 |
+
return a - b
|
20 |
+
|
21 |
+
@staticmethod
|
22 |
+
def multiply(a: Number, b: Number) -> Number:
|
23 |
+
"""Return the product of a and b"""
|
24 |
+
return a * b
|
25 |
+
|
26 |
+
@staticmethod
|
27 |
+
def divide(a: Number, b: Number) -> Union[Number, str]:
|
28 |
+
"""Return the division of a by b, handle division by zero"""
|
29 |
+
if b == 0:
|
30 |
+
return 'Error: Division by zero'
|
31 |
+
return a / b
|
32 |
+
|
33 |
+
@staticmethod
|
34 |
+
def power(base: Number, exponent: Number) -> Number:
|
35 |
+
"""Return base raised to the power of exponent"""
|
36 |
+
return base ** exponent
|
37 |
+
|
38 |
+
@staticmethod
|
39 |
+
def factorial(n: int) -> Union[int, str]:
|
40 |
+
"""Return factorial of n (non-negative integer)"""
|
41 |
+
if not isinstance(n, int) or n < 0:
|
42 |
+
return 'Error: Input must be a non-negative integer'
|
43 |
+
if n == 0 or n == 1:
|
44 |
+
return 1
|
45 |
+
result = 1
|
46 |
+
for i in range(2, n + 1):
|
47 |
+
result *= i
|
48 |
+
return result
|
49 |
+
|
50 |
+
@staticmethod
|
51 |
+
def square_root(n: Number) -> Union[float, str]:
|
52 |
+
"""Return square root of n"""
|
53 |
+
if n < 0:
|
54 |
+
return 'Error: Cannot calculate square root of negative number'
|
55 |
+
return math.sqrt(n)
|
56 |
+
|
57 |
+
@staticmethod
|
58 |
+
def percentage(part: Number, whole: Number) -> Union[float, str]:
|
59 |
+
"""Calculate percentage: (part/whole) * 100"""
|
60 |
+
if whole == 0:
|
61 |
+
return 'Error: Cannot calculate percentage with zero denominator'
|
62 |
+
return (part / whole) * 100
|
63 |
+
|
64 |
+
@staticmethod
|
65 |
+
def average(numbers: List[Number]) -> Union[float, str]:
|
66 |
+
"""Calculate average of a list of numbers"""
|
67 |
+
if not numbers:
|
68 |
+
return 'Error: Cannot calculate average of empty list'
|
69 |
+
return sum(numbers) / len(numbers)
|
70 |
+
|
71 |
+
@staticmethod
|
72 |
+
def round_number(n: Number, decimals: int = 2) -> Number:
|
73 |
+
"""Round number to specified decimal places"""
|
74 |
+
return round(n, decimals)
|
75 |
+
|
76 |
+
@staticmethod
|
77 |
+
def absolute(n: Number) -> Number:
|
78 |
+
"""Return absolute value of n"""
|
79 |
+
return abs(n)
|
80 |
+
|
81 |
+
@staticmethod
|
82 |
+
def min_value(numbers: List[Number]) -> Union[Number, str]:
|
83 |
+
"""Find minimum value in list"""
|
84 |
+
if not numbers:
|
85 |
+
return 'Error: Cannot find minimum of empty list'
|
86 |
+
return min(numbers)
|
87 |
+
|
88 |
+
@staticmethod
|
89 |
+
def max_value(numbers: List[Number]) -> Union[Number, str]:
|
90 |
+
"""Find maximum value in list"""
|
91 |
+
if not numbers:
|
92 |
+
return 'Error: Cannot find maximum of empty list'
|
93 |
+
return max(numbers)
|
94 |
+
|
95 |
+
@staticmethod
|
96 |
+
def calculate_compound_interest(principal: Number, rate: Number, time: Number, compounds_per_year: int = 1) -> float:
|
97 |
+
"""
|
98 |
+
Calculate compound interest
|
99 |
+
Formula: A = P(1 + r/n)^(nt)
|
100 |
+
"""
|
101 |
+
return principal * (1 + rate/compounds_per_year) ** (compounds_per_year * time)
|
102 |
+
|
103 |
+
@staticmethod
|
104 |
+
def solve_quadratic(a: Number, b: Number, c: Number) -> Union[tuple, str]:
|
105 |
+
"""
|
106 |
+
Solve quadratic equation ax² + bx + c = 0
|
107 |
+
Returns tuple of solutions or error message
|
108 |
+
"""
|
109 |
+
if a == 0:
|
110 |
+
return 'Error: Not a quadratic equation (a cannot be 0)'
|
111 |
+
|
112 |
+
discriminant = b**2 - 4*a*c
|
113 |
+
|
114 |
+
if discriminant < 0:
|
115 |
+
return 'Error: No real solutions (negative discriminant)'
|
116 |
+
elif discriminant == 0:
|
117 |
+
solution = -b / (2*a)
|
118 |
+
return (solution, solution)
|
119 |
+
else:
|
120 |
+
sqrt_discriminant = math.sqrt(discriminant)
|
121 |
+
solution1 = (-b + sqrt_discriminant) / (2*a)
|
122 |
+
solution2 = (-b - sqrt_discriminant) / (2*a)
|
123 |
+
return (solution1, solution2)
|
124 |
+
|
125 |
+
# Convenience functions for direct use
|
126 |
+
def add(a: Number, b: Number) -> Number:
|
127 |
+
"""Add two numbers"""
|
128 |
+
return MathTools.add(a, b)
|
129 |
+
|
130 |
+
def subtract(a: Number, b: Number) -> Number:
|
131 |
+
"""Subtract two numbers"""
|
132 |
+
return MathTools.subtract(a, b)
|
133 |
+
|
134 |
+
def multiply(a: Number, b: Number) -> Number:
|
135 |
+
"""Multiply two numbers"""
|
136 |
+
return MathTools.multiply(a, b)
|
137 |
+
|
138 |
+
def divide(a: Number, b: Number) -> Union[Number, str]:
|
139 |
+
"""Divide two numbers"""
|
140 |
+
return MathTools.divide(a, b)
|
141 |
+
|
142 |
+
def power(base: Number, exponent: Number) -> Number:
|
143 |
+
"""Raise base to power of exponent"""
|
144 |
+
return MathTools.power(base, exponent)
|
145 |
+
|
146 |
+
def factorial(n: int) -> Union[int, str]:
|
147 |
+
"""Calculate factorial of n"""
|
148 |
+
return MathTools.factorial(n)
|
149 |
+
|
150 |
+
def square_root(n: Number) -> Union[float, str]:
|
151 |
+
"""Calculate square root"""
|
152 |
+
return MathTools.square_root(n)
|
153 |
+
|
154 |
+
def percentage(part: Number, whole: Number) -> Union[float, str]:
|
155 |
+
"""Calculate percentage"""
|
156 |
+
return MathTools.percentage(part, whole)
|
157 |
+
|
158 |
+
def average(numbers: List[Number]) -> Union[float, str]:
|
159 |
+
"""Calculate average of numbers"""
|
160 |
+
return MathTools.average(numbers)
|
161 |
+
|
162 |
+
def calculate_expression(expression: str) -> Union[Number, str]:
|
163 |
+
"""
|
164 |
+
Safely evaluate mathematical expressions
|
165 |
+
WARNING: Only use with trusted input
|
166 |
+
"""
|
167 |
+
try:
|
168 |
+
# Only allow safe mathematical operations
|
169 |
+
allowed_chars = set('0123456789+-*/().^ ')
|
170 |
+
if not all(c in allowed_chars for c in expression.replace('**', '^')):
|
171 |
+
return 'Error: Invalid characters in expression'
|
172 |
+
|
173 |
+
# Replace ^ with ** for Python exponentiation
|
174 |
+
safe_expression = expression.replace('^', '**')
|
175 |
+
|
176 |
+
# Evaluate the expression
|
177 |
+
result = eval(safe_expression)
|
178 |
+
return result
|
179 |
+
except ZeroDivisionError:
|
180 |
+
return 'Error: Division by zero in expression'
|
181 |
+
except Exception as e:
|
182 |
+
return f'Error: Invalid expression - {str(e)}'
|
183 |
+
|
184 |
+
# Example usage and testing
|
185 |
+
if __name__ == "__main__":
|
186 |
+
# Test basic operations
|
187 |
+
print("Basic Operations:")
|
188 |
+
print(f"5 + 3 = {add(5, 3)}")
|
189 |
+
print(f"10 - 4 = {subtract(10, 4)}")
|
190 |
+
print(f"6 * 7 = {multiply(6, 7)}")
|
191 |
+
print(f"15 / 3 = {divide(15, 3)}")
|
192 |
+
print(f"2^8 = {power(2, 8)}")
|
193 |
+
|
194 |
+
print("\nAdvanced Operations:")
|
195 |
+
print(f"√16 = {square_root(16)}")
|
196 |
+
print(f"5! = {factorial(5)}")
|
197 |
+
print(f"Average of [1,2,3,4,5] = {average([1,2,3,4,5])}")
|
198 |
+
percent_result = percentage(25, 100)
|
199 |
+
if isinstance(percent_result, float):
|
200 |
+
print(f"25% of 200 = {percent_result * 200 / 100}")
|
201 |
+
else:
|
202 |
+
print(f"25% of 200 = {percent_result}")
|
203 |
+
|
204 |
+
print("\nQuadratic Equation (x² - 5x + 6 = 0):")
|
205 |
+
solutions = MathTools.solve_quadratic(1, -5, 6)
|
206 |
+
print(f"Solutions: {solutions}")
|
tools/multimodal_tools.py
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tools/multimodal_tools.py
|
2 |
+
import requests
|
3 |
+
import json
|
4 |
+
from typing import Optional, Dict, Any
|
5 |
+
from .utils import encode_image_to_base64, validate_file_exists, get_env_var, logger
|
6 |
+
|
7 |
+
class MultimodalTools:
|
8 |
+
"""Free multimodal AI tools using OpenRouter and other free services"""
|
9 |
+
|
10 |
+
def __init__(self, openrouter_key: Optional[str] = None):
|
11 |
+
self.openrouter_key = openrouter_key or get_env_var("OPENROUTER_API_KEY", None)
|
12 |
+
self.openrouter_url = "https://openrouter.ai/api/v1/chat/completions"
|
13 |
+
self.headers = {
|
14 |
+
"Authorization": f"Bearer {self.openrouter_key}",
|
15 |
+
"Content-Type": "application/json",
|
16 |
+
"HTTP-Referer": "https://your-app.com", # Optional: for analytics
|
17 |
+
"X-Title": "Multimodal Tools" # Optional: for analytics
|
18 |
+
}
|
19 |
+
|
20 |
+
# Available free multimodal models
|
21 |
+
self.vision_model = "moonshotai/kimi-vl-a3b-thinking:free"
|
22 |
+
self.text_model = "meta-llama/llama-4-maverick:free"
|
23 |
+
|
24 |
+
def _make_openrouter_request(self, payload: Dict[str, Any]) -> str:
|
25 |
+
"""Make request to OpenRouter API with error handling"""
|
26 |
+
try:
|
27 |
+
response = requests.post(
|
28 |
+
self.openrouter_url,
|
29 |
+
headers=self.headers,
|
30 |
+
json=payload,
|
31 |
+
timeout=30
|
32 |
+
)
|
33 |
+
response.raise_for_status()
|
34 |
+
|
35 |
+
result = response.json()
|
36 |
+
if 'choices' in result and len(result['choices']) > 0:
|
37 |
+
return result['choices'][0]['message']['content']
|
38 |
+
else:
|
39 |
+
logger.error(f"Unexpected response format: {result}")
|
40 |
+
return "Error: Invalid response format"
|
41 |
+
|
42 |
+
except requests.exceptions.RequestException as e:
|
43 |
+
logger.error(f"OpenRouter API request failed: {str(e)}")
|
44 |
+
return f"Error making API request: {str(e)}"
|
45 |
+
except Exception as e:
|
46 |
+
logger.error(f"Unexpected error: {str(e)}")
|
47 |
+
return f"Unexpected error: {str(e)}"
|
48 |
+
|
49 |
+
def analyze_image(self, image_path: str, question: str = "Describe this image in detail") -> str:
|
50 |
+
"""
|
51 |
+
Analyze image content using multimodal AI
|
52 |
+
|
53 |
+
Args:
|
54 |
+
image_path: Path to image file
|
55 |
+
question: Question about the image
|
56 |
+
|
57 |
+
Returns:
|
58 |
+
AI analysis of the image
|
59 |
+
"""
|
60 |
+
if not validate_file_exists(image_path):
|
61 |
+
return f"Error: Image file not found at {image_path}"
|
62 |
+
|
63 |
+
try:
|
64 |
+
encoded_image = encode_image_to_base64(image_path)
|
65 |
+
|
66 |
+
payload = {
|
67 |
+
"model": self.vision_model,
|
68 |
+
"messages": [
|
69 |
+
{
|
70 |
+
"role": "user",
|
71 |
+
"content": [
|
72 |
+
{"type": "text", "text": question},
|
73 |
+
{
|
74 |
+
"type": "image_url",
|
75 |
+
"image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}
|
76 |
+
}
|
77 |
+
]
|
78 |
+
}
|
79 |
+
],
|
80 |
+
"temperature": 0,
|
81 |
+
"max_tokens": 1024
|
82 |
+
}
|
83 |
+
|
84 |
+
return self._make_openrouter_request(payload)
|
85 |
+
|
86 |
+
except Exception as e:
|
87 |
+
error_msg = f"Error analyzing image: {str(e)}"
|
88 |
+
logger.error(error_msg)
|
89 |
+
return error_msg
|
90 |
+
|
91 |
+
def extract_text_from_image(self, image_path: str) -> str:
|
92 |
+
"""
|
93 |
+
Extract text from image using OCR via multimodal AI
|
94 |
+
|
95 |
+
Args:
|
96 |
+
image_path: Path to image file
|
97 |
+
|
98 |
+
Returns:
|
99 |
+
Extracted text from image
|
100 |
+
"""
|
101 |
+
ocr_prompt = """Extract all visible text from this image.
|
102 |
+
Return only the text content without any additional commentary or formatting.
|
103 |
+
If no text is visible, return 'No text found'."""
|
104 |
+
|
105 |
+
return self.analyze_image(image_path, ocr_prompt)
|
106 |
+
|
107 |
+
def analyze_audio_transcript(self, transcript: str, question: str = "Summarize this audio content") -> str:
|
108 |
+
"""
|
109 |
+
Analyze audio content via transcript
|
110 |
+
|
111 |
+
Args:
|
112 |
+
transcript: Audio transcript text
|
113 |
+
question: Question about the audio content
|
114 |
+
|
115 |
+
Returns:
|
116 |
+
AI analysis of the audio content
|
117 |
+
"""
|
118 |
+
if not transcript.strip():
|
119 |
+
return "Error: Empty transcript provided"
|
120 |
+
|
121 |
+
try:
|
122 |
+
payload = {
|
123 |
+
"model": self.text_model,
|
124 |
+
"messages": [
|
125 |
+
{
|
126 |
+
"role": "user",
|
127 |
+
"content": f"Audio transcript: {transcript}\n\nQuestion: {question}"
|
128 |
+
}
|
129 |
+
],
|
130 |
+
"temperature": 0,
|
131 |
+
"max_tokens": 1024
|
132 |
+
}
|
133 |
+
|
134 |
+
return self._make_openrouter_request(payload)
|
135 |
+
|
136 |
+
except Exception as e:
|
137 |
+
error_msg = f"Error analyzing audio transcript: {str(e)}"
|
138 |
+
logger.error(error_msg)
|
139 |
+
return error_msg
|
140 |
+
|
141 |
+
def describe_image(self, image_path: str) -> str:
|
142 |
+
"""Get a detailed description of an image"""
|
143 |
+
return self.analyze_image(
|
144 |
+
image_path,
|
145 |
+
"Provide a detailed, objective description of this image including objects, people, colors, setting, and any notable details."
|
146 |
+
)
|
147 |
+
|
148 |
+
def answer_visual_question(self, image_path: str, question: str) -> str:
|
149 |
+
"""Answer a specific question about an image"""
|
150 |
+
return self.analyze_image(image_path, question)
|
151 |
+
|
152 |
+
# Convenience functions for direct use
|
153 |
+
def analyze_image(image_path: str, question: str = "Describe this image in detail") -> str:
|
154 |
+
"""Standalone function to analyze an image"""
|
155 |
+
tools = MultimodalTools()
|
156 |
+
return tools.analyze_image(image_path, question)
|
157 |
+
|
158 |
+
def extract_text(image_path: str) -> str:
|
159 |
+
"""Standalone function to extract text from an image"""
|
160 |
+
tools = MultimodalTools()
|
161 |
+
return tools.extract_text_from_image(image_path)
|
162 |
+
|
163 |
+
def analyze_transcript(transcript: str, question: str = "Summarize this content") -> str:
|
164 |
+
"""Standalone function to analyze audio transcript"""
|
165 |
+
tools = MultimodalTools()
|
166 |
+
return tools.analyze_audio_transcript(transcript, question)
|
tools/search_tools.py
ADDED
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tools/search_tools.py
|
2 |
+
import requests
|
3 |
+
import os
|
4 |
+
from typing import List, Dict, Any, Optional
|
5 |
+
from .utils import get_env_var, logger
|
6 |
+
|
7 |
+
class SearchTools:
|
8 |
+
"""Free and cost-effective search tools with multiple providers"""
|
9 |
+
|
10 |
+
def __init__(self):
|
11 |
+
# Primary: Free alternatives
|
12 |
+
self.duckduckgo_enabled = True
|
13 |
+
|
14 |
+
# Secondary: Tavily (cost-effective)
|
15 |
+
self.tavily_api_key = os.getenv("TAVILY_API_KEY")
|
16 |
+
|
17 |
+
# Tertiary: SerpAPI (expensive, fallback only)
|
18 |
+
self.serpapi_key = os.getenv("SERPAPI_KEY")
|
19 |
+
|
20 |
+
def search_duckduckgo(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
|
21 |
+
"""
|
22 |
+
Free search using DuckDuckGo Instant Answer API
|
23 |
+
|
24 |
+
Args:
|
25 |
+
query: Search query
|
26 |
+
max_results: Maximum number of results
|
27 |
+
|
28 |
+
Returns:
|
29 |
+
List of search results
|
30 |
+
"""
|
31 |
+
try:
|
32 |
+
# DuckDuckGo Instant Answer API (free)
|
33 |
+
url = "https://api.duckduckgo.com/"
|
34 |
+
params = {
|
35 |
+
'q': query,
|
36 |
+
'format': 'json',
|
37 |
+
'no_html': '1',
|
38 |
+
'skip_disambig': '1'
|
39 |
+
}
|
40 |
+
|
41 |
+
response = requests.get(url, params=params, timeout=10)
|
42 |
+
response.raise_for_status()
|
43 |
+
|
44 |
+
data = response.json()
|
45 |
+
results = []
|
46 |
+
|
47 |
+
# Process abstract
|
48 |
+
if data.get('Abstract'):
|
49 |
+
results.append({
|
50 |
+
'title': data.get('Heading', 'DuckDuckGo Result'),
|
51 |
+
'url': data.get('AbstractURL', ''),
|
52 |
+
'content': data.get('Abstract', ''),
|
53 |
+
'source': 'DuckDuckGo'
|
54 |
+
})
|
55 |
+
|
56 |
+
# Process related topics
|
57 |
+
for topic in data.get('RelatedTopics', [])[:max_results-len(results)]:
|
58 |
+
if isinstance(topic, dict) and 'Text' in topic:
|
59 |
+
results.append({
|
60 |
+
'title': topic.get('Text', '')[:100],
|
61 |
+
'url': topic.get('FirstURL', ''),
|
62 |
+
'content': topic.get('Text', ''),
|
63 |
+
'source': 'DuckDuckGo'
|
64 |
+
})
|
65 |
+
|
66 |
+
return results[:max_results]
|
67 |
+
|
68 |
+
except Exception as e:
|
69 |
+
logger.error(f"DuckDuckGo search failed: {str(e)}")
|
70 |
+
return []
|
71 |
+
|
72 |
+
def search_tavily(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
|
73 |
+
"""
|
74 |
+
Search using Tavily API (cost-effective)
|
75 |
+
|
76 |
+
Args:
|
77 |
+
query: Search query
|
78 |
+
max_results: Maximum number of results
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
List of search results
|
82 |
+
"""
|
83 |
+
if not self.tavily_api_key:
|
84 |
+
logger.warning("Tavily API key not provided")
|
85 |
+
return []
|
86 |
+
|
87 |
+
try:
|
88 |
+
url = "https://api.tavily.com/search"
|
89 |
+
payload = {
|
90 |
+
"api_key": self.tavily_api_key,
|
91 |
+
"query": query,
|
92 |
+
"search_depth": "basic",
|
93 |
+
"include_answer": False,
|
94 |
+
"include_images": False,
|
95 |
+
"include_raw_content": False,
|
96 |
+
"max_results": max_results
|
97 |
+
}
|
98 |
+
|
99 |
+
response = requests.post(url, json=payload, timeout=15)
|
100 |
+
response.raise_for_status()
|
101 |
+
|
102 |
+
data = response.json()
|
103 |
+
results = []
|
104 |
+
|
105 |
+
for result in data.get('results', []):
|
106 |
+
results.append({
|
107 |
+
'title': result.get('title', ''),
|
108 |
+
'url': result.get('url', ''),
|
109 |
+
'content': result.get('content', ''),
|
110 |
+
'source': 'Tavily'
|
111 |
+
})
|
112 |
+
|
113 |
+
return results
|
114 |
+
|
115 |
+
except Exception as e:
|
116 |
+
logger.error(f"Tavily search failed: {str(e)}")
|
117 |
+
return []
|
118 |
+
|
119 |
+
def search_serpapi(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
|
120 |
+
"""
|
121 |
+
Search using SerpAPI (expensive, fallback only)
|
122 |
+
|
123 |
+
Args:
|
124 |
+
query: Search query
|
125 |
+
max_results: Maximum number of results
|
126 |
+
|
127 |
+
Returns:
|
128 |
+
List of search results
|
129 |
+
"""
|
130 |
+
if not self.serpapi_key:
|
131 |
+
logger.warning("SerpAPI key not provided")
|
132 |
+
return []
|
133 |
+
|
134 |
+
try:
|
135 |
+
url = "https://serpapi.com/search"
|
136 |
+
params = {
|
137 |
+
'api_key': self.serpapi_key,
|
138 |
+
'engine': 'google',
|
139 |
+
'q': query,
|
140 |
+
'num': max_results,
|
141 |
+
'gl': 'us', # Geolocation
|
142 |
+
'hl': 'en' # Language
|
143 |
+
}
|
144 |
+
|
145 |
+
response = requests.get(url, params=params, timeout=15)
|
146 |
+
response.raise_for_status()
|
147 |
+
|
148 |
+
data = response.json()
|
149 |
+
results = []
|
150 |
+
|
151 |
+
for result in data.get('organic_results', []):
|
152 |
+
results.append({
|
153 |
+
'title': result.get('title', ''),
|
154 |
+
'url': result.get('link', ''),
|
155 |
+
'content': result.get('snippet', ''),
|
156 |
+
'source': 'Google (SerpAPI)'
|
157 |
+
})
|
158 |
+
|
159 |
+
return results
|
160 |
+
|
161 |
+
except Exception as e:
|
162 |
+
logger.error(f"SerpAPI search failed: {str(e)}")
|
163 |
+
return []
|
164 |
+
|
165 |
+
def search(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
|
166 |
+
"""
|
167 |
+
Comprehensive search using multiple providers with fallback strategy
|
168 |
+
|
169 |
+
Args:
|
170 |
+
query: Search query
|
171 |
+
max_results: Maximum number of results
|
172 |
+
|
173 |
+
Returns:
|
174 |
+
List of search results from best available provider
|
175 |
+
"""
|
176 |
+
if not query.strip():
|
177 |
+
return []
|
178 |
+
|
179 |
+
# Try providers in order of preference (free -> cheap -> expensive)
|
180 |
+
providers = [
|
181 |
+
("DuckDuckGo", self.search_duckduckgo),
|
182 |
+
("Tavily", self.search_tavily),
|
183 |
+
("SerpAPI", self.search_serpapi)
|
184 |
+
]
|
185 |
+
|
186 |
+
for provider_name, search_func in providers:
|
187 |
+
try:
|
188 |
+
logger.info(f"Attempting search with {provider_name}")
|
189 |
+
results = search_func(query, max_results)
|
190 |
+
|
191 |
+
if results:
|
192 |
+
logger.info(f"Successfully retrieved {len(results)} results from {provider_name}")
|
193 |
+
return results
|
194 |
+
else:
|
195 |
+
logger.warning(f"No results from {provider_name}")
|
196 |
+
|
197 |
+
except Exception as e:
|
198 |
+
logger.error(f"Error with {provider_name}: {str(e)}")
|
199 |
+
continue
|
200 |
+
|
201 |
+
logger.error("All search providers failed")
|
202 |
+
return []
|
203 |
+
|
204 |
+
def search_news(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
|
205 |
+
"""Search for news articles"""
|
206 |
+
news_query = f"news {query}"
|
207 |
+
return self.search(news_query, max_results)
|
208 |
+
|
209 |
+
def search_academic(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
|
210 |
+
"""Search for academic content"""
|
211 |
+
academic_query = f"academic research {query} site:scholar.google.com OR site:arxiv.org OR site:researchgate.net"
|
212 |
+
return self.search(academic_query, max_results)
|
213 |
+
|
214 |
+
# Convenience functions
|
215 |
+
def search_web(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
|
216 |
+
"""Standalone function for web search"""
|
217 |
+
tools = SearchTools()
|
218 |
+
return tools.search(query, max_results)
|
219 |
+
|
220 |
+
def search_news(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
|
221 |
+
"""Standalone function for news search"""
|
222 |
+
tools = SearchTools()
|
223 |
+
return tools.search_news(query, max_results)
|
tools/utils.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tools/utils.py
|
2 |
+
import base64
|
3 |
+
import os
|
4 |
+
import mimetypes
|
5 |
+
from typing import Optional, Dict, Any
|
6 |
+
import logging
|
7 |
+
|
8 |
+
# Configure logging
|
9 |
+
logging.basicConfig(level=logging.INFO)
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
|
12 |
+
def encode_image_to_base64(image_path: str) -> str:
|
13 |
+
"""Convert image file to base64 encoding"""
|
14 |
+
try:
|
15 |
+
with open(image_path, "rb") as image_file:
|
16 |
+
encoded = base64.b64encode(image_file.read()).decode('utf-8')
|
17 |
+
return encoded
|
18 |
+
except Exception as e:
|
19 |
+
logger.error(f"Error encoding image {image_path}: {str(e)}")
|
20 |
+
raise
|
21 |
+
|
22 |
+
def get_mime_type(file_path: str) -> str:
|
23 |
+
"""Get MIME type for file"""
|
24 |
+
mime_type, _ = mimetypes.guess_type(file_path)
|
25 |
+
return mime_type or 'application/octet-stream'
|
26 |
+
|
27 |
+
def validate_file_exists(file_path: str) -> bool:
|
28 |
+
"""Validate that file exists and is readable"""
|
29 |
+
return os.path.exists(file_path) and os.path.isfile(file_path)
|
30 |
+
|
31 |
+
def get_env_var(var_name: str, default: Optional[str] = None) -> str:
|
32 |
+
"""Get environment variable with optional default"""
|
33 |
+
value = os.getenv(var_name, default)
|
34 |
+
if value is None:
|
35 |
+
raise ValueError(f"Environment variable {var_name} is required")
|
36 |
+
return value
|
tools/youtube_tools.py
ADDED
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# tools/youtube_tools.py (Updated with fixes)
|
2 |
+
"""
|
3 |
+
YouTube Tools Module - Fixed version using pytubefix
|
4 |
+
Addresses network issues, deprecation warnings, and playlist errors
|
5 |
+
"""
|
6 |
+
|
7 |
+
from pytubefix import YouTube, Playlist
|
8 |
+
from pytubefix.cli import on_progress
|
9 |
+
from typing import Optional, Dict, Any, List
|
10 |
+
import os
|
11 |
+
import time
|
12 |
+
import logging
|
13 |
+
from .utils import logger, validate_file_exists
|
14 |
+
|
15 |
+
class YouTubeTools:
|
16 |
+
"""YouTube tools with improved error handling and network resilience"""
|
17 |
+
|
18 |
+
def __init__(self, max_retries: int = 3, retry_delay: float = 1.0):
|
19 |
+
self.supported_formats = ['mp4', '3gp', 'webm']
|
20 |
+
self.supported_audio_formats = ['mp3', 'mp4', 'webm']
|
21 |
+
self.max_retries = max_retries
|
22 |
+
self.retry_delay = retry_delay
|
23 |
+
|
24 |
+
def _retry_operation(self, operation, *args, **kwargs):
|
25 |
+
"""Retry operation with exponential backoff for network issues"""
|
26 |
+
for attempt in range(self.max_retries):
|
27 |
+
try:
|
28 |
+
return operation(*args, **kwargs)
|
29 |
+
except Exception as e:
|
30 |
+
if attempt == self.max_retries - 1:
|
31 |
+
raise e
|
32 |
+
|
33 |
+
error_msg = str(e).lower()
|
34 |
+
if any(term in error_msg for term in ['network', 'socket', 'timeout', 'connection']):
|
35 |
+
wait_time = self.retry_delay * (2 ** attempt)
|
36 |
+
logger.warning(f"Network error (attempt {attempt + 1}/{self.max_retries}): {e}")
|
37 |
+
logger.info(f"Retrying in {wait_time} seconds...")
|
38 |
+
time.sleep(wait_time)
|
39 |
+
else:
|
40 |
+
raise e
|
41 |
+
|
42 |
+
def get_video_info(self, url: str) -> Optional[Dict[str, Any]]:
|
43 |
+
"""
|
44 |
+
Retrieve comprehensive metadata about a YouTube video using pytubefix
|
45 |
+
"""
|
46 |
+
try:
|
47 |
+
def _get_info():
|
48 |
+
yt = YouTube(url, on_progress_callback=on_progress)
|
49 |
+
|
50 |
+
# Get available streams info with better error handling
|
51 |
+
video_streams = []
|
52 |
+
try:
|
53 |
+
streams = yt.streams.filter(progressive=True, file_extension='mp4')
|
54 |
+
for stream in streams:
|
55 |
+
try:
|
56 |
+
video_streams.append({
|
57 |
+
'resolution': getattr(stream, 'resolution', 'unknown'),
|
58 |
+
'fps': getattr(stream, 'fps', 'unknown'),
|
59 |
+
'video_codec': getattr(stream, 'video_codec', 'unknown'),
|
60 |
+
'audio_codec': getattr(stream, 'audio_codec', 'unknown'),
|
61 |
+
'filesize': getattr(stream, 'filesize', None),
|
62 |
+
'mime_type': getattr(stream, 'mime_type', 'unknown')
|
63 |
+
})
|
64 |
+
except Exception as stream_error:
|
65 |
+
logger.debug(f"Error processing stream: {stream_error}")
|
66 |
+
continue
|
67 |
+
except Exception as e:
|
68 |
+
logger.warning(f"Could not retrieve stream details: {e}")
|
69 |
+
|
70 |
+
# Get caption languages safely
|
71 |
+
captions_available = []
|
72 |
+
try:
|
73 |
+
if yt.captions:
|
74 |
+
captions_available = list(yt.captions.keys())
|
75 |
+
except Exception as e:
|
76 |
+
logger.warning(f"Could not retrieve captions list: {e}")
|
77 |
+
|
78 |
+
info = {
|
79 |
+
'title': getattr(yt, 'title', 'Unknown'),
|
80 |
+
'author': getattr(yt, 'author', 'Unknown'),
|
81 |
+
'channel_url': getattr(yt, 'channel_url', 'Unknown'),
|
82 |
+
'length': getattr(yt, 'length', 0),
|
83 |
+
'views': getattr(yt, 'views', 0),
|
84 |
+
'description': getattr(yt, 'description', ''),
|
85 |
+
'thumbnail_url': getattr(yt, 'thumbnail_url', ''),
|
86 |
+
'publish_date': yt.publish_date.isoformat() if getattr(yt, 'publish_date', None) else None,
|
87 |
+
'keywords': getattr(yt, 'keywords', []),
|
88 |
+
'video_id': getattr(yt, 'video_id', ''),
|
89 |
+
'watch_url': getattr(yt, 'watch_url', url),
|
90 |
+
'available_streams': video_streams,
|
91 |
+
'captions_available': captions_available
|
92 |
+
}
|
93 |
+
|
94 |
+
return info
|
95 |
+
|
96 |
+
info = self._retry_operation(_get_info)
|
97 |
+
if info is not None:
|
98 |
+
logger.info(f"Retrieved info for video: {info.get('title', 'Unknown')}")
|
99 |
+
return info
|
100 |
+
|
101 |
+
except Exception as e:
|
102 |
+
logger.error(f"Failed to get video info for {url}: {e}")
|
103 |
+
return None
|
104 |
+
|
105 |
+
def download_video(self, url: str, output_path: str = './downloads',
|
106 |
+
resolution: str = 'highest', filename: Optional[str] = None) -> Optional[str]:
|
107 |
+
"""Download a YouTube video with retry logic"""
|
108 |
+
try:
|
109 |
+
def _download():
|
110 |
+
os.makedirs(output_path, exist_ok=True)
|
111 |
+
|
112 |
+
yt = YouTube(url, on_progress_callback=on_progress)
|
113 |
+
|
114 |
+
# Select stream based on resolution preference
|
115 |
+
if resolution == 'highest':
|
116 |
+
stream = yt.streams.get_highest_resolution()
|
117 |
+
elif resolution == 'lowest':
|
118 |
+
stream = yt.streams.get_lowest_resolution()
|
119 |
+
else:
|
120 |
+
stream = yt.streams.filter(res=resolution, progressive=True, file_extension='mp4').first()
|
121 |
+
if not stream:
|
122 |
+
logger.warning(f"Resolution {resolution} not found, downloading highest instead")
|
123 |
+
stream = yt.streams.get_highest_resolution()
|
124 |
+
|
125 |
+
if not stream:
|
126 |
+
raise Exception("No suitable stream found for download")
|
127 |
+
|
128 |
+
# Download with custom filename if provided
|
129 |
+
if filename:
|
130 |
+
safe_filename = "".join(c for c in filename if c.isalnum() or c in (' ', '-', '_')).rstrip()
|
131 |
+
file_path = stream.download(output_path=output_path, filename=f"{safe_filename}.{stream.subtype}")
|
132 |
+
else:
|
133 |
+
file_path = stream.download(output_path=output_path)
|
134 |
+
|
135 |
+
return file_path
|
136 |
+
|
137 |
+
file_path = self._retry_operation(_download)
|
138 |
+
logger.info(f"Downloaded video to {file_path}")
|
139 |
+
return file_path
|
140 |
+
|
141 |
+
except Exception as e:
|
142 |
+
logger.error(f"Failed to download video from {url}: {e}")
|
143 |
+
return None
|
144 |
+
|
145 |
+
def download_audio(self, url: str, output_path: str = './downloads',
|
146 |
+
filename: Optional[str] = None) -> Optional[str]:
|
147 |
+
"""Download only audio from a YouTube video with retry logic"""
|
148 |
+
try:
|
149 |
+
def _download_audio():
|
150 |
+
os.makedirs(output_path, exist_ok=True)
|
151 |
+
|
152 |
+
yt = YouTube(url, on_progress_callback=on_progress)
|
153 |
+
audio_stream = yt.streams.get_audio_only()
|
154 |
+
|
155 |
+
if not audio_stream:
|
156 |
+
raise Exception("No audio stream found")
|
157 |
+
|
158 |
+
if filename:
|
159 |
+
safe_filename = "".join(c for c in filename if c.isalnum() or c in (' ', '-', '_')).rstrip()
|
160 |
+
file_path = audio_stream.download(output_path=output_path, filename=f"{safe_filename}.{audio_stream.subtype}")
|
161 |
+
else:
|
162 |
+
file_path = audio_stream.download(output_path=output_path)
|
163 |
+
|
164 |
+
return file_path
|
165 |
+
|
166 |
+
file_path = self._retry_operation(_download_audio)
|
167 |
+
logger.info(f"Downloaded audio to {file_path}")
|
168 |
+
return file_path
|
169 |
+
|
170 |
+
except Exception as e:
|
171 |
+
logger.error(f"Failed to download audio from {url}: {e}")
|
172 |
+
return None
|
173 |
+
|
174 |
+
def get_captions(self, url: str, language_code: str = 'en') -> Optional[str]:
|
175 |
+
"""
|
176 |
+
Get captions/subtitles - FIXED: No more deprecation warning
|
177 |
+
"""
|
178 |
+
try:
|
179 |
+
def _get_captions():
|
180 |
+
yt = YouTube(url, on_progress_callback=on_progress)
|
181 |
+
|
182 |
+
if not yt.captions:
|
183 |
+
logger.warning("No captions available for this video")
|
184 |
+
return None
|
185 |
+
|
186 |
+
# Use modern dictionary-style access instead of deprecated method
|
187 |
+
if language_code in yt.captions:
|
188 |
+
caption = yt.captions[language_code]
|
189 |
+
captions_text = caption.generate_srt_captions()
|
190 |
+
return captions_text
|
191 |
+
else:
|
192 |
+
available_langs = list(yt.captions.keys())
|
193 |
+
logger.warning(f"Captions not found for language {language_code}. Available: {available_langs}")
|
194 |
+
return None
|
195 |
+
|
196 |
+
result = self._retry_operation(_get_captions)
|
197 |
+
if result:
|
198 |
+
logger.info(f"Retrieved captions in {language_code}")
|
199 |
+
return result
|
200 |
+
|
201 |
+
except Exception as e:
|
202 |
+
logger.error(f"Failed to get captions from {url}: {e}")
|
203 |
+
return None
|
204 |
+
|
205 |
+
def get_playlist_info(self, playlist_url: str) -> Optional[Dict[str, Any]]:
|
206 |
+
"""
|
207 |
+
Get information about a YouTube playlist - FIXED: Better error handling
|
208 |
+
"""
|
209 |
+
try:
|
210 |
+
def _get_playlist_info():
|
211 |
+
playlist = Playlist(playlist_url)
|
212 |
+
|
213 |
+
# Get video URLs first (this triggers the playlist loading)
|
214 |
+
video_urls = list(playlist.video_urls)
|
215 |
+
|
216 |
+
# Safely access playlist properties with fallbacks
|
217 |
+
info = {
|
218 |
+
'video_count': len(video_urls),
|
219 |
+
'video_urls': video_urls[:10], # Limit to first 10 for performance
|
220 |
+
'total_videos': len(video_urls)
|
221 |
+
}
|
222 |
+
|
223 |
+
# Try to get additional info, but don't fail if unavailable
|
224 |
+
try:
|
225 |
+
info['title'] = getattr(playlist, 'title', 'Unknown Playlist')
|
226 |
+
except:
|
227 |
+
info['title'] = 'Private/Unavailable Playlist'
|
228 |
+
|
229 |
+
try:
|
230 |
+
info['description'] = getattr(playlist, 'description', '')
|
231 |
+
except:
|
232 |
+
info['description'] = 'Description unavailable'
|
233 |
+
|
234 |
+
try:
|
235 |
+
info['owner'] = getattr(playlist, 'owner', 'Unknown')
|
236 |
+
except:
|
237 |
+
info['owner'] = 'Owner unavailable'
|
238 |
+
|
239 |
+
return info
|
240 |
+
|
241 |
+
info = self._retry_operation(_get_playlist_info)
|
242 |
+
if info is not None:
|
243 |
+
logger.info(f"Retrieved playlist info: {info['title']} ({info['video_count']} videos)")
|
244 |
+
return info
|
245 |
+
|
246 |
+
except Exception as e:
|
247 |
+
logger.error(f"Failed to get playlist info from {playlist_url}: {e}")
|
248 |
+
return None
|
249 |
+
|
250 |
+
def get_available_qualities(self, url: str) -> Optional[List[Dict[str, Any]]]:
|
251 |
+
"""
|
252 |
+
Get all available download qualities - FIXED: Better network handling
|
253 |
+
"""
|
254 |
+
try:
|
255 |
+
def _get_qualities():
|
256 |
+
yt = YouTube(url, on_progress_callback=on_progress)
|
257 |
+
streams = []
|
258 |
+
|
259 |
+
# Get progressive streams (video + audio)
|
260 |
+
for stream in yt.streams.filter(progressive=True):
|
261 |
+
try:
|
262 |
+
streams.append({
|
263 |
+
'resolution': getattr(stream, 'resolution', 'unknown'),
|
264 |
+
'fps': getattr(stream, 'fps', 'unknown'),
|
265 |
+
'filesize_mb': round(stream.filesize / (1024 * 1024), 2) if getattr(stream, 'filesize', None) else None,
|
266 |
+
'mime_type': getattr(stream, 'mime_type', 'unknown'),
|
267 |
+
'video_codec': getattr(stream, 'video_codec', 'unknown'),
|
268 |
+
'audio_codec': getattr(stream, 'audio_codec', 'unknown')
|
269 |
+
})
|
270 |
+
except Exception as stream_error:
|
271 |
+
logger.debug(f"Error processing stream: {stream_error}")
|
272 |
+
continue
|
273 |
+
|
274 |
+
# Sort by resolution (numeric part)
|
275 |
+
def sort_key(x):
|
276 |
+
res = x['resolution']
|
277 |
+
if res and res != 'unknown' and res[:-1].isdigit():
|
278 |
+
return int(res[:-1])
|
279 |
+
return 0
|
280 |
+
|
281 |
+
return sorted(streams, key=sort_key, reverse=True)
|
282 |
+
|
283 |
+
return self._retry_operation(_get_qualities)
|
284 |
+
|
285 |
+
except Exception as e:
|
286 |
+
logger.error(f"Failed to get qualities for {url}: {e}")
|
287 |
+
return None
|
288 |
+
|
289 |
+
# Convenience functions (unchanged)
|
290 |
+
def get_video_info(url: str) -> Optional[Dict[str, Any]]:
|
291 |
+
"""Standalone function to get video information"""
|
292 |
+
tools = YouTubeTools()
|
293 |
+
return tools.get_video_info(url)
|
294 |
+
|
295 |
+
def download_video(url: str, output_path: str = './downloads',
|
296 |
+
resolution: str = 'highest', filename: Optional[str] = None) -> Optional[str]:
|
297 |
+
"""Standalone function to download a video"""
|
298 |
+
tools = YouTubeTools()
|
299 |
+
return tools.download_video(url, output_path, resolution, filename)
|
300 |
+
|
301 |
+
def download_audio(url: str, output_path: str = './downloads',
|
302 |
+
filename: Optional[str] = None) -> Optional[str]:
|
303 |
+
"""Standalone function to download audio only"""
|
304 |
+
tools = YouTubeTools()
|
305 |
+
return tools.download_audio(url, output_path, filename)
|
306 |
+
|
307 |
+
def get_captions(url: str, language_code: str = 'en') -> Optional[str]:
|
308 |
+
"""Standalone function to get video captions"""
|
309 |
+
tools = YouTubeTools()
|
310 |
+
return tools.get_captions(url, language_code)
|
311 |
+
|
312 |
+
def get_playlist_info(playlist_url: str) -> Optional[Dict[str, Any]]:
|
313 |
+
"""Standalone function to get playlist information"""
|
314 |
+
tools = YouTubeTools()
|
315 |
+
return tools.get_playlist_info(playlist_url)
|