philincloud commited on
Commit
830a3ef
·
verified ·
1 Parent(s): 66b821b

Update langgraph_agent.py

Browse files
Files changed (1) hide show
  1. langgraph_agent.py +24 -6
langgraph_agent.py CHANGED
@@ -91,7 +91,7 @@ HF_INFERENCE_CLIENT = None
91
  if HF_API_TOKEN:
92
  HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
93
  else:
94
- print("WARNING: HF_API_TOKEN not set. Image tools will not function.")
95
 
96
  @tool
97
  def read_file_content(file_path: str) -> Dict[str, str]:
@@ -115,8 +115,8 @@ def read_file_content(file_path: str) -> Dict[str, str]:
115
  # Indicate that it's an image and needs to be described by a specific tool
116
  return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. Use 'describe_image' tool to get a textual description."}
117
  elif file_extension == ".mp3":
118
- # Indicate that it's an audio file and the LLM should process it natively
119
- return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. The LLM should process this natively."}
120
  else:
121
  return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3 files are recognized."}
122
  except FileNotFoundError:
@@ -159,6 +159,24 @@ def describe_image(image_path: str) -> Dict[str, str]:
159
  except Exception as e:
160
  return {"error": f"Error describing image {image_path}: {str(e)}"}
161
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  API_KEY = os.getenv("GEMINI_API_KEY")
164
  HF_SPACE_TOKEN = os.getenv("HF_SPACE_TOKEN")
@@ -170,8 +188,8 @@ tools = [
170
  wiki_search, web_search, arvix_search,
171
  read_file_content,
172
  python_interpreter,
173
- describe_image, # Added new tool
174
- # transcribe_audio, # Removed as per user request
175
  ]
176
 
177
 
@@ -184,7 +202,7 @@ def build_graph(provider: str = "gemini"):
184
  """Build the LangGraph agent with chosen LLM (default: Gemini)."""
185
  if provider == "gemini":
186
  llm = ChatGoogleGenerativeAI(
187
- model= "gemini-1.5-flash-preview-05-20", # This model is capable of native audio processing
188
  temperature=1.0,
189
  max_retries=2,
190
  api_key=GEMINI_API_KEY,
 
91
  if HF_API_TOKEN:
92
  HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
93
  else:
94
+ print("WARNING: HF_API_TOKEN not set. Image and Audio tools will not function.")
95
 
96
  @tool
97
  def read_file_content(file_path: str) -> Dict[str, str]:
 
115
  # Indicate that it's an image and needs to be described by a specific tool
116
  return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. Use 'describe_image' tool to get a textual description."}
117
  elif file_extension == ".mp3":
118
+ # Indicate that it's an audio file and needs to be transcribed by a specific tool
119
+ return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. Use 'transcribe_audio' tool to get the text transcription."}
120
  else:
121
  return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3 files are recognized."}
122
  except FileNotFoundError:
 
159
  except Exception as e:
160
  return {"error": f"Error describing image {image_path}: {str(e)}"}
161
 
162
+ @tool
163
+ def transcribe_audio(audio_path: str) -> Dict[str, str]:
164
+ """
165
+ Transcribes an audio file (e.g., MP3) to text using an automatic speech recognition model
166
+ from the Hugging Face Inference API. Requires HF_API_TOKEN environment variable to be set.
167
+ """
168
+ if not HF_INFERENCE_CLIENT:
169
+ return {"error": "Hugging Face API token not configured for audio transcription. Cannot use this tool."}
170
+ try:
171
+ with open(audio_path, "rb") as f:
172
+ audio_bytes = f.read()
173
+ transcription = HF_INFERENCE_CLIENT.automatic_speech_recognition(audio_bytes)
174
+ return {"audio_transcription": transcription, "audio_path": audio_path}
175
+ except FileNotFoundError:
176
+ return {"error": f"Audio file not found: {audio_path}. Please ensure the file exists."}
177
+ except Exception as e:
178
+ return {"error": f"Error transcribing audio {audio_path}: {str(e)}"}
179
+
180
 
181
  API_KEY = os.getenv("GEMINI_API_KEY")
182
  HF_SPACE_TOKEN = os.getenv("HF_SPACE_TOKEN")
 
188
  wiki_search, web_search, arvix_search,
189
  read_file_content,
190
  python_interpreter,
191
+ describe_image,
192
+ transcribe_audio, # Re-added tool
193
  ]
194
 
195
 
 
202
  """Build the LangGraph agent with chosen LLM (default: Gemini)."""
203
  if provider == "gemini":
204
  llm = ChatGoogleGenerativeAI(
205
+ model= "gemini-1.5-flash-preview-05-20",
206
  temperature=1.0,
207
  max_retries=2,
208
  api_key=GEMINI_API_KEY,