Spaces:

DjallelBr
/

AccentDetector

Sleeping

App Files Files Community

AccentDetector / Summarizer_Agent.py

DjallelBr

Upload folder using huggingface_hub

ae09409 verified 2 months ago

raw

history blame contribute delete

2.65 kB

	from openai import OpenAI
	# from dotenv import load_dotenv
	import yt_dlp
	import modal
	import torch

	class Summarizer:
	def __init__(self):
	self.system='''You are an intelligent assistant working for a company that builds advanced tools to automate real hiring decisions. Your task is to analyze speech-to-text transcripts from candidate interviews. For each candidate’s response, you will:

	Provide a clear and concise summary of what the candidate said.

	Identify the candidate’s motivation and key skills expressed in their speech.

	Highlight any additional insights or relevant observations that could assist recruiters in making informed hiring decisions.

	Focus on delivering objective, actionable, and relevant information that captures the candidate’s potential skills'''

	self.transcriber="gpt-4o-transcribe"
	self.summarizer="gpt-4.1-mini"
	self.Accentizer= modal.Cls.lookup("ClassifierAudio", "Accentizer")
	self.client=OpenAI()
	def download_audio(self,url, output_path='audio'):
	import yt_dlp
	ydl_opts = {
	'format': 'bestaudio/best',
	'outtmpl': output_path,
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'mp3', # or 'wav'
	'preferredquality': '192',
	}],
	}

	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([url])
	def classify(self,url):
	raw= self.Accentizer.classify.remote(url)
	accent = raw["label"][0] if isinstance(raw["label"], list) else str(raw["label"])
	score = raw["score"].item() if isinstance(raw["score"], torch.Tensor) else float(raw["score"])
	return {
	"label": accent,
	"score": round(score, 4)
	}
	return result
	def summarize(self,url):
	self.download_audio(url=url)
	audio_file=open("./audio.mp3", "rb")
	transcription = self.client.audio.transcriptions.create(
	model=self.transcriber,
	file=audio_file
	)
	user_prompt="Here is transcription of the audio \n "
	user_prompt+=transcription.text
	prompts = [
	{"role": "system", "content": self.system},
	{"role": "user", "content": user_prompt}
	]
	chat=self.client.chat.completions.create(
	model=self.summarizer,
	messages=prompts
	)
	return chat.choices[0].message.content