DhirajN commited on
Commit
3fe0f9e
·
verified ·
1 Parent(s): d6811db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -19
app.py CHANGED
@@ -6,50 +6,64 @@ from transformers import pipeline
6
  import torch
7
  from huggingface_hub import InferenceClient
8
  import os
 
9
 
10
  # Initialize the InferenceClient for PHI 3
11
  client = InferenceClient(
12
  "microsoft/Phi-3.5-mini-instruct", # Update this to the correct model name for PHI 3
13
  token=os.getenv("HF_API_TOKEN", "")
14
- # You can configure this API token through the Hugging Face Secrets
15
  )
16
 
17
  # Check if a GPU is available and use it if possible
18
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
19
 
20
  # Initialize the Whisper pipeline
21
- whisper = pipeline('automatic-speech-recognition', model='openai/whisper-tiny', device=0 if device == 'cuda' else -1)
22
 
23
  # Instructions (can be set through Hugging Face Secrets or hardcoded)
24
  instructions = os.getenv("INST", "Your default instructions here.")
25
 
26
  def query_phi(prompt):
27
- response = "" # Initialize an empty string to store the response
28
- for message in client.chat_completion(
29
- messages=[{"role": "user", "content": f"{instructions}\n{prompt}"}],
30
- max_tokens=500,
31
- stream=True,
32
- ):
33
- response += message.choices[0].delta.content # Append each message to the response
34
- return response # Return the accumulated response after the loop
 
 
 
 
 
 
35
 
36
  def transcribe_and_query(audio):
37
- # Transcribe the audio file
38
- transcription = whisper(audio)["text"]
39
- transcription = "Prompt : " + transcription
40
- # Query Microsoft PHI 3 with the transcribed text
41
- phi_response = query_phi(transcription)
42
 
43
- return transcription, phi_response
 
 
 
 
 
 
 
 
 
 
44
 
45
  # Create Gradio interface
46
  iface = gr.Interface(
47
  fn=transcribe_and_query,
48
  inputs=gr.Audio(type="filepath"),
49
  outputs=["text", "text"],
50
- title="Scam Call detector with BEEP",
51
- description="Upload your recorded call to see if it is a scam or not. /n Stay Safe, Stay Secure."
52
  )
53
 
54
  # Launch the interface
55
- iface.launch(share=True) # share=True is optional, it provides a public link
 
6
  import torch
7
  from huggingface_hub import InferenceClient
8
  import os
9
+ import librosa
10
 
11
  # Initialize the InferenceClient for PHI 3
12
  client = InferenceClient(
13
  "microsoft/Phi-3.5-mini-instruct", # Update this to the correct model name for PHI 3
14
  token=os.getenv("HF_API_TOKEN", "")
 
15
  )
16
 
17
  # Check if a GPU is available and use it if possible
18
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
19
 
20
  # Initialize the Whisper pipeline
21
+ whisper = pipeline('automatic-speech-recognition', model='openai/whisper-tiny', device=device)
22
 
23
  # Instructions (can be set through Hugging Face Secrets or hardcoded)
24
  instructions = os.getenv("INST", "Your default instructions here.")
25
 
26
  def query_phi(prompt):
27
+ print("Sending request to PHI 3 API...")
28
+ response = ""
29
+ try:
30
+ for message in client.chat_completion(
31
+ messages=[{"role": "user", "content": f"{instructions}\n{prompt}"}],
32
+ max_tokens=500,
33
+ stream=True,
34
+ ):
35
+ response += message.choices[0].delta.content
36
+ except Exception as e:
37
+ print("Error in PHI 3 API:", e)
38
+ return "PHI 3 API Error: " + str(e)
39
+
40
+ return response
41
 
42
  def transcribe_and_query(audio):
43
+ try:
44
+ # Load the audio file as waveform
45
+ audio_data, sr = librosa.load(audio, sr=16000)
 
 
46
 
47
+ # Transcribe using Whisper
48
+ transcription = whisper(audio_data)["text"]
49
+ transcription = "Prompt : " + transcription
50
+
51
+ # Query Microsoft PHI 3 with the transcribed text
52
+ phi_response = query_phi(transcription)
53
+
54
+ return transcription, phi_response
55
+
56
+ except Exception as e:
57
+ return f"Error processing audio: {str(e)}", "No response from PHI 3"
58
 
59
  # Create Gradio interface
60
  iface = gr.Interface(
61
  fn=transcribe_and_query,
62
  inputs=gr.Audio(type="filepath"),
63
  outputs=["text", "text"],
64
+ title="Scam Call Detector with BEEP",
65
+ description="Upload your recorded call to see if it is a scam or not.\n Stay Safe, Stay Secure."
66
  )
67
 
68
  # Launch the interface
69
+ iface.launch(share=True)