|
import cohere |
|
import gradio as gr |
|
from pypdf import PdfReader |
|
from gtts import gTTS |
|
from io import BytesIO |
|
import os |
|
from loguru import logger |
|
import tempfile |
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
COHERE_API_KEY = os.getenv('COHERE_API_KEY') |
|
|
|
|
|
if not COHERE_API_KEY: |
|
raise ValueError("Cohere API key not found. Please set the COHERE_API_KEY environment variable.") |
|
|
|
cohere_client = cohere.Client(COHERE_API_KEY) |
|
|
|
|
|
language_options = [ |
|
("English", "en"), |
|
("Spanish", "es"), |
|
("French", "fr"), |
|
("German", "de"), |
|
("Italian", "it"), |
|
("Chinese", "zh-CN"), |
|
("Japanese", "ja"), |
|
("Hindi", "hi") |
|
] |
|
|
|
|
|
def extract_text_from_pdf(pdf_file): |
|
reader = PdfReader(pdf_file) |
|
text = "" |
|
for page in reader.pages: |
|
page_text = page.extract_text() |
|
if page_text: |
|
text += page_text |
|
return text |
|
|
|
|
|
def text_to_speech(text, language_code): |
|
if not text or not isinstance(text, str): |
|
logger.error("No valid text available for speech conversion.") |
|
return None |
|
|
|
try: |
|
tts = gTTS(text, lang=language_code) |
|
audio_fp = BytesIO() |
|
tts.write_to_fp(audio_fp) |
|
audio_fp.seek(0) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file: |
|
temp_audio_file.write(audio_fp.read()) |
|
temp_audio_path = temp_audio_file.name |
|
return temp_audio_path |
|
except Exception as e: |
|
logger.error(f"Error during text-to-speech conversion: {e}") |
|
return None |
|
|
|
|
|
def pdf_to_audio(pdf_file, language_code): |
|
try: |
|
text = extract_text_from_pdf(pdf_file) |
|
|
|
|
|
if not text.strip(): |
|
logger.error("The PDF contains no extractable text.") |
|
return "The PDF contains no extractable text. Please try a different file.", None |
|
|
|
|
|
response = cohere_client.generate( |
|
model='c4ai-aya-23', |
|
prompt=text, |
|
max_tokens=500 |
|
) |
|
|
|
|
|
if not response or not response.generations: |
|
logger.error("Cohere API did not return a valid response.") |
|
return "Error: Cohere API did not return a valid response.", None |
|
|
|
processed_text = response.generations[0].text.strip() |
|
|
|
|
|
if not processed_text: |
|
logger.error("Cohere generated an empty response.") |
|
return "Error: Cohere generated an empty response.", None |
|
|
|
|
|
audio_file_path = text_to_speech(processed_text, language_code) |
|
|
|
if audio_file_path is None: |
|
return "Error: Failed to generate speech from the provided text.", None |
|
|
|
return processed_text, audio_file_path |
|
except Exception as e: |
|
logger.error(f"Error during PDF to audio conversion: {e}") |
|
return "An error occurred while processing the PDF.", None |
|
|
|
|
|
def gradio_interface(pdf_file, language_code): |
|
return pdf_to_audio(pdf_file, language_code) |
|
|
|
|
|
gr.Interface( |
|
fn=gradio_interface, |
|
inputs=[ |
|
"file", |
|
gr.Dropdown(choices=language_options, label="Select Language") |
|
], |
|
outputs=[ |
|
"text", |
|
"audio" |
|
], |
|
title="PDF to Audio using Cohere (Multi-language)" |
|
).launch(debug=True) |
|
|