Spaces:

jorgegn29
/

ia_resume_bot

Sleeping

App Files Files Community

ia_resume_bot / resumen_utils.py

jorgegn29

Upload resumen_utils.py

a3a7e8b verified about 1 month ago

raw

history blame contribute delete

3.71 kB

	import re
	from langdetect import detect
	from deep_translator import GoogleTranslator
	from fpdf import FPDF
	from transformers import pipeline
	import youtube_transcript_api

	def detectar_idioma(texto):
	try:
	return detect(texto)
	except:
	return "es"

	def traducir_texto(texto, idioma_origen='auto', idioma_destino='es'):
	try:
	return GoogleTranslator(source=idioma_origen, target=idioma_destino).translate(texto)
	except Exception as e:
	print(f"Error en traducción: {e}")
	return texto

	def resumir_texto(texto):
	try:
	summarizer = pipeline("summarization")
	resumen = summarizer(texto, max_length=150, min_length=30, do_sample=False)
	return resumen[0]['summary_text']
	except Exception as e:
	print(f"Error en resumen: {e}")
	return texto

	def exportar_a_pdf(texto, nombre_archivo="resumen.pdf"):
	try:
	pdf = FPDF()
	pdf.add_page()
	pdf.set_auto_page_break(auto=True, margin=15)
	pdf.set_font("Arial", size=12)
	for linea in texto.split('\n'):
	pdf.multi_cell(0, 10, linea)
	pdf.output(nombre_archivo)
	except Exception as e:
	print(f"Error al exportar PDF: {e}")

	def transcribir_youtube(url):
	try:
	video_id = re.findall(r"v=([^&]+)", url)
	if not video_id:
	return "", "URL de YouTube no válida."
	video_id = video_id[0]
	transcript_list = youtube_transcript_api.YouTubeTranscriptApi.list_transcripts(video_id)
	transcript = transcript_list.find_transcript(['es', 'en'])
	texto = " ".join([t['text'] for t in transcript.fetch()])
	return texto, None
	except Exception as e:
	return "", f"Error al obtener transcripción: {e}"

	def generar_preguntas_clave(texto):
	try:
	question_generator = pipeline("e2e-qg")
	except Exception as e:
	print(f"Error al cargar pipeline de preguntas: {e}")
	return []

	try:
	resultado = question_generator(texto)
	except Exception as e:
	print(f"Error al generar preguntas: {e}")
	return []

	preguntas = []
	if isinstance(resultado, list):
	for item in resultado:
	if isinstance(item, dict) and 'question' in item:
	preguntas.append(item['question'])
	else:
	preguntas.append(str(item))
	else:
	preguntas.append(str(resultado))

	return preguntas

	def extraer_entidades(texto):
	try:
	ner = pipeline("ner", grouped_entities=True)
	resultados = ner(texto)
	entidades = {}
	for entidad in resultados:
	label = entidad.get('entity_group', entidad.get('entity'))
	entidades.setdefault(label, set()).add(entidad['word'])
	# Convertir sets a listas y unir palabras repetidas
	entidades_limpias = {k: list(v) for k, v in entidades.items()}
	return entidades_limpias
	except Exception as e:
	print(f"Error al extraer entidades: {e}")
	return {}

	def texto_a_voz(texto, lang="es"):
	try:
	from gtts import gTTS
	import base64
	import io
	tts = gTTS(text=texto, lang=lang)
	mp3_fp = io.BytesIO()
	tts.write_to_fp(mp3_fp)
	mp3_fp.seek(0)
	audio_bytes = mp3_fp.read()
	encoded = base64.b64encode(audio_bytes).decode()
	audio_html = f'<audio controls autoplay><source src="data:audio/mp3;base64,{encoded}" type="audio/mp3"></audio>'
	return audio_html
	except Exception as e:
	print(f"Error en texto a voz: {e}")
	return ""