Spaces:
Sleeping
Sleeping
import json | |
import pandas as pd | |
from datetime import datetime | |
import tempfile | |
class OutputGenerator: | |
def __init__(self): | |
self.templates = { | |
'markdown': self._load_markdown_template(), | |
'html': self._load_html_template() | |
} | |
def generate_all_formats(self, transcript, summary, extracted_info): | |
""" | |
Generate output dalam berbagai format | |
""" | |
# Prepare data | |
meeting_data = { | |
'date': datetime.now().strftime('%d %B %Y'), | |
'time': datetime.now().strftime('%H:%M'), | |
'duration': self._calculate_duration(transcript), | |
'participants': self._extract_participants(transcript), | |
'summary': summary, | |
'keywords': extracted_info['keywords'], | |
'action_items': extracted_info['action_items'], | |
'decisions': extracted_info['decisions'], | |
'transcript': transcript | |
} | |
# Generate outputs | |
outputs = { | |
'markdown': self._generate_markdown(meeting_data), | |
'json': self._generate_json(meeting_data), | |
'transcript_table': self._generate_transcript_table(transcript), | |
'action_items_table': self._generate_action_items_table( | |
extracted_info['action_items'] | |
), | |
'decisions_table': self._generate_decisions_table( | |
extracted_info['decisions'] | |
) | |
} | |
return outputs | |
def _generate_markdown(self, data): | |
""" | |
Generate markdown format meeting minutes | |
""" | |
markdown = f"""# π Notulensi Rapat - {data['date']} | |
## π Informasi Rapat | |
- **Tanggal**: {data['date']} | |
- **Waktu**: {data['time']} | |
- **Durasi**: {data['duration']} | |
- **Peserta**: {', '.join(data['participants'])} | |
## π Ringkasan Eksekutif | |
{data['summary']} | |
## π― Topik Utama | |
{self._format_keywords(data['keywords'])} | |
## β Action Items | |
{self._format_action_items_md(data['action_items'])} | |
## π Keputusan Penting | |
{self._format_decisions_md(data['decisions'])} | |
## π¬ Transkrip Lengkap | |
{self._format_transcript_md(data['transcript'])} | |
--- | |
*Dokumen ini dihasilkan secara otomatis menggunakan AI Meeting Minutes Generator* | |
""" | |
return markdown | |
def _generate_json(self, data): | |
""" | |
Generate JSON output and save to file | |
""" | |
json_data = { | |
'metadata': { | |
'generated_at': datetime.now().isoformat(), | |
'version': '1.0' | |
}, | |
'meeting_info': { | |
'date': data['date'], | |
'duration': data['duration'], | |
'participants': data['participants'] | |
}, | |
'content': { | |
'summary': data['summary'], | |
'keywords': [kw[0] for kw in data['keywords'][:5]], | |
'action_items': [ | |
{ | |
'description': item['text'], | |
'assigned_to': item['speaker'], | |
'timestamp': item['timestamp'], | |
'mentioned_persons': item['entities']['persons'], | |
'mentioned_dates': item['entities']['dates'] | |
} | |
for item in data['action_items'] | |
], | |
'decisions': [ | |
{ | |
'description': dec['text'], | |
'made_by': dec['speaker'], | |
'timestamp': dec['timestamp'] | |
} | |
for dec in data['decisions'] | |
] | |
}, | |
'full_transcript': [ | |
{ | |
'speaker': seg['speaker'], | |
'start_time': seg['start'], | |
'end_time': seg['end'], | |
'text': seg['text'] | |
} | |
for seg in data['transcript'] | |
] | |
} | |
# Save to temporary file | |
temp_file = tempfile.NamedTemporaryFile( | |
mode='w', | |
suffix='.json', | |
delete=False | |
) | |
json.dump(json_data, temp_file, indent=2, ensure_ascii=False) | |
temp_file.close() | |
return temp_file.name | |
def _generate_transcript_table(self, transcript): | |
""" | |
Generate transcript table for Gradio DataFrame | |
""" | |
data = [] | |
for seg in transcript: | |
data.append([ | |
f"{seg['start']:.1f}s - {seg['end']:.1f}s", | |
seg['speaker'], | |
seg['text'] | |
]) | |
return pd.DataFrame(data, columns=['Waktu', 'Pembicara', 'Teks']) | |
def _generate_action_items_table(self, action_items): | |
""" | |
Generate action items table | |
""" | |
data = [] | |
for item in action_items: | |
# Extract mentioned persons for assignment | |
assignees = item['entities']['persons'] if item['entities']['persons'] else [item['speaker']] | |
dates = ', '.join(item['entities']['dates']) if item['entities']['dates'] else 'TBD' | |
data.append([ | |
item['text'], | |
', '.join(assignees), | |
item['timestamp'] | |
]) | |
return pd.DataFrame( | |
data, | |
columns=['Action Item', 'Penanggung Jawab', 'Timestamp'] | |
) | |
def _generate_decisions_table(self, decisions): | |
""" | |
Generate decisions table | |
""" | |
data = [] | |
for dec in decisions: | |
data.append([ | |
dec['text'], | |
dec['speaker'], | |
dec['timestamp'] | |
]) | |
return pd.DataFrame( | |
data, | |
columns=['Keputusan', 'Pembicara', 'Timestamp'] | |
) | |
# Helper methods | |
def _calculate_duration(self, transcript): | |
if not transcript: | |
return "0:00" | |
total_seconds = transcript[-1]['end'] | |
hours = int(total_seconds // 3600) | |
minutes = int((total_seconds % 3600) // 60) | |
seconds = int(total_seconds % 60) | |
if hours > 0: | |
return f"{hours}:{minutes:02d}:{seconds:02d}" | |
else: | |
return f"{minutes}:{seconds:02d}" | |
def _extract_participants(self, transcript): | |
speakers = list(set([seg['speaker'] for seg in transcript])) | |
return sorted(speakers) | |
def _format_keywords(self, keywords): | |
return '\n'.join([f"- **{kw[0]}** (score: {kw[1]:.2f})" | |
for kw in keywords[:5]]) | |
def _format_action_items_md(self, action_items): | |
if not action_items: | |
return "*Tidak ada action items yang terdeteksi*" | |
formatted = [] | |
for i, item in enumerate(action_items, 1): | |
assignees = item['entities']['persons'] if item['entities']['persons'] else [item['speaker']] | |
formatted.append(f"{i}. {item['text']}\n - **Penanggung Jawab**: {', '.join(assignees)}\n - **Waktu**: {item['timestamp']}") | |
return '\n\n'.join(formatted) | |
def _format_decisions_md(self, decisions): | |
if not decisions: | |
return "*Tidak ada keputusan yang terdeteksi*" | |
formatted = [] | |
for i, dec in enumerate(decisions, 1): | |
formatted.append(f"{i}. {dec['text']}\n - **Diputuskan oleh**: {dec['speaker']}\n - **Waktu**: {dec['timestamp']}") | |
return '\n\n'.join(formatted) | |
def _format_transcript_md(self, transcript): | |
formatted = [] | |
current_speaker = None | |
for seg in transcript: | |
if seg['speaker'] != current_speaker: | |
formatted.append(f"\n**{seg['speaker']}** ({seg['start']:.1f}s):") | |
current_speaker = seg['speaker'] | |
formatted.append(f"> {seg['text']}") | |
return '\n'.join(formatted) | |
def _load_markdown_template(self): | |
# Template bisa di-customize | |
return """# Meeting Minutes Template | |
{content} | |
""" | |
def _load_html_template(self): | |
return """<!DOCTYPE html> | |
<html> | |
<head> | |
<style> | |
body { font-family: Arial, sans-serif; margin: 40px; } | |
h1 { color: #333; } | |
.metadata { background: #f0f0f0; padding: 15px; border-radius: 5px; } | |
.action-item { background: #e8f5e9; padding: 10px; margin: 10px 0; border-left: 4px solid #4caf50; } | |
.decision { background: #e3f2fd; padding: 10px; margin: 10px 0; border-left: 4px solid #2196f3; } | |
</style> | |
</head> | |
<body> | |
{content} | |
</body> | |
</html>""" |