Spaces:
Sleeping
Sleeping
import io | |
import re | |
import os | |
import glob | |
import asyncio | |
import hashlib | |
import unicodedata | |
import streamlit as st | |
from PIL import Image | |
import fitz | |
import edge_tts | |
from reportlab.lib.pagesizes import A4 | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.lib import colors | |
from reportlab.pdfbase import pdfmetrics | |
from reportlab.pdfbase.ttfonts import TTFont | |
from datetime import datetime | |
import pytz | |
st.set_page_config(layout="wide", initial_sidebar_state="collapsed") | |
# π Time flies when you're having function calls | |
def get_timestamp_prefix(): | |
"""π°οΈ Gets a timestamp that's more reliable than your coworker's ETA estimates""" | |
central = pytz.timezone("US/Central") | |
now = datetime.now(central) | |
# Format: three-letter day, MMDD, HHMM + AM/PM (all uppercase) | |
return now.strftime("%a %m%d %I%M%p").upper() | |
# π§Ή Because text needs a bath before being spoken | |
def clean_for_speech(text): | |
"""π§Ό Scrubs your text cleaner than your bathroom will ever be""" | |
# Remove hash marks | |
text = text.replace("#", "") | |
# Remove emojis using a regex pattern that covers a wide range | |
emoji_pattern = re.compile( | |
r"[\U0001F300-\U0001F5FF" | |
r"\U0001F600-\U0001F64F" | |
r"\U0001F680-\U0001F6FF" | |
r"\U0001F700-\U0001F77F" | |
r"\U0001F780-\U0001F7FF" | |
r"\U0001F800-\U0001F8FF" | |
r"\U0001F900-\U0001F9FF" | |
r"\U0001FA00-\U0001FA6F" | |
r"\U0001FA70-\U0001FAFF" | |
r"\u2600-\u26FF" | |
r"\u2700-\u27BF]+", flags=re.UNICODE) | |
text = emoji_pattern.sub('', text) | |
return text | |
# π€ Making robots talk so you don't have to | |
async def generate_audio(text, voice, filename): | |
"""π Turn text into speech, because reading is so last century""" | |
communicate = edge_tts.Communicate(text, voice) | |
await communicate.save(filename) | |
return filename | |
# π Detecting links like a digital bloodhound | |
def detect_and_convert_links(text): | |
"""πΈοΈ Finds URLs in your text and turns them into actual clickable links""" | |
# Pattern to find URLs (http/https/ftp/www) | |
url_pattern = re.compile( | |
r'(https?://|www\.)[^\s\[\]()<>{}]+(\.[^\s\[\]()<>{}]+)+(/[^\s\[\]()<>{}]*)?', | |
re.IGNORECASE | |
) | |
# Pattern to find markdown links [text](url) | |
md_link_pattern = re.compile(r'\[(.*?)\]\((https?://[^\s\[\]()<>{}]+)\)') | |
# First convert markdown links | |
text = md_link_pattern.sub(r'<a href="\2">\1</a>', text) | |
# Then find and convert plain URLs not already in tags | |
start_idx = 0 | |
result = [] | |
while start_idx < len(text): | |
# Find the next URL | |
match = url_pattern.search(text, start_idx) | |
if not match: | |
# No more URLs, add the remaining text | |
result.append(text[start_idx:]) | |
break | |
# Check if the URL is already inside an <a> tag | |
# This is a simplified check. A more robust approach might use an HTML parser | |
prev_text = text[start_idx:match.start()] | |
tag_balance = prev_text.count('<a') - prev_text.count('</a') | |
if tag_balance > 0: | |
# URL is likely inside a tag, skip this match | |
result.append(text[start_idx:match.end()]) | |
else: | |
# Add text before the URL | |
result.append(text[start_idx:match.start()]) | |
# Get the URL | |
url = match.group(0) | |
# Add proper URL prefix if needed | |
if url.startswith('www.'): | |
url_with_prefix = 'http://' + url | |
else: | |
url_with_prefix = url | |
# Add the URL as a link | |
result.append(f'<a href="{url_with_prefix}">{url}</a>') | |
start_idx = match.end() | |
return ''.join(result) | |
# π Making emojis wear the right font costume | |
def apply_emoji_font(text, emoji_font): | |
"""π¦ Because emojis deserve their own font fashion show""" | |
# First handle links - temporarily replace them with placeholders | |
link_pattern = re.compile(r'<a\s+href="([^"]+)">(.*?)</a>') | |
links = [] | |
def save_link(match): | |
link_idx = len(links) | |
links.append((match.group(1), match.group(2))) | |
return f"###LINK_{link_idx}###" | |
text = link_pattern.sub(save_link, text) | |
# Now handle bold formatting | |
text = re.sub(r'<b>(.*?)</b>', lambda m: f'###BOLD_START###{m.group(1)}###BOLD_END###', text) | |
# Apply emoji font replacement | |
emoji_pattern = re.compile( | |
r"([\U0001F300-\U0001F5FF" | |
r"\U0001F600-\U0001F64F" | |
r"\U0001F680-\U0001F6FF" | |
r"\U0001F700-\U0001F77F" | |
r"\U0001F780-\U0001F7FF" | |
r"\U0001F800-\U0001F8FF" | |
r"\U0001F900-\U0001F9FF" | |
r"\U0001FAD0-\U0001FAD9" # additional range if needed | |
r"\U0001FA00-\U0001FA6F" | |
r"\U0001FA70-\U0001FAFF" | |
r"\u2600-\u26FF" | |
r"\u2700-\u27BF]+)" | |
) | |
def replace_emoji(match): | |
emoji = match.group(1) | |
emoji = unicodedata.normalize('NFC', emoji) | |
return f'<font face="{emoji_font}">{emoji}</font>' | |
segments = [] | |
last_pos = 0 | |
for match in emoji_pattern.finditer(text): | |
start, end = match.span() | |
if last_pos < start: | |
segments.append(f'<font face="DejaVuSans">{text[last_pos:start]}</font>') | |
segments.append(replace_emoji(match)) | |
last_pos = end | |
if last_pos < len(text): | |
segments.append(f'<font face="DejaVuSans">{text[last_pos:]}</font>') | |
combined_text = ''.join(segments) | |
# Restore bold tags | |
combined_text = combined_text.replace('###BOLD_START###', '</font><b><font face="DejaVuSans">') | |
combined_text = combined_text.replace('###BOLD_END###', '</font></b><font face="DejaVuSans">') | |
# Restore links | |
for i, (url, label) in enumerate(links): | |
placeholder = f"###LINK_{i}###" | |
if placeholder in combined_text: | |
# If the link is within a font tag, we need to close and reopen it | |
parts = combined_text.split(placeholder) | |
if len(parts) == 2: | |
before, after = parts | |
# Check if we're inside a font tag | |
if before.rfind('<font') > before.rfind('</font>'): | |
# Close font tag before link, reopen after | |
link_html = f'</font><a href="{url}">{label}</a><font face="DejaVuSans">' | |
combined_text = before + link_html + after | |
else: | |
# Simple replacement | |
combined_text = before + f'<a href="{url}">{label}</a>' + after | |
return combined_text | |
# π Converting markdown to PDF content, because PDFs never go out of style | |
def markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers): | |
"""π Transforms your disorganized thoughts into structured PDF content""" | |
lines = markdown_text.strip().split('\n') | |
pdf_content = [] | |
number_pattern = re.compile(r'^\d+\.\s') | |
for line in lines: | |
line = line.strip() | |
if not line or line.startswith('# '): | |
continue | |
# Process links before any other formatting | |
line = detect_and_convert_links(line) | |
# Handle bold formatting | |
if render_with_bold: | |
line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line) | |
if auto_bold_numbers and number_pattern.match(line): | |
# Only apply bold if not already entirely bold | |
if not (line.startswith("<b>") and line.endswith("</b>")): | |
# If there's already some bold formatting inside, we need to handle carefully | |
if "<b>" in line and "</b>" in line: | |
# Complex case - for simplicity, just make the whole line bold | |
# but remove any existing bold tags first | |
line = re.sub(r'</?b>', '', line) | |
line = f"<b>{line}</b>" | |
else: | |
line = f"<b>{line}</b>" | |
pdf_content.append(line) | |
total_lines = len(pdf_content) | |
return pdf_content, total_lines | |
# ποΈ Building PDFs like it's your second job | |
def create_pdf(markdown_text, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns): | |
"""π¨ Constructs a PDF with the precision of a sleep-deprived architect""" | |
buffer = io.BytesIO() | |
page_width = A4[0] * 2 | |
page_height = A4[1] | |
doc = SimpleDocTemplate(buffer, pagesize=(page_width, page_height), leftMargin=36, rightMargin=36, topMargin=36, bottomMargin=36) | |
styles = getSampleStyleSheet() | |
spacer_height = 10 | |
pdf_content, total_lines = markdown_to_pdf_content(markdown_text, render_with_bold, auto_bold_numbers) | |
# Define styles for different text types | |
item_style = ParagraphStyle( | |
'ItemStyle', parent=styles['Normal'], fontName="DejaVuSans", | |
fontSize=base_font_size, leading=base_font_size * 1.15, spaceAfter=1, | |
linkUnderline=True # Enable underline for links | |
) | |
numbered_bold_style = ParagraphStyle( | |
'NumberedBoldStyle', parent=styles['Normal'], fontName="NotoEmoji-Bold", | |
fontSize=base_font_size + 1 if enlarge_numbered else base_font_size, | |
leading=(base_font_size + 1) * 1.15 if enlarge_numbered else base_font_size * 1.15, spaceAfter=1, | |
linkUnderline=True # Enable underline for links | |
) | |
section_style = ParagraphStyle( | |
'SectionStyle', parent=styles['Heading2'], fontName="DejaVuSans", | |
textColor=colors.darkblue, fontSize=base_font_size * 1.1, leading=base_font_size * 1.32, spaceAfter=2, | |
linkUnderline=True # Enable underline for links | |
) | |
# Register fonts | |
try: | |
available_font_files = glob.glob("*.ttf") | |
if not available_font_files: | |
st.error("No .ttf font files found in the current directory.") | |
return | |
selected_font_path = None | |
for f in available_font_files: | |
if "NotoEmoji-Bold" in f: | |
selected_font_path = f | |
break | |
if selected_font_path: | |
pdfmetrics.registerFont(TTFont("NotoEmoji-Bold", selected_font_path)) | |
pdfmetrics.registerFont(TTFont("DejaVuSans", "DejaVuSans.ttf")) | |
except Exception as e: | |
st.error(f"Font registration error: {e}") | |
return | |
# Distribute content across columns | |
columns = [[] for _ in range(num_columns)] | |
lines_per_column = total_lines / num_columns if num_columns > 0 else total_lines | |
current_line_count = 0 | |
current_column = 0 | |
number_pattern = re.compile(r'^\d+\.\s') | |
for item in pdf_content: | |
if current_line_count >= lines_per_column and current_column < num_columns - 1: | |
current_column += 1 | |
current_line_count = 0 | |
columns[current_column].append(item) | |
current_line_count += 1 | |
# Format columns into Paragraph objects | |
column_cells = [[] for _ in range(num_columns)] | |
for col_idx, column in enumerate(columns): | |
for item in column: | |
if isinstance(item, str) and item.startswith("<b>") and item.endswith("</b>"): | |
content = item[3:-4].strip() | |
if number_pattern.match(content): | |
column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), numbered_bold_style)) | |
else: | |
column_cells[col_idx].append(Paragraph(apply_emoji_font(content, "NotoEmoji-Bold"), section_style)) | |
else: | |
column_cells[col_idx].append(Paragraph(apply_emoji_font(item, "DejaVuSans"), item_style)) | |
# Ensure columns have the same number of cells | |
max_cells = max(len(cells) for cells in column_cells) if column_cells else 0 | |
for cells in column_cells: | |
cells.extend([Paragraph("", item_style)] * (max_cells - len(cells))) | |
# Create the table layout | |
col_width = (page_width - 72) / num_columns if num_columns > 0 else page_width - 72 | |
table_data = list(zip(*column_cells)) if column_cells else [[]] | |
table = Table(table_data, colWidths=[col_width] * num_columns, hAlign='CENTER') | |
table.setStyle(TableStyle([ | |
('VALIGN', (0, 0), (-1, -1), 'TOP'), | |
('ALIGN', (0, 0), (-1, -1), 'LEFT'), | |
('BACKGROUND', (0, 0), (-1, -1), colors.white), | |
('GRID', (0, 0), (-1, -1), 0, colors.white), | |
('LINEAFTER', (0, 0), (num_columns-1, -1), 0.5, colors.grey), | |
('LEFTPADDING', (0, 0), (-1, -1), 2), | |
('RIGHTPADDING', (0, 0), (-1, -1), 2), | |
('TOPPADDING', (0, 0), (-1, -1), 1), | |
('BOTTOMPADDING', (0, 0), (-1, -1), 1), | |
])) | |
# Build the PDF | |
story = [Spacer(1, spacer_height), table] | |
doc.build(story) | |
buffer.seek(0) | |
return buffer.getvalue() | |
# πΌοΈ Converting PDFs to images, because we can't leave well enough alone | |
def pdf_to_image(pdf_bytes): | |
"""π Turns your PDF into pictures because some people just want to see the world rendered""" | |
try: | |
doc = fitz.open(stream=pdf_bytes, filetype="pdf") | |
images = [] | |
for page in doc: | |
pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0)) | |
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
images.append(img) | |
doc.close() | |
return images | |
except Exception as e: | |
st.error(f"Failed to render PDF preview: {e}") | |
return None | |
# -- Markdown File Selection -- | |
md_files = [f for f in glob.glob("*.md") if os.path.basename(f) != "README.md"] | |
md_options = [os.path.splitext(os.path.basename(f))[0] for f in md_files] | |
# πͺ The main Streamlit show begins here | |
with st.sidebar: | |
st.markdown("### PDF Options") | |
if md_options: | |
selected_md = st.selectbox("Select Markdown File", options=md_options, index=0) | |
with open(f"{selected_md}.md", "r", encoding="utf-8") as f: | |
st.session_state.markdown_content = f.read() | |
else: | |
st.warning("No markdown file found. Please add one to your folder.") | |
selected_md = None | |
st.session_state.markdown_content = "" | |
available_font_files = {os.path.splitext(os.path.basename(f))[0]: f for f in glob.glob("*.ttf")} | |
selected_font_name = st.selectbox("Select Emoji Font", options=list(available_font_files.keys()), | |
index=list(available_font_files.keys()).index("NotoEmoji-Bold") if "NotoEmoji-Bold" in available_font_files else 0) | |
base_font_size = st.slider("Font Size (points)", min_value=6, max_value=16, value=8, step=1) | |
render_with_bold = st.checkbox("Render with Bold Formatting (remove ** markers)", value=True, key="render_with_bold") | |
auto_bold_numbers = st.checkbox("Auto Bold Numbered Lines", value=True, key="auto_bold_numbers") | |
enlarge_numbered = st.checkbox("Enlarge Font Size for Numbered Lines", value=True, key="enlarge_numbered") | |
num_columns = st.selectbox("Number of Columns", options=[1, 2, 3, 4, 5, 6], index=3) | |
# Use the file's content for editing | |
edited_markdown = st.text_area("Modify the markdown content below:", value=st.session_state.markdown_content, height=300, key=f"markdown_{selected_md}_{selected_font_name}_{num_columns}") | |
if st.button("Update PDF"): | |
st.session_state.markdown_content = edited_markdown | |
if selected_md: | |
with open(f"{selected_md}.md", "w", encoding="utf-8") as f: | |
f.write(edited_markdown) | |
st.rerun() | |
# Create a timestamp prefix for file naming | |
prefix = get_timestamp_prefix() | |
# Download button for Markdown with new naming convention and double emoji label | |
st.download_button( | |
label="πΎπ Save Markdown", | |
data=st.session_state.markdown_content, | |
file_name=f"{prefix} {selected_md}.md" if selected_md else f"{prefix} default.md", | |
mime="text/markdown" | |
) | |
st.markdown("### Text-to-Speech") | |
VOICES = ["en-US-AriaNeural", "en-US-JennyNeural", "en-GB-SoniaNeural", "en-US-GuyNeural", "en-US-AnaNeural"] | |
selected_voice = st.selectbox("Select Voice for TTS", options=VOICES, index=0) | |
if st.button("Generate Audio"): | |
# Clean markdown input for speech generation | |
cleaned_text = clean_for_speech(st.session_state.markdown_content) | |
# Create a filename for the audio file using the timestamp, markdown name, and selected voice | |
audio_filename = f"{prefix} {selected_md} {selected_voice}.mp3" if selected_md else f"{prefix} default {selected_voice}.mp3" | |
audio_file = asyncio.run(generate_audio(cleaned_text, selected_voice, audio_filename)) | |
st.audio(audio_file) | |
with open(audio_file, "rb") as f: | |
audio_bytes = f.read() | |
st.download_button( | |
label="πΎπ Save Audio", | |
data=audio_bytes, | |
file_name=audio_filename, | |
mime="audio/mpeg" | |
) | |
# π Generating the PDF with more complexity than a rocket launch | |
with st.spinner("Generating PDF..."): | |
pdf_bytes = create_pdf(st.session_state.markdown_content, base_font_size, render_with_bold, auto_bold_numbers, enlarge_numbered, num_columns) | |
# πΊ Displaying the preview, because everyone loves to window shop | |
with st.container(): | |
pdf_images = pdf_to_image(pdf_bytes) | |
if pdf_images: | |
for img in pdf_images: | |
st.image(img, use_container_width=True) | |
else: | |
st.info("Download the PDF to view it locally.") | |
# πΎ Last chance to save your masterpiece before it's gone forever | |
with st.sidebar: | |
st.download_button( | |
label="πΎπ Save PDF", | |
data=pdf_bytes, | |
file_name=f"{prefix} {selected_md}.pdf" if selected_md else f"{prefix} output.pdf", | |
mime="application/pdf" | |
) |