File size: 8,282 Bytes
21ed5c5 0540b53 d067fc0 bea81c7 0abaeda 26df9fd 21ed5c5 26df9fd d067fc0 0540b53 bea81c7 0abaeda 21ed5c5 0abaeda 21ed5c5 0abaeda 21ed5c5 0abaeda 21ed5c5 0abaeda 21ed5c5 0abaeda 21ed5c5 0abaeda 21ed5c5 0abaeda 21ed5c5 0abaeda 21ed5c5 0abaeda 21ed5c5 0abaeda 21ed5c5 0abaeda d067fc0 0540b53 21ed5c5 26df9fd 21ed5c5 0abaeda 21ed5c5 03ec9cb 21ed5c5 0abaeda 21ed5c5 0abaeda 26df9fd 21ed5c5 26df9fd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
import streamlit as st
from ai_config_faiss import get_ai_assistant
from ttv_web_scraper import db_load_metadata_sets
import json
from datetime import datetime
import os
import base64
# Initialize session state
if 'results' not in st.session_state:
st.session_state.results = None
if 'where' not in st.session_state:
st.session_state.where = {}
if 'num_results' not in st.session_state:
st.session_state.num_results = 3
if 'favorites' not in st.session_state:
st.session_state.favorites = {}
if 'show_filters' not in st.session_state:
st.session_state.show_filters = True
# Initialize filter selections
for filter_type in ['company', 'speaker', 'subjects']:
if f'selected_{filter_type}' not in st.session_state:
st.session_state[f'selected_{filter_type}'] = []
@st.cache_resource
def get_assistant():
return get_ai_assistant()
def format_timestamp(timestamp):
try:
time = datetime.strptime(timestamp, "%H:%M:%S")
return time.strftime("%M:%S")
except ValueError:
return timestamp
def get_file_content(file_path):
if os.path.exists(file_path):
with open(file_path, "rb") as file:
return file.read()
return None
def create_markdown_download_link(markdown_content):
b64 = base64.b64encode(markdown_content.encode()).decode()
return f'<a href="data:text/markdown;base64,{b64}" download="favorites.md">Download Favorites</a>'
def update_filter(filter_type, item):
if item in st.session_state[f'selected_{filter_type}']:
st.session_state[f'selected_{filter_type}'].remove(item)
else:
st.session_state[f'selected_{filter_type}'].append(item)
update_where()
def update_where():
st.session_state.where = {}
for filter_type in ['company', 'speaker', 'subjects']:
if st.session_state[f'selected_{filter_type}']:
st.session_state.where[filter_type] = st.session_state[f'selected_{filter_type}']
def toggle_show_filters():
st.session_state.show_filters = not st.session_state.show_filters
def update_num_results():
st.session_state.num_results = st.session_state.num_results_slider
def submit_query():
if not st.session_state.where:
st.warning("Please select at least one filter before submitting.")
return
assistant = get_assistant()
with st.spinner("Thinking..."):
response = assistant.query("", num_results=st.session_state.num_results, filters=st.session_state.where)
try:
st.session_state.results = json.loads(response)
except json.JSONDecodeError:
st.error("Failed to parse the response. Please try again.")
def update_favorite(result_id):
result = next((r for r in st.session_state.results if r['id'] == result_id), None)
if result:
result['favorite'] = not result['favorite']
if result['favorite']:
st.session_state.favorites[result_id] = result
else:
st.session_state.favorites.pop(result_id, None)
def clear_favorites():
st.session_state.favorites.clear()
st.success("All favorites have been cleared.")
def save_favorites():
if st.session_state.favorites:
markdown_content = "# Favorites\n\n"
for fav in st.session_state.favorites.values():
markdown_content += f"## {fav['metadata']['title']}\n\n"
markdown_content += f"**Speaker:** {fav['metadata']['speaker']} ({fav['metadata']['company']})\n\n"
markdown_content += f"**Date:** {fav['metadata']['date']}\n\n"
markdown_content += f"**Time:** {format_timestamp(fav['metadata']['start_timestamp'])} - {format_timestamp(fav['metadata']['end_timestamp'])}\n\n"
markdown_content += f"**Transcript:** {fav['content']}\n\n"
play_link = fav['metadata']['play']
modified_play_link = f"{play_link}&controls=1&showinfo=0&modestbranding=1"
markdown_content += f"**Video Link:** [{play_link}]({modified_play_link})\n\n"
if fav['metadata']['subjects']:
markdown_content += f"**Subjects:** {', '.join(fav['metadata']['subjects'])}\n\n"
markdown_content += "---\n\n"
st.markdown(create_markdown_download_link(markdown_content), unsafe_allow_html=True)
else:
st.warning("No favorites selected.")
def display_result(result, favorite_tab=False):
st.markdown(f"### {result['metadata']['title']}")
col1, col2 = st.columns([3, 2])
with col1:
st.markdown(f"**Speaker:** {result['metadata']['speaker']} ({result['metadata']['company']})")
st.markdown(f"**Date:** {result['metadata']['date']}")
st.markdown("**Transcript:**")
st.markdown(result['content'])
with col2:
start_time = format_timestamp(result['metadata']['start_timestamp'])
end_time = format_timestamp(result['metadata']['end_timestamp'])
st.markdown(f"**Time:** {start_time} - {end_time}")
play_url = result['metadata']['play']
if play_url:
st.components.v1.iframe(src=play_url, width=300, height=169, scrolling=True)
else:
st.warning("No video found")
if 'download' in result['metadata']:
download_path = result['metadata']['download']
file_name = os.path.basename(download_path)
file_content = get_file_content(download_path)
if file_content:
prefix = "fav_dl_" if favorite_tab else "dl_"
st.download_button(label="Download Clip", data=file_content, file_name=file_name, mime="video/mp4", key=f"{prefix}{result['id']}")
else:
st.warning(f"Clip file not found: {file_name}")
if result['metadata']['subjects']:
st.markdown("**Subjects:**")
subject_tags = ' '.join([f"<span style='background-color: #f0f0f0; color:black; padding: 2px 6px; margin: 2px; border-radius: 10px;'>{subject}</span>" for subject in result['metadata']['subjects']])
st.markdown(subject_tags, unsafe_allow_html=True)
favorite_key = f"fav_{favorite_tab}_{result['id']}"
st.checkbox("Favorite", value=result['favorite'], key=favorite_key, on_change=update_favorite, args=(result['id'],))
st.markdown("---")
def main():
st.title("Telecom TV Video Expert")
st.markdown("Trained on data from [here](https://www.telecomtv.com/content/dsp-leaders-forum-videos/)")
_, _, companies, sentiments, subjects = db_load_metadata_sets()
tab1, tab2 = st.tabs(["Search", "Favorites"])
with tab1:
st.header("Filter Options")
st.checkbox("Show Filters", value=st.session_state.show_filters, on_change=toggle_show_filters)
if st.session_state.show_filters:
col1, col2, col3 = st.columns(3)
for filter_type, items in [('company', companies.keys()), ('speaker', set().union(*companies.values())), ('subjects', subjects)]:
with locals()[f'col{["company", "speaker", "subjects"].index(filter_type) + 1}']:
st.subheader(filter_type.capitalize())
for item in sorted(items):
st.checkbox(item, key=f'{filter_type}_{item}',
value=item in st.session_state[f'selected_{filter_type}'],
on_change=update_filter,
args=(filter_type, item))
st.slider("Number of relevant transcript excerpts to show:", min_value=1, max_value=500, value=st.session_state.num_results, step=1, key='num_results_slider', on_change=update_num_results)
st.button("Submit", on_click=submit_query)
if st.session_state.results:
for result in st.session_state.results:
result['favorite'] = result['id'] in st.session_state.favorites
display_result(result)
with tab2:
st.header("Favorites")
col1, col2 = st.columns(2)
with col1:
st.button("Save Favorites", on_click=save_favorites)
with col2:
st.button("Clear Favorites", on_click=clear_favorites)
for fav in st.session_state.favorites.values():
display_result(fav, favorite_tab=True)
if __name__ == "__main__":
main()
|