Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pickle | |
import requests | |
from bs4 import BeautifulSoup | |
import easyocr | |
import numpy as np | |
from PIL import Image | |
import cv2 | |
import warnings | |
# Suppress sklearn version warnings | |
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn") | |
# === Custom CSS for better styling === | |
def load_css(): | |
st.markdown(""" | |
<style> | |
/* Main app styling */ | |
.main-header { | |
text-align: center; | |
padding: 2rem 0; | |
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
color: white; | |
border-radius: 10px; | |
margin-bottom: 2rem; | |
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
} | |
.main-header h1 { | |
font-size: 2.5rem; | |
margin-bottom: 0.5rem; | |
text-shadow: 2px 2px 4px rgba(0,0,0,0.3); | |
} | |
.main-header p { | |
font-size: 1.1rem; | |
opacity: 0.9; | |
margin: 0; | |
} | |
/* Card styling */ | |
.info-card { | |
background: white; | |
padding: 1.5rem; | |
border-radius: 10px; | |
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); | |
border-left: 4px solid #667eea; | |
margin: 1rem 0; | |
} | |
/* Camera card styling */ | |
.camera-card { | |
background: linear-gradient(135deg, #f8f9fa, #e9ecef); | |
padding: 1.5rem; | |
border-radius: 10px; | |
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); | |
border-left: 4px solid #28a745; | |
margin: 1rem 0; | |
} | |
/* Results styling - matching the image design */ | |
.allergen-result { | |
padding: 1rem 1.5rem; | |
border-radius: 8px; | |
margin: 0.5rem 0; | |
font-size: 1rem; | |
font-weight: 500; | |
display: flex; | |
align-items: center; | |
gap: 0.5rem; | |
} | |
.allergen-detected { | |
background-color: #f8d7da; | |
color: #721c24; | |
border: 1px solid #f1aeb5; | |
} | |
.allergen-safe { | |
background-color: #d1e7dd; | |
color: #0f5132; | |
border: 1px solid #a3cfbb; | |
} | |
.allergen-summary { | |
background-color: #fff3cd; | |
color: #664d03; | |
border: 1px solid #ffecb5; | |
padding: 1rem 1.5rem; | |
border-radius: 8px; | |
margin: 1rem 0; | |
font-weight: 600; | |
text-align: center; | |
} | |
/* OCR result styling */ | |
.ocr-result { | |
background: #f8f9fa; | |
padding: 1rem 1.5rem; | |
margin: 1rem 0; | |
border-radius: 10px; | |
border-left: 4px solid #17a2b8; | |
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05); | |
line-height: 1.6; | |
font-size: 1rem; | |
} | |
.ocr-result strong { | |
color: #495057; | |
font-weight: 600; | |
} | |
/* Button styling */ | |
.stButton > button { | |
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
color: white; | |
border: none; | |
border-radius: 25px; | |
padding: 0.75rem 2rem; | |
font-weight: bold; | |
transition: all 0.3s ease; | |
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
} | |
.stButton > button:hover { | |
transform: translateY(-2px); | |
box-shadow: 0 6px 8px rgba(0, 0, 0, 0.15); | |
} | |
/* Camera button styling */ | |
.camera-button { | |
background: linear-gradient(135deg, #28a745 0%, #20c997 100%) !important; | |
} | |
/* Radio button styling */ | |
.stRadio > div { | |
background: white; | |
padding: 1rem; | |
border-radius: 10px; | |
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); | |
} | |
/* Text area styling */ | |
.stTextArea > div > div > textarea { | |
border-radius: 10px; | |
border: 2px solid #e0e0e0; | |
transition: border-color 0.3s ease; | |
} | |
.stTextArea > div > div > textarea:focus { | |
border-color: #667eea; | |
box-shadow: 0 0 10px rgba(102, 126, 234, 0.2); | |
} | |
/* Expander styling */ | |
.streamlit-expanderHeader { | |
background: linear-gradient(135deg, #f8f9fa, #e9ecef); | |
border-radius: 10px; | |
border: 1px solid #dee2e6; | |
} | |
/* Progress indicator */ | |
.progress-text { | |
text-align: center; | |
font-weight: bold; | |
color: #667eea; | |
margin: 1rem 0; | |
} | |
/* Improved ingredient list styling - single div */ | |
.ingredients-container { | |
background: #f8f9fa; | |
padding: 1rem 1.5rem; | |
margin: 1rem 0; | |
border-radius: 10px; | |
border-left: 4px solid #667eea; | |
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05); | |
line-height: 1.6; | |
font-size: 1rem; | |
} | |
.ingredients-container strong { | |
color: #495057; | |
font-weight: 600; | |
} | |
/* Footer */ | |
.footer { | |
text-align: center; | |
padding: 2rem 0; | |
color: #6c757d; | |
border-top: 1px solid #e9ecef; | |
margin-top: 3rem; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# === Load EasyOCR Reader === | |
def load_ocr_reader(): | |
"""Load EasyOCR reader with Indonesian and English language support""" | |
try: | |
reader = easyocr.Reader(['id', 'en'], gpu=False) # Indonesian and English | |
return reader | |
except Exception as e: | |
st.error(f"β Gagal memuat EasyOCR: {str(e)}") | |
return None | |
# === Load TF-IDF Vectorizer === | |
def load_vectorizer(): | |
try: | |
with open("saved_models/tfidf_vectorizer.pkl", "rb") as f: | |
vectorizer = pickle.load(f) | |
return vectorizer | |
except Exception as e: | |
st.error(f"β Gagal memuat vectorizer: {str(e)}") | |
st.warning("β οΈ Jika error terkait versi sklearn, coba install ulang dengan: pip install scikit-learn==1.2.2") | |
return None | |
# === Load XGBoost Model === | |
def load_model(): | |
try: | |
with open("saved_models/XGBoost_model.pkl", "rb") as f: | |
model = pickle.load(f) | |
return model | |
except Exception as e: | |
st.error(f"β Gagal memuat model: {str(e)}") | |
st.warning("β οΈ Jika error terkait versi sklearn, coba install ulang dengan: pip install scikit-learn==1.2.2") | |
return None | |
# === OCR Text Extraction === | |
def extract_text_from_image(image, reader): | |
"""Extract text from image using EasyOCR""" | |
try: | |
# Convert PIL image to numpy array | |
if isinstance(image, Image.Image): | |
image_array = np.array(image) | |
else: | |
image_array = image | |
# Perform OCR | |
results = reader.readtext(image_array) | |
# Extract text from results | |
extracted_texts = [] | |
confidence_scores = [] | |
for (bbox, text, confidence) in results: | |
if confidence > 0.3: # Filter out low confidence text | |
extracted_texts.append(text) | |
confidence_scores.append(confidence) | |
# Join all extracted text | |
full_text = " ".join(extracted_texts) | |
avg_confidence = np.mean(confidence_scores) if confidence_scores else 0 | |
return full_text, extracted_texts, avg_confidence | |
except Exception as e: | |
return "", [], 0 | |
# === Prediksi === | |
def predict_allergen(model, vectorizer, input_text): | |
X_input = vectorizer.transform([input_text]) | |
prediction = model.predict(X_input) | |
try: | |
# Untuk multi-label classification, predict_proba mengembalikan list probabilitas | |
probabilities = model.predict_proba(X_input) | |
# Jika probabilities adalah list of arrays (multi-label) | |
if isinstance(probabilities, list): | |
# Ambil probabilitas untuk kelas positif dari setiap classifier | |
positive_probs = [] | |
for i, prob_array in enumerate(probabilities): | |
if prob_array.shape[1] == 2: # Binary classification | |
positive_probs.append(prob_array[0][1]) # Probabilitas kelas positif | |
else: | |
positive_probs.append(prob_array[0][0]) # Jika hanya 1 kelas | |
return prediction[0], positive_probs | |
else: | |
# Single output | |
return prediction[0], probabilities[0] | |
except Exception as e: | |
# Jika predict_proba gagal, gunakan decision_function jika tersedia | |
try: | |
decision_scores = model.decision_function(X_input) | |
# Convert decision scores to probabilities using sigmoid | |
import numpy as np | |
probabilities = 1 / (1 + np.exp(-decision_scores[0])) | |
return prediction[0], probabilities | |
except: | |
# Last fallback - return predictions as confidence (0 or 1 -> 0% or 100%) | |
confidence_scores = [float(pred) for pred in prediction[0]] | |
return prediction[0], confidence_scores | |
# === Scraping bahan dari Cookpad === | |
def get_ingredients_from_cookpad(url): | |
headers = {"User-Agent": "Mozilla/5.0"} | |
try: | |
response = requests.get(url, headers=headers) | |
if response.status_code != 200: | |
return None, "Gagal mengambil halaman." | |
soup = BeautifulSoup(response.text, "html.parser") | |
ingredient_div = soup.find("div", class_="ingredient-list") | |
if not ingredient_div: | |
return None, "Tidak menemukan elemen bahan." | |
ingredients = [] | |
for item in ingredient_div.find_all("li"): | |
amount = item.find("bdi") | |
name = item.find("span") | |
if amount and name: | |
ingredients.append(f"{amount.get_text(strip=True)} {name.get_text(strip=True)}") | |
else: | |
ingredients.append(item.get_text(strip=True)) | |
return ingredients, None | |
except Exception as e: | |
return None, f"Terjadi kesalahan: {str(e)}" | |
# === Display OCR Results === | |
def display_ocr_results(extracted_text, text_list, confidence): | |
"""Display OCR extraction results""" | |
st.markdown("### π Hasil Ekstraksi Teks") | |
if extracted_text.strip(): | |
st.markdown(f''' | |
<div class="ocr-result"> | |
<strong>π Teks yang Terdeteksi:</strong><br> | |
{extracted_text} | |
</div> | |
''', unsafe_allow_html=True) | |
# Show confidence and individual text elements | |
with st.expander(f"π Detail OCR (Confidence: {confidence:.2f})", expanded=False): | |
st.markdown("**Teks Individual yang Terdeteksi:**") | |
for i, text in enumerate(text_list, 1): | |
st.write(f"{i}. {text}") | |
# Show tips for better results | |
if confidence < 0.5: | |
st.info("π‘ **Tips untuk hasil yang lebih baik:** Confidence rendah terdeteksi. Coba ambil foto dengan pencahayaan yang lebih baik, hindari bayangan, dan pastikan teks tidak buram.") | |
else: | |
st.warning("β οΈ Tidak ada teks yang dapat diekstrak dari gambar.") | |
# Provide detailed troubleshooting tips | |
st.markdown(""" | |
<div class="info-card"> | |
<strong>π§ Tips Troubleshooting:</strong><br> | |
β’ Pastikan pencahayaan cukup terang<br> | |
β’ Hindari bayangan pada teks<br> | |
β’ Pastikan teks tidak buram atau kabur<br> | |
β’ Coba pegang kamera lebih stabil<br> | |
β’ Pastikan teks berukuran cukup besar di foto<br> | |
β’ Hindari refleksi cahaya pada permukaan teks<br> | |
β’ Coba ambil foto dari jarak yang berbeda | |
</div> | |
""", unsafe_allow_html=True) | |
# === Display results with custom styling matching the image === | |
def display_results(results, probabilities, labels): | |
st.markdown("### π― Hasil Analisis Alergen") | |
# Emoji mapping for each allergen | |
allergen_emojis = { | |
'Susu': 'π₯', | |
'Kacang': 'π₯', | |
'Telur': 'π₯', | |
'Makanan Laut': 'π¦', | |
'Gandum': 'πΎ' | |
} | |
detected_allergens = [] | |
# Display each allergen result | |
for i, (allergen, status) in enumerate(results.items()): | |
emoji = allergen_emojis.get(allergen, 'π') | |
# Get actual probability from model | |
try: | |
if isinstance(probabilities, list) and i < len(probabilities): | |
confidence = probabilities[i] * 100 | |
elif hasattr(probabilities, '__getitem__') and i < len(probabilities): | |
confidence = probabilities[i] * 100 | |
else: | |
# If no probability available, show based on prediction | |
confidence = 100.0 if status == 1 else 0.0 | |
except (IndexError, TypeError): | |
# Fallback to prediction-based confidence | |
confidence = 100.0 if status == 1 else 0.0 | |
if status == 1: # Detected | |
detected_allergens.append(allergen) | |
st.markdown(f''' | |
<div class="allergen-result allergen-detected"> | |
{emoji} {allergen}: Terdeteksi β οΈ ({confidence:.2f}%) | |
</div> | |
''', unsafe_allow_html=True) | |
else: # Not detected | |
# For negative cases, show (100 - confidence) to represent "not detected" confidence | |
negative_confidence = 100 - confidence if confidence > 50 else confidence | |
st.markdown(f''' | |
<div class="allergen-result allergen-safe"> | |
{emoji} {allergen}: Tidak Terdeteksi β ({negative_confidence:.2f}%) | |
</div> | |
''', unsafe_allow_html=True) | |
# Display summary | |
if detected_allergens: | |
allergen_list = ", ".join(detected_allergens) | |
st.markdown(f''' | |
<div class="allergen-summary"> | |
Resep ini mengandung alergen: {allergen_list} | |
</div> | |
''', unsafe_allow_html=True) | |
else: | |
st.markdown(f''' | |
<div class="allergen-summary"> | |
π Tidak ada alergen berbahaya terdeteksi dalam resep ini! | |
</div> | |
''', unsafe_allow_html=True) | |
# === Main UI === | |
def main(): | |
st.set_page_config( | |
page_title="Deteksi Alergen Makanan", | |
page_icon="π₯", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
# Load custom CSS | |
load_css() | |
# Header | |
st.markdown(""" | |
<div class="main-header"> | |
<h1>π₯ Deteksi Alergen Makanan</h1> | |
<p>Analisis kandungan alergen dalam resep makanan dengan teknologi AI & OCR</p> | |
</div> | |
""", unsafe_allow_html=True) | |
# Sidebar info | |
with st.sidebar: | |
st.markdown("### π Informasi Alergen") | |
st.markdown(""" | |
**Alergen yang dapat dideteksi:** | |
- π₯ Susu | |
- π₯ Kacang | |
- π₯ Telur | |
- π¦ Makanan Laut | |
- πΎ Gandum | |
""") | |
st.markdown("### π‘ Tips Penggunaan") | |
st.markdown(""" | |
**Input Manual:** | |
- Masukkan bahan dengan detail | |
- Gunakan nama bahan dalam bahasa Indonesia | |
**Kamera OCR:** | |
- Pastikan teks terlihat jelas | |
- Gunakan pencahayaan yang baik | |
- Hindari blur atau teks terpotong | |
**URL Cookpad:** | |
- Pastikan link valid | |
- Maksimal 20 URL per analisis | |
""") | |
# Main content | |
col1, col2, col3 = st.columns([1, 6, 1]) | |
with col2: | |
# Input method selection | |
st.markdown("### π§ Pilih Metode Input") | |
input_mode = st.radio( | |
"Pilih metode input data", | |
["π Input Manual", "π· Kamera OCR", "π URL Cookpad"], | |
horizontal=True, | |
label_visibility="collapsed" | |
) | |
# Load model components | |
try: | |
vectorizer = load_vectorizer() | |
model = load_model() | |
if vectorizer is None or model is None: | |
st.stop() | |
labels = ['Susu', 'Kacang', 'Telur', 'Makanan Laut', 'Gandum'] | |
except Exception as e: | |
st.error(f"β Gagal memuat komponen model: {str(e)}") | |
st.stop() | |
st.markdown("---") | |
if input_mode == "π Input Manual": | |
st.markdown("### π Masukkan Bahan Makanan") | |
# Info card | |
st.markdown(""" | |
<div class="info-card"> | |
<strong>π‘ Petunjuk:</strong> Masukkan daftar bahan makanan yang ingin dianalisis. | |
Pisahkan setiap bahan dengan koma atau baris baru. | |
</div> | |
""", unsafe_allow_html=True) | |
input_text = st.text_area( | |
"Masukkan bahan makanan", | |
height=150, | |
placeholder="Contoh: telur, susu, tepung terigu, garam, mentega...", | |
label_visibility="collapsed" | |
) | |
col_btn1, col_btn2, col_btn3 = st.columns([2, 2, 2]) | |
with col_btn2: | |
if st.button("π Analisis Alergen", use_container_width=True): | |
if not input_text.strip(): | |
st.warning("β οΈ Mohon masukkan bahan makanan terlebih dahulu.") | |
else: | |
with st.spinner("π Sedang menganalisis..."): | |
pred, probs = predict_allergen(model, vectorizer, input_text) | |
results = dict(zip(labels, pred)) | |
st.success("β Analisis selesai!") | |
display_results(results, probs, labels) | |
elif input_mode == "π· Kamera OCR": | |
st.markdown("### π· Deteksi Alergen dari Gambar") | |
# Info card for camera | |
st.markdown(""" | |
<div class="camera-card"> | |
<strong>π· Petunjuk Kamera:</strong> Ambil foto langsung dari daftar bahan, kemasan makanan, | |
atau resep. Pastikan teks terlihat jelas dan pencahayaan memadai untuk hasil OCR terbaik. | |
</div> | |
""", unsafe_allow_html=True) | |
# Camera input | |
camera_image = st.camera_input("πΈ Ambil foto dengan kamera") | |
if camera_image is not None: | |
# Display the captured image | |
col_img1, col_img2, col_img3 = st.columns([1, 3, 1]) | |
with col_img2: | |
st.image(camera_image, caption="π· Gambar yang diambil", use_container_width=True) | |
# Show image info | |
img = Image.open(camera_image) | |
width, height = img.size | |
st.info(f"π Dimensi gambar: {width} x {height} pixels") | |
# Load OCR reader | |
with st.spinner("π Memuat OCR engine..."): | |
reader = load_ocr_reader() | |
if reader is None: | |
st.error("β Gagal memuat OCR engine. Pastikan EasyOCR telah terinstall.") | |
else: | |
col_btn1, col_btn2, col_btn3 = st.columns([2, 2, 2]) | |
with col_btn2: | |
if st.button("π Ekstrak Teks & Analisis", use_container_width=True, key="ocr_analyze"): | |
# Extract text from image | |
with st.spinner("π Mengekstrak teks dari gambar... (ini mungkin memakan waktu)"): | |
extracted_text, text_list, confidence = extract_text_from_image(camera_image, reader) | |
# Display OCR results (will show tips if no text found) | |
display_ocr_results(extracted_text, text_list, confidence) | |
if extracted_text.strip(): | |
# Analyze allergens | |
with st.spinner("π Menganalisis alergen..."): | |
pred, probs = predict_allergen(model, vectorizer, extracted_text) | |
results = dict(zip(labels, pred)) | |
st.success("β Analisis selesai!") | |
display_results(results, probs, labels) | |
else: | |
# Show additional debug info button | |
if st.button("π§ Coba Analisis Paksa (Debug Mode)", key="debug_mode"): | |
st.info("π§ Mode debug: Mencoba ekstraksi dengan parameter yang lebih agresif...") | |
# Try with different EasyOCR parameters | |
try: | |
img_array = np.array(Image.open(camera_image)) | |
results = reader.readtext(img_array, detail=1, paragraph=True, width_ths=0.1, height_ths=0.1) | |
debug_texts = [] | |
for (bbox, text, conf) in results: | |
if len(text.strip()) > 0: | |
debug_texts.append(f"{text.strip()} (conf: {conf:.2f})") | |
if debug_texts: | |
st.write("π **Teks yang ditemukan dalam mode debug:**") | |
for text in debug_texts: | |
st.write(f"β’ {text}") | |
# Try analysis with debug text | |
debug_combined = " ".join([t.split(" (conf:")[0] for t in debug_texts]) | |
pred, probs = predict_allergen(model, vectorizer, debug_combined) | |
results = dict(zip(labels, pred)) | |
st.markdown("### π§ͺ Hasil Analisis Debug") | |
display_results(results, probs, labels) | |
else: | |
st.warning("Bahkan dalam mode debug, tidak ada teks yang dapat diekstrak.") | |
except Exception as e: | |
st.error(f"Error in debug mode: {str(e)}") | |
elif input_mode == "π URL Cookpad": | |
st.markdown("### π Analisis dari URL Cookpad") | |
# Info card | |
st.markdown(""" | |
<div class="info-card"> | |
<strong>π‘ Petunjuk:</strong> Masukkan hingga 20 URL resep dari Cookpad. | |
Setiap URL harus dalam baris terpisah. | |
</div> | |
""", unsafe_allow_html=True) | |
urls_input = st.text_area( | |
"Masukkan URL Cookpad", | |
placeholder="https://cookpad.com/id/resep/...\nhttps://cookpad.com/id/resep/...", | |
height=200, | |
label_visibility="collapsed" | |
) | |
urls = [url.strip() for url in urls_input.splitlines() if url.strip()] | |
if len(urls) > 20: | |
st.warning("β οΈ Maksimal hanya bisa memproses 20 URL. Menggunakan 20 URL pertama.") | |
urls = urls[:20] | |
if urls: | |
st.info(f"π Siap memproses {len(urls)} URL") | |
if st.button("π Analisis dari URL", use_container_width=True): | |
if not urls: | |
st.warning("β οΈ Mohon masukkan minimal satu URL.") | |
else: | |
# Progress bar | |
progress_bar = st.progress(0) | |
status_text = st.empty() | |
for i, url in enumerate(urls): | |
# Update progress | |
progress = (i + 1) / len(urls) | |
progress_bar.progress(progress) | |
status_text.markdown(f'<div class="progress-text">Memproses resep {i+1} dari {len(urls)}</div>', unsafe_allow_html=True) | |
ingredients, error = get_ingredients_from_cookpad(url) | |
with st.expander(f"π Resep #{i+1}", expanded=False): | |
st.markdown(f"**URL:** {url}") | |
if error: | |
st.error(f"β {error}") | |
else: | |
st.success("β Bahan berhasil diambil!") | |
# Display ingredients in a single nice container | |
ingredients_text = ", ".join(ingredients) | |
st.markdown(f''' | |
<div class="ingredients-container"> | |
<strong>π§Ύ Daftar Bahan:</strong><br> | |
{ingredients_text} | |
</div> | |
''', unsafe_allow_html=True) | |
# Predict allergens | |
joined_ingredients = " ".join(ingredients) | |
pred, probs = predict_allergen(model, vectorizer, joined_ingredients) | |
results = dict(zip(labels, pred)) | |
st.markdown("---") | |
display_results(results, probs, labels) | |
# Clear progress indicators | |
progress_bar.empty() | |
status_text.empty() | |
st.success("π Semua resep telah dianalisis!") | |
# Footer | |
st.markdown(""" | |
<div class="footer"> | |
<p>π¬ Powered by XGBoost, TF-IDF & EasyOCR | Made with β€οΈ using Streamlit</p> | |
</div> | |
""", unsafe_allow_html=True) | |
if __name__ == "__main__": | |
main() |