import streamlit as st
import pickle
import requests
from bs4 import BeautifulSoup
import easyocr
import numpy as np
from PIL import Image
import cv2
import warnings
# Suppress sklearn version warnings
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")
# === Custom CSS for better styling ===
def load_css():
st.markdown("""
""", unsafe_allow_html=True)
# === Load EasyOCR Reader ===
@st.cache_resource
def load_ocr_reader():
"""Load EasyOCR reader with Indonesian and English language support"""
try:
reader = easyocr.Reader(['id', 'en'], gpu=False) # Indonesian and English
return reader
except Exception as e:
st.error(f"❌ Gagal memuat EasyOCR: {str(e)}")
return None
# === Load TF-IDF Vectorizer ===
@st.cache_resource
def load_vectorizer():
try:
with open("saved_models/tfidf_vectorizer.pkl", "rb") as f:
vectorizer = pickle.load(f)
return vectorizer
except Exception as e:
st.error(f"❌ Gagal memuat vectorizer: {str(e)}")
st.warning("⚠️ Jika error terkait versi sklearn, coba install ulang dengan: pip install scikit-learn==1.2.2")
return None
# === Load XGBoost Model ===
@st.cache_resource
def load_model():
try:
with open("saved_models/XGBoost_model.pkl", "rb") as f:
model = pickle.load(f)
return model
except Exception as e:
st.error(f"❌ Gagal memuat model: {str(e)}")
st.warning("⚠️ Jika error terkait versi sklearn, coba install ulang dengan: pip install scikit-learn==1.2.2")
return None
# === OCR Text Extraction ===
def extract_text_from_image(image, reader):
"""Extract text from image using EasyOCR"""
try:
# Convert PIL image to numpy array
if isinstance(image, Image.Image):
image_array = np.array(image)
else:
image_array = image
# Perform OCR
results = reader.readtext(image_array)
# Extract text from results
extracted_texts = []
confidence_scores = []
for (bbox, text, confidence) in results:
if confidence > 0.3: # Filter out low confidence text
extracted_texts.append(text)
confidence_scores.append(confidence)
# Join all extracted text
full_text = " ".join(extracted_texts)
avg_confidence = np.mean(confidence_scores) if confidence_scores else 0
return full_text, extracted_texts, avg_confidence
except Exception as e:
return "", [], 0
# === Prediksi ===
def predict_allergen(model, vectorizer, input_text):
X_input = vectorizer.transform([input_text])
prediction = model.predict(X_input)
try:
# Untuk multi-label classification, predict_proba mengembalikan list probabilitas
probabilities = model.predict_proba(X_input)
# Jika probabilities adalah list of arrays (multi-label)
if isinstance(probabilities, list):
# Ambil probabilitas untuk kelas positif dari setiap classifier
positive_probs = []
for i, prob_array in enumerate(probabilities):
if prob_array.shape[1] == 2: # Binary classification
positive_probs.append(prob_array[0][1]) # Probabilitas kelas positif
else:
positive_probs.append(prob_array[0][0]) # Jika hanya 1 kelas
return prediction[0], positive_probs
else:
# Single output
return prediction[0], probabilities[0]
except Exception as e:
# Jika predict_proba gagal, gunakan decision_function jika tersedia
try:
decision_scores = model.decision_function(X_input)
# Convert decision scores to probabilities using sigmoid
import numpy as np
probabilities = 1 / (1 + np.exp(-decision_scores[0]))
return prediction[0], probabilities
except:
# Last fallback - return predictions as confidence (0 or 1 -> 0% or 100%)
confidence_scores = [float(pred) for pred in prediction[0]]
return prediction[0], confidence_scores
# === Scraping bahan dari Cookpad ===
def get_ingredients_from_cookpad(url):
headers = {"User-Agent": "Mozilla/5.0"}
try:
response = requests.get(url, headers=headers)
if response.status_code != 200:
return None, "Gagal mengambil halaman."
soup = BeautifulSoup(response.text, "html.parser")
ingredient_div = soup.find("div", class_="ingredient-list")
if not ingredient_div:
return None, "Tidak menemukan elemen bahan."
ingredients = []
for item in ingredient_div.find_all("li"):
amount = item.find("bdi")
name = item.find("span")
if amount and name:
ingredients.append(f"{amount.get_text(strip=True)} {name.get_text(strip=True)}")
else:
ingredients.append(item.get_text(strip=True))
return ingredients, None
except Exception as e:
return None, f"Terjadi kesalahan: {str(e)}"
# === Display OCR Results ===
def display_ocr_results(extracted_text, text_list, confidence):
"""Display OCR extraction results"""
st.markdown("### 📖 Hasil Ekstraksi Teks")
if extracted_text.strip():
st.markdown(f'''
📝 Teks yang Terdeteksi:
{extracted_text}
''', unsafe_allow_html=True)
# Show confidence and individual text elements
with st.expander(f"📊 Detail OCR (Confidence: {confidence:.2f})", expanded=False):
st.markdown("**Teks Individual yang Terdeteksi:**")
for i, text in enumerate(text_list, 1):
st.write(f"{i}. {text}")
# Show tips for better results
if confidence < 0.5:
st.info("💡 **Tips untuk hasil yang lebih baik:** Confidence rendah terdeteksi. Coba ambil foto dengan pencahayaan yang lebih baik, hindari bayangan, dan pastikan teks tidak buram.")
else:
st.warning("⚠️ Tidak ada teks yang dapat diekstrak dari gambar.")
# Provide detailed troubleshooting tips
st.markdown("""
🔧 Tips Troubleshooting:
• Pastikan pencahayaan cukup terang
• Hindari bayangan pada teks
• Pastikan teks tidak buram atau kabur
• Coba pegang kamera lebih stabil
• Pastikan teks berukuran cukup besar di foto
• Hindari refleksi cahaya pada permukaan teks
• Coba ambil foto dari jarak yang berbeda
""", unsafe_allow_html=True)
# === Display results with custom styling matching the image ===
def display_results(results, probabilities, labels):
st.markdown("### 🎯 Hasil Analisis Alergen")
# Emoji mapping for each allergen
allergen_emojis = {
'Susu': '🥛',
'Kacang': '🥜',
'Telur': '🥚',
'Makanan Laut': '🦐',
'Gandum': '🌾'
}
detected_allergens = []
# Display each allergen result
for i, (allergen, status) in enumerate(results.items()):
emoji = allergen_emojis.get(allergen, '📋')
# Get actual probability from model
try:
if isinstance(probabilities, list) and i < len(probabilities):
confidence = probabilities[i] * 100
elif hasattr(probabilities, '__getitem__') and i < len(probabilities):
confidence = probabilities[i] * 100
else:
# If no probability available, show based on prediction
confidence = 100.0 if status == 1 else 0.0
except (IndexError, TypeError):
# Fallback to prediction-based confidence
confidence = 100.0 if status == 1 else 0.0
if status == 1: # Detected
detected_allergens.append(allergen)
st.markdown(f'''
{emoji} {allergen}: Terdeteksi ⚠️ ({confidence:.2f}%)
''', unsafe_allow_html=True)
else: # Not detected
# For negative cases, show (100 - confidence) to represent "not detected" confidence
negative_confidence = 100 - confidence if confidence > 50 else confidence
st.markdown(f'''
{emoji} {allergen}: Tidak Terdeteksi ✓ ({negative_confidence:.2f}%)
''', unsafe_allow_html=True)
# Display summary
if detected_allergens:
allergen_list = ", ".join(detected_allergens)
st.markdown(f'''
Resep ini mengandung alergen: {allergen_list}
''', unsafe_allow_html=True)
else:
st.markdown(f'''
🎉 Tidak ada alergen berbahaya terdeteksi dalam resep ini!
''', unsafe_allow_html=True)
# === Main UI ===
def main():
st.set_page_config(
page_title="Deteksi Alergen Makanan",
page_icon="🥘",
layout="wide",
initial_sidebar_state="expanded"
)
# Load custom CSS
load_css()
# Header
st.markdown("""
🥘 Deteksi Alergen Makanan
Analisis kandungan alergen dalam resep makanan dengan teknologi AI & OCR
""", unsafe_allow_html=True)
# Sidebar info
with st.sidebar:
st.markdown("### 📋 Informasi Alergen")
st.markdown("""
**Alergen yang dapat dideteksi:**
- 🥛 Susu
- 🥜 Kacang
- 🥚 Telur
- 🦐 Makanan Laut
- 🌾 Gandum
""")
st.markdown("### 💡 Tips Penggunaan")
st.markdown("""
**Input Manual:**
- Masukkan bahan dengan detail
- Gunakan nama bahan dalam bahasa Indonesia
**Kamera OCR:**
- Pastikan teks terlihat jelas
- Gunakan pencahayaan yang baik
- Hindari blur atau teks terpotong
**URL Cookpad:**
- Pastikan link valid
- Maksimal 20 URL per analisis
""")
# Main content
col1, col2, col3 = st.columns([1, 6, 1])
with col2:
# Input method selection
st.markdown("### 🔧 Pilih Metode Input")
input_mode = st.radio(
"Pilih metode input data",
["📝 Input Manual", "📷 Kamera OCR", "🔗 URL Cookpad"],
horizontal=True,
label_visibility="collapsed"
)
# Load model components
try:
vectorizer = load_vectorizer()
model = load_model()
if vectorizer is None or model is None:
st.stop()
labels = ['Susu', 'Kacang', 'Telur', 'Makanan Laut', 'Gandum']
except Exception as e:
st.error(f"❌ Gagal memuat komponen model: {str(e)}")
st.stop()
st.markdown("---")
if input_mode == "📝 Input Manual":
st.markdown("### 📝 Masukkan Bahan Makanan")
# Info card
st.markdown("""
💡 Petunjuk: Masukkan daftar bahan makanan yang ingin dianalisis.
Pisahkan setiap bahan dengan koma atau baris baru.
""", unsafe_allow_html=True)
input_text = st.text_area(
"Masukkan bahan makanan",
height=150,
placeholder="Contoh: telur, susu, tepung terigu, garam, mentega...",
label_visibility="collapsed"
)
col_btn1, col_btn2, col_btn3 = st.columns([2, 2, 2])
with col_btn2:
if st.button("🔍 Analisis Alergen", use_container_width=True):
if not input_text.strip():
st.warning("⚠️ Mohon masukkan bahan makanan terlebih dahulu.")
else:
with st.spinner("🔄 Sedang menganalisis..."):
pred, probs = predict_allergen(model, vectorizer, input_text)
results = dict(zip(labels, pred))
st.success("✅ Analisis selesai!")
display_results(results, probs, labels)
elif input_mode == "📷 Kamera OCR":
st.markdown("### 📷 Deteksi Alergen dari Gambar")
# Info card for camera
st.markdown("""
📷 Petunjuk Kamera: Ambil foto langsung dari daftar bahan, kemasan makanan,
atau resep. Pastikan teks terlihat jelas dan pencahayaan memadai untuk hasil OCR terbaik.
""", unsafe_allow_html=True)
# Camera input
camera_image = st.camera_input("📸 Ambil foto dengan kamera")
if camera_image is not None:
# Display the captured image
col_img1, col_img2, col_img3 = st.columns([1, 3, 1])
with col_img2:
st.image(camera_image, caption="📷 Gambar yang diambil", use_container_width=True)
# Show image info
img = Image.open(camera_image)
width, height = img.size
st.info(f"📐 Dimensi gambar: {width} x {height} pixels")
# Load OCR reader
with st.spinner("🔄 Memuat OCR engine..."):
reader = load_ocr_reader()
if reader is None:
st.error("❌ Gagal memuat OCR engine. Pastikan EasyOCR telah terinstall.")
else:
col_btn1, col_btn2, col_btn3 = st.columns([2, 2, 2])
with col_btn2:
if st.button("🔍 Ekstrak Teks & Analisis", use_container_width=True, key="ocr_analyze"):
# Extract text from image
with st.spinner("📖 Mengekstrak teks dari gambar... (ini mungkin memakan waktu)"):
extracted_text, text_list, confidence = extract_text_from_image(camera_image, reader)
# Display OCR results (will show tips if no text found)
display_ocr_results(extracted_text, text_list, confidence)
if extracted_text.strip():
# Analyze allergens
with st.spinner("🔄 Menganalisis alergen..."):
pred, probs = predict_allergen(model, vectorizer, extracted_text)
results = dict(zip(labels, pred))
st.success("✅ Analisis selesai!")
display_results(results, probs, labels)
else:
# Show additional debug info button
if st.button("🔧 Coba Analisis Paksa (Debug Mode)", key="debug_mode"):
st.info("🔧 Mode debug: Mencoba ekstraksi dengan parameter yang lebih agresif...")
# Try with different EasyOCR parameters
try:
img_array = np.array(Image.open(camera_image))
results = reader.readtext(img_array, detail=1, paragraph=True, width_ths=0.1, height_ths=0.1)
debug_texts = []
for (bbox, text, conf) in results:
if len(text.strip()) > 0:
debug_texts.append(f"{text.strip()} (conf: {conf:.2f})")
if debug_texts:
st.write("🔍 **Teks yang ditemukan dalam mode debug:**")
for text in debug_texts:
st.write(f"• {text}")
# Try analysis with debug text
debug_combined = " ".join([t.split(" (conf:")[0] for t in debug_texts])
pred, probs = predict_allergen(model, vectorizer, debug_combined)
results = dict(zip(labels, pred))
st.markdown("### 🧪 Hasil Analisis Debug")
display_results(results, probs, labels)
else:
st.warning("Bahkan dalam mode debug, tidak ada teks yang dapat diekstrak.")
except Exception as e:
st.error(f"Error in debug mode: {str(e)}")
elif input_mode == "🔗 URL Cookpad":
st.markdown("### 🔗 Analisis dari URL Cookpad")
# Info card
st.markdown("""
💡 Petunjuk: Masukkan hingga 20 URL resep dari Cookpad.
Setiap URL harus dalam baris terpisah.
""", unsafe_allow_html=True)
urls_input = st.text_area(
"Masukkan URL Cookpad",
placeholder="https://cookpad.com/id/resep/...\nhttps://cookpad.com/id/resep/...",
height=200,
label_visibility="collapsed"
)
urls = [url.strip() for url in urls_input.splitlines() if url.strip()]
if len(urls) > 20:
st.warning("⚠️ Maksimal hanya bisa memproses 20 URL. Menggunakan 20 URL pertama.")
urls = urls[:20]
if urls:
st.info(f"📊 Siap memproses {len(urls)} URL")
if st.button("🔍 Analisis dari URL", use_container_width=True):
if not urls:
st.warning("⚠️ Mohon masukkan minimal satu URL.")
else:
# Progress bar
progress_bar = st.progress(0)
status_text = st.empty()
for i, url in enumerate(urls):
# Update progress
progress = (i + 1) / len(urls)
progress_bar.progress(progress)
status_text.markdown(f'Memproses resep {i+1} dari {len(urls)}
', unsafe_allow_html=True)
ingredients, error = get_ingredients_from_cookpad(url)
with st.expander(f"📖 Resep #{i+1}", expanded=False):
st.markdown(f"**URL:** {url}")
if error:
st.error(f"❌ {error}")
else:
st.success("✅ Bahan berhasil diambil!")
# Display ingredients in a single nice container
ingredients_text = ", ".join(ingredients)
st.markdown(f'''
🧾 Daftar Bahan:
{ingredients_text}
''', unsafe_allow_html=True)
# Predict allergens
joined_ingredients = " ".join(ingredients)
pred, probs = predict_allergen(model, vectorizer, joined_ingredients)
results = dict(zip(labels, pred))
st.markdown("---")
display_results(results, probs, labels)
# Clear progress indicators
progress_bar.empty()
status_text.empty()
st.success("🎉 Semua resep telah dianalisis!")
# Footer
st.markdown("""
""", unsafe_allow_html=True)
if __name__ == "__main__":
main()