rdsarjito
Update kode untuk fitur baru
0dee53b
import streamlit as st
import pickle
import requests
from bs4 import BeautifulSoup
import easyocr
import numpy as np
from PIL import Image
import cv2
import warnings
# Suppress sklearn version warnings
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")
# === Custom CSS for better styling ===
def load_css():
st.markdown("""
<style>
/* Main app styling */
.main-header {
text-align: center;
padding: 2rem 0;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
border-radius: 10px;
margin-bottom: 2rem;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
.main-header h1 {
font-size: 2.5rem;
margin-bottom: 0.5rem;
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
}
.main-header p {
font-size: 1.1rem;
opacity: 0.9;
margin: 0;
}
/* Card styling */
.info-card {
background: white;
padding: 1.5rem;
border-radius: 10px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
border-left: 4px solid #667eea;
margin: 1rem 0;
}
/* Camera card styling */
.camera-card {
background: linear-gradient(135deg, #f8f9fa, #e9ecef);
padding: 1.5rem;
border-radius: 10px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
border-left: 4px solid #28a745;
margin: 1rem 0;
}
/* Results styling - matching the image design */
.allergen-result {
padding: 1rem 1.5rem;
border-radius: 8px;
margin: 0.5rem 0;
font-size: 1rem;
font-weight: 500;
display: flex;
align-items: center;
gap: 0.5rem;
}
.allergen-detected {
background-color: #f8d7da;
color: #721c24;
border: 1px solid #f1aeb5;
}
.allergen-safe {
background-color: #d1e7dd;
color: #0f5132;
border: 1px solid #a3cfbb;
}
.allergen-summary {
background-color: #fff3cd;
color: #664d03;
border: 1px solid #ffecb5;
padding: 1rem 1.5rem;
border-radius: 8px;
margin: 1rem 0;
font-weight: 600;
text-align: center;
}
/* OCR result styling */
.ocr-result {
background: #f8f9fa;
padding: 1rem 1.5rem;
margin: 1rem 0;
border-radius: 10px;
border-left: 4px solid #17a2b8;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
line-height: 1.6;
font-size: 1rem;
}
.ocr-result strong {
color: #495057;
font-weight: 600;
}
/* Button styling */
.stButton > button {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
border: none;
border-radius: 25px;
padding: 0.75rem 2rem;
font-weight: bold;
transition: all 0.3s ease;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
.stButton > button:hover {
transform: translateY(-2px);
box-shadow: 0 6px 8px rgba(0, 0, 0, 0.15);
}
/* Camera button styling */
.camera-button {
background: linear-gradient(135deg, #28a745 0%, #20c997 100%) !important;
}
/* Radio button styling */
.stRadio > div {
background: white;
padding: 1rem;
border-radius: 10px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}
/* Text area styling */
.stTextArea > div > div > textarea {
border-radius: 10px;
border: 2px solid #e0e0e0;
transition: border-color 0.3s ease;
}
.stTextArea > div > div > textarea:focus {
border-color: #667eea;
box-shadow: 0 0 10px rgba(102, 126, 234, 0.2);
}
/* Expander styling */
.streamlit-expanderHeader {
background: linear-gradient(135deg, #f8f9fa, #e9ecef);
border-radius: 10px;
border: 1px solid #dee2e6;
}
/* Progress indicator */
.progress-text {
text-align: center;
font-weight: bold;
color: #667eea;
margin: 1rem 0;
}
/* Improved ingredient list styling - single div */
.ingredients-container {
background: #f8f9fa;
padding: 1rem 1.5rem;
margin: 1rem 0;
border-radius: 10px;
border-left: 4px solid #667eea;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
line-height: 1.6;
font-size: 1rem;
}
.ingredients-container strong {
color: #495057;
font-weight: 600;
}
/* Footer */
.footer {
text-align: center;
padding: 2rem 0;
color: #6c757d;
border-top: 1px solid #e9ecef;
margin-top: 3rem;
}
</style>
""", unsafe_allow_html=True)
# === Load EasyOCR Reader ===
@st.cache_resource
def load_ocr_reader():
"""Load EasyOCR reader with Indonesian and English language support"""
try:
reader = easyocr.Reader(['id', 'en'], gpu=False) # Indonesian and English
return reader
except Exception as e:
st.error(f"❌ Gagal memuat EasyOCR: {str(e)}")
return None
# === Load TF-IDF Vectorizer ===
@st.cache_resource
def load_vectorizer():
try:
with open("saved_models/tfidf_vectorizer.pkl", "rb") as f:
vectorizer = pickle.load(f)
return vectorizer
except Exception as e:
st.error(f"❌ Gagal memuat vectorizer: {str(e)}")
st.warning("⚠️ Jika error terkait versi sklearn, coba install ulang dengan: pip install scikit-learn==1.2.2")
return None
# === Load XGBoost Model ===
@st.cache_resource
def load_model():
try:
with open("saved_models/XGBoost_model.pkl", "rb") as f:
model = pickle.load(f)
return model
except Exception as e:
st.error(f"❌ Gagal memuat model: {str(e)}")
st.warning("⚠️ Jika error terkait versi sklearn, coba install ulang dengan: pip install scikit-learn==1.2.2")
return None
# === OCR Text Extraction ===
def extract_text_from_image(image, reader):
"""Extract text from image using EasyOCR"""
try:
# Convert PIL image to numpy array
if isinstance(image, Image.Image):
image_array = np.array(image)
else:
image_array = image
# Perform OCR
results = reader.readtext(image_array)
# Extract text from results
extracted_texts = []
confidence_scores = []
for (bbox, text, confidence) in results:
if confidence > 0.3: # Filter out low confidence text
extracted_texts.append(text)
confidence_scores.append(confidence)
# Join all extracted text
full_text = " ".join(extracted_texts)
avg_confidence = np.mean(confidence_scores) if confidence_scores else 0
return full_text, extracted_texts, avg_confidence
except Exception as e:
return "", [], 0
# === Prediksi ===
def predict_allergen(model, vectorizer, input_text):
X_input = vectorizer.transform([input_text])
prediction = model.predict(X_input)
try:
# Untuk multi-label classification, predict_proba mengembalikan list probabilitas
probabilities = model.predict_proba(X_input)
# Jika probabilities adalah list of arrays (multi-label)
if isinstance(probabilities, list):
# Ambil probabilitas untuk kelas positif dari setiap classifier
positive_probs = []
for i, prob_array in enumerate(probabilities):
if prob_array.shape[1] == 2: # Binary classification
positive_probs.append(prob_array[0][1]) # Probabilitas kelas positif
else:
positive_probs.append(prob_array[0][0]) # Jika hanya 1 kelas
return prediction[0], positive_probs
else:
# Single output
return prediction[0], probabilities[0]
except Exception as e:
# Jika predict_proba gagal, gunakan decision_function jika tersedia
try:
decision_scores = model.decision_function(X_input)
# Convert decision scores to probabilities using sigmoid
import numpy as np
probabilities = 1 / (1 + np.exp(-decision_scores[0]))
return prediction[0], probabilities
except:
# Last fallback - return predictions as confidence (0 or 1 -> 0% or 100%)
confidence_scores = [float(pred) for pred in prediction[0]]
return prediction[0], confidence_scores
# === Scraping bahan dari Cookpad ===
def get_ingredients_from_cookpad(url):
headers = {"User-Agent": "Mozilla/5.0"}
try:
response = requests.get(url, headers=headers)
if response.status_code != 200:
return None, "Gagal mengambil halaman."
soup = BeautifulSoup(response.text, "html.parser")
ingredient_div = soup.find("div", class_="ingredient-list")
if not ingredient_div:
return None, "Tidak menemukan elemen bahan."
ingredients = []
for item in ingredient_div.find_all("li"):
amount = item.find("bdi")
name = item.find("span")
if amount and name:
ingredients.append(f"{amount.get_text(strip=True)} {name.get_text(strip=True)}")
else:
ingredients.append(item.get_text(strip=True))
return ingredients, None
except Exception as e:
return None, f"Terjadi kesalahan: {str(e)}"
# === Display OCR Results ===
def display_ocr_results(extracted_text, text_list, confidence):
"""Display OCR extraction results"""
st.markdown("### πŸ“– Hasil Ekstraksi Teks")
if extracted_text.strip():
st.markdown(f'''
<div class="ocr-result">
<strong>πŸ“ Teks yang Terdeteksi:</strong><br>
{extracted_text}
</div>
''', unsafe_allow_html=True)
# Show confidence and individual text elements
with st.expander(f"πŸ“Š Detail OCR (Confidence: {confidence:.2f})", expanded=False):
st.markdown("**Teks Individual yang Terdeteksi:**")
for i, text in enumerate(text_list, 1):
st.write(f"{i}. {text}")
# Show tips for better results
if confidence < 0.5:
st.info("πŸ’‘ **Tips untuk hasil yang lebih baik:** Confidence rendah terdeteksi. Coba ambil foto dengan pencahayaan yang lebih baik, hindari bayangan, dan pastikan teks tidak buram.")
else:
st.warning("⚠️ Tidak ada teks yang dapat diekstrak dari gambar.")
# Provide detailed troubleshooting tips
st.markdown("""
<div class="info-card">
<strong>πŸ”§ Tips Troubleshooting:</strong><br>
β€’ Pastikan pencahayaan cukup terang<br>
β€’ Hindari bayangan pada teks<br>
β€’ Pastikan teks tidak buram atau kabur<br>
β€’ Coba pegang kamera lebih stabil<br>
β€’ Pastikan teks berukuran cukup besar di foto<br>
β€’ Hindari refleksi cahaya pada permukaan teks<br>
β€’ Coba ambil foto dari jarak yang berbeda
</div>
""", unsafe_allow_html=True)
# === Display results with custom styling matching the image ===
def display_results(results, probabilities, labels):
st.markdown("### 🎯 Hasil Analisis Alergen")
# Emoji mapping for each allergen
allergen_emojis = {
'Susu': 'πŸ₯›',
'Kacang': 'πŸ₯œ',
'Telur': 'πŸ₯š',
'Makanan Laut': '🦐',
'Gandum': '🌾'
}
detected_allergens = []
# Display each allergen result
for i, (allergen, status) in enumerate(results.items()):
emoji = allergen_emojis.get(allergen, 'πŸ“‹')
# Get actual probability from model
try:
if isinstance(probabilities, list) and i < len(probabilities):
confidence = probabilities[i] * 100
elif hasattr(probabilities, '__getitem__') and i < len(probabilities):
confidence = probabilities[i] * 100
else:
# If no probability available, show based on prediction
confidence = 100.0 if status == 1 else 0.0
except (IndexError, TypeError):
# Fallback to prediction-based confidence
confidence = 100.0 if status == 1 else 0.0
if status == 1: # Detected
detected_allergens.append(allergen)
st.markdown(f'''
<div class="allergen-result allergen-detected">
{emoji} {allergen}: Terdeteksi ⚠️ ({confidence:.2f}%)
</div>
''', unsafe_allow_html=True)
else: # Not detected
# For negative cases, show (100 - confidence) to represent "not detected" confidence
negative_confidence = 100 - confidence if confidence > 50 else confidence
st.markdown(f'''
<div class="allergen-result allergen-safe">
{emoji} {allergen}: Tidak Terdeteksi βœ“ ({negative_confidence:.2f}%)
</div>
''', unsafe_allow_html=True)
# Display summary
if detected_allergens:
allergen_list = ", ".join(detected_allergens)
st.markdown(f'''
<div class="allergen-summary">
Resep ini mengandung alergen: {allergen_list}
</div>
''', unsafe_allow_html=True)
else:
st.markdown(f'''
<div class="allergen-summary">
πŸŽ‰ Tidak ada alergen berbahaya terdeteksi dalam resep ini!
</div>
''', unsafe_allow_html=True)
# === Main UI ===
def main():
st.set_page_config(
page_title="Deteksi Alergen Makanan",
page_icon="πŸ₯˜",
layout="wide",
initial_sidebar_state="expanded"
)
# Load custom CSS
load_css()
# Header
st.markdown("""
<div class="main-header">
<h1>πŸ₯˜ Deteksi Alergen Makanan</h1>
<p>Analisis kandungan alergen dalam resep makanan dengan teknologi AI & OCR</p>
</div>
""", unsafe_allow_html=True)
# Sidebar info
with st.sidebar:
st.markdown("### πŸ“‹ Informasi Alergen")
st.markdown("""
**Alergen yang dapat dideteksi:**
- πŸ₯› Susu
- πŸ₯œ Kacang
- πŸ₯š Telur
- 🦐 Makanan Laut
- 🌾 Gandum
""")
st.markdown("### πŸ’‘ Tips Penggunaan")
st.markdown("""
**Input Manual:**
- Masukkan bahan dengan detail
- Gunakan nama bahan dalam bahasa Indonesia
**Kamera OCR:**
- Pastikan teks terlihat jelas
- Gunakan pencahayaan yang baik
- Hindari blur atau teks terpotong
**URL Cookpad:**
- Pastikan link valid
- Maksimal 20 URL per analisis
""")
# Main content
col1, col2, col3 = st.columns([1, 6, 1])
with col2:
# Input method selection
st.markdown("### πŸ”§ Pilih Metode Input")
input_mode = st.radio(
"Pilih metode input data",
["πŸ“ Input Manual", "πŸ“· Kamera OCR", "πŸ”— URL Cookpad"],
horizontal=True,
label_visibility="collapsed"
)
# Load model components
try:
vectorizer = load_vectorizer()
model = load_model()
if vectorizer is None or model is None:
st.stop()
labels = ['Susu', 'Kacang', 'Telur', 'Makanan Laut', 'Gandum']
except Exception as e:
st.error(f"❌ Gagal memuat komponen model: {str(e)}")
st.stop()
st.markdown("---")
if input_mode == "πŸ“ Input Manual":
st.markdown("### πŸ“ Masukkan Bahan Makanan")
# Info card
st.markdown("""
<div class="info-card">
<strong>πŸ’‘ Petunjuk:</strong> Masukkan daftar bahan makanan yang ingin dianalisis.
Pisahkan setiap bahan dengan koma atau baris baru.
</div>
""", unsafe_allow_html=True)
input_text = st.text_area(
"Masukkan bahan makanan",
height=150,
placeholder="Contoh: telur, susu, tepung terigu, garam, mentega...",
label_visibility="collapsed"
)
col_btn1, col_btn2, col_btn3 = st.columns([2, 2, 2])
with col_btn2:
if st.button("πŸ” Analisis Alergen", use_container_width=True):
if not input_text.strip():
st.warning("⚠️ Mohon masukkan bahan makanan terlebih dahulu.")
else:
with st.spinner("πŸ”„ Sedang menganalisis..."):
pred, probs = predict_allergen(model, vectorizer, input_text)
results = dict(zip(labels, pred))
st.success("βœ… Analisis selesai!")
display_results(results, probs, labels)
elif input_mode == "πŸ“· Kamera OCR":
st.markdown("### πŸ“· Deteksi Alergen dari Gambar")
# Info card for camera
st.markdown("""
<div class="camera-card">
<strong>πŸ“· Petunjuk Kamera:</strong> Ambil foto langsung dari daftar bahan, kemasan makanan,
atau resep. Pastikan teks terlihat jelas dan pencahayaan memadai untuk hasil OCR terbaik.
</div>
""", unsafe_allow_html=True)
# Camera input
camera_image = st.camera_input("πŸ“Έ Ambil foto dengan kamera")
if camera_image is not None:
# Display the captured image
col_img1, col_img2, col_img3 = st.columns([1, 3, 1])
with col_img2:
st.image(camera_image, caption="πŸ“· Gambar yang diambil", use_container_width=True)
# Show image info
img = Image.open(camera_image)
width, height = img.size
st.info(f"πŸ“ Dimensi gambar: {width} x {height} pixels")
# Load OCR reader
with st.spinner("πŸ”„ Memuat OCR engine..."):
reader = load_ocr_reader()
if reader is None:
st.error("❌ Gagal memuat OCR engine. Pastikan EasyOCR telah terinstall.")
else:
col_btn1, col_btn2, col_btn3 = st.columns([2, 2, 2])
with col_btn2:
if st.button("πŸ” Ekstrak Teks & Analisis", use_container_width=True, key="ocr_analyze"):
# Extract text from image
with st.spinner("πŸ“– Mengekstrak teks dari gambar... (ini mungkin memakan waktu)"):
extracted_text, text_list, confidence = extract_text_from_image(camera_image, reader)
# Display OCR results (will show tips if no text found)
display_ocr_results(extracted_text, text_list, confidence)
if extracted_text.strip():
# Analyze allergens
with st.spinner("πŸ”„ Menganalisis alergen..."):
pred, probs = predict_allergen(model, vectorizer, extracted_text)
results = dict(zip(labels, pred))
st.success("βœ… Analisis selesai!")
display_results(results, probs, labels)
else:
# Show additional debug info button
if st.button("πŸ”§ Coba Analisis Paksa (Debug Mode)", key="debug_mode"):
st.info("πŸ”§ Mode debug: Mencoba ekstraksi dengan parameter yang lebih agresif...")
# Try with different EasyOCR parameters
try:
img_array = np.array(Image.open(camera_image))
results = reader.readtext(img_array, detail=1, paragraph=True, width_ths=0.1, height_ths=0.1)
debug_texts = []
for (bbox, text, conf) in results:
if len(text.strip()) > 0:
debug_texts.append(f"{text.strip()} (conf: {conf:.2f})")
if debug_texts:
st.write("πŸ” **Teks yang ditemukan dalam mode debug:**")
for text in debug_texts:
st.write(f"β€’ {text}")
# Try analysis with debug text
debug_combined = " ".join([t.split(" (conf:")[0] for t in debug_texts])
pred, probs = predict_allergen(model, vectorizer, debug_combined)
results = dict(zip(labels, pred))
st.markdown("### πŸ§ͺ Hasil Analisis Debug")
display_results(results, probs, labels)
else:
st.warning("Bahkan dalam mode debug, tidak ada teks yang dapat diekstrak.")
except Exception as e:
st.error(f"Error in debug mode: {str(e)}")
elif input_mode == "πŸ”— URL Cookpad":
st.markdown("### πŸ”— Analisis dari URL Cookpad")
# Info card
st.markdown("""
<div class="info-card">
<strong>πŸ’‘ Petunjuk:</strong> Masukkan hingga 20 URL resep dari Cookpad.
Setiap URL harus dalam baris terpisah.
</div>
""", unsafe_allow_html=True)
urls_input = st.text_area(
"Masukkan URL Cookpad",
placeholder="https://cookpad.com/id/resep/...\nhttps://cookpad.com/id/resep/...",
height=200,
label_visibility="collapsed"
)
urls = [url.strip() for url in urls_input.splitlines() if url.strip()]
if len(urls) > 20:
st.warning("⚠️ Maksimal hanya bisa memproses 20 URL. Menggunakan 20 URL pertama.")
urls = urls[:20]
if urls:
st.info(f"πŸ“Š Siap memproses {len(urls)} URL")
if st.button("πŸ” Analisis dari URL", use_container_width=True):
if not urls:
st.warning("⚠️ Mohon masukkan minimal satu URL.")
else:
# Progress bar
progress_bar = st.progress(0)
status_text = st.empty()
for i, url in enumerate(urls):
# Update progress
progress = (i + 1) / len(urls)
progress_bar.progress(progress)
status_text.markdown(f'<div class="progress-text">Memproses resep {i+1} dari {len(urls)}</div>', unsafe_allow_html=True)
ingredients, error = get_ingredients_from_cookpad(url)
with st.expander(f"πŸ“– Resep #{i+1}", expanded=False):
st.markdown(f"**URL:** {url}")
if error:
st.error(f"❌ {error}")
else:
st.success("βœ… Bahan berhasil diambil!")
# Display ingredients in a single nice container
ingredients_text = ", ".join(ingredients)
st.markdown(f'''
<div class="ingredients-container">
<strong>🧾 Daftar Bahan:</strong><br>
{ingredients_text}
</div>
''', unsafe_allow_html=True)
# Predict allergens
joined_ingredients = " ".join(ingredients)
pred, probs = predict_allergen(model, vectorizer, joined_ingredients)
results = dict(zip(labels, pred))
st.markdown("---")
display_results(results, probs, labels)
# Clear progress indicators
progress_bar.empty()
status_text.empty()
st.success("πŸŽ‰ Semua resep telah dianalisis!")
# Footer
st.markdown("""
<div class="footer">
<p>πŸ”¬ Powered by XGBoost, TF-IDF & EasyOCR | Made with ❀️ using Streamlit</p>
</div>
""", unsafe_allow_html=True)
if __name__ == "__main__":
main()