|
import streamlit as st |
|
import tensorflow as tf |
|
from PIL import Image |
|
import numpy as np |
|
import json |
|
from tensorflow.keras.applications.vgg16 import VGG16,preprocess_input |
|
from tensorflow.keras.preprocessing.image import img_to_array |
|
from tensorflow.keras.preprocessing.text import Tokenizer,tokenizer_from_json |
|
from tensorflow.keras.preprocessing.sequence import pad_sequences |
|
from tensorflow.keras.models import Model |
|
|
|
from keras.models import load_model |
|
|
|
|
|
model = load_model('image_caption.h5') |
|
|
|
with open('tokenizer_config.json', 'r') as f: |
|
tokenizer_config = json.load(f) |
|
tokenizer = tokenizer_from_json(tokenizer_config) |
|
|
|
|
|
max_length=35 |
|
|
|
vgg_model = VGG16() |
|
vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output) |
|
|
|
|
|
st.set_page_config(page_title="Image Captioning App", layout="wide") |
|
|
|
|
|
|
|
def preprocess_image(image): |
|
image = image.convert("RGB") |
|
image = image.resize((224, 224)) |
|
image = img_to_array(image) |
|
image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) |
|
image = preprocess_input(image) |
|
return image |
|
|
|
|
|
def predict(image): |
|
image = preprocess_image(image) |
|
feature = vgg_model.predict(image, verbose=0) |
|
preds = predict_caption(model, feature, tokenizer, max_length) |
|
preds=preds[8:-7] |
|
return preds |
|
|
|
def idx_word(integer,tok): |
|
for word,index in tok.word_index.items(): |
|
if index== integer: |
|
return word |
|
return None |
|
|
|
def predict_caption(model,image,tok,max_len): |
|
in_text="startseq" |
|
for i in range(max_len): |
|
seq=tok.texts_to_sequences([in_text])[0] |
|
seq=pad_sequences([seq],max_len) |
|
yhat = model.predict([image, seq], verbose=0) |
|
yhat = np.argmax(yhat) |
|
word = idx_word(yhat, tok) |
|
if word is None: |
|
break |
|
in_text += " " + word |
|
if word == 'endseq': |
|
break |
|
return in_text |
|
|
|
|
|
def main(): |
|
st.title("Image Captioning App") |
|
st.write("Upload an image and the app will predict its class.") |
|
|
|
uploaded_image = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png"]) |
|
|
|
if uploaded_image is not None: |
|
image = Image.open(uploaded_image) |
|
st.image(image, caption='Uploaded Image', use_column_width=True) |
|
st.write("") |
|
|
|
if st.button("Generate Caption"): |
|
with st.spinner("Generating..."): |
|
predictions = predict(image) |
|
|
|
st.write(f"Top Caption:{predictions}") |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|