UtkarshShivhare
/

image_captioning

Model card Files Files and versions

image_captioning / app.py

UtkarshShivhare's picture

UtkarshShivhare

Upload 2 files

6e7fbce verified over 1 year ago

history blame contribute delete

2.75 kB

	import streamlit as st
	import tensorflow as tf
	from PIL import Image
	import numpy as np
	import json
	from tensorflow.keras.applications.vgg16 import VGG16,preprocess_input
	from tensorflow.keras.preprocessing.image import img_to_array
	from tensorflow.keras.preprocessing.text import Tokenizer,tokenizer_from_json
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	from tensorflow.keras.models import Model

	from keras.models import load_model

	# Load the .h5 model
	model = load_model('image_caption.h5')

	with open('tokenizer_config.json', 'r') as f:
	tokenizer_config = json.load(f)
	tokenizer = tokenizer_from_json(tokenizer_config)
	# tokenizer.word_index = eval(tokenizer_config)['word_index']

	max_length=35
	# Load pre-trained model
	vgg_model = VGG16()
	vgg_model = Model(inputs=vgg_model.inputs, outputs=vgg_model.layers[-2].output)

	# Set Streamlit configurations
	st.set_page_config(page_title="Image Captioning App", layout="wide")


	# Function to preprocess the input image
	def preprocess_image(image):
	image = image.convert("RGB")
	image = image.resize((224, 224))
	image = img_to_array(image)
	image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
	image = preprocess_input(image)
	return image

	# Function to make predictions on the input image
	def predict(image):
	image = preprocess_image(image)
	feature = vgg_model.predict(image, verbose=0)
	preds = predict_caption(model, feature, tokenizer, max_length)
	preds=preds[8:-7]
	return preds

	def idx_word(integer,tok):
	for word,index in tok.word_index.items():
	if index== integer:
	return word
	return None

	def predict_caption(model,image,tok,max_len):
	in_text="startseq"
	for i in range(max_len):
	seq=tok.texts_to_sequences([in_text])[0]
	seq=pad_sequences([seq],max_len)
	yhat = model.predict([image, seq], verbose=0)
	yhat = np.argmax(yhat)
	word = idx_word(yhat, tok)
	if word is None:
	break
	in_text += " " + word
	if word == 'endseq':
	break
	return in_text

	# Streamlit app
	def main():
	st.title("Image Captioning App")
	st.write("Upload an image and the app will predict its class.")

	uploaded_image = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png"])

	if uploaded_image is not None:
	image = Image.open(uploaded_image)
	st.image(image, caption='Uploaded Image', use_column_width=True)
	st.write("")

	if st.button("Generate Caption"):
	with st.spinner("Generating..."):
	predictions = predict(image)

	st.write(f"Top Caption:{predictions}")

	# Run the app
	if __name__ == "__main__":
	main()