Spaces:
Sleeping
Sleeping
""" | |
Copyright 2024 X_G85 | |
Model Integration Utils | |
------------------------- | |
""" | |
# Author: Adam-Al-Rahman <adam.al.rahman.dev@gmail.com> | |
import numpy as np | |
import pandas as pd | |
import tensorflow as tf | |
from tensorflow.keras.preprocessing.text import tokenizer_from_json | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
def tokenizer(arch: str, tokenizer_json: str, text: str, max_length=300): | |
""" | |
::param:: arch: type of model `Bstm` or `Bert` | |
""" | |
tokenized_data = None | |
if arch == "Lstm": | |
# Load the tokenizer from the JSON file | |
with open(tokenizer_json) as file: | |
data = file.read() | |
tokenizer = tokenizer_from_json(data) | |
# Use the tokenizer to transform test data | |
tokenized_text = tokenizer.texts_to_sequences(text) | |
tokenized_data = pad_sequences(tokenized_text, maxlen=max_length) | |
tokenized_data = tokenized_data.astype(np.float32) | |
return tokenized_data | |
def modelx( | |
arch: str, | |
model_path: str, | |
text: str, | |
tokenizer_json: str = "", | |
batch_size=32, | |
max_length=300, | |
): | |
model_result = None | |
if tokenizer_json: | |
text = tokenizer(arch, tokenizer_json, text, max_length) | |
else: | |
text = pd.Series(text) | |
if arch == "Lstm": | |
model = tf.keras.models.load_model(model_path) | |
model_result = model.predict(text, batch_size=batch_size) | |
model_result = tf.squeeze(tf.round(model_result)) | |
if model_result == 1.0: | |
model_result = "REAL NEWS" | |
elif model_result == 0.0: | |
model_result = "FAKE NEWS" | |
return model_result | |