LSTM Text Generation Model

This model was trained using TensorFlow/Keras for financial article generation tasks.

Model Details

Model Type: LSTM
Framework: TensorFlow/Keras
Task: Text Generation
Vocabulary Size: 30000
Architecture: Bi-directional Long Short-Term Memory (LSTM)

Usage

from huggingface_hub import snapshot_download
import tensorflow as tf
import json
import pickle
import numpy as np

# Download model files
model_path = snapshot_download(repo_id="firobeid/L4_LSTM_financial_News_Headlines_generator")

# Load the LSTM model
model = tf.keras.models.load_model(f"{model_path}/lstm_model")

# Load tokenizer
try:
    # Try JSON format first
    with open(f"{model_path}/tokenizer.json", 'r', encoding='utf-8') as f:
        tokenizer_json = f.read() 
    tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(tokenizer_json)
except FileNotFoundError:
    # Fallback to pickle format
    with open(f"{model_path}/tokenizer.pkl", 'rb') as f:
        tokenizer = pickle.load(f)

# Text generation function
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

def preprocess(texts, max_sequence_length = 71):
    texts = '<s> {}'.format(texts.lower())
    X = np.array(tokenizer.texts_to_sequences([texts])) # REMOVE -1 
    pad_encoded = pad_sequences(X, 
                                 maxlen= max_sequence_length, 
                                 padding='pre')
    return pad_encoded
  
def next_word(model, tokenizer,
              text, num_gen_words=1, 
              randome_sampling = False,
              temperature=1):  
    '''
    Randome_Sampling : Using a categorical distribution to predict the character returned by the model
    Low temperatures results in more predictable text.
    Higher temperatures results in more surprising text.
    Experiment to find the best setting.
    '''
    input_text = text  
    output_text = [input_text]  

    for i in range(num_gen_words):  
        X_new = preprocess(input_text)  
        
        if randome_sampling:
            y_proba = model.predict(X_new, verbose = 0)[0, -1:, :]#first sentence, last token
            rescaled_logits = tf.math.log(y_proba) / temperature
            pred_word_ind = tf.random.categorical(rescaled_logits, num_samples=1) #REMOVE THIS + 1
            pred_word = tokenizer.sequences_to_texts(pred_word_ind.numpy())[0]
        else:
            y_proba = model.predict(X_new, verbose=0)[0]  #first sentence
            pred_word_ind = np.argmax(y_proba, axis = -1) #REMOVE THIS + 1 
            pred_word = tokenizer.index_word[pred_word_ind[-1]]   
             
            
        input_text += ' ' + pred_word  
        output_text.append(pred_word)  
        
        if pred_word == '</s>':
            return ' '.join(output_text) 
        
    return ' '.join(output_text)  
  
def generate_text(model, tokenizer, text, num_gen_words=25, temperature=1, random_sampling=False):  
    return next_word(model, tokenizer, text, num_gen_words, random_sampling, temperature)  

# Example usage
# Start with these tag: <s>, while keeping words in lower case
generate_text(model, 
              tokenizer,
              "Apple", 
              num_gen_words = 10, 
              random_sampling = True,
              temperature= 10)

Training

This model was trained on text data using LSTM architecture for next-word prediction.

Limitations

Model performance depends on training data quality and size
Generated text may not always be coherent for longer sequences
Model architecture is optimized for the specific vocabulary it was trained on