Spaces:

MuzzammilShah
/

NLP-Playground

Sleeping

App Files Files Community

NLP-Playground / nlp_engine.py

MuzzammilShah

Upload 24 files

3ab6535 verified about 2 months ago

raw

history blame contribute delete

6.29 kB

	import torch
	from transformers import pipeline
	from sentence_transformers import SentenceTransformer

	class NLPEngine:
	def __init__(self, device=-1):
	device_name = 'cuda' if device == 0 else 'mps' if device == 'mps' else 'cpu'
	print(f"Initializing NLPEngine on device: {device_name}")

	self.sentiment = pipeline(
	'sentiment-analysis',
	model='distilbert-base-uncased-finetuned-sst-2-english',
	device=device
	)
	self.summarizer = pipeline(
	'summarization',
	model='facebook/bart-large-cnn',
	device=device
	)
	self.ner = pipeline(
	'ner',
	model='dslim/bert-base-NER',
	aggregation_strategy='simple',
	device=device
	)
	self.qa = pipeline(
	'question-answering',
	model='deepset/roberta-base-squad2',
	device=device
	)
	self.generator = pipeline(
	'text-generation',
	model='gpt2',
	device=device
	)
	## For initial tests of semantic search
	# self.retriever = pipeline(
	# 'feature-extraction',
	# model='sentence-transformers/all-MiniLM-L6-v2',
	# device=device
	# )
	self.sentence_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

	print("NLPEngine initialized successfully.")

	def analyze_sentiment(self, text):
	return self.sentiment(text)

	def summarize_text(self, text, max_length=150, min_length=30):
	return self.summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)

	def extract_entities(self, text):
	return self.ner(text)

	def answer_question(self, question, context):
	return self.qa(question=question, context=context)

	def generate_text(self, prompt, max_length=50, num_return_sequences=1):
	return self.generator(prompt, max_length=max_length, num_return_sequences=num_return_sequences)

	def get_embeddings(self, text_or_texts):

	## For initial tests of semantic search
	## I tried to manually calculate and adjust the embedding sizes, until i directly used the sentence transformer from HF
	# embeddings = self.retriever(text_or_texts)
	# if isinstance(text_or_texts, str):
	# return torch.mean(torch.tensor(embeddings[0]), dim=0)
	# else:
	# return torch.stack([torch.mean(torch.tensor(emb), dim=0) for emb in embeddings])
	return torch.tensor(self.sentence_model.encode(text_or_texts))

	if __name__ == "__main__":
	pass

	# Uncomment all the codes from here if you would like to run this as a stand alone script and test on your terminal how each pipeline works
	# =================================

	# # Check for available hardware acceleration
	# if torch.cuda.is_available():
	# selected_device = 0 # CUDA
	# elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
	# selected_device = 'mps' # MPS for Apple Silicon
	# else:
	# selected_device = -1 # CPU

	# print("Starting NLPEngine tests...")
	# engine = NLPEngine(device=selected_device)

	# sample_text_sentiment = "Hugging Face is a great platform for NLP."
	# print(f"\nSentiment for '{sample_text_sentiment}': {engine.analyze_sentiment(sample_text_sentiment)}")

	# sample_text_summarize = """
	# The Hugging Face ecosystem provides a wide array of tools and models for natural language processing.
	# It includes transformers for state-of-the-art models, datasets for accessing and sharing data,
	# and a model hub for discovering and using pre-trained models. Developers can leverage these
	# resources to build powerful NLP applications with relative ease. The platform also supports
	# various tasks such as text classification, summarization, translation, and question answering.
	# The quick brown fox jumps over the lazy dog. This sentence is repeated multiple times to ensure
	# the text is long enough for summarization to be meaningful. The quick brown fox jumps over the lazy dog.
	# The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog.
	# """
	# print(f"\nSummary: {engine.summarize_text(sample_text_summarize, min_length=20, max_length=50)}")

	# sample_text_ner = "Apple Inc. is looking at buying U.K. startup for $1 billion. Tim Cook is the CEO. The meeting is in New York."
	# print(f"\nNER for '{sample_text_ner}': {engine.extract_entities(sample_text_ner)}")

	# sample_context_qa = "The capital of France is Paris. It is known for the Eiffel Tower and the Louvre Museum."
	# sample_question_qa = "What is Paris known for?"
	# print(f"\nQA for context '{sample_context_qa}' and question '{sample_question_qa}': {engine.answer_question(question=sample_question_qa, context=sample_context_qa)}")

	# sample_prompt_generate = "In a world powered by AI,"
	# print(f"\nGenerated Text from prompt '{sample_prompt_generate}': {engine.generate_text(sample_prompt_generate, max_length=30)}")

	# # sample_text_retriever1 = "This is a test sentence for semantic search."
	# # sample_text_retriever2 = "Another sentence to compare for similarity."
	# # embedding1 = engine.get_embeddings(sample_text_retriever1)
	# # embedding2 = engine.get_embeddings(sample_text_retriever2)
	# # print(f"\nEmbedding shape for a single sentence: {embedding1.shape}")

	# corpus = ["The weather is sunny today.", "I enjoy walking in the park on a beautiful day.", "AI is transforming many industries."]
	# query = "What is the forecast for today?"

	# query_embedding = engine.get_embeddings(query)
	# corpus_embeddings = engine.get_embeddings(corpus)

	# print(f"Query embedding shape: {query_embedding.shape}")
	# print(f"Corpus embeddings shape: {corpus_embeddings.shape}")

	# if query_embedding.ndim == 1:
	# query_embedding = query_embedding.unsqueeze(0)

	# similarities = torch.nn.functional.cosine_similarity(query_embedding, corpus_embeddings, dim=1)
	# print(f"\nSimilarities between '{query}' and corpus sentences: {similarities.tolist()}")

	# print("\nNLPEngine tests completed.")