handler ok

Browse files

Files changed (5) hide show

__pycache__/handler.cpython-310.pyc +0 -0
handler.py +34 -0
poetry.lock +0 -0
pyproject.toml +17 -0
test.py +31 -0

__pycache__/handler.cpython-310.pyc ADDED Viewed

Binary file (2.19 kB). View file

handler.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from typing import Dict, List, Any
+import  json
+class EndpointHandler:
+    def __init__(self, path=""):
+        # Load the model and tokenizer
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16).to(self.device).eval()
+        self.tokenizer = AutoTokenizer.from_pretrained(path)
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        # Handle the incoming request
+        input_text = data["inputs"]["text"]
+        template = data["inputs"]["template"]
+        # Use the predict function
+        output = self.predict_NuExtract([input_text], template)
+        return [{"extracted_information": output}]
+    def predict_NuExtract(self, texts, template, batch_size=1, max_length=10_000, max_new_tokens=4_000):
+        # Generate prompts based on the template
+        template = json.dumps(json.loads(template), indent=4)
+        prompts = [f"""<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>""" for text in texts]
+        outputs = []
+        with torch.no_grad():
+            for i in range(0, len(prompts), batch_size):
+                batch_prompts = prompts[i:i+batch_size]
+                batch_encodings = self.tokenizer(batch_prompts, return_tensors="pt", truncation=True, padding=True, max_length=max_length).to(self.device)
+                pred_ids = self.model.generate(**batch_encodings, max_new_tokens=max_new_tokens)
+                outputs += self.tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
+        return [output.split("<|output|>")[1] for output in outputs]

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,17 @@

+[tool.poetry]
+name = "visado2"
+version = "0.1.0"
+description = ""
+authors = ["mateuor2d2 <mateuor2d2@gmail.com>"]
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.10"
+transformers = "^4.45.2"
+torch = "^2.5.0"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

test.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from handler import EndpointHandler
+import json
+# init handler
+my_handler = EndpointHandler(path=".")
+# prepare sample payload
+text = """We introduce Mistral 7B, a 7–billion-parameter language model engineered for
+superior performance and efficiency. Mistral 7B outperforms the best open 13B
+model (Llama 2) across all evaluated benchmarks, and the best released 34B
+model (Llama 1) in reasoning, mathematics, and code generation. Our model
+leverages grouped-query attention (GQA) for faster inference, coupled with sliding
+window attention (SWA) to effectively handle sequences of arbitrary length with a
+reduced inference cost. We also provide a model fine-tuned to follow instructions,
+Mistral 7B – Instruct, that surpasses Llama 2 13B – chat model both on human and
+automated benchmarks. Our models are released under the Apache 2.0 license.
+Code: <https://github.com/mistralai/mistral-src>
+Webpage: <https://mistral.ai/news/announcing-mistral-7b/>"""
+template = """{
+    "Model": {
+        "Name": "",
+        "Number of parameters": "",
+        "Number of max token": "",
+        "Architecture": []
+    },
+    "Usage": {
+        "Use case": [],
+        "Licence": ""
+    }
+}"""
+resultado=my_handler({"inputs": {"text": text, "template": template}})
+print(resultado)