mateuo commited on
Commit
92c000b
·
1 Parent(s): cf01cfa

handler ok

Browse files
Files changed (5) hide show
  1. __pycache__/handler.cpython-310.pyc +0 -0
  2. handler.py +34 -0
  3. poetry.lock +0 -0
  4. pyproject.toml +17 -0
  5. test.py +31 -0
__pycache__/handler.cpython-310.pyc ADDED
Binary file (2.19 kB). View file
 
handler.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from typing import Dict, List, Any
4
+ import json
5
+ class EndpointHandler:
6
+ def __init__(self, path=""):
7
+ # Load the model and tokenizer
8
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
+ self.model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16).to(self.device).eval()
10
+ self.tokenizer = AutoTokenizer.from_pretrained(path)
11
+
12
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
13
+ # Handle the incoming request
14
+ input_text = data["inputs"]["text"]
15
+ template = data["inputs"]["template"]
16
+
17
+ # Use the predict function
18
+ output = self.predict_NuExtract([input_text], template)
19
+ return [{"extracted_information": output}]
20
+
21
+ def predict_NuExtract(self, texts, template, batch_size=1, max_length=10_000, max_new_tokens=4_000):
22
+ # Generate prompts based on the template
23
+ template = json.dumps(json.loads(template), indent=4)
24
+ prompts = [f"""<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>""" for text in texts]
25
+ outputs = []
26
+
27
+ with torch.no_grad():
28
+ for i in range(0, len(prompts), batch_size):
29
+ batch_prompts = prompts[i:i+batch_size]
30
+ batch_encodings = self.tokenizer(batch_prompts, return_tensors="pt", truncation=True, padding=True, max_length=max_length).to(self.device)
31
+ pred_ids = self.model.generate(**batch_encodings, max_new_tokens=max_new_tokens)
32
+ outputs += self.tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
33
+
34
+ return [output.split("<|output|>")[1] for output in outputs]
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "visado2"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["mateuor2d2 <mateuor2d2@gmail.com>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.10"
10
+ transformers = "^4.45.2"
11
+ torch = "^2.5.0"
12
+
13
+
14
+
15
+ [build-system]
16
+ requires = ["poetry-core"]
17
+ build-backend = "poetry.core.masonry.api"
test.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from handler import EndpointHandler
2
+ import json
3
+ # init handler
4
+ my_handler = EndpointHandler(path=".")
5
+ # prepare sample payload
6
+ text = """We introduce Mistral 7B, a 7–billion-parameter language model engineered for
7
+ superior performance and efficiency. Mistral 7B outperforms the best open 13B
8
+ model (Llama 2) across all evaluated benchmarks, and the best released 34B
9
+ model (Llama 1) in reasoning, mathematics, and code generation. Our model
10
+ leverages grouped-query attention (GQA) for faster inference, coupled with sliding
11
+ window attention (SWA) to effectively handle sequences of arbitrary length with a
12
+ reduced inference cost. We also provide a model fine-tuned to follow instructions,
13
+ Mistral 7B – Instruct, that surpasses Llama 2 13B – chat model both on human and
14
+ automated benchmarks. Our models are released under the Apache 2.0 license.
15
+ Code: <https://github.com/mistralai/mistral-src>
16
+ Webpage: <https://mistral.ai/news/announcing-mistral-7b/>"""
17
+
18
+ template = """{
19
+ "Model": {
20
+ "Name": "",
21
+ "Number of parameters": "",
22
+ "Number of max token": "",
23
+ "Architecture": []
24
+ },
25
+ "Usage": {
26
+ "Use case": [],
27
+ "Licence": ""
28
+ }
29
+ }"""
30
+ resultado=my_handler({"inputs": {"text": text, "template": template}})
31
+ print(resultado)