handler ok
Browse files- __pycache__/handler.cpython-310.pyc +0 -0
- handler.py +34 -0
- poetry.lock +0 -0
- pyproject.toml +17 -0
- test.py +31 -0
__pycache__/handler.cpython-310.pyc
ADDED
Binary file (2.19 kB). View file
|
|
handler.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
+
from typing import Dict, List, Any
|
4 |
+
import json
|
5 |
+
class EndpointHandler:
|
6 |
+
def __init__(self, path=""):
|
7 |
+
# Load the model and tokenizer
|
8 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
9 |
+
self.model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16).to(self.device).eval()
|
10 |
+
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
11 |
+
|
12 |
+
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
13 |
+
# Handle the incoming request
|
14 |
+
input_text = data["inputs"]["text"]
|
15 |
+
template = data["inputs"]["template"]
|
16 |
+
|
17 |
+
# Use the predict function
|
18 |
+
output = self.predict_NuExtract([input_text], template)
|
19 |
+
return [{"extracted_information": output}]
|
20 |
+
|
21 |
+
def predict_NuExtract(self, texts, template, batch_size=1, max_length=10_000, max_new_tokens=4_000):
|
22 |
+
# Generate prompts based on the template
|
23 |
+
template = json.dumps(json.loads(template), indent=4)
|
24 |
+
prompts = [f"""<|input|>\n### Template:\n{template}\n### Text:\n{text}\n\n<|output|>""" for text in texts]
|
25 |
+
outputs = []
|
26 |
+
|
27 |
+
with torch.no_grad():
|
28 |
+
for i in range(0, len(prompts), batch_size):
|
29 |
+
batch_prompts = prompts[i:i+batch_size]
|
30 |
+
batch_encodings = self.tokenizer(batch_prompts, return_tensors="pt", truncation=True, padding=True, max_length=max_length).to(self.device)
|
31 |
+
pred_ids = self.model.generate(**batch_encodings, max_new_tokens=max_new_tokens)
|
32 |
+
outputs += self.tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
|
33 |
+
|
34 |
+
return [output.split("<|output|>")[1] for output in outputs]
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "visado2"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = ""
|
5 |
+
authors = ["mateuor2d2 <mateuor2d2@gmail.com>"]
|
6 |
+
readme = "README.md"
|
7 |
+
|
8 |
+
[tool.poetry.dependencies]
|
9 |
+
python = "^3.10"
|
10 |
+
transformers = "^4.45.2"
|
11 |
+
torch = "^2.5.0"
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
[build-system]
|
16 |
+
requires = ["poetry-core"]
|
17 |
+
build-backend = "poetry.core.masonry.api"
|
test.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from handler import EndpointHandler
|
2 |
+
import json
|
3 |
+
# init handler
|
4 |
+
my_handler = EndpointHandler(path=".")
|
5 |
+
# prepare sample payload
|
6 |
+
text = """We introduce Mistral 7B, a 7–billion-parameter language model engineered for
|
7 |
+
superior performance and efficiency. Mistral 7B outperforms the best open 13B
|
8 |
+
model (Llama 2) across all evaluated benchmarks, and the best released 34B
|
9 |
+
model (Llama 1) in reasoning, mathematics, and code generation. Our model
|
10 |
+
leverages grouped-query attention (GQA) for faster inference, coupled with sliding
|
11 |
+
window attention (SWA) to effectively handle sequences of arbitrary length with a
|
12 |
+
reduced inference cost. We also provide a model fine-tuned to follow instructions,
|
13 |
+
Mistral 7B – Instruct, that surpasses Llama 2 13B – chat model both on human and
|
14 |
+
automated benchmarks. Our models are released under the Apache 2.0 license.
|
15 |
+
Code: <https://github.com/mistralai/mistral-src>
|
16 |
+
Webpage: <https://mistral.ai/news/announcing-mistral-7b/>"""
|
17 |
+
|
18 |
+
template = """{
|
19 |
+
"Model": {
|
20 |
+
"Name": "",
|
21 |
+
"Number of parameters": "",
|
22 |
+
"Number of max token": "",
|
23 |
+
"Architecture": []
|
24 |
+
},
|
25 |
+
"Usage": {
|
26 |
+
"Use case": [],
|
27 |
+
"Licence": ""
|
28 |
+
}
|
29 |
+
}"""
|
30 |
+
resultado=my_handler({"inputs": {"text": text, "template": template}})
|
31 |
+
print(resultado)
|