Spaces:
Running
Running
Istvan-Adem
commited on
Commit
·
008ffd7
1
Parent(s):
e722da4
add pytesseract
Browse files- ocr/api/message/openai_request.py +0 -32
- ocr/api/message/prompts.py +0 -34
- ocr/api/message/views.py +1 -4
ocr/api/message/openai_request.py
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
from ocr.api.message.prompts import OCRPrompts
|
2 |
-
from ocr.core.wrappers import openai_wrapper
|
3 |
-
|
4 |
-
|
5 |
-
@openai_wrapper(model='gpt-4o-mini')
|
6 |
-
async def generate_report(content: str):
|
7 |
-
messages = [
|
8 |
-
{
|
9 |
-
"role": "system",
|
10 |
-
"content": OCRPrompts.generate_general_answer
|
11 |
-
},
|
12 |
-
{
|
13 |
-
"role": "user",
|
14 |
-
"content": content
|
15 |
-
}
|
16 |
-
]
|
17 |
-
return messages
|
18 |
-
|
19 |
-
|
20 |
-
@openai_wrapper(model='gpt-4o-mini')
|
21 |
-
async def extract_original_text(content: str):
|
22 |
-
messages = [
|
23 |
-
{
|
24 |
-
"role": "system",
|
25 |
-
"content": OCRPrompts.extract_original_text
|
26 |
-
},
|
27 |
-
{
|
28 |
-
"role": "user",
|
29 |
-
"content": content
|
30 |
-
}
|
31 |
-
]
|
32 |
-
return messages
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ocr/api/message/prompts.py
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
class OCRPrompts:
|
2 |
-
generate_general_answer = """## Task
|
3 |
-
|
4 |
-
You must analyze the text extracted from medical document and generate a simple overview in **Markdown2** format. Your output must strictly follow the required format.
|
5 |
-
|
6 |
-
## Report Structure
|
7 |
-
|
8 |
-
The report must be structured as follows, with each section containing only relevant information from the document:
|
9 |
-
|
10 |
-
```markdown
|
11 |
-
## Simple Overview of the CT Report
|
12 |
-
|
13 |
-
[Provide a brief and clear medical summary of the main points from the extracted text.]
|
14 |
-
|
15 |
-
## Conclusion
|
16 |
-
|
17 |
-
[Summarize key insights and provide any recommendations based on the findings in 2 sentences.]
|
18 |
-
```
|
19 |
-
|
20 |
-
[INST]
|
21 |
-
|
22 |
-
## Instructions
|
23 |
-
|
24 |
-
- **Do not invent or infer any information.** Only use data provided in the user request.
|
25 |
-
- Ensure that the format is followed strictly, and the output is complete without any deviations.
|
26 |
-
|
27 |
-
[/INST]"""
|
28 |
-
extract_original_text = """## Task
|
29 |
-
|
30 |
-
You must return ALL provided text, but not include the patient's name, contact details, or demographic data.
|
31 |
-
|
32 |
-
## Important notes
|
33 |
-
|
34 |
-
- You must return all text but exclude any information related to the name, contact details, and demographic data."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ocr/api/message/views.py
CHANGED
@@ -1,9 +1,6 @@
|
|
1 |
-
import asyncio
|
2 |
-
|
3 |
from fastapi import File, UploadFile, HTTPException
|
4 |
|
5 |
from ocr.api.message import ocr_router
|
6 |
-
from ocr.api.message.openai_request import generate_report, extract_original_text
|
7 |
from ocr.api.message.schemas import OcrResponse
|
8 |
from ocr.api.message.utils import divide_images, clean_response, extract_text_from_images
|
9 |
from ocr.core.wrappers import OcrResponseWrapper
|
@@ -26,7 +23,7 @@ async def get_all_chat_messages(
|
|
26 |
# extract_original_text(text_content),
|
27 |
# generate_report(text_content)
|
28 |
# )
|
29 |
-
cleaned_original_text =
|
30 |
return OcrResponseWrapper(data=OcrResponse(text=clean_response(text_content), originalText=cleaned_original_text))
|
31 |
finally:
|
32 |
await file.close()
|
|
|
|
|
|
|
1 |
from fastapi import File, UploadFile, HTTPException
|
2 |
|
3 |
from ocr.api.message import ocr_router
|
|
|
4 |
from ocr.api.message.schemas import OcrResponse
|
5 |
from ocr.api.message.utils import divide_images, clean_response, extract_text_from_images
|
6 |
from ocr.core.wrappers import OcrResponseWrapper
|
|
|
23 |
# extract_original_text(text_content),
|
24 |
# generate_report(text_content)
|
25 |
# )
|
26 |
+
cleaned_original_text = text_content
|
27 |
return OcrResponseWrapper(data=OcrResponse(text=clean_response(text_content), originalText=cleaned_original_text))
|
28 |
finally:
|
29 |
await file.close()
|