Istvan-Adem commited on
Commit
008ffd7
·
1 Parent(s): e722da4

add pytesseract

Browse files
ocr/api/message/openai_request.py DELETED
@@ -1,32 +0,0 @@
1
- from ocr.api.message.prompts import OCRPrompts
2
- from ocr.core.wrappers import openai_wrapper
3
-
4
-
5
- @openai_wrapper(model='gpt-4o-mini')
6
- async def generate_report(content: str):
7
- messages = [
8
- {
9
- "role": "system",
10
- "content": OCRPrompts.generate_general_answer
11
- },
12
- {
13
- "role": "user",
14
- "content": content
15
- }
16
- ]
17
- return messages
18
-
19
-
20
- @openai_wrapper(model='gpt-4o-mini')
21
- async def extract_original_text(content: str):
22
- messages = [
23
- {
24
- "role": "system",
25
- "content": OCRPrompts.extract_original_text
26
- },
27
- {
28
- "role": "user",
29
- "content": content
30
- }
31
- ]
32
- return messages
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ocr/api/message/prompts.py DELETED
@@ -1,34 +0,0 @@
1
- class OCRPrompts:
2
- generate_general_answer = """## Task
3
-
4
- You must analyze the text extracted from medical document and generate a simple overview in **Markdown2** format. Your output must strictly follow the required format.
5
-
6
- ## Report Structure
7
-
8
- The report must be structured as follows, with each section containing only relevant information from the document:
9
-
10
- ```markdown
11
- ## Simple Overview of the CT Report
12
-
13
- [Provide a brief and clear medical summary of the main points from the extracted text.]
14
-
15
- ## Conclusion
16
-
17
- [Summarize key insights and provide any recommendations based on the findings in 2 sentences.]
18
- ```
19
-
20
- [INST]
21
-
22
- ## Instructions
23
-
24
- - **Do not invent or infer any information.** Only use data provided in the user request.
25
- - Ensure that the format is followed strictly, and the output is complete without any deviations.
26
-
27
- [/INST]"""
28
- extract_original_text = """## Task
29
-
30
- You must return ALL provided text, but not include the patient's name, contact details, or demographic data.
31
-
32
- ## Important notes
33
-
34
- - You must return all text but exclude any information related to the name, contact details, and demographic data."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ocr/api/message/views.py CHANGED
@@ -1,9 +1,6 @@
1
- import asyncio
2
-
3
  from fastapi import File, UploadFile, HTTPException
4
 
5
  from ocr.api.message import ocr_router
6
- from ocr.api.message.openai_request import generate_report, extract_original_text
7
  from ocr.api.message.schemas import OcrResponse
8
  from ocr.api.message.utils import divide_images, clean_response, extract_text_from_images
9
  from ocr.core.wrappers import OcrResponseWrapper
@@ -26,7 +23,7 @@ async def get_all_chat_messages(
26
  # extract_original_text(text_content),
27
  # generate_report(text_content)
28
  # )
29
- cleaned_original_text = await extract_original_text(text_content)
30
  return OcrResponseWrapper(data=OcrResponse(text=clean_response(text_content), originalText=cleaned_original_text))
31
  finally:
32
  await file.close()
 
 
 
1
  from fastapi import File, UploadFile, HTTPException
2
 
3
  from ocr.api.message import ocr_router
 
4
  from ocr.api.message.schemas import OcrResponse
5
  from ocr.api.message.utils import divide_images, clean_response, extract_text_from_images
6
  from ocr.core.wrappers import OcrResponseWrapper
 
23
  # extract_original_text(text_content),
24
  # generate_report(text_content)
25
  # )
26
+ cleaned_original_text = text_content
27
  return OcrResponseWrapper(data=OcrResponse(text=clean_response(text_content), originalText=cleaned_original_text))
28
  finally:
29
  await file.close()