Spaces:

brestok
/

ocr-backend

Running

Istvan-Adem commited on Feb 26

Commit

008ffd7

1 Parent(s): e722da4

add pytesseract

Files changed (3) hide show

ocr/api/message/openai_request.py DELETED Viewed

@@ -1,32 +0,0 @@
-from ocr.api.message.prompts import OCRPrompts
-from ocr.core.wrappers import openai_wrapper
-@openai_wrapper(model='gpt-4o-mini')
-async def generate_report(content: str):
-    messages = [
-        {
-            "role": "system",
-            "content": OCRPrompts.generate_general_answer
-        },
-        {
-            "role": "user",
-            "content": content
-        }
-    ]
-    return messages
-@openai_wrapper(model='gpt-4o-mini')
-async def extract_original_text(content: str):
-    messages = [
-        {
-            "role": "system",
-            "content": OCRPrompts.extract_original_text
-        },
-        {
-            "role": "user",
-            "content": content
-        }
-    ]
-    return messages

ocr/api/message/prompts.py DELETED Viewed

@@ -1,34 +0,0 @@
-class OCRPrompts:
-    generate_general_answer = """## Task
-You must analyze the text extracted from medical document and generate a simple overview in **Markdown2** format. Your output must strictly follow the required format.
-## Report Structure
-The report must be structured as follows, with each section containing only relevant information from the document:
-```markdown
-## Simple Overview of the CT Report
-[Provide a brief and clear medical summary of the main points from the extracted text.]
-## Conclusion
-[Summarize key insights and provide any recommendations based on the findings in 2 sentences.]
-```
-[INST]
-## Instructions
-- **Do not invent or infer any information.** Only use data provided in the user request.
-- Ensure that the format is followed strictly, and the output is complete without any deviations.
-[/INST]"""
-    extract_original_text = """## Task
-You must return ALL provided text, but not include the patient's name, contact details, or demographic data.
-## Important notes
-- You must return all text but exclude any information related to the name, contact details, and demographic data."""

ocr/api/message/views.py CHANGED Viewed

@@ -1,9 +1,6 @@
-import asyncio
 from fastapi import File, UploadFile, HTTPException
 from ocr.api.message import ocr_router
-from ocr.api.message.openai_request import generate_report, extract_original_text
 from ocr.api.message.schemas import OcrResponse
 from ocr.api.message.utils import divide_images, clean_response, extract_text_from_images
 from ocr.core.wrappers import OcrResponseWrapper
@@ -26,7 +23,7 @@ async def get_all_chat_messages(
         #     extract_original_text(text_content),
             # generate_report(text_content)
         # )
-        cleaned_original_text = await extract_original_text(text_content)
         return OcrResponseWrapper(data=OcrResponse(text=clean_response(text_content), originalText=cleaned_original_text))
     finally:
         await file.close()

 from fastapi import File, UploadFile, HTTPException
 from ocr.api.message import ocr_router
 from ocr.api.message.schemas import OcrResponse
 from ocr.api.message.utils import divide_images, clean_response, extract_text_from_images
 from ocr.core.wrappers import OcrResponseWrapper
         #     extract_original_text(text_content),
             # generate_report(text_content)
         # )
+        cleaned_original_text = text_content
         return OcrResponseWrapper(data=OcrResponse(text=clean_response(text_content), originalText=cleaned_original_text))
     finally:
         await file.close()