grobid-evaluation / service.py
lfoppiano's picture
Update service.py
99b8cb9 verified
import http.server
import socketserver
import subprocess
import threading
import queue
import io
import select
import re
import os
import urllib.request
import urllib.parse
import urllib.error
PORT = 8080
command_executed = False
command_output = queue.Queue()
command_queue = queue.Queue()
def sanitize_output(output):
# Remove any non-printable characters except for carriage return
return re.sub(r'[^\x20-\x7E\r]+', '', output)
def run_command(command):
print("Running command ", command)
process = subprocess.Popen(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
bufsize=1 # Line-buffered
)
while True:
reads = [process.stdout.fileno(), process.stderr.fileno()]
ret = select.select(reads, [], [], 0.1) # Timeout to periodically flush
for fd in ret[0]:
if fd == process.stdout.fileno():
output = process.stdout.readline()
if output:
command_output.put(sanitize_output(output) + "\n")
if fd == process.stderr.fileno():
error = process.stderr.readline()
if error:
command_output.put(sanitize_output(error) + "\n")
if process.poll() is not None:
break
process.stdout.flush()
process.stderr.flush()
process.stdout.close()
process.stderr.close()
process.wait()
command_output.put("Leaving")
def command_worker():
print("Starting worker command")
while True:
command = command_queue.get()
if command is None:
break
run_command(command)
command_queue.task_done()
def start_commands():
commands = [
["echo", "## standard process"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PMC_sample_1943', '-Prun=1', '-PfileRatio=1'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-pmc_sample_1943.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PLOS_1000', '-Prun=1', '-PfileRatio=1'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-plos_1000.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000', '-Prun=1', '-PfileRatio=1'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-biorxiv-10k-test-2000.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/eLife_984', '-Prun=1', '-PfileRatio=1'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-elife_984.md"],
["echo", "## article/light"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PMC_sample_1943', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-pmc_sample_1943-article_light.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PLOS_1000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-plos_1000-article_light.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-biorxiv-10k-test-2000-article_light.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/eLife_984', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-eLife_984-article_light.md"],
["echo", "## article/light-ref"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PMC_sample_1943', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-pmc_sample_1943-article_light_ref.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PLOS_1000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-plos_1000-article_light_ref.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-biorxiv-10k-test-2000-article_light_ref.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/eLife_984', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-eLife_984-article_light_ref.md"],
]
for command in commands:
command_queue.put(command)
threading.Thread(target=command_worker).start()
class Handler(http.server.SimpleHTTPRequestHandler):
def do_GET(self):
global command_executed
# Check if the request is for downloading a file from URL
if self.path.startswith('/fetch?filename='):
try:
# Parse the URL from the query string
filename = urllib.parse.unquote(self.path.split('=', 1)[1])
# Construct the full file path
file_path = os.path.join('/opt/grobid/grobid-home/tmp/', filename)
# Check if the file exists
if not os.path.isfile(file_path):
self.send_error(404, f"File '{filename}' not found.")
return
# Read the file content
with open(file_path, 'r') as file:
content = file.read()
response = content
except urllib.error.URLError as e:
self.send_error(500, f"Failed to download file: {str(e)}")
except Exception as e:
self.send_error(500, f"An error occurred: {str(e)}")
elif not command_executed:
command_executed = True
threading.Thread(target=start_commands).start()
response = "Starting evaluation."
else:
response = io.StringIO()
response.write("Command output:\n")
for item in list(command_output.queue):
response.write(item)
response = response.getvalue()
self.send_response(200)
self.send_header("Content-type", "text/plain")
self.end_headers()
self.wfile.write(response.encode())
with socketserver.TCPServer(("", PORT), Handler) as httpd:
print(f"Serving on port {PORT}")
httpd.serve_forever()