grobid-evaluation / service.py
lfoppiano's picture
Update service.py
99b8cb9 verified
raw
history blame
6.8 kB
import http.server
import socketserver
import subprocess
import threading
import queue
import io
import select
import re
import os
import urllib.request
import urllib.parse
import urllib.error
PORT = 8080
command_executed = False
command_output = queue.Queue()
command_queue = queue.Queue()
def sanitize_output(output):
# Remove any non-printable characters except for carriage return
return re.sub(r'[^\x20-\x7E\r]+', '', output)
def run_command(command):
print("Running command ", command)
process = subprocess.Popen(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
bufsize=1 # Line-buffered
)
while True:
reads = [process.stdout.fileno(), process.stderr.fileno()]
ret = select.select(reads, [], [], 0.1) # Timeout to periodically flush
for fd in ret[0]:
if fd == process.stdout.fileno():
output = process.stdout.readline()
if output:
command_output.put(sanitize_output(output) + "\n")
if fd == process.stderr.fileno():
error = process.stderr.readline()
if error:
command_output.put(sanitize_output(error) + "\n")
if process.poll() is not None:
break
process.stdout.flush()
process.stderr.flush()
process.stdout.close()
process.stderr.close()
process.wait()
command_output.put("Leaving")
def command_worker():
print("Starting worker command")
while True:
command = command_queue.get()
if command is None:
break
run_command(command)
command_queue.task_done()
def start_commands():
commands = [
["echo", "## standard process"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PMC_sample_1943', '-Prun=1', '-PfileRatio=1'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-pmc_sample_1943.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PLOS_1000', '-Prun=1', '-PfileRatio=1'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-plos_1000.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000', '-Prun=1', '-PfileRatio=1'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-biorxiv-10k-test-2000.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/eLife_984', '-Prun=1', '-PfileRatio=1'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-elife_984.md"],
["echo", "## article/light"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PMC_sample_1943', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-pmc_sample_1943-article_light.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PLOS_1000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-plos_1000-article_light.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-biorxiv-10k-test-2000-article_light.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/eLife_984', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-eLife_984-article_light.md"],
["echo", "## article/light-ref"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PMC_sample_1943', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-pmc_sample_1943-article_light_ref.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PLOS_1000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-plos_1000-article_light_ref.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-biorxiv-10k-test-2000-article_light_ref.md"],
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/eLife_984', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'],
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-eLife_984-article_light_ref.md"],
]
for command in commands:
command_queue.put(command)
threading.Thread(target=command_worker).start()
class Handler(http.server.SimpleHTTPRequestHandler):
def do_GET(self):
global command_executed
# Check if the request is for downloading a file from URL
if self.path.startswith('/fetch?filename='):
try:
# Parse the URL from the query string
filename = urllib.parse.unquote(self.path.split('=', 1)[1])
# Construct the full file path
file_path = os.path.join('/opt/grobid/grobid-home/tmp/', filename)
# Check if the file exists
if not os.path.isfile(file_path):
self.send_error(404, f"File '{filename}' not found.")
return
# Read the file content
with open(file_path, 'r') as file:
content = file.read()
response = content
except urllib.error.URLError as e:
self.send_error(500, f"Failed to download file: {str(e)}")
except Exception as e:
self.send_error(500, f"An error occurred: {str(e)}")
elif not command_executed:
command_executed = True
threading.Thread(target=start_commands).start()
response = "Starting evaluation."
else:
response = io.StringIO()
response.write("Command output:\n")
for item in list(command_output.queue):
response.write(item)
response = response.getvalue()
self.send_response(200)
self.send_header("Content-type", "text/plain")
self.end_headers()
self.wfile.write(response.encode())
with socketserver.TCPServer(("", PORT), Handler) as httpd:
print(f"Serving on port {PORT}")
httpd.serve_forever()