import http.server import socketserver import subprocess import threading import queue import io import select import re import os import urllib.request import urllib.parse import urllib.error PORT = 8080 command_executed = False command_output = queue.Queue() command_queue = queue.Queue() def sanitize_output(output): # Remove any non-printable characters except for carriage return return re.sub(r'[^\x20-\x7E\r]+', '', output) def run_command(command): print("Running command ", command) process = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1 # Line-buffered ) while True: reads = [process.stdout.fileno(), process.stderr.fileno()] ret = select.select(reads, [], [], 0.1) # Timeout to periodically flush for fd in ret[0]: if fd == process.stdout.fileno(): output = process.stdout.readline() if output: command_output.put(sanitize_output(output) + "\n") if fd == process.stderr.fileno(): error = process.stderr.readline() if error: command_output.put(sanitize_output(error) + "\n") if process.poll() is not None: break process.stdout.flush() process.stderr.flush() process.stdout.close() process.stderr.close() process.wait() command_output.put("Leaving") def command_worker(): print("Starting worker command") while True: command = command_queue.get() if command is None: break run_command(command) command_queue.task_done() def start_commands(): commands = [ ["echo", "## standard process"], ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PMC_sample_1943', '-Prun=1', '-PfileRatio=1'], ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-pmc_sample_1943.md"], ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PLOS_1000', '-Prun=1', '-PfileRatio=1'], ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-plos_1000.md"], ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000', '-Prun=1', '-PfileRatio=1'], ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-biorxiv-10k-test-2000.md"], ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/eLife_984', '-Prun=1', '-PfileRatio=1'], ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-elife_984.md"], ["echo", "## article/light"], ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PMC_sample_1943', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'], ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-pmc_sample_1943-article_light.md"], ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PLOS_1000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'], ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-plos_1000-article_light.md"], ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'], ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-biorxiv-10k-test-2000-article_light.md"], ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/eLife_984', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'], ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-eLife_984-article_light.md"], ["echo", "## article/light-ref"], ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PMC_sample_1943', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'], ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-pmc_sample_1943-article_light_ref.md"], ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PLOS_1000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'], ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-plos_1000-article_light_ref.md"], ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'], ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-biorxiv-10k-test-2000-article_light_ref.md"], ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/eLife_984', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'], ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-eLife_984-article_light_ref.md"], ] for command in commands: command_queue.put(command) threading.Thread(target=command_worker).start() class Handler(http.server.SimpleHTTPRequestHandler): def do_GET(self): global command_executed # Check if the request is for downloading a file from URL if self.path.startswith('/fetch?filename='): try: # Parse the URL from the query string filename = urllib.parse.unquote(self.path.split('=', 1)[1]) # Construct the full file path file_path = os.path.join('/opt/grobid/grobid-home/tmp/', filename) # Check if the file exists if not os.path.isfile(file_path): self.send_error(404, f"File '{filename}' not found.") return # Read the file content with open(file_path, 'r') as file: content = file.read() response = content except urllib.error.URLError as e: self.send_error(500, f"Failed to download file: {str(e)}") except Exception as e: self.send_error(500, f"An error occurred: {str(e)}") elif not command_executed: command_executed = True threading.Thread(target=start_commands).start() response = "Starting evaluation." else: response = io.StringIO() response.write("Command output:\n") for item in list(command_output.queue): response.write(item) response = response.getvalue() self.send_response(200) self.send_header("Content-type", "text/plain") self.end_headers() self.wfile.write(response.encode()) with socketserver.TCPServer(("", PORT), Handler) as httpd: print(f"Serving on port {PORT}") httpd.serve_forever()