Spaces:
Paused
Paused
import http.server | |
import socketserver | |
import subprocess | |
import threading | |
import queue | |
import io | |
import select | |
import re | |
import os | |
import urllib.request | |
import urllib.parse | |
import urllib.error | |
PORT = 8080 | |
command_executed = False | |
command_output = queue.Queue() | |
command_queue = queue.Queue() | |
def sanitize_output(output): | |
# Remove any non-printable characters except for carriage return | |
return re.sub(r'[^\x20-\x7E\r]+', '', output) | |
def run_command(command): | |
print("Running command ", command) | |
process = subprocess.Popen( | |
command, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE, | |
text=True, | |
bufsize=1 # Line-buffered | |
) | |
while True: | |
reads = [process.stdout.fileno(), process.stderr.fileno()] | |
ret = select.select(reads, [], [], 0.1) # Timeout to periodically flush | |
for fd in ret[0]: | |
if fd == process.stdout.fileno(): | |
output = process.stdout.readline() | |
if output: | |
command_output.put(sanitize_output(output) + "\n") | |
if fd == process.stderr.fileno(): | |
error = process.stderr.readline() | |
if error: | |
command_output.put(sanitize_output(error) + "\n") | |
if process.poll() is not None: | |
break | |
process.stdout.flush() | |
process.stderr.flush() | |
process.stdout.close() | |
process.stderr.close() | |
process.wait() | |
command_output.put("Leaving") | |
def command_worker(): | |
print("Starting worker command") | |
while True: | |
command = command_queue.get() | |
if command is None: | |
break | |
run_command(command) | |
command_queue.task_done() | |
def start_commands(): | |
commands = [ | |
["echo", "## standard process"], | |
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PMC_sample_1943', '-Prun=1', '-PfileRatio=1'], | |
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-pmc_sample_1943.md"], | |
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PLOS_1000', '-Prun=1', '-PfileRatio=1'], | |
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-plos_1000.md"], | |
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000', '-Prun=1', '-PfileRatio=1'], | |
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-biorxiv-10k-test-2000.md"], | |
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/eLife_984', '-Prun=1', '-PfileRatio=1'], | |
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-elife_984.md"], | |
["echo", "## article/light"], | |
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PMC_sample_1943', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'], | |
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-pmc_sample_1943-article_light.md"], | |
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PLOS_1000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'], | |
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-plos_1000-article_light.md"], | |
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'], | |
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-biorxiv-10k-test-2000-article_light.md"], | |
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/eLife_984', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'], | |
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-eLife_984-article_light.md"], | |
["echo", "## article/light-ref"], | |
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PMC_sample_1943', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'], | |
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-pmc_sample_1943-article_light_ref.md"], | |
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PLOS_1000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'], | |
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-plos_1000-article_light_ref.md"], | |
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'], | |
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-biorxiv-10k-test-2000-article_light_ref.md"], | |
['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/eLife_984', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'], | |
["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-eLife_984-article_light_ref.md"], | |
] | |
for command in commands: | |
command_queue.put(command) | |
threading.Thread(target=command_worker).start() | |
class Handler(http.server.SimpleHTTPRequestHandler): | |
def do_GET(self): | |
global command_executed | |
# Check if the request is for downloading a file from URL | |
if self.path.startswith('/fetch?filename='): | |
try: | |
# Parse the URL from the query string | |
filename = urllib.parse.unquote(self.path.split('=', 1)[1]) | |
# Construct the full file path | |
file_path = os.path.join('/opt/grobid/grobid-home/tmp/', filename) | |
# Check if the file exists | |
if not os.path.isfile(file_path): | |
self.send_error(404, f"File '{filename}' not found.") | |
return | |
# Read the file content | |
with open(file_path, 'r') as file: | |
content = file.read() | |
response = content | |
except urllib.error.URLError as e: | |
self.send_error(500, f"Failed to download file: {str(e)}") | |
except Exception as e: | |
self.send_error(500, f"An error occurred: {str(e)}") | |
elif not command_executed: | |
command_executed = True | |
threading.Thread(target=start_commands).start() | |
response = "Starting evaluation." | |
else: | |
response = io.StringIO() | |
response.write("Command output:\n") | |
for item in list(command_output.queue): | |
response.write(item) | |
response = response.getvalue() | |
self.send_response(200) | |
self.send_header("Content-type", "text/plain") | |
self.end_headers() | |
self.wfile.write(response.encode()) | |
with socketserver.TCPServer(("", PORT), Handler) as httpd: | |
print(f"Serving on port {PORT}") | |
httpd.serve_forever() | |