File size: 6,802 Bytes
2813cc9
 
4762d21
667c4ef
1017e87
47e5c00
cc380b1
 
8c48418
 
 
 
2813cc9
9be8c9c
4762d21
47e5c00
1017e87
667c4ef
8c48418
cc380b1
 
 
47e5c00
8c48418
1017e87
cc380b1
1017e87
 
 
 
cc380b1
 
1017e87
 
f52c6ed
cc380b1
f52c6ed
 
 
 
cc380b1
 
f52c6ed
 
cc380b1
f52c6ed
1017e87
cc380b1
 
1017e87
 
 
f52c6ed
47e5c00
8c48418
1017e87
cc380b1
1017e87
 
 
 
 
 
 
47e5c00
1017e87
87f128f
8c48418
cc380b1
8c48418
cc380b1
8c48418
cc380b1
8c48418
210f4ab
8c48418
 
210f4ab
8c48418
210f4ab
8c48418
210f4ab
8c48418
210f4ab
8c48418
 
210f4ab
8c48418
210f4ab
8c48418
210f4ab
8c48418
 
 
87f128f
 
1017e87
 
6cb824f
47e5c00
2813cc9
 
47e5c00
8c48418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99b8cb9
8c48418
 
 
 
 
 
 
4762d21
1017e87
cc380b1
8c48418
4762d21
47e5c00
 
 
 
 
 
4762d21
2813cc9
 
 
4762d21
6cb824f
47e5c00
2813cc9
 
8c48418
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import http.server
import socketserver
import subprocess
import threading
import queue
import io
import select
import re
import os
import urllib.request
import urllib.parse
import urllib.error

PORT = 8080
command_executed = False
command_output = queue.Queue()
command_queue = queue.Queue()


def sanitize_output(output):
    # Remove any non-printable characters except for carriage return
    return re.sub(r'[^\x20-\x7E\r]+', '', output)


def run_command(command):
    print("Running command ", command)
    process = subprocess.Popen(
        command,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
        bufsize=1  # Line-buffered
    )
    while True:
        reads = [process.stdout.fileno(), process.stderr.fileno()]
        ret = select.select(reads, [], [], 0.1)  # Timeout to periodically flush
        for fd in ret[0]:
            if fd == process.stdout.fileno():
                output = process.stdout.readline()
                if output:
                    command_output.put(sanitize_output(output) + "\n")
            if fd == process.stderr.fileno():
                error = process.stderr.readline()
                if error:
                    command_output.put(sanitize_output(error) + "\n")
        if process.poll() is not None:
            break
        process.stdout.flush()
        process.stderr.flush()
    process.stdout.close()
    process.stderr.close()
    process.wait()
    command_output.put("Leaving")


def command_worker():
    print("Starting worker command")
    while True:
        command = command_queue.get()
        if command is None:
            break
        run_command(command)
        command_queue.task_done()


def start_commands():
    commands = [
        ["echo", "## standard process"],
        ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PMC_sample_1943', '-Prun=1', '-PfileRatio=1'],
        ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-pmc_sample_1943.md"],
        ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PLOS_1000', '-Prun=1', '-PfileRatio=1'],
        ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-plos_1000.md"],
        ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000', '-Prun=1', '-PfileRatio=1'],
        ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-biorxiv-10k-test-2000.md"],
        ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/eLife_984', '-Prun=1', '-PfileRatio=1'],
        ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-elife_984.md"],
        ["echo", "## article/light"],
        ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PMC_sample_1943', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'],
        ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-pmc_sample_1943-article_light.md"],
        ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PLOS_1000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'],
        ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-plos_1000-article_light.md"],
        ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'],
        ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-biorxiv-10k-test-2000-article_light.md"],
        ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/eLife_984', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light'],
        ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-eLife_984-article_light.md"],
        ["echo", "## article/light-ref"],
        ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PMC_sample_1943', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'],
        ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-pmc_sample_1943-article_light_ref.md"],
        ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/PLOS_1000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'],
        ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-plos_1000-article_light_ref.md"],
        ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/biorxiv-10k-test-2000', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'],
        ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-biorxiv-10k-test-2000-article_light_ref.md"],
        ['./gradlew', 'jatsEval', '-Pp2t=/opt/grobid/evaluation/eLife_984', '-Prun=1', '-PfileRatio=1', '-Pflavor=article/light-ref'],
        ["cp", "/opt/grobid/grobid-home/tmp/report.md", "/opt/grobid/grobid-home/tmp/report-eLife_984-article_light_ref.md"],
    ]
    for command in commands:
        command_queue.put(command)
    threading.Thread(target=command_worker).start()


class Handler(http.server.SimpleHTTPRequestHandler):
    def do_GET(self):
        global command_executed

        # Check if the request is for downloading a file from URL
        if self.path.startswith('/fetch?filename='):
            try:
                # Parse the URL from the query string
                filename = urllib.parse.unquote(self.path.split('=', 1)[1])

                # Construct the full file path
                file_path = os.path.join('/opt/grobid/grobid-home/tmp/', filename)

                # Check if the file exists
                if not os.path.isfile(file_path):
                    self.send_error(404, f"File '{filename}' not found.")
                    return

                # Read the file content
                with open(file_path, 'r') as file:
                    content = file.read()

                response = content

            except urllib.error.URLError as e:
                self.send_error(500, f"Failed to download file: {str(e)}")
            except Exception as e:
                self.send_error(500, f"An error occurred: {str(e)}")

        elif not command_executed:
            command_executed = True
            threading.Thread(target=start_commands).start()
            response = "Starting evaluation."

        else:
            response = io.StringIO()
            response.write("Command output:\n")
            for item in list(command_output.queue):
                response.write(item)

            response = response.getvalue()

        self.send_response(200)
        self.send_header("Content-type", "text/plain")
        self.end_headers()
        self.wfile.write(response.encode())


with socketserver.TCPServer(("", PORT), Handler) as httpd:
    print(f"Serving on port {PORT}")
    httpd.serve_forever()