File size: 24,480 Bytes
821e9b3
 
3f8b483
821e9b3
9ed8b92
b3013af
 
 
 
 
 
 
5521e44
053ea77
 
 
9ed8b92
 
 
b3013af
9ed8b92
5521e44
b3013af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
821e9b3
 
 
3f8b483
b3013af
 
 
3f8b483
 
821e9b3
5ce1ae0
 
 
54a0bc8
 
3f8b483
 
 
47934fb
 
 
b3013af
3f8b483
 
b3013af
3f8b483
54a0bc8
 
 
 
 
 
 
 
3f8b483
 
54a0bc8
 
5521e44
b3013af
821e9b3
5521e44
821e9b3
 
b3013af
 
821e9b3
 
 
b3013af
bee7793
 
b3013af
bee7793
f5e6b21
b3013af
9ed8b92
 
 
 
821e9b3
9ed8b92
 
 
64feb25
 
821e9b3
b3013af
 
 
 
 
 
 
 
 
9ed8b92
 
 
 
b3013af
 
9ed8b92
 
b3013af
 
 
 
 
 
 
 
 
 
 
9ed8b92
821e9b3
 
64feb25
 
 
 
821e9b3
b3013af
 
 
 
64feb25
 
 
 
b3013af
821e9b3
64feb25
 
821e9b3
64feb25
821e9b3
 
9ed8b92
053ea77
821e9b3
64feb25
821e9b3
053ea77
 
821e9b3
e22d08b
821e9b3
e22d08b
64feb25
 
e22d08b
64feb25
e22d08b
 
64feb25
e22d08b
64feb25
 
 
e22d08b
64feb25
 
 
 
 
053ea77
64feb25
 
9c745ef
053ea77
 
64feb25
 
 
 
 
 
 
9c745ef
64feb25
 
053ea77
64feb25
 
9c745ef
053ea77
 
64feb25
 
 
 
 
b3013af
 
64feb25
ff54288
 
 
 
 
 
 
7e8b548
ff54288
 
 
 
dff6d96
ff54288
 
 
 
 
7e8b548
ff54288
 
 
 
dff6d96
ff54288
64feb25
 
ff54288
 
 
 
7e8b548
ff54288
 
 
 
dff6d96
ff54288
64feb25
122c32d
 
b3013af
122c32d
b3013af
122c32d
 
7e8b548
122c32d
 
 
b3013af
dff6d96
122c32d
b3013af
 
64feb25
122c32d
 
 
 
 
7e8b548
122c32d
 
 
b3013af
dff6d96
122c32d
b3013af
 
122c32d
 
 
 
b3013af
7e8b548
 
122c32d
 
 
821e9b3
 
b3013af
821e9b3
 
7e8b548
122c32d
821e9b3
 
b3013af
dff6d96
 
821e9b3
b3013af
 
9ed8b92
64feb25
 
b3013af
 
 
 
 
 
 
 
 
 
 
 
 
9ed8b92
 
 
5521e44
821e9b3
 
9ed8b92
821e9b3
 
5771b1d
8096fdc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d68333
 
 
8096fdc
 
 
 
 
 
 
 
 
 
8086484
8096fdc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1feb2ff
54a0bc8
 
1feb2ff
8096fdc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54a0bc8
 
8096fdc
 
534392a
8096fdc
534392a
8096fdc
534392a
8096fdc
54a0bc8
8096fdc
 
 
9ed8b92
821e9b3
7446fcd
64feb25
 
821e9b3
9ed8b92
821e9b3
64feb25
 
 
 
 
 
 
 
 
 
 
 
d7b6b69
 
 
 
 
 
 
 
64feb25
9ed8b92
821e9b3
634125e
 
 
 
 
 
 
9ed8b92
 
634125e
64feb25
634125e
821e9b3
634125e
821e9b3
5521e44
 
ae3cd11
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
import os
import re
import json
import gradio as gr
from openai import OpenAI
import gspread
from google.oauth2.service_account import Credentials

SCOPES = [
    "https://www.googleapis.com/auth/spreadsheets",
    "https://www.googleapis.com/auth/drive"
]

MODEL_NAME = "gpt-4o-mini"  # Ensure this matches your deployed model.
TEMPERATURE = 0.2

# Initialize the OpenAI client with the API key from environment variables.
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

# In-memory set to track submitted emails (this resets when the app restarts).
submitted_emails = set()

def get_google_sheet():
    """
    Connects to the Google Sheet using service account credentials stored
    in the environment variable "GOOGLE_CREDS_JSON" and returns the worksheet
    named "Submissions" from the spreadsheet identified by "SPREADSHEET_ID".
    """
    creds = Credentials.from_service_account_info(
        json.loads(os.environ["GOOGLE_CREDS_JSON"]),
        scopes=SCOPES
    )
    gc = gspread.authorize(creds)
    sh = gc.open_by_key(os.environ["SPREADSHEET_ID"])
    worksheet = sh.worksheet("Submissions")
    return worksheet

def get_evaluation_questions():
    """
    Loads evaluation questions and expected answers from environment variables.
    
    Expected environment variables:
    - TEST_QUESTION_1: a JSON array of user query strings.
    - TEST_EXPECTED_1: a JSON array of JSON-like strings representing expected outputs.
    
    Both lists must be of equal length.
    """
    questions_str = os.environ.get("TEST_QUESTION_1")
    docs_str = os.environ.get("TEST_DOCUMENTS_1")
    expected_str = os.environ.get("TEST_EXPECTED_1")

    if not questions_str or not expected_str or not docs_str:
        return []
    try:
        questions_list = json.loads(questions_str)
    except Exception as e:
        print(f"Error parsing questions: {str(e)}")
        return []
    try:
        expected_list = json.loads(expected_str)
    except Exception as e:
        print(f"Error parsing expected answers: {str(e)}")
        return []
    try:
        docs_list = json.loads(docs_str)
    except Exception as e:
        print(f"Error parsing documents: {str(e)}")
        return []
    
    # Ensure all lists are of the same length.
    if len(questions_list) != len(expected_list) or len(questions_list) != len(docs_list):
        print("Mismatch in length: questions list and expected answers list must have the same length.")
        return []
    
    return [{"question": q, "expected": e, "docs": d} for q, e, d in zip(questions_list, expected_list, docs_list)]

# Load evaluation questions at startup.
EVALUATION_QUESTIONS = get_evaluation_questions()

def sanitize_input(text):
    """
    Sanitizes input to allow only alphanumerics and some punctuation,
    then truncates to 500 characters.
    """
    clean_text = re.sub(r"[^a-zA-Z0-9\s.,!?@:\-]", "", text)
    return clean_text.strip()[:500]

def sanitize_prompt(text):
    """
    Sanitizes the system prompt by stripping and limiting its length.
    """
    return text.strip()[:8000]

def validate_email(email):
    """
    Validates that the provided email is in a valid format.
    Returns True if valid, False otherwise.
    """
    email_regex = r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$"
    return re.match(email_regex, email) is not None


def submit_prompt(email, name, system_prompt_1, system_prompt_2, system_prompt_3):
    """
    Handles the full submission process:
     - Validates email format.
     - Checks if the email has already been used (by in-memory set and Google Sheet).
     - Sanitizes input fields.
     - Processes the system prompt against each evaluation question using the OpenAI API.
     - For each test question, records the verdict and answer.
     - Appends the submission as a new row in the Google Sheet with columns:
         Name, Email, System Prompt, Score, and for each of the 7 test questions: verdict and answer.
    Returns a result message with evaluation details.
    """
    # Validate email format.
    if not validate_email(email):
        return "Invalid email address. Please enter a valid email."
    
    # Check if this email has already been submitted (in-memory).
    if email in submitted_emails:
        return f"Submission already received for {email}. You can only submit once."
    
    # Connect to Google Sheet and check if the email already exists.
    try:
        sheet = get_google_sheet()
        email_col = sheet.col_values(2)  # Assumes column 2 contains the email addresses.
        if email in email_col[1:]:  # Skip header row.
            return f"Submission already received for {email}. You can only submit once."
    except Exception as e:
        print(f"Error accessing Google Sheet: {str(e)}")
        return f"Error accessing Google Sheet: {str(e)}"
    
    # Sanitize inputs.
    email = sanitize_input(email)
    name = sanitize_input(name)
    system_prompt_1 = sanitize_prompt(system_prompt_1)
    system_prompt_2 = sanitize_prompt(system_prompt_2)
    system_prompt_3 = sanitize_prompt(system_prompt_3)

    score = 0
    responses = []    # For display output.
    verdicts = []     # For storing each question's verdict in the sheet.
    answers_list = [] # For storing each question's answer in the sheet.
    
    start_tag = "<user_message>"
    end_tag = "</user_message>"
    
    
    # Process each evaluation question.
    for item in EVALUATION_QUESTIONS:
        # Usual assumption is that the question is relevant unless proven otherwise.
        notRelevant = False
        question = item["question"]
        docs = item["docs"].split("---") if item["docs"] else []
        expected = item["expected"]
        try:
            response = client.chat.completions.create(
                model=MODEL_NAME,  # Ensure this model identifier matches your deployed model.
                messages=[
                    {"role": "system", "content": system_prompt_1},
                    {"role": "user", "content": question}
                ],
                temperature=TEMPERATURE
            )
            output1 = response.choices[0].message.content.strip()
        except Exception as e:
            output1 = f"Error during OpenAI API call: {str(e)}"
            
        # Check if the answer contains the user message tags.
        if start_tag in output1 and end_tag in output1:
            # Extract the content between the tags.
            start_index = output1.index(start_tag) + len(start_tag)
            end_index = output1.index(end_tag)
            # Extract the answer between the tags and stop the execution for this question as the query is deemed irrelevant.
            answer = output1[start_index:end_index].strip()
            notRelevant = True
        else:
            # If no tags, treat the entire answer as the response.
            output1 = output1.strip()
            output2 = ""
            
            for doc in docs:
                try:
                    response = client.chat.completions.create(
                        model=MODEL_NAME,
                        messages=[
                            {"role": "system", "content": system_prompt_2},
                            {"role": "user", "content": f"Target company context: \n{output1} \n\n Paragraph:\n {doc}"}
                        ],
                        temperature=TEMPERATURE
                    )
                    output2 += "\n" + response.choices[0].message.content.strip()
                except Exception as e:
                    output2 += f"\nError processing document: {str(e)}"
                    
            # Prepare the final output for LLM3.
            
            output2 = output2.strip()
            try:
                response = client.chat.completions.create(
                    model=MODEL_NAME,
                    messages=[
                        {"role": "system", "content": system_prompt_3},
                        {"role": "user", "content": f"Extracted information: \n{output2}"}
                    ],
                    temperature=TEMPERATURE,
                )
                answer = response.choices[0].message.content.strip()
            except Exception as e:
                answer = f"Error during final OpenAI API call: {str(e)}"
                
        
        verdict = ""
        
        # When the expected output is a string, it indicates that the query was irrelevant.
        if isinstance(expected, str):
            if notRelevant:
                verdict = f"Correct"
                score += 1
                responses.append(
                    f"Question: {question}\n"
                    f"Answer: {output1}\n --- \n{answer}\n"
                    f"Expected: {expected}\n"
                    f"Result: {verdict}\n"
                )
                verdicts.append(verdict)
                answers_list.append(f"{output1}\n --- \n{answer}\n")
                continue
            else:
                verdict = "Incorrect (Query was irrelevant, but no user message found)"
                responses.append(
                    f"Question: {question}\n"
                    f"Answer: {output1}\n --- \n{answer}\n"
                    f"Expected: {expected}\n"
                    f"Result: {verdict}\n"
                )
                verdicts.append(verdict)
                answers_list.append(f"{output1}\n --- \n{answer}\n")
                continue
        
        # If the expected output is a JSON object but answer is a String
        if notRelevant and not isinstance(expected, str):
            verdict = "Incorrect (Query was relevant, but user message found)"
            responses.append(
                f"Question: {question}\n"
                f"Answer: {output1}\n --- \n{answer}\n"
                f"Expected: {json.dumps(expected)}\n"
                f"Result: {verdict}\n"
            )
            verdicts.append(verdict)
            answers_list.append(f"{output1}\n --- \n{answer}\n")
            continue
    
        try:
            parsed_answer = json.loads(answer)
            answer_to_store = json.dumps(parsed_answer)  # Normalize parsed JSON as string.
        except json.JSONDecodeError as e:
            verdict = f"Incorrect (Invalid JSON: {str(e)})"
            responses.append(
                f"Question: {question}\n"
                f"Answer: {output1}\n --- \n{answer}\n"
                f"Expected: {json.dumps(expected)}\n"
                f"Result: {verdict}\n"
            )
            verdicts.append(verdict)
            answers_list.append(f"{output1}\n --- \n{answer}\n")
            continue
        
        # Verify that all required keys are present.
        required_keys = ["buyer_firm", "seller_firm", "third_party", "contains_target_firm"]
        missing_keys = [key for key in required_keys if key not in parsed_answer]
        if missing_keys:
            verdict = f"Incorrect (Missing Keys: {', '.join(missing_keys)})"
            responses.append(
                f"Question: {question}\n"
                f"Answer: {output1}\n --- \n{json.dumps(parsed_answer)}\n"
                f"Expected: {json.dumps(expected)}\n"
                f"Result: {verdict}\n"
            )
            verdicts.append(verdict)
            answers_list.append(f"{output1}\n --- \n{json.dumps(parsed_answer)}\n")
            continue
        
        # Compare values for each required key.
        incorrect_values = []
        for key in required_keys:
            if parsed_answer[key] != expected[key]:
                incorrect_values.append(key)
        
        if len(incorrect_values) > 1:
            verdict = f"Incorrect (Values for keys {', '.join([repr(k) for k in incorrect_values])} are incorrect)"
        elif len(incorrect_values) == 1:
            verdict = f"Incorrect (Value for key '{incorrect_values[0]}' is incorrect)"
        else:
            score += 1
            verdict = "Correct"
        
        responses.append(
            f"Question: {question}\n"
            f"Answer: {output1}\n --- \n{json.dumps(parsed_answer)}\n"
            f"Expected: {json.dumps(expected)}\n"
            f"Result: {verdict}\n"
        )
        verdicts.append(verdict)
        answers_list.append(f"{output1}\n --- \n{json.dumps(parsed_answer)}\n")

    result_details = "\n".join(responses)
    
    # Record this email locally so that subsequent submissions are blocked.
    submitted_emails.add(email)
    system_prompt = f"{system_prompt_1}\n---\n{system_prompt_2}\n---\n{system_prompt_3}"

    # Prepare the row for Google Sheets:
    # The row format is: Name, Email, System Prompt, Score, then for each of the 7 test questions: Verdict, Answer.
    row = [name, email, system_prompt, str(score)]
    for v, a in zip(verdicts, answers_list):
        row.extend([v, a])
    
    # Append the new row to the Google Sheet.
    try:
        sheet.append_row(row)
    except Exception as e:
        print(f"Error appending row to Google Sheet: {str(e)}")
        return f"Error saving submission: {str(e)}"
    
    return (
        f"Thank you for your submission, {name}!\n\n"
    )

def build_interface():
    """
    Constructs the Gradio interface with a submission button and single-submission mechanism.
    """
    with gr.Blocks() as demo:
        gr.Markdown("""
        # Applicant Task: Target Company & Law Firm Identification

        This task involves processing a user query to determine the relevance to the intended task, followed by analyzing textual data  to extract information about law firms representing parties (Buyer, Seller, and Third Parties) and verifying the presence of a target company. For reference, see this sample agreement: [SEC Agreement Example](https://www.sec.gov/Archives/edgar/data/28452/000119312505012401/dex101.htm)

        > **Note:**  
        > This evaluation system uses the `gpt-4o-mini` model with a temperature setting of `0.2` for all LLM steps.

        The system is designed to sequentially leverage three LLM functions:

        ### Step 1: LLM1
        - Determines if the user's query mentions any target company.
        - If no target company is found, LLM1 responds with a message wrapped in `<user_message></user_message>` XML tags to inform the user that the query is irrelevant to this task.
        - If the query contains a target company, LLM1 moves forward with a formatted acknowledgment of the identified target company.

        ### Step 2: LLM2
        - Examines four separate paragraphs independently.
        - For each paragraph, extracts:
            - Buyer's representative law firm
            - Seller's representative law firm
            - Any third-party law firm present
            - Whether the target company is mentioned in the paragraph
        - Each paragraph's results are formatted and concatenated for the next step.

        ### Step 3: LLM3
        - Compiles the information from all analyzed paragraphs and outputs a structured JSON object:

        ```json
        {
        "buyer_firm": "string",
        "seller_firm": "string",
        "third_party": "string",
        "contains_target_firm": boolean
        }
        ```

        | Field                  | Default Value if Missing | Type      |
        | ---------------------- | ------------------------ | --------- |
        | `buyer_firm`           | `"unknown"`                 | `string`  |
        | `seller_firm`          | `"unknown"`                 | `string`  |
        | `third_party`          | `"unknown"`                 | `string`  |
        | `contains_target_firm` | `false`                  | `boolean` |

        The goal is to identify the representative law firms of involved parties and determine if the target company is mentioned, ensuring the results are structured and accurate.

        ---

        **Key Considerations:**
        - The output must adhere to the prescribed JSON format for the final step.
        - Ensure the system can accurately extract and classify relevant information from the input paragraphs.
        """)
        gr.Image("mermaid_chart.png", label="LLM Flowchart")

        # Example Inputs and Outputs in an Accordion
        with gr.Accordion("Example Workflow", open=False):
            gr.Markdown("""
        **User Query:**
        ```
        Is Kirkland & Ellis present in the agreement?
        ```

        **Document Provided:**

        **Paragraph 1:**
        ```
        This Stock and Asset Purchase Agreement is entered into as of October 28, 2021, among Purolite Corporation, a Delaware corporation, along with Stefan E. Brodie and Don B. Brodie (collectively referred to as the Sellers), and Ecolab Inc., a Delaware corporation, as the Purchaser. Additionally, Gibson, Dunn & Crutcher LLP, as an independent third-party representative, is engaged for specific advisory roles outlined in this Agreement.
        ```

        **Paragraph 2:**
        ```
        This Agreement shall be governed by and construed in accordance with the internal laws of the State of Delaware, without giving effect to any choice or conflict of law provision. Each clause within this Agreement shall be interpreted independently, and the invalidity of one clause shall not affect the enforceability of the remaining provisions. Headings are for convenience only and shall not affect the interpretation of this Agreement. Nothing herein shall be construed as limiting or waiving any rights or obligations under applicable law unless expressly stated.
        ```

        **Paragraph 3:**
        ```
        Such notices, demands, and other communications shall be directed to the Parties at their respective addresses. One Party may be contacted at:
        1 Ecolab Place
        St. Paul, Minnesota 55102
        Attention: General Counsel
        with a copy (which shall not constitute notice) to:
        Shearman & Sterling LLP
        599 Lexington Avenue
        New York, New York 10022
        Attention: Adam Miller
        Another Party may be reached at:
        Purolite Corporation
        2201 Renaissance Boulevard
        King of Prussia, Pennsylvania 19406
        Attention: Stefan E. Brodie; Howard Brodie
        with a copy (which shall not constitute notice) to:
        Cleary Gottlieb Steen & Hamilton LLP
        One Liberty Plaza
        New York, New York 10006
        Attention: John Reynolds; Sarah Lee
        Additional communications relating to the role of the third-party representative shall be directed to:
        Gibson, Dunn & Crutcher LLP
        200 Park Avenue
        New York, New York 10166
        Attention: Jane Smith
        ```

        **Paragraph 4:**
        ```
        All references to the singular include the plural and vice versa, and all references to any gender include all genders. The Parties agree that any ambiguities in the language of this Agreement shall not be construed against either Party. Section headings used in this Agreement are for reference only and shall not affect the meaning or interpretation of any provision.
        ```

        ---

        **Expected Steps and Outputs:**

        **Step 1 (LLM1):**
        - If no target company is identified:
        ```
        <user_message>Query is not relevant to the intended task.</user_message>
        ```
        - If a target company is identified:
        ```
        The target company is Kirkland & Ellis LLP.
        ```

        **Step 2 (LLM2 for Paragraphs):**
        - Example Input:
        ```
        This Stock and Asset Purchase Agreement is entered into as of October 28, 2021, among Purolite Corporation, a Delaware corporation, along with Stefan E. Brodie and Don B. Brodie (collectively referred to as the Sellers), and Ecolab Inc., a Delaware corporation, as the Purchaser. Additionally, Gibson, Dunn & Crutcher LLP, as an independent third-party representative, is engaged for specific advisory roles outlined in this Agreement.
        ```

        - Example Output:
        ```
        Buyer: Ecolab Inc.
        Buyer Representative: Not stated
        Seller: Purolite Corporation
        Seller Representative: Not stated
        Third-Party Representation: Advisory roles, Gibson, Dunn & Crutcher LLP
        Target Company Mentioned: No
        ```

        **Step 3 (LLM3 Final Output):**
        - Compiled JSON:
        ```json
        {
        "buyer_firm": "Shearman & Sterling LLP",
        "seller_firm": "Cleary Gottlieb Steen & Hamilton LLP",
        "third_party": "Gibson, Dunn & Crutcher LLP",
        "contains_target_firm": false
        }
        ```

        """)

        # Challenge instructions and testing guidance
        with gr.Accordion("Task Instructions and Testing", open=False):
            gr.Markdown("""
        ---
        **Task Instructions:**
        - Design prompts that ensure proper interaction between the three LLM systems, with each step contributing to the final output.
        - Ensure strict adherence to JSON formatting requirements (e.g., no extra characters that may cause JSON parsing errors).
        - Test extensively to verify accurate law firm and target company identification.

        **Output Requirements:**
        - Ensure final LLM3 JSON output has the following keys:
            - `"buyer_firm"`
            - `"seller_firm"`
            - `"third_party"`
            - `"contains_target_firm"`
        - Values must be accurately extracted or classified based on LLM2's parsed data.

        **Hints for Crafting System Prompts:**
        - Explicitly specify formatting requirements at each step.
        - Clarify the task definitions and expected classifications in each system prompt for LLM1, LLM2, and LLM3.
        - Test using diverse sample data for robustness.
        ---
        """)

        gr.Markdown("""
        ---
        ### Submission Instructions

        Enter your name and email below, as listed in your CV, and submit your designed prompts.

        You can only submit once, so validate your system prompts thoroughly using mock queries and example data before final submission.

        **Good Luck!**

        _Remember: Focus on clarity, accuracy, and structured responses to achieve a high score!_
        ---
        """)

        email_input = gr.Textbox(label="Email", placeholder="your.email@example.com")
        name_input = gr.Textbox(label="First Name, Last Name", placeholder="John, Smith")
        system_prompt_input_1 = gr.Textbox(
            label="System Prompt for LLM1",
            placeholder="Enter your system prompt here...",
            lines=6,
        )
        
        system_prompt_input_2 = gr.Textbox(
            label="System Prompt for LLM2",
            placeholder="Enter your system prompt here...",
            lines=10,
        )
        
        system_prompt_input_3 = gr.Textbox(
            label="System Prompt for LLM3",
            placeholder="Enter your system prompt here...",
            lines=6,
        )
        gr.Markdown("""
        <div style="background-color:#fff7e6; padding:16px; border-radius:8px; border:1px solid #ffe5b4; margin-bottom:1em;">
        <b>⏳ Please note:</b><br>
        Submitting may take up to <b>120 seconds</b>.<br>
        <strong>After clicking <span style='color:#006ce1;'>Submit</span>, please wait and <span style='color:crimson;'>do not press it again</span>.</strong>
        </div>
        """)

        
        submit_button = gr.Button("Submit")
        output_text = gr.Textbox(label="Results", lines=15)
        feedback_md = gr.Markdown("", visible=False)

        def submit_and_disable(email, name, s1, s2, s3):
            message = submit_prompt(email, name, s1, s2, s3)
            # Feedback to be shown in the Markdown field
            feedback = "✅ **Submission received! Thank you.**<br>Please wait for results to appear above. You can close the page."
            return message, gr.update(interactive=False), gr.update(value=feedback, visible=True)

        submit_button.click(
            fn=submit_and_disable,
            inputs=[email_input, name_input, system_prompt_input_1, system_prompt_input_2, system_prompt_input_3],
            outputs=[output_text, submit_button, feedback_md],
        )

    return demo

if __name__ == "__main__":
    interface = build_interface()
    # Launch the app on 0.0.0.0 so it is accessible externally (e.g., in a container).
    interface.launch(server_name="0.0.0.0", server_port=7860)