File size: 65,074 Bytes
744e5e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
from jupyter_handler import JupyterNotebook
import json
import logging
import os
import datetime
from pathlib import Path
from typing import Dict, List, Any, Optional
from tavily import TavilyClient

# Phoenix tracing imports
try:
    from openinference.instrumentation import using_session
    PHOENIX_AVAILABLE = True
    print("Phoenix session tracking imports successful")
except ImportError:
    PHOENIX_AVAILABLE = False
    print("Phoenix session tracking not available - missing openinference packages")

# Configure logging for utils module
logger = logging.getLogger(__name__)

# Initialize Tavily client
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
tavily_client = TavilyClient(api_key=TAVILY_API_KEY) if TAVILY_API_KEY else None


TOOLS = [
    {
        "type": "function",
        "function": {
            "name": "add_and_execute_jupyter_code_cell",
            "description": "A Python code execution environment that runs code in a Jupyter notebook interface. This is stateful - variables and imports persist between executions.",
            "parameters": {
                "type": "object",
                "properties": {
                    "code": {
                        "type": "string",
                        "description": "The Python code to execute."
                    }
                },
                "required": ["code"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "edit_and_execute_current_cell",
            "description": "Edit the current/last code cell and execute the new code. Use this to fix errors or modify the previous code instead of creating a new cell.",
            "parameters": {
                "type": "object",
                "properties": {
                    "code": {
                        "type": "string",
                        "description": "The updated Python code to replace the current cell with and execute."
                    }
                },
                "required": ["code"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "execute_shell_command",
            "description": "Execute shell/system commands like ls, cat, mkdir, etc. This runs independently of Python and provides terminal-style output.",
            "parameters": {
                "type": "object",
                "properties": {
                    "command": {
                        "type": "string",
                        "description": "The shell command to execute (e.g., 'ls -la', 'cat file.txt', 'mkdir new_folder')."
                    }
                },
                "required": ["command"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "web_search",
            "description": "Search the web for current information, documentation, tutorials, and solutions to coding problems. Use this to get context before starting tasks or when encountering errors.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "Search query (max 400 characters). Be specific and include relevant keywords."
                    }
                },
                "required": ["query"]
            }
        }
    },
]

# TOOLS = TOOLS[:1]

MAX_TURNS = 20


def create_phoenix_session_context(session_id: str, user_id: str = None, metadata: Dict = None):
    """
    Create a Phoenix session context for tracing LLM interactions.
    
    Args:
        session_id: Unique identifier for the session
        user_id: Optional user identifier
        metadata: Additional metadata to include in traces
        
    Returns:
        Context manager for Phoenix session tracking
    """
    if not PHOENIX_AVAILABLE:
        # Return a no-op context manager if Phoenix is not available
        from contextlib import nullcontext
        return nullcontext()
    
    try:
        # Use using_session for proper session grouping in Phoenix
        # This ensures all LLM calls within this context are grouped under the same session
        logger.debug(f"Creating Phoenix session context for session_id: {session_id}")
        return using_session(session_id)
    except Exception as e:
        logger.warning(f"Failed to create Phoenix session context for {session_id}: {e}")
        # Fallback to no-op context if Phoenix session creation fails
        from contextlib import nullcontext
        return nullcontext()


class SessionStateManager:
    """Manages comprehensive session state in a single JSON file"""
    
    def __init__(self, session_id: str, base_dir: str = './temp/'):
        self.session_id = session_id
        self.base_dir = Path(base_dir)
        self.session_dir = self.base_dir / session_id
        self.state_file = self.session_dir / 'session_state.json'
        self.session_dir.mkdir(parents=True, exist_ok=True)
        logger.info(f"SessionStateManager initialized for {session_id}")
    
    def create_initial_state(self, hardware_config: Dict, api_config: Dict, 
                           environment: Dict, system_prompt: str) -> Dict:
        """Create initial session state structure"""
        timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
        
        initial_state = {
            "session_id": self.session_id,
            "created_at": timestamp,
            "last_updated": timestamp,
            "version": "1.0",
            
            "hardware_config": hardware_config,
            "api_config": api_config,
            "environment": environment,
            
            "conversation_history": [
                {
                    "role": "system",
                    "content": system_prompt,
                    "timestamp": timestamp,
                    "metadata": {"type": "system_initialization"}
                }
            ],
            
            "llm_interactions": [],  # Complete API call logs
            "tool_executions": [],   # All tool calls and results
            
            "notebook_data": {
                "cells": [],
                "metadata": {
                    "kernel_info": {"name": "python3"},
                    "language_info": {"name": "python", "version": "3.12"},
                },
                "nbformat": 4,
                "nbformat_minor": 0
            },
            
            "execution_state": {
                "current_turn": 0,
                "max_turns": MAX_TURNS,
                "is_running": False,
                "is_paused": False,
                "last_execution_successful": None,
                "sandbox_active": False,
                "sandbox_info": None
            },
            
            "session_stats": {
                "total_messages": 1,
                "total_code_executions": 0,
                "total_searches": 0,
                "total_errors": 0,
                "session_duration_seconds": 0
            }
        }
        
        logger.info("Created initial session state for %s", self.session_id)
        return initial_state
    
    def load_state(self) -> Optional[Dict]:
        """Load session state from file with improved error handling"""
        if not self.state_file.exists():
            logger.info(f"No existing session state found for {self.session_id}")
            return None
            
        try:
            with open(self.state_file, 'r', encoding='utf-8') as f:
                state = json.load(f)
            logger.info(f"Loaded session state for {self.session_id} with {len(state.get('conversation_history', []))} messages")
            return state
        except json.JSONDecodeError as e:
            logger.error(f"JSON corruption in session state for {self.session_id}: {str(e)}")
            logger.info(f"Creating backup of corrupted file: {self.state_file}.corrupted")
            try:
                import shutil
                shutil.copy2(self.state_file, str(self.state_file) + ".corrupted")
                logger.info(f"Backup created successfully")
            except Exception as backup_error:
                logger.warning(f"Failed to create backup: {backup_error}")
            return None
        except Exception as e:
            logger.error(f"Failed to load session state for {self.session_id}: {str(e)}")
            return None
    
    def save_state(self, state: Dict) -> bool:
        """Save session state to file with improved error handling"""
        try:
            # Update last_updated timestamp
            state["last_updated"] = datetime.datetime.now(datetime.timezone.utc).isoformat()
            
            # Update session stats
            if "session_stats" not in state:
                state["session_stats"] = {}
            
            created_at = datetime.datetime.fromisoformat(state["created_at"])
            current_time = datetime.datetime.now(datetime.timezone.utc)
            state["session_stats"]["session_duration_seconds"] = int((current_time - created_at).total_seconds())
            state["session_stats"]["total_messages"] = len(state.get("conversation_history", []))
            
            # Validate JSON serializability before writing
            try:
                json.dumps(state, ensure_ascii=False)
            except (TypeError, ValueError) as e:
                logger.error(f"State contains non-serializable data: {e}")
                logger.info("Attempting to clean non-serializable data...")
                state = self._clean_non_serializable_data(state)
            
            # Write to temporary file first, then rename for atomic operation
            temp_file = self.state_file.with_suffix('.tmp')
            with open(temp_file, 'w', encoding='utf-8') as f:
                json.dump(state, f, indent=2, ensure_ascii=False)
            
            # Atomic rename
            temp_file.replace(self.state_file)
            
            logger.debug(f"Saved session state for {self.session_id} ({len(json.dumps(state))} characters)")
            return True
        except Exception as e:
            logger.error(f"Failed to save session state for {self.session_id}: {str(e)}")
            # Clean up temp file if it exists
            temp_file = self.state_file.with_suffix('.tmp')
            if temp_file.exists():
                try:
                    temp_file.unlink()
                except Exception:
                    pass
            return False
    
    def _clean_non_serializable_data(self, obj):
        """Recursively clean non-serializable data from objects"""
        if isinstance(obj, dict):
            cleaned = {}
            for key, value in obj.items():
                try:
                    json.dumps(value)
                    cleaned[key] = self._clean_non_serializable_data(value)
                except (TypeError, ValueError):
                    logger.warning(f"Removing non-serializable field: {key}")
                    cleaned[key] = f"<non-serializable: {type(value).__name__}>"
            return cleaned
        elif isinstance(obj, list):
            cleaned = []
            for item in obj:
                try:
                    json.dumps(item)
                    cleaned.append(self._clean_non_serializable_data(item))
                except (TypeError, ValueError):
                    cleaned.append(f"<non-serializable: {type(item).__name__}>")
            return cleaned
        else:
            return obj
    
    def log_llm_interaction(self, state: Dict, request_data: Dict, response_data: Dict, 
                           model: str, turn: int) -> None:
        """Log complete LLM API interaction"""
        timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
        
        interaction = {
            "timestamp": timestamp,
            "turn": turn,
            "model": model,
            "request": {
                "messages_count": len(request_data.get("messages", [])),
                "tools_count": len(request_data.get("tools", [])),
                "model": request_data.get("model"),
                "tool_choice": request_data.get("tool_choice")
            },
            "response": {
                "content": response_data.get("choices", [{}])[0].get("message", {}).get("content"),
                "tool_calls": response_data.get("choices", [{}])[0].get("message", {}).get("tool_calls"),
                "finish_reason": response_data.get("choices", [{}])[0].get("finish_reason"),
                "usage": response_data.get("usage")
            }
        }
        
        if "llm_interactions" not in state:
            state["llm_interactions"] = []
        state["llm_interactions"].append(interaction)
        
        # Log Phoenix session information for easy debugging
        logger.debug(f"Logged LLM interaction for turn {turn} in session {self.session_id}")
        logger.debug(f"Phoenix session tracking: session_id={self.session_id}, turn={turn}, model={model}")
        
        # Log usage information if available for monitoring
        usage = response_data.get("usage")
        if usage:
            logger.info(f"Session {self.session_id} turn {turn}: "
                       f"prompt_tokens={usage.get('prompt_tokens', 0)}, "
                       f"completion_tokens={usage.get('completion_tokens', 0)}, "
                       f"total_tokens={usage.get('total_tokens', 0)}")
    
    def log_tool_execution(self, state: Dict, tool_call_id: str, tool_name: str, 
                          tool_args: Dict, result: str, execution_data: Any = None) -> None:
        """Log tool execution with full details"""
        timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
        
        # Safely serialize execution_data to prevent JSON corruption
        safe_execution_data = None
        if execution_data is not None:
            try:
                # Convert execution_data to a safe, serializable format
                if hasattr(execution_data, '__dict__'):
                    safe_execution_data = {
                        "type": type(execution_data).__name__,
                        "error": str(execution_data.error) if hasattr(execution_data, 'error') and execution_data.error else None,
                        "has_results": hasattr(execution_data, 'results') and bool(execution_data.results),
                        "has_stdout": hasattr(execution_data, 'logs') and hasattr(execution_data.logs, 'stdout') and bool(execution_data.logs.stdout),
                        "has_stderr": hasattr(execution_data, 'logs') and hasattr(execution_data.logs, 'stderr') and bool(execution_data.logs.stderr)
                    }
                else:
                    # For simple types, convert to string safely
                    safe_execution_data = str(execution_data)[:200]  # Limit length
            except Exception as e:
                logger.warning(f"Failed to serialize execution_data for {tool_call_id}: {e}")
                safe_execution_data = {"serialization_error": str(e)}
        
        tool_execution = {
            "timestamp": timestamp,
            "tool_call_id": tool_call_id,
            "tool_name": tool_name,
            "arguments": tool_args,
            "result_summary": result[:500] + "..." if len(result) > 500 else result,
            "result_length": len(result),
            "execution_data": safe_execution_data,
            "success": execution_data is None or (hasattr(execution_data, 'error') and execution_data.error is None) if execution_data else True
        }
        
        if "tool_executions" not in state:
            state["tool_executions"] = []
        state["tool_executions"].append(tool_execution)
        
        # Update stats
        if tool_name == "add_and_execute_jupyter_code_cell":
            state["session_stats"]["total_code_executions"] = state["session_stats"].get("total_code_executions", 0) + 1
        elif tool_name == "web_search":
            state["session_stats"]["total_searches"] = state["session_stats"].get("total_searches", 0) + 1
            
        if not tool_execution["success"]:
            state["session_stats"]["total_errors"] = state["session_stats"].get("total_errors", 0) + 1
        
        logger.debug(f"Logged tool execution {tool_name} ({tool_call_id}) in session {self.session_id}")
    
    def add_message(self, state: Dict, role: str, content: str, 
                   tool_calls: List = None, tool_call_id: str = None, 
                   raw_execution: Any = None, metadata: Dict = None) -> None:
        """Add message to conversation history with full context"""
        timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
        
        message = {
            "role": role,
            "content": content,
            "timestamp": timestamp
        }
        
        if tool_calls:
            message["tool_calls"] = tool_calls
        if tool_call_id:
            message["tool_call_id"] = tool_call_id
        if raw_execution:
            message["raw_execution"] = raw_execution
        if metadata:
            message["metadata"] = metadata
            
        state["conversation_history"].append(message)
        logger.debug(f"Added {role} message to session {self.session_id} conversation history")
    
    def update_execution_state(self, state: Dict, **kwargs) -> None:
        """Update execution state fields"""
        for key, value in kwargs.items():
            if key in state["execution_state"]:
                state["execution_state"][key] = value
                logger.debug(f"Updated execution state {key}={value} for session {self.session_id}")
                
        # Try to sync with global EXECUTION_STATES for UI consistency (if available)
        try:
            import sys
            if 'app' in sys.modules:
                execution_states = getattr(sys.modules['app'], 'EXECUTION_STATES', None)
                if execution_states and self.session_id in execution_states:
                    for key, value in kwargs.items():
                        execution_states[self.session_id][key] = value
        except (ImportError, AttributeError):
            pass  # Ignore if we can't sync with global state
    
    def update_notebook_data(self, state: Dict, notebook_data: Dict) -> None:
        """Update notebook data in session state"""
        state["notebook_data"] = notebook_data
        logger.debug(f"Updated notebook data for session {self.session_id} ({len(notebook_data.get('cells', []))} cells)")
    
    def get_conversation_history(self, state: Dict) -> List[Dict]:
        """Get conversation history suitable for LLM API calls"""
        return state.get("conversation_history", [])
    
    def validate_and_repair_conversation(self, state: Dict) -> None:
        """Validate and repair conversation history to ensure tool calls have responses"""
        conversation = state.get("conversation_history", [])
        if not conversation:
            return
            
        pending_tool_calls = set()
        valid_messages = []
        
        for message in conversation:
            if message.get("role") == "assistant" and message.get("tool_calls"):
                # Track tool calls
                for tool_call in message["tool_calls"]:
                    pending_tool_calls.add(tool_call["id"])
                valid_messages.append(message)
                
            elif message.get("role") == "tool" and message.get("tool_call_id"):
                # Remove from pending when we find a response
                pending_tool_calls.discard(message["tool_call_id"])
                valid_messages.append(message)
                
            else:
                # Regular message (system, user, assistant without tool calls)
                valid_messages.append(message)
        
        # If there are incomplete tool calls, remove the assistant messages that created them
        if pending_tool_calls:
            logger.warning(f"Found incomplete tool calls in conversation: {pending_tool_calls}")
            logger.warning("Removing incomplete assistant messages to repair conversation")
            
            repaired_messages = []
            for message in valid_messages:
                if (message.get("role") == "assistant" and 
                    message.get("tool_calls") and 
                    any(tc["id"] in pending_tool_calls for tc in message["tool_calls"])):
                    logger.debug("Removing assistant message with incomplete tool calls")
                    continue
                repaired_messages.append(message)
            
            # Update conversation history
            state["conversation_history"] = repaired_messages
            logger.info(f"Repaired conversation: {len(conversation)} -> {len(repaired_messages)} messages")
            
            # Save the repaired state
            self.save_state(state)
    
    def session_exists(self) -> bool:
        """Check if session state file exists"""
        return self.state_file.exists()
    
    def get_session_summary(self, state: Dict) -> str:
        """Get human-readable session summary"""
        stats = state.get("session_stats", {})
        created = datetime.datetime.fromisoformat(state["created_at"])
        
        return f"""Session {self.session_id}:
- Created: {created.strftime('%Y-%m-%d %H:%M:%S UTC')}
- Messages: {stats.get('total_messages', 0)}
- Code Executions: {stats.get('total_code_executions', 0)}
- Web Searches: {stats.get('total_searches', 0)}
- Errors: {stats.get('total_errors', 0)}
- Duration: {stats.get('session_duration_seconds', 0)}s
- Hardware: {state.get('hardware_config', {}).get('gpu_type', 'unknown')}
- Model: {state.get('api_config', {}).get('model_name', 'unknown')}"""


def execute_code(sbx, code):
    logger.debug(f"Executing code in sandbox ({len(code)} characters)")
    execution = sbx.run_code(code, on_stdout=lambda data: logger.debug(f'stdout: {data}'))
    output = ""
    if len(execution.logs.stdout) > 0:
        output += "\n".join(execution.logs.stdout)
        logger.debug(f"Execution produced {len(execution.logs.stdout)} stdout lines")
    if len(execution.logs.stderr) > 0:
        output += "\n".join(execution.logs.stderr)
        logger.debug(f"Execution produced {len(execution.logs.stderr)} stderr lines")
    if execution.error is not None:
        output += execution.error.traceback
        logger.warning(f"Execution error: {execution.error.name}: {execution.error.value}")
    logger.debug(f"Code execution completed, output length: {len(output)}")
    return output, execution


def parse_exec_result_llm(execution, max_code_output=1000):
    logger.debug(f"Parsing execution result for LLM (max_output: {max_code_output})")
    output = []

    def truncate_if_needed(text):
        if len(text) > max_code_output:
            return (text[:max_code_output] + f"\n[Output is truncated as it is more than {max_code_output} characters]")
        return text

    if execution.results:
        results_text_parts = []
        plot_count = 0
        
        for result in execution.results:
            if hasattr(result, 'text') and result.text:
                results_text_parts.append(result.text)
            elif hasattr(result, 'png') and result.png:
                plot_count += 1
                results_text_parts.append(f"[Plot {plot_count} generated and displayed]")
            elif hasattr(result, 'html') and result.html:
                results_text_parts.append("[HTML output generated]")
        
        if results_text_parts:
            results_text = "\n".join(results_text_parts)
            output.append(truncate_if_needed(results_text))
        
        logger.debug(f"Added {len(execution.results)} execution results (including {plot_count} plots)")
    if execution.logs.stdout:
        stdout_text = "\n".join(execution.logs.stdout)
        output.append(truncate_if_needed(stdout_text))
        logger.debug(f"Added stdout output ({len(execution.logs.stdout)} lines)")
    if execution.logs.stderr:
        stderr_text = "\n".join(execution.logs.stderr)
        output.append(truncate_if_needed(stderr_text))
        logger.debug(f"Added stderr output ({len(execution.logs.stderr)} lines)")
    if execution.error is not None:
        output.append(truncate_if_needed(execution.error.traceback))
        logger.debug(f"Added error traceback: {execution.error.name}")
    
    final_output = "\n".join(output)
    logger.debug(f"Parsed execution result for LLM: {len(final_output)} characters")
    return final_output

def clean_messages_for_api(messages):
    """
    Create a clean copy of messages without raw_execution fields and metadata for API calls.
    Also validates that tool calls have corresponding tool responses.
    This prevents 413 errors and API validation errors.
    """
    logger.debug(f"Cleaning {len(messages)} messages for API call")
    cleaned_messages = []
    raw_execution_count = 0
    metadata_count = 0
    pending_tool_calls = set()
    
    for message in messages:
        cleaned_message = message.copy()
        
        # Remove raw_execution data
        if "raw_execution" in cleaned_message:
            cleaned_message.pop("raw_execution")
            raw_execution_count += 1
            
        # Remove metadata and timestamp
        if "metadata" in cleaned_message:
            cleaned_message.pop("metadata")
            metadata_count += 1
        if "timestamp" in cleaned_message:
            cleaned_message.pop("timestamp")
        
        # Track tool calls and responses for validation
        if cleaned_message.get("role") == "assistant" and cleaned_message.get("tool_calls"):
            for tool_call in cleaned_message["tool_calls"]:
                pending_tool_calls.add(tool_call["id"])
        elif cleaned_message.get("role") == "tool" and cleaned_message.get("tool_call_id"):
            pending_tool_calls.discard(cleaned_message["tool_call_id"])
            
        cleaned_messages.append(cleaned_message)
    
    # If there are pending tool calls without responses, remove the assistant message with tool calls
    if pending_tool_calls:
        logger.warning(f"Found {len(pending_tool_calls)} tool calls without responses: {pending_tool_calls}")
        logger.warning("Removing incomplete tool call messages to prevent API errors")
        
        # Remove messages with incomplete tool calls
        filtered_messages = []
        for message in cleaned_messages:
            if (message.get("role") == "assistant" and 
                message.get("tool_calls") and 
                any(tc["id"] in pending_tool_calls for tc in message["tool_calls"])):
                logger.debug("Removing assistant message with incomplete tool calls")
                continue
            filtered_messages.append(message)
        
        cleaned_messages = filtered_messages
    
    logger.debug(f"Cleaned messages: removed raw_execution from {raw_execution_count}, metadata from {metadata_count}")
    logger.debug(f"Final cleaned message count: {len(cleaned_messages)}")
    return cleaned_messages


def web_search(query):
    """
    Perform web search using Tavily API with automatic year addition and formatting.
    
    Args:
        query (str): Search query (max 400 characters)
        
    Returns:
        str: Formatted search results for LLM consumption
    """
    if not tavily_client:
        logger.error("Tavily client not initialized - API key missing")
        return "❌ Search unavailable: Tavily API key not configured"
    
    # Validate query length
    if len(query) > 400:
        logger.warning(f"Query too long ({len(query)} chars), truncating to 400")
        query = query[:400]
    
    # Add current year to query for more recent results
    current_year = datetime.datetime.now().year
    if str(current_year) not in query:
        # Only add year if query has room for it
        year_addition = f" {current_year}"
        if len(query + year_addition) <= 400:
            query += year_addition
            logger.debug(f"Added current year to query: {current_year}")
    
    logger.info(f"Performing Tavily search: '{query}' ({len(query)} chars)")
    
    try:
        # Perform search with optimized parameters
        response = tavily_client.search(
            query=query,
            search_depth="basic",  # Use basic for faster results
            max_results=5,         # Limit results to avoid overwhelming context
            include_answer=True,   # Include AI-generated answer
            include_raw_content=False,  # Don't include raw content to save tokens
            include_images=False   # Don't include images
        )
        
        logger.info(f"Search completed: {len(response.get('results', []))} results found")
        
        # Format results for LLM consumption
        formatted_results = format_search_results_for_llm(response)
        
        logger.debug(f"Formatted search results: {len(formatted_results)} characters")
        return formatted_results
        
    except Exception as e:
        logger.error(f"Tavily search failed: {str(e)}")
        return f"❌ Search failed: {str(e)}"


def format_search_results_for_llm(response):
    """Format Tavily search results for LLM consumption"""
    
    query = response.get('query', 'Unknown query')
    results = response.get('results', [])
    answer = response.get('answer', '')
    
    formatted = f"🔍 **Web Search Results for:** {query}\n\n"
    
    if answer:
        formatted += f"**Quick Answer:** {answer}\n\n"
    
    if results:
        formatted += f"**Found {len(results)} relevant sources:**\n\n"
        
        for i, result in enumerate(results, 1):
            title = result.get('title', 'Untitled')
            url = result.get('url', '')
            content = result.get('content', '')
            score = result.get('score', 0)
            
            # Truncate content to reasonable length
            # if len(content) > 300:
            #     content = content[:300] + "..."
            
            formatted += f"**{i}. {title}** (Relevance: {score:.2f})\n"
            formatted += f"   🔗 {url}\n"
            formatted += f"   📄 {content}\n\n"
    else:
        formatted += "No results found.\n"
    
    return formatted


def run_interactive_notebook_with_session_state(client, model, session_state_manager, session_state, sbx, stop_event=None, tools=None):
    logger.info(f"Starting interactive notebook with session state for {session_state_manager.session_id}")
    
    # Get conversation history from session state
    messages = session_state_manager.get_conversation_history(session_state)
    notebook = JupyterNotebook(messages)
    
    # Update execution state
    session_state_manager.update_execution_state(session_state, is_running=True, sandbox_active=True, current_phase="initializing")
    
    # Use provided tools or default to all tools
    if tools is None:
        tools = TOOLS
    
    try:
        sbx_info = sbx.get_info()
        notebook.add_sandbox_countdown(sbx_info.started_at, sbx_info.end_at)
        
        # Store sandbox info in session state
        session_state["execution_state"]["sandbox_info"] = {
            "started_at": sbx_info.started_at.isoformat(),
            "end_at": sbx_info.end_at.isoformat(),
            "timeout_seconds": int((sbx_info.end_at - sbx_info.started_at).total_seconds())
        }
        
        logger.debug(f"Added sandbox countdown: {sbx_info.started_at} to {sbx_info.end_at}")
    except Exception as e:
        logger.warning(f"Failed to get sandbox info: {str(e)}")
    
    logger.debug("Initial notebook yield in 'generating' mode")
    
    # Update notebook data in session state
    session_state_manager.update_notebook_data(session_state, notebook.data)
    
    # Save initial state
    session_state_manager.save_state(session_state)
    
    yield notebook.render(mode="generating"), notebook.data, messages
    
    max_code_output = 1000
    turns = session_state["execution_state"]["current_turn"]
    done = False
    previous_execution_had_error = False
    previous_execution_had_warnings = False
    
    logger.info(f"Starting interactive loop from turn {turns} with max_output={max_code_output}, max_turns={MAX_TURNS}")

    while not done and (turns <= MAX_TURNS) and (stop_event is None or not stop_event.is_set()):
        turns += 1
        logger.info(f"Starting turn {turns}/{MAX_TURNS}")
        
        try:
            # Update phase to generating
            session_state_manager.update_execution_state(session_state, current_phase="generating")
            
            # Refresh messages from session state before API call
            messages = session_state_manager.get_conversation_history(session_state)
            logger.debug(f"Making API call to {model} with {len(messages)} messages")

            # Prepare request data for logging
            request_data = {
                "messages": clean_messages_for_api(messages),
                "model": model,
                "tools": tools,
                "tool_choice": "auto"
            }
            
            # Prepare session metadata for Phoenix tracing
            session_metadata = {
                "turn": turns,
                "max_turns": MAX_TURNS,
                "model": model,
                "tools_count": len(tools),
                "messages_count": len(messages),
                "current_phase": "generating"
            }
            
            # Add hardware config if available
            if "hardware_config" in session_state:
                hw_config = session_state["hardware_config"]
                session_metadata.update({
                    "gpu_type": hw_config.get("gpu_type", "unknown"),
                    "cpu_cores": hw_config.get("cpu_cores", "unknown"),
                    "memory_gb": hw_config.get("memory_gb", "unknown")
                })
            
            # Wrap OpenAI API call with Phoenix session context for proper grouping
            with create_phoenix_session_context(
                session_id=session_state_manager.session_id,
                user_id=None,  # Could be extracted from request context if available
                metadata=session_metadata
            ):
                logger.debug(f"Making OpenAI API call with Phoenix session context: {session_state_manager.session_id}")
                response = client.chat.completions.create(**request_data)
                logger.debug("API call successful within Phoenix session context")
            
            # Log the complete LLM interaction
            session_state_manager.log_llm_interaction(
                session_state, request_data, response.model_dump(), model, turns
            )
        except Exception as e:
            # Handle inference client errors
            logger.error(f"Inference failed on turn {turns}: {str(e)}")
            
            # Add detailed error information to the notebook
            error_message = str(e)
            if "429" in error_message or "too_many_requests" in error_message.lower():
                detailed_error = f"""**API Rate Limit Exceeded** 🚫

The inference service has reached its rate limit. This typically means:
- Too many requests have been sent in a short period
- Daily quota has been exceeded
- Service is temporarily overloaded

**What you can try:**
- Wait a few minutes and try again
- If using Cerebras API, check your daily quota
- Try using a different model or service
- Contact support if the issue persists

**Technical details:**
```
{error_message}
```"""
            elif "401" in error_message or "unauthorized" in error_message.lower():
                detailed_error = f"""**Authentication Error** 🔐

There's an issue with API authentication:
- API key might be missing or invalid
- API key might have expired
- Insufficient permissions

**Technical details:**
```
{error_message}
```"""
            elif "500" in error_message or "internal" in error_message.lower():
                detailed_error = f"""**Server Error** 🔧

The inference service encountered an internal error:
- Service might be temporarily unavailable
- Try again in a few moments
- If the issue persists, it's likely a service-side problem

**Technical details:**
```
{error_message}
```"""
            else:
                detailed_error = f"""**Inference Service Error** ⚠️

An error occurred while communicating with the AI service:

**Technical details:**
```
{error_message}
```

**What you can try:**
- Check your internet connection
- Try again in a few moments
- If the problem persists, contact support"""
            
            notebook.add_error(detailed_error)
            
            # Add error to session state
            session_state_manager.add_message(
                session_state, "assistant", detailed_error, 
                metadata={"type": "error", "error_type": "api_error", "turn": turns}
            )
            
            # Update execution state
            session_state_manager.update_execution_state(
                session_state, is_running=False, last_execution_successful=False
            )
            
            # Update notebook data and save state
            session_state_manager.update_notebook_data(session_state, notebook.data)
            session_state_manager.save_state(session_state)
            
            yield notebook.render(mode="error"), notebook.data, messages
            return

        # Get the response content and tool calls
        full_response = response.choices[0].message.content or ""
        tool_calls = response.choices[0].message.tool_calls or []
        
        logger.debug(f"Turn {turns}: Response content length: {len(full_response)}, Tool calls: {len(tool_calls)}")

        # Add markdown cell for assistant's thinking
        if full_response.strip():
            logger.debug(f"Adding assistant response as markdown ({len(full_response)} chars)")
            notebook.add_markdown(full_response, "assistant")
        else:
            logger.debug("Skipping empty assistant response")

        # Handle tool calls and add assistant message to session state only
        if tool_calls:
            logger.info(f"Processing {len(tool_calls)} tool calls on turn {turns}")
            # Add assistant message to session state (messages will be derived from this)
            session_state_manager.add_message(
                session_state, "assistant", full_response,
                tool_calls=[{
                    "id": tc.id,
                    "type": "function",
                    "function": {"name": tc.function.name, "arguments": tc.function.arguments}
                } for tc in tool_calls],
                metadata={"turn": turns, "type": "thinking"}
            )
            logger.debug(f"Added assistant message with {len(tool_calls)} tool calls to session state")
        elif full_response.strip():
            # If no tool calls but we have content, add regular assistant message
            session_state_manager.add_message(
                session_state, "assistant", full_response,
                metadata={"turn": turns, "type": "thinking"}
            )
            logger.debug("Added regular assistant message to session state")
        
        for i, tool_call in enumerate(tool_calls):
            logger.debug(f"Processing tool call {i+1}/{len(tool_calls)}: {tool_call.function.name}")

            if tool_call.function.name == "add_and_execute_jupyter_code_cell":
                # Update phase to executing code
                session_state_manager.update_execution_state(session_state, current_phase="executing_code")
                
                logger.debug(f"Processing code execution tool call: {tool_call.id}")
                tool_args = json.loads(tool_call.function.arguments)
                code = tool_args["code"]
                logger.debug(f"Code to execute: {len(code)} characters")
                
                # Determine if we should reuse the last cell or create a new one
                # Reuse if there were errors (not just warnings) in the previous execution
                should_reuse_cell = (previous_execution_had_error and 
                                   notebook.get_last_cell_type() == "code")
                
                if should_reuse_cell:
                    logger.info("Reusing last code cell due to previous execution error")
                    # Update the existing cell's code instead of creating a new one
                    notebook.update_last_code_cell(code)
                else:
                    logger.debug("Creating new code cell")
                    # Create a new cell (normal behavior)
                    notebook.add_code(code)
                
                logger.debug("Yielding notebook in 'executing' mode")
                yield notebook.render(mode="executing"), notebook.data, messages

                try:
                    # Check for stop event before execution
                    if stop_event and stop_event.is_set():
                        logger.info("Stop event detected before code execution")
                        stopped_message = """**Execution Stopped** ⏸️

The execution was stopped by user request before the code could run."""
                        notebook.add_markdown(stopped_message, "assistant")
                        yield notebook.render(mode="stopped"), notebook.data, messages
                        return
                    
                    # Execution sandbox call - might timeout
                    logger.info("Executing code in sandbox")
                    execution = sbx.run_code(code)
                    notebook.append_execution(execution)
                    
                    # Update error and warning tracking for next iteration
                    previous_execution_had_error = notebook.has_execution_error(execution)
                    previous_execution_had_warnings = notebook.has_execution_warnings(execution)
                    # Log tool execution in session state
                    tool_args = json.loads(tool_call.function.arguments)
                    tool_response_content = parse_exec_result_llm(execution, max_code_output=max_code_output)
                    session_state_manager.log_tool_execution(
                        session_state, tool_call.id, "add_and_execute_jupyter_code_cell",
                        tool_args, tool_response_content, execution
                    )
                    
                    if previous_execution_had_error:
                        logger.warning("Code execution resulted in error")
                    elif previous_execution_had_warnings:
                        logger.info("Code execution completed with warnings")
                    else:
                        logger.info("Code execution completed successfully")
                    
                except Exception as e:
                    # Handle sandbox timeout/execution errors
                    logger.error(f"Code execution failed: {str(e)}")
                    
                    # Add detailed error information for code execution failures
                    error_message = str(e)
                    if "timeout" in error_message.lower():
                        detailed_error = f"""**Code Execution Timeout** ⏰

The code execution took too long and was terminated:
- Code may have entered an infinite loop
- Processing large datasets can cause timeouts
- Complex computations may exceed time limits

**What you can try:**
- Optimize your code for better performance
- Break down complex operations into smaller steps
- Increase the timeout limit in settings
- Check for infinite loops or blocking operations

**Technical details:**
```
{error_message}
```"""
                    else:
                        detailed_error = f"""**Code Execution Failed** 💥

An error occurred while executing the code in the sandbox:

**Technical details:**
```
{error_message}
```

**What you can try:**
- Check the code for syntax errors
- Verify all required packages are available
- Try simplifying the code
- Check the sandbox logs for more details"""
                    
                    notebook.add_error(detailed_error)
                    yield notebook.render(mode="error"), notebook.data, messages
                    return

                # Prepare tool response (already computed above)
                raw_execution = notebook.parse_exec_result_nb(execution)
                
                logger.debug(f"Tool response: {len(tool_response_content)} chars content, {len(raw_execution)} raw outputs")

                # Add tool response to session state only
                session_state_manager.add_message(
                    session_state, "tool", tool_response_content,
                    tool_call_id=tool_call.id, raw_execution=raw_execution,
                    metadata={"turn": turns, "execution_successful": not previous_execution_had_error}
                )
            elif tool_call.function.name == "web_search":
                # Update phase to searching
                session_state_manager.update_execution_state(session_state, current_phase="searching")
                
                logger.debug(f"Processing search tool call: {tool_call.id}")
                tool_args = json.loads(tool_call.function.arguments)
                query = tool_args["query"]
                logger.debug(f"Search query: '{query}' ({len(query)} chars)")
                
                # Add search status to notebook
                notebook.add_markdown("🔍 **Searching the web...**", "assistant")
                yield notebook.render(mode="generating"), notebook.data, messages
                
                try:
                    # Perform search
                    search_results = web_search(query)
                    logger.info("Search completed successfully")
                    
                    # Log search tool execution
                    tool_args = json.loads(tool_call.function.arguments)
                    session_state_manager.log_tool_execution(
                        session_state, tool_call.id, "web_search",
                        tool_args, search_results
                    )
                    
                    # Add search results to notebook
                    notebook.add_markdown(search_results, "assistant")
                    
                    # Add tool response to session state only
                    session_state_manager.add_message(
                        session_state, "tool", search_results,
                        tool_call_id=tool_call.id,
                        metadata={"turn": turns, "search_successful": True}
                    )
                    
                except Exception as e:
                    error_message = f"❌ Search failed: {str(e)}"
                    logger.error(f"Search tool call failed: {str(e)}")
                    
                    # Log failed search
                    tool_args = json.loads(tool_call.function.arguments)
                    session_state_manager.log_tool_execution(
                        session_state, tool_call.id, "web_search",
                        tool_args, error_message
                    )
                    
                    # Add error to notebook
                    notebook.add_markdown(error_message, "assistant")
                    
                    # Add error response to session state only
                    session_state_manager.add_message(
                        session_state, "tool", error_message,
                        tool_call_id=tool_call.id,
                        metadata={"turn": turns, "search_successful": False, "error": str(e)}
                    )
            elif tool_call.function.name == "edit_and_execute_current_cell":
                # Update phase to executing code
                session_state_manager.update_execution_state(session_state, current_phase="executing_code")
                
                logger.debug(f"Processing edit current cell tool call: {tool_call.id}")
                tool_args = json.loads(tool_call.function.arguments)
                code = tool_args["code"]
                logger.debug(f"Code to execute in current cell: {len(code)} characters")
                
                # Check if we have a code cell to edit
                if notebook.get_last_cell_type() == "code":
                    logger.info("Editing last code cell with new code")
                    notebook.update_last_code_cell(code)
                else:
                    logger.info("No code cell to edit, creating new cell")
                    notebook.add_code(code)
                
                logger.debug("Yielding notebook in 'executing' mode")
                yield notebook.render(mode="executing"), notebook.data, messages

                try:
                    # Check for stop event before execution
                    if stop_event and stop_event.is_set():
                        logger.info("Stop event detected before code execution")
                        stopped_message = """**Execution Stopped** ⏸️

The execution was stopped by user request before the code could run."""
                        notebook.add_markdown(stopped_message, "assistant")
                        yield notebook.render(mode="stopped"), notebook.data, messages
                        return
                    
                    # Execution sandbox call - might timeout
                    logger.info("Executing edited code in sandbox")
                    execution = sbx.run_code(code)
                    notebook.append_execution(execution)
                    
                    # Update error and warning tracking for next iteration
                    previous_execution_had_error = notebook.has_execution_error(execution)
                    previous_execution_had_warnings = notebook.has_execution_warnings(execution)
                    # Log tool execution in session state
                    tool_response_content = parse_exec_result_llm(execution, max_code_output=max_code_output)
                    session_state_manager.log_tool_execution(
                        session_state, tool_call.id, "edit_and_execute_current_cell",
                        tool_args, tool_response_content, execution
                    )
                    
                    if previous_execution_had_error:
                        logger.warning("Edited code execution resulted in error")
                    elif previous_execution_had_warnings:
                        logger.info("Edited code execution completed with warnings")
                    else:
                        logger.info("Edited code execution completed successfully")
                    
                except Exception as e:
                    # Handle sandbox timeout/execution errors
                    logger.error(f"Edited code execution failed: {str(e)}")
                    
                    # Add detailed error information for code execution failures
                    error_message = str(e)
                    if "timeout" in error_message.lower():
                        detailed_error = f"""**Code Execution Timeout** ⏰

The edited code execution took too long and was terminated:
- Code may have entered an infinite loop
- Processing large datasets can cause timeouts
- Complex computations may exceed time limits

**What you can try:**
- Optimize your code for better performance
- Break down complex operations into smaller steps
- Increase the timeout limit in settings
- Check for infinite loops or blocking operations

**Technical details:**
```
{error_message}
```"""
                    else:
                        detailed_error = f"""**Code Execution Failed** 💥

An error occurred while executing the edited code in the sandbox:

**Technical details:**
```
{error_message}
```

**What you can try:**
- Check the code for syntax errors
- Verify all required packages are available
- Try simplifying the code
- Check the sandbox logs for more details"""
                    
                    notebook.add_error(detailed_error)
                    yield notebook.render(mode="error"), notebook.data, messages
                    return

                # Prepare tool response
                raw_execution = notebook.parse_exec_result_nb(execution)
                
                logger.debug(f"Tool response: {len(tool_response_content)} chars content, {len(raw_execution)} raw outputs")

                # Add tool response to session state only
                session_state_manager.add_message(
                    session_state, "tool", tool_response_content,
                    tool_call_id=tool_call.id, raw_execution=raw_execution,
                    metadata={"turn": turns, "execution_successful": not previous_execution_had_error, "action": "edit_cell"}
                )
            elif tool_call.function.name == "execute_shell_command":
                # Update phase to executing shell command
                session_state_manager.update_execution_state(session_state, current_phase="executing_shell")
                
                logger.debug(f"Processing shell command tool call: {tool_call.id}")
                tool_args = json.loads(tool_call.function.arguments)
                command = tool_args["command"]
                logger.debug(f"Shell command to execute: '{command}'")
                
                # Add shell command to notebook with special styling
                notebook.add_shell_command(command)
                
                logger.debug("Yielding notebook in 'executing' mode")
                yield notebook.render(mode="executing"), notebook.data, messages

                try:
                    # Check for stop event before execution
                    if stop_event and stop_event.is_set():
                        logger.info("Stop event detected before shell execution")
                        stopped_message = """**Execution Stopped** ⏸️

The execution was stopped by user request before the shell command could run."""
                        notebook.add_markdown(stopped_message, "assistant")
                        yield notebook.render(mode="stopped"), notebook.data, messages
                        return
                    
                    # Execute shell command in sandbox using raw shell execution
                    logger.info(f"Executing raw shell command in sandbox: {command}")
                    
                    try:
                        # Use the new raw shell execution method
                        if hasattr(sbx, 'run_shell'):
                            shell_execution = sbx.run_shell(command, timeout=60)
                            logger.info("Shell command executed using raw shell method")
                        else:
                            # Fallback: Execute shell command using Python subprocess within sandbox
                            shell_code = f"""
import subprocess
import sys

try:
    result = subprocess.run(
        {repr(command)}, 
        shell=True, 
        capture_output=True, 
        text=True, 
        timeout=60
    )
    
    if result.stdout:
        print("STDOUT:")
        print(result.stdout)
    
    if result.stderr:
        print("STDERR:")
        print(result.stderr)
    
    print(f"Exit code: {{result.returncode}}")
    
except subprocess.TimeoutExpired:
    print("Error: Command timed out after 60 seconds")
except Exception as e:
    print(f"Error executing command: {{e}}")
"""
                            shell_execution = sbx.run_code(shell_code)
                            logger.info("Shell command executed via Python subprocess fallback")
                        
                        # Add shell execution results to notebook
                        notebook.append_shell_execution(shell_execution)
                        
                        # Prepare response content for LLM
                        shell_response_content = parse_exec_result_llm(shell_execution, max_code_output=max_code_output)
                        
                        # Log tool execution in session state
                        session_state_manager.log_tool_execution(
                            session_state, tool_call.id, "execute_shell_command",
                            tool_args, shell_response_content, shell_execution
                        )
                        
                        # Check for errors
                        shell_had_error = notebook.has_execution_error(shell_execution)
                        
                        if shell_had_error:
                            logger.warning("Shell command execution resulted in error")
                        else:
                            logger.info("Shell command execution completed successfully")
                        
                    except Exception as shell_error:
                        logger.error(f"Shell command execution failed: {str(shell_error)}")
                        
                        # Create error message
                        detailed_error = f"""**Shell Command Failed** 🔧

An error occurred while executing the shell command:

**Command:** `{command}`

**Technical details:**
```
{str(shell_error)}
```

**What you can try:**
- Check if the command exists in the sandbox environment
- Verify command syntax
- Try a simpler version of the command
- Check if required tools/packages are installed"""
                        
                        notebook.add_error(detailed_error)
                        
                        # Log failed execution
                        session_state_manager.log_tool_execution(
                            session_state, tool_call.id, "execute_shell_command",
                            tool_args, detailed_error
                        )
                        
                        yield notebook.render(mode="error"), notebook.data, messages
                        return
                    
                except Exception as e:
                    # Handle general execution errors
                    logger.error(f"Shell command execution failed: {str(e)}")
                    
                    detailed_error = f"""**Shell Execution Error** ⚠️

An unexpected error occurred while executing the shell command:

**Command:** `{command}`

**Technical details:**
```
{str(e)}
```"""
                    
                    notebook.add_error(detailed_error)
                    yield notebook.render(mode="error"), notebook.data, messages
                    return

                # Prepare tool response for LLM and session state
                raw_execution = notebook.parse_exec_result_nb(shell_execution)
                
                logger.debug(f"Shell tool response: {len(shell_response_content)} chars content")

                # Add tool response to session state
                session_state_manager.add_message(
                    session_state, "tool", shell_response_content,
                    tool_call_id=tool_call.id, raw_execution=raw_execution,
                    metadata={"turn": turns, "command": command, "execution_successful": not shell_had_error, "action": "shell_command"}
                )
            else:
                logger.warning(f"Unknown tool call function: {tool_call.function.name}")

        if not tool_calls:
            logger.info(f"No tool calls on turn {turns}, conversation ending")
            if len(full_response.strip())==0:
                logger.error("Assistant provided no content and no tool calls")
                notebook.add_error(f"No tool call and empty assistant response:\n{response.model_dump_json(indent=2)}")

            # Only add the final assistant message if we didn't already add it above
            # (in the elif full_response.strip() block)
            if full_response.strip():
                # Since we're now only using session state, we can safely add the message
                # The session state manager will handle any deduplication if needed
                session_state_manager.add_message(
                    session_state, "assistant", full_response,
                    metadata={"turn": turns, "type": "final_response"}
                )
                logger.debug("Added final assistant response to session state")
            
            done = True
            
        # Update session state after each turn
        session_state_manager.update_execution_state(
            session_state, current_turn=turns, last_execution_successful=not previous_execution_had_error
        )
        session_state_manager.update_notebook_data(session_state, notebook.data)
        session_state_manager.save_state(session_state)
        
        if done:
            logger.info(f"Interactive notebook completed after {turns} turns")
            session_state_manager.update_execution_state(
                session_state, is_running=False, sandbox_active=True
            )
            session_state_manager.save_state(session_state)
            yield notebook.render(mode="done"), notebook.data, messages
        else:
            logger.debug(f"Turn {turns} completed, yielding in 'generating' mode")
            yield notebook.render(mode="generating"), notebook.data, messages
    
    if turns > MAX_TURNS:
        logger.warning(f"Interactive notebook reached maximum turns ({MAX_TURNS})")
        error_msg = f"**Maximum Turns Reached** 🔄\n\nThe conversation has reached the maximum number of turns ({MAX_TURNS}). This is a safety limit to prevent infinite loops.\n\n**What you can try:**\n- Start a new conversation\n- Clear the notebook and begin fresh\n- Contact support if you need a higher turn limit"
        notebook.add_error(error_msg)
        
        # Add error to session state
        session_state_manager.add_message(
            session_state, "assistant", error_msg,
            metadata={"type": "error", "error_type": "max_turns_exceeded", "turn": turns}
        )
        
        # Update final state
        session_state_manager.update_execution_state(
            session_state, is_running=False, last_execution_successful=False
        )
        session_state_manager.update_notebook_data(session_state, notebook.data)
        session_state_manager.save_state(session_state)
        
        yield notebook.render(mode="error"), notebook.data, messages
    elif stop_event and stop_event.is_set():
        logger.info("Interactive notebook stopped by user")
        
        # Add a stopped message to the notebook
        stopped_message = """**Execution Stopped** ⏸️

The execution was stopped by user request. You can resume by clicking Run again."""
        notebook.add_markdown(stopped_message, "assistant")
        
        # Add stopped message to session state
        session_state_manager.add_message(
            session_state, "assistant", stopped_message,
            metadata={"type": "status", "status_type": "stopped_by_user", "turn": turns}
        )
        
        # Update state to indicate pause
        session_state_manager.update_execution_state(
            session_state, is_running=False, is_paused=True
        )
        session_state_manager.update_notebook_data(session_state, notebook.data)
        session_state_manager.save_state(session_state)
        
        yield notebook.render(mode="stopped"), notebook.data, messages


def run_interactive_notebook(client, model, messages, sbx, stop_event=None, tools=None):
    """Backward compatibility wrapper for the new session state system"""
    logger.warning("Using legacy run_interactive_notebook - this should be replaced with session state version")
    
    # Create a temporary session for backward compatibility
    import uuid
    temp_session_id = str(uuid.uuid4())[:8]
    session_manager = SessionStateManager(temp_session_id)
    
    # Create basic session state
    session_state = session_manager.create_initial_state(
        hardware_config={"gpu_type": "unknown", "cpu_cores": 2, "memory_gb": 8, "timeout_sec": 300},
        api_config={"model_name": model, "provider_type": "unknown"},
        environment={"variables": "", "files_uploaded": []},
        system_prompt=messages[0].get("content", "") if messages and messages[0].get("role") == "system" else ""
    )
    
    # Initialize conversation history with provided messages
    session_state["conversation_history"] = messages
    
    # Use the new session-based function
    yield from run_interactive_notebook_with_session_state(
        client, model, session_manager, session_state, sbx, stop_event, tools
    )