grlll Claude commited on
Commit
4b1e061
·
1 Parent(s): d72e914

feat: Add Apple Health XML parser and SQLite database generation

Browse files

- Add comprehensive Apple Health XML parser with SQLModel
- Parse XML data into SQLite database for efficient querying
- Update app to upload both XML and parsed SQLite to dataset
- Modify MCP server to query SQLite instead of parsing XML
- Add support for workouts query type
- Include all Apple Health data types (records, workouts, correlations, etc.)

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (8) hide show
  1. .gitignore +3 -0
  2. app.py +116 -59
  3. pyproject.toml +3 -0
  4. src/__init__.py +0 -0
  5. src/parser/__init__.py +0 -0
  6. src/parser/models.py +402 -0
  7. src/parser/parser.py +1215 -0
  8. uv.lock +123 -0
.gitignore CHANGED
@@ -8,3 +8,6 @@ wheels/
8
 
9
  # Virtual environments
10
  .venv
 
 
 
 
8
 
9
  # Virtual environments
10
  .venv
11
+ .DS_Store
12
+
13
+ data
app.py CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
2
  from huggingface_hub import HfApi, create_repo
3
  from huggingface_hub.utils import RepositoryNotFoundError
4
  import os
 
 
5
 
6
 
7
  def create_interface():
@@ -87,22 +89,42 @@ def create_interface():
87
  token=token
88
  )
89
 
90
- # Upload the export.xml file
91
  api.upload_file(
92
  path_or_fileobj=file_path,
93
  path_in_repo="export.xml",
94
  repo_id=dataset_repo_id,
95
  repo_type="dataset",
96
- token=token
 
97
  )
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  # Create README for dataset
100
  dataset_readme = f"""# Apple Health Data
101
 
102
  This is a private dataset containing Apple Health export data for {username}.
103
 
104
  ## Files
105
- - `export.xml`: The Apple Health export file
 
106
 
107
  ## Associated MCP Server
108
  - Space: [{space_repo_id}](https://huggingface.co/spaces/{space_repo_id})
@@ -131,7 +153,7 @@ This dataset is private and contains personal health information. Do not share a
131
  # Create MCP server app.py
132
  mcp_app_content = f'''import gradio as gr
133
  from huggingface_hub import hf_hub_download
134
- import xml.etree.ElementTree as ET
135
  import pandas as pd
136
  from datetime import datetime
137
  import json
@@ -139,64 +161,98 @@ import json
139
  # Download the health data
140
  DATA_REPO = "{dataset_repo_id}"
141
 
142
- def load_health_data():
143
- """Load and parse the Apple Health export.xml file."""
 
 
 
 
 
 
 
 
 
 
 
144
  try:
145
- # Download the export.xml file from the dataset
146
- file_path = hf_hub_download(
147
- repo_id=DATA_REPO,
148
- filename="export.xml",
149
- repo_type="dataset",
150
- use_auth_token=True
151
- )
152
 
153
- # Parse the XML file
154
- tree = ET.parse(file_path)
155
- root = tree.getroot()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
- # Extract records
158
- records = []
159
- for record in root.findall('.//Record'):
160
- records.append(record.attrib)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
- return pd.DataFrame(records)
 
 
 
163
  except Exception as e:
164
- return None
165
-
166
- # Load data on startup
167
- health_df = load_health_data()
168
-
169
- def query_health_data(query_type, start_date=None, end_date=None):
170
- """Query the health data based on user input."""
171
- if health_df is None:
172
- return "Error: Could not load health data."
173
-
174
- df = health_df.copy()
175
-
176
- # Filter by date if provided
177
- if start_date:
178
- df = df[df['startDate'] >= start_date]
179
- if end_date:
180
- df = df[df['endDate'] <= end_date]
181
-
182
- if query_type == "summary":
183
- # Get summary statistics
184
- summary = {{
185
- "Total Records": len(df),
186
- "Record Types": df['type'].value_counts().to_dict() if 'type' in df.columns else {{}},
187
- "Date Range": f"{{df['startDate'].min()}} to {{df['endDate'].max()}}" if 'startDate' in df.columns else "N/A"
188
- }}
189
- return json.dumps(summary, indent=2)
190
-
191
- elif query_type == "recent":
192
- # Get recent records
193
- if 'startDate' in df.columns:
194
- recent = df.nlargest(10, 'startDate')[['type', 'value', 'startDate', 'unit']].to_dict('records')
195
- return json.dumps(recent, indent=2)
196
- return "No date information available"
197
-
198
- else:
199
- return "Invalid query type"
200
 
201
  # MCP Server Interface
202
  with gr.Blocks(title="Apple Health MCP Server") as demo:
@@ -205,7 +261,7 @@ with gr.Blocks(title="Apple Health MCP Server") as demo:
205
 
206
  with gr.Tab("Query Interface"):
207
  query_type = gr.Dropdown(
208
- choices=["summary", "recent"],
209
  value="summary",
210
  label="Query Type"
211
  )
@@ -278,9 +334,10 @@ pandas>=2.0.0
278
 
279
  **Private Dataset:** [{dataset_repo_id}]({dataset_url})
280
  - Your export.xml file has been securely uploaded
 
281
 
282
  **MCP Server Space:** [{space_repo_id}]({space_url})
283
- - Query interface for your health data
284
  - MCP endpoint configuration included
285
 
286
  Both repositories are private and only accessible by you."""
 
2
  from huggingface_hub import HfApi, create_repo
3
  from huggingface_hub.utils import RepositoryNotFoundError
4
  import os
5
+ import tempfile
6
+ from src.parser.parser import AppleHealthParser
7
 
8
 
9
  def create_interface():
 
89
  token=token
90
  )
91
 
92
+ # Upload the export.xml file (first commit)
93
  api.upload_file(
94
  path_or_fileobj=file_path,
95
  path_in_repo="export.xml",
96
  repo_id=dataset_repo_id,
97
  repo_type="dataset",
98
+ token=token,
99
+ commit_message="Initial upload: export.xml"
100
  )
101
 
102
+ # Parse the XML file and create SQLite database
103
+ with tempfile.TemporaryDirectory() as temp_dir:
104
+ db_path = os.path.join(temp_dir, "health_data.db")
105
+
106
+ # Parse the XML file
107
+ parser = AppleHealthParser()
108
+ parser.parse_file(file_path, db_path)
109
+
110
+ # Upload the SQLite database (second commit)
111
+ api.upload_file(
112
+ path_or_fileobj=db_path,
113
+ path_in_repo="health_data.db",
114
+ repo_id=dataset_repo_id,
115
+ repo_type="dataset",
116
+ token=token,
117
+ commit_message="Add parsed SQLite database"
118
+ )
119
+
120
  # Create README for dataset
121
  dataset_readme = f"""# Apple Health Data
122
 
123
  This is a private dataset containing Apple Health export data for {username}.
124
 
125
  ## Files
126
+ - `export.xml`: The original Apple Health export file
127
+ - `health_data.db`: SQLite database with parsed health data
128
 
129
  ## Associated MCP Server
130
  - Space: [{space_repo_id}](https://huggingface.co/spaces/{space_repo_id})
 
153
  # Create MCP server app.py
154
  mcp_app_content = f'''import gradio as gr
155
  from huggingface_hub import hf_hub_download
156
+ import sqlite3
157
  import pandas as pd
158
  from datetime import datetime
159
  import json
 
161
  # Download the health data
162
  DATA_REPO = "{dataset_repo_id}"
163
 
164
+ def get_db_connection():
165
+ """Get a connection to the SQLite database."""
166
+ # Download the health_data.db file from the dataset
167
+ db_path = hf_hub_download(
168
+ repo_id=DATA_REPO,
169
+ filename="health_data.db",
170
+ repo_type="dataset",
171
+ use_auth_token=True
172
+ )
173
+ return sqlite3.connect(db_path)
174
+
175
+ def query_health_data(query_type, start_date=None, end_date=None):
176
+ """Query the health data based on user input."""
177
  try:
178
+ conn = get_db_connection()
 
 
 
 
 
 
179
 
180
+ if query_type == "summary":
181
+ # Get summary statistics
182
+ summary = {{}}
183
+
184
+ # Total records
185
+ total_records = pd.read_sql_query("SELECT COUNT(*) as count FROM records", conn).iloc[0]['count']
186
+ summary["Total Records"] = total_records
187
+
188
+ # Record types distribution
189
+ record_types = pd.read_sql_query(
190
+ "SELECT type, COUNT(*) as count FROM records GROUP BY type ORDER BY count DESC LIMIT 20",
191
+ conn
192
+ )
193
+ summary["Top Record Types"] = record_types.set_index('type')['count'].to_dict()
194
+
195
+ # Date range
196
+ date_range = pd.read_sql_query(
197
+ "SELECT MIN(start_date) as min_date, MAX(start_date) as max_date FROM records",
198
+ conn
199
+ )
200
+ summary["Date Range"] = f"{{date_range.iloc[0]['min_date']}} to {{date_range.iloc[0]['max_date']}}"
201
+
202
+ # Workouts count
203
+ workout_count = pd.read_sql_query("SELECT COUNT(*) as count FROM workouts", conn).iloc[0]['count']
204
+ summary["Total Workouts"] = workout_count
205
+
206
+ conn.close()
207
+ return json.dumps(summary, indent=2)
208
+
209
+ elif query_type == "recent":
210
+ # Build query with date filters
211
+ query = "SELECT type, value, unit, start_date FROM records"
212
+ conditions = []
213
+
214
+ if start_date:
215
+ conditions.append(f"start_date >= '{{start_date}}'")
216
+ if end_date:
217
+ conditions.append(f"start_date <= '{{end_date}}'")
218
+
219
+ if conditions:
220
+ query += " WHERE " + " AND ".join(conditions)
221
+
222
+ query += " ORDER BY start_date DESC LIMIT 20"
223
+
224
+ # Get recent records
225
+ recent_records = pd.read_sql_query(query, conn)
226
+
227
+ conn.close()
228
+ return json.dumps(recent_records.to_dict('records'), indent=2)
229
 
230
+ elif query_type == "workouts":
231
+ # Get recent workouts
232
+ query = "SELECT workout_activity_type, duration, total_energy_burned, start_date FROM workouts"
233
+ conditions = []
234
+
235
+ if start_date:
236
+ conditions.append(f"start_date >= '{{start_date}}'")
237
+ if end_date:
238
+ conditions.append(f"start_date <= '{{end_date}}'")
239
+
240
+ if conditions:
241
+ query += " WHERE " + " AND ".join(conditions)
242
+
243
+ query += " ORDER BY start_date DESC LIMIT 20"
244
+
245
+ workouts = pd.read_sql_query(query, conn)
246
+
247
+ conn.close()
248
+ return json.dumps(workouts.to_dict('records'), indent=2)
249
 
250
+ else:
251
+ conn.close()
252
+ return "Invalid query type"
253
+
254
  except Exception as e:
255
+ return f"Error querying database: {{str(e)}}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
  # MCP Server Interface
258
  with gr.Blocks(title="Apple Health MCP Server") as demo:
 
261
 
262
  with gr.Tab("Query Interface"):
263
  query_type = gr.Dropdown(
264
+ choices=["summary", "recent", "workouts"],
265
  value="summary",
266
  label="Query Type"
267
  )
 
334
 
335
  **Private Dataset:** [{dataset_repo_id}]({dataset_url})
336
  - Your export.xml file has been securely uploaded
337
+ - SQLite database (health_data.db) has been generated from your data
338
 
339
  **MCP Server Space:** [{space_repo_id}]({space_url})
340
+ - Query interface for your health data using SQLite
341
  - MCP endpoint configuration included
342
 
343
  Both repositories are private and only accessible by you."""
pyproject.toml CHANGED
@@ -7,6 +7,9 @@ requires-python = ">=3.12"
7
  dependencies = [
8
  "gradio>=5.34.0",
9
  "huggingface-hub>=0.33.0",
 
 
 
10
  ]
11
 
12
  [dependency-groups]
 
7
  dependencies = [
8
  "gradio>=5.34.0",
9
  "huggingface-hub>=0.33.0",
10
+ "lxml>=5.4.0",
11
+ "sqlmodel>=0.0.24",
12
+ "tqdm>=4.67.1",
13
  ]
14
 
15
  [dependency-groups]
src/__init__.py ADDED
File without changes
src/parser/__init__.py ADDED
File without changes
src/parser/models.py ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from enum import Enum
3
+ from typing import TYPE_CHECKING, Optional
4
+
5
+ from sqlmodel import Field, Relationship, SQLModel
6
+
7
+ if TYPE_CHECKING:
8
+ pass
9
+
10
+
11
+ # Enums for known types
12
+ class BiologicalSex(str, Enum):
13
+ MALE = "HKBiologicalSexMale"
14
+ FEMALE = "HKBiologicalSexFemale"
15
+ OTHER = "HKBiologicalSexOther"
16
+ NOT_SET = "HKBiologicalSexNotSet"
17
+
18
+
19
+ class BloodType(str, Enum):
20
+ A_POSITIVE = "HKBloodTypeAPositive"
21
+ A_NEGATIVE = "HKBloodTypeANegative"
22
+ B_POSITIVE = "HKBloodTypeBPositive"
23
+ B_NEGATIVE = "HKBloodTypeBNegative"
24
+ AB_POSITIVE = "HKBloodTypeABPositive"
25
+ AB_NEGATIVE = "HKBloodTypeABNegative"
26
+ O_POSITIVE = "HKBloodTypeOPositive"
27
+ O_NEGATIVE = "HKBloodTypeONegative"
28
+ NOT_SET = "HKBloodTypeNotSet"
29
+
30
+
31
+ class EyeSide(str, Enum):
32
+ LEFT = "left"
33
+ RIGHT = "right"
34
+
35
+
36
+ # Base Models
37
+ class TimestampedBase(SQLModel):
38
+ """Base model for entities with date tracking"""
39
+
40
+ start_date: datetime = Field(index=True) # Indexed for date range queries
41
+ end_date: datetime = Field(index=True)
42
+ creation_date: datetime | None = None
43
+
44
+
45
+ class SourcedBase(TimestampedBase):
46
+ """Base model for entities with source tracking"""
47
+
48
+ source_name: str
49
+ source_version: str | None = None
50
+ device: str | None = None
51
+
52
+
53
+ # Main Models
54
+ class HealthData(SQLModel, table=True):
55
+ """Root health data container"""
56
+
57
+ id: int | None = Field(default=None, primary_key=True)
58
+ locale: str
59
+ export_date: datetime
60
+
61
+ # Personal info
62
+ date_of_birth: str
63
+ biological_sex: str
64
+ blood_type: str
65
+ fitzpatrick_skin_type: str
66
+ cardio_fitness_medications_use: str
67
+
68
+ # Relationships
69
+ records: list["Record"] = Relationship(back_populates="health_data")
70
+ correlations: list["Correlation"] = Relationship(back_populates="health_data")
71
+ workouts: list["Workout"] = Relationship(back_populates="health_data")
72
+ activity_summaries: list["ActivitySummary"] = Relationship(
73
+ back_populates="health_data"
74
+ )
75
+ clinical_records: list["ClinicalRecord"] = Relationship(
76
+ back_populates="health_data"
77
+ )
78
+ audiograms: list["Audiogram"] = Relationship(back_populates="health_data")
79
+ vision_prescriptions: list["VisionPrescription"] = Relationship(
80
+ back_populates="health_data"
81
+ )
82
+
83
+
84
+ class MetadataEntry(SQLModel, table=True):
85
+ """Key-value metadata entries with proper polymorphic pattern"""
86
+
87
+ id: int | None = Field(default=None, primary_key=True)
88
+ key: str = Field(index=True)
89
+ value: str
90
+
91
+ # Polymorphic discriminator and ID
92
+ parent_type: str = Field(index=True) # 'record', 'correlation', 'workout', etc.
93
+ parent_id: int = Field(index=True)
94
+
95
+
96
+ class CorrelationRecord(SQLModel, table=True):
97
+ """Link table for Correlation-Record many-to-many"""
98
+
99
+ correlation_id: int = Field(foreign_key="correlation.id", primary_key=True)
100
+ record_id: int = Field(foreign_key="record.id", primary_key=True)
101
+
102
+
103
+ class Record(SourcedBase, table=True):
104
+ """Generic health record"""
105
+
106
+ id: int | None = Field(default=None, primary_key=True)
107
+ type: str = Field(index=True) # Indexed for filtering
108
+ unit: str | None = None
109
+ value: str | None = None
110
+
111
+ # Foreign key
112
+ health_data_id: int | None = Field(
113
+ default=None, foreign_key="healthdata.id", index=True
114
+ )
115
+ health_data: HealthData | None = Relationship(back_populates="records")
116
+
117
+ # Relationships
118
+ heart_rate_variability_list: Optional["HeartRateVariabilityMetadataList"] = (
119
+ Relationship(back_populates="record")
120
+ )
121
+
122
+ # Many-to-many with correlations
123
+ correlations: list["Correlation"] = Relationship(
124
+ back_populates="records", link_model=CorrelationRecord
125
+ )
126
+
127
+
128
+ class Correlation(SourcedBase, table=True):
129
+ """Groups related records together"""
130
+
131
+ id: int | None = Field(default=None, primary_key=True)
132
+ type: str = Field(index=True)
133
+
134
+ # Foreign key
135
+ health_data_id: int | None = Field(
136
+ default=None, foreign_key="healthdata.id", index=True
137
+ )
138
+ health_data: HealthData | None = Relationship(back_populates="correlations")
139
+
140
+ # Relationships
141
+ records: list[Record] = Relationship(
142
+ back_populates="correlations", link_model=CorrelationRecord
143
+ )
144
+
145
+
146
+ class HeartRateVariabilityMetadataList(SQLModel, table=True):
147
+ """Container for HRV instantaneous readings"""
148
+
149
+ id: int | None = Field(default=None, primary_key=True)
150
+
151
+ # Foreign key
152
+ record_id: int = Field(
153
+ foreign_key="record.id", unique=True, index=True
154
+ ) # One-to-one
155
+ record: Record = Relationship(back_populates="heart_rate_variability_list")
156
+
157
+ # Relationships
158
+ instantaneous_bpm: list["InstantaneousBeatsPerMinute"] = Relationship(
159
+ back_populates="hrv_list"
160
+ )
161
+
162
+
163
+ class InstantaneousBeatsPerMinute(SQLModel, table=True):
164
+ """Individual heart rate reading"""
165
+
166
+ id: int | None = Field(default=None, primary_key=True)
167
+ bpm: int
168
+ time: datetime
169
+
170
+ # Foreign key
171
+ hrv_list_id: int = Field(
172
+ foreign_key="heartratevariabilitymetadatalist.id", index=True
173
+ )
174
+ hrv_list: HeartRateVariabilityMetadataList = Relationship(
175
+ back_populates="instantaneous_bpm"
176
+ )
177
+
178
+
179
+ class Workout(SourcedBase, table=True):
180
+ """Workout activity record"""
181
+
182
+ id: int | None = Field(default=None, primary_key=True)
183
+ workout_activity_type: str = Field(index=True)
184
+ duration: float | None = None
185
+ duration_unit: str | None = None
186
+ total_distance: float | None = None
187
+ total_distance_unit: str | None = None
188
+ total_energy_burned: float | None = None
189
+ total_energy_burned_unit: str | None = None
190
+
191
+ # Foreign key
192
+ health_data_id: int | None = Field(
193
+ default=None, foreign_key="healthdata.id", index=True
194
+ )
195
+ health_data: HealthData | None = Relationship(back_populates="workouts")
196
+
197
+ # Relationships
198
+ events: list["WorkoutEvent"] = Relationship(back_populates="workout")
199
+ statistics: list["WorkoutStatistics"] = Relationship(back_populates="workout")
200
+ route: Optional["WorkoutRoute"] = Relationship(back_populates="workout")
201
+
202
+
203
+ class WorkoutEvent(SQLModel, table=True):
204
+ """Event during a workout"""
205
+
206
+ id: int | None = Field(default=None, primary_key=True)
207
+ type: str
208
+ date: datetime = Field(index=True)
209
+ duration: float | None = None
210
+ duration_unit: str | None = None
211
+
212
+ # Foreign key
213
+ workout_id: int = Field(foreign_key="workout.id", index=True)
214
+ workout: Workout = Relationship(back_populates="events")
215
+
216
+
217
+ class WorkoutStatistics(TimestampedBase, table=True):
218
+ """Statistics for a workout period"""
219
+
220
+ id: int | None = Field(default=None, primary_key=True)
221
+ type: str = Field(index=True)
222
+ average: float | None = None
223
+ minimum: float | None = None
224
+ maximum: float | None = None
225
+ sum: float | None = None
226
+ unit: str | None = None
227
+
228
+ # Foreign key
229
+ workout_id: int = Field(foreign_key="workout.id", index=True)
230
+ workout: Workout = Relationship(back_populates="statistics")
231
+
232
+
233
+ class WorkoutRoute(SourcedBase, table=True):
234
+ """GPS route for workout"""
235
+
236
+ id: int | None = Field(default=None, primary_key=True)
237
+
238
+ # Foreign key
239
+ workout_id: int = Field(
240
+ foreign_key="workout.id", unique=True, index=True
241
+ ) # One-to-one
242
+ workout: Workout = Relationship(back_populates="route")
243
+
244
+ # File reference
245
+ file_path: str | None = None
246
+
247
+
248
+ class ActivitySummary(SQLModel, table=True):
249
+ """Daily activity summary"""
250
+
251
+ id: int | None = Field(default=None, primary_key=True)
252
+ date_components: str = Field(
253
+ index=True, unique=True
254
+ ) # Indexed and unique for date lookups
255
+ active_energy_burned: float | None = None
256
+ active_energy_burned_goal: float | None = None
257
+ active_energy_burned_unit: str | None = None
258
+ apple_move_time: float | None = None
259
+ apple_move_time_goal: float | None = None
260
+ apple_exercise_time: float | None = None
261
+ apple_exercise_time_goal: float | None = None
262
+ apple_stand_hours: int | None = None
263
+ apple_stand_hours_goal: int | None = None
264
+
265
+ # Foreign key
266
+ health_data_id: int | None = Field(
267
+ default=None, foreign_key="healthdata.id", index=True
268
+ )
269
+ health_data: HealthData | None = Relationship(back_populates="activity_summaries")
270
+
271
+
272
+ class ClinicalRecord(SQLModel, table=True):
273
+ """FHIR clinical record reference"""
274
+
275
+ id: int | None = Field(default=None, primary_key=True)
276
+ type: str
277
+ identifier: str
278
+ source_name: str
279
+ source_url: str
280
+ fhir_version: str
281
+ received_date: datetime
282
+ resource_file_path: str
283
+
284
+ # Foreign key
285
+ health_data_id: int | None = Field(default=None, foreign_key="healthdata.id")
286
+ health_data: HealthData | None = Relationship(back_populates="clinical_records")
287
+
288
+
289
+ class Audiogram(SourcedBase, table=True):
290
+ """Hearing test data"""
291
+
292
+ id: int | None = Field(default=None, primary_key=True)
293
+ type: str
294
+
295
+ # Foreign key
296
+ health_data_id: int | None = Field(
297
+ default=None, foreign_key="healthdata.id", index=True
298
+ )
299
+ health_data: HealthData | None = Relationship(back_populates="audiograms")
300
+
301
+ # Relationships
302
+ sensitivity_points: list["SensitivityPoint"] = Relationship(
303
+ back_populates="audiogram"
304
+ )
305
+
306
+
307
+ class SensitivityPoint(SQLModel, table=True):
308
+ """Hearing sensitivity measurement"""
309
+
310
+ id: int | None = Field(default=None, primary_key=True)
311
+ frequency_value: float
312
+ frequency_unit: str
313
+
314
+ # Left ear measurements
315
+ left_ear_value: float | None = None
316
+ left_ear_unit: str | None = None
317
+ left_ear_masked: bool | None = None
318
+ left_ear_clamping_range_lower_bound: float | None = None
319
+ left_ear_clamping_range_upper_bound: float | None = None
320
+
321
+ # Right ear measurements
322
+ right_ear_value: float | None = None
323
+ right_ear_unit: str | None = None
324
+ right_ear_masked: bool | None = None
325
+ right_ear_clamping_range_lower_bound: float | None = None
326
+ right_ear_clamping_range_upper_bound: float | None = None
327
+
328
+ # Foreign key
329
+ audiogram_id: int = Field(foreign_key="audiogram.id", index=True)
330
+ audiogram: Audiogram = Relationship(back_populates="sensitivity_points")
331
+
332
+
333
+ class VisionPrescription(SQLModel, table=True):
334
+ """Eye prescription data"""
335
+
336
+ id: int | None = Field(default=None, primary_key=True)
337
+ type: str
338
+ date_issued: datetime
339
+ expiration_date: datetime | None = None
340
+ brand: str | None = None
341
+
342
+ # Foreign key
343
+ health_data_id: int | None = Field(
344
+ default=None, foreign_key="healthdata.id", index=True
345
+ )
346
+ health_data: HealthData | None = Relationship(back_populates="vision_prescriptions")
347
+
348
+ # Relationships
349
+ eye_prescriptions: list["EyePrescription"] = Relationship(
350
+ back_populates="vision_prescription"
351
+ )
352
+ attachments: list["VisionAttachment"] = Relationship(
353
+ back_populates="vision_prescription"
354
+ )
355
+
356
+
357
+ class EyePrescription(SQLModel, table=True):
358
+ """Individual eye prescription data"""
359
+
360
+ id: int | None = Field(default=None, primary_key=True)
361
+ eye_side: EyeSide
362
+
363
+ # Prescription values
364
+ sphere: float | None = None
365
+ sphere_unit: str | None = None
366
+ cylinder: float | None = None
367
+ cylinder_unit: str | None = None
368
+ axis: float | None = None
369
+ axis_unit: str | None = None
370
+ add: float | None = None
371
+ add_unit: str | None = None
372
+ vertex: float | None = None
373
+ vertex_unit: str | None = None
374
+ prism_amount: float | None = None
375
+ prism_amount_unit: str | None = None
376
+ prism_angle: float | None = None
377
+ prism_angle_unit: str | None = None
378
+ far_pd: float | None = None
379
+ far_pd_unit: str | None = None
380
+ near_pd: float | None = None
381
+ near_pd_unit: str | None = None
382
+ base_curve: float | None = None
383
+ base_curve_unit: str | None = None
384
+ diameter: float | None = None
385
+ diameter_unit: str | None = None
386
+
387
+ # Foreign key
388
+ vision_prescription_id: int = Field(foreign_key="visionprescription.id", index=True)
389
+ vision_prescription: VisionPrescription = Relationship(
390
+ back_populates="eye_prescriptions"
391
+ )
392
+
393
+
394
+ class VisionAttachment(SQLModel, table=True):
395
+ """Attachment reference for vision prescription"""
396
+
397
+ id: int | None = Field(default=None, primary_key=True)
398
+ identifier: str | None = None
399
+
400
+ # Foreign key
401
+ vision_prescription_id: int = Field(foreign_key="visionprescription.id", index=True)
402
+ vision_prescription: VisionPrescription = Relationship(back_populates="attachments")
src/parser/parser.py ADDED
@@ -0,0 +1,1215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from datetime import datetime, timedelta
3
+ from pathlib import Path
4
+ from typing import Any
5
+ from zoneinfo import ZoneInfo
6
+
7
+ from lxml import etree # type: ignore[import-untyped]
8
+ from sqlmodel import Session, SQLModel, create_engine, select
9
+ from tqdm import tqdm
10
+
11
+ from .models import (
12
+ ActivitySummary,
13
+ Audiogram,
14
+ ClinicalRecord,
15
+ Correlation,
16
+ CorrelationRecord,
17
+ EyePrescription,
18
+ EyeSide,
19
+ HealthData,
20
+ HeartRateVariabilityMetadataList,
21
+ InstantaneousBeatsPerMinute,
22
+ MetadataEntry,
23
+ Record,
24
+ SensitivityPoint,
25
+ VisionAttachment,
26
+ VisionPrescription,
27
+ Workout,
28
+ WorkoutEvent,
29
+ WorkoutRoute,
30
+ WorkoutStatistics,
31
+ )
32
+
33
+
34
+ class AppleHealthParser:
35
+ """Parser for Apple Health export XML files with streaming support."""
36
+
37
+ def __init__(self, db_path: str = "data/sqlite.db", bulk_mode: bool = True, data_cutoff: timedelta = timedelta(days=180)):
38
+ """Initialize parser with database connection.
39
+
40
+ Args:
41
+ db_path: Path to SQLite database
42
+ bulk_mode: Enable bulk processing for better performance
43
+ data_cutoff: Only process records newer than this timedelta (default: 6 months)
44
+ """
45
+ # Create data directory if it doesn't exist
46
+ Path(db_path).parent.mkdir(parents=True, exist_ok=True)
47
+
48
+ # Create database engine
49
+ self.engine = create_engine(f"sqlite:///{db_path}")
50
+ SQLModel.metadata.create_all(self.engine)
51
+
52
+ # Add performance indexes
53
+ self._create_indexes()
54
+
55
+ # Batch processing settings
56
+ self.bulk_mode = bulk_mode
57
+ self.batch_size = 5000 if bulk_mode else 1000
58
+ self.transaction_batch_size = 100 # Commit every N entities
59
+ self.current_batch: list[Any] = []
60
+ self.pending_commits = 0
61
+
62
+ # Data filtering settings
63
+ self.data_cutoff = data_cutoff
64
+ self.cutoff_date = datetime.now(ZoneInfo("Europe/Zurich")) - data_cutoff
65
+
66
+ # Bulk processing collections
67
+ self.records_batch: list[Record] = []
68
+ self.workouts_batch: list[Workout] = []
69
+ self.correlations_batch: list[Correlation] = []
70
+ self.metadata_batch: list[MetadataEntry] = []
71
+
72
+ # Maps for deferred ID resolution
73
+ self.record_temp_ids: dict[str, int] = {} # temp_id -> actual_id
74
+ self.workout_temp_ids: dict[str, int] = {}
75
+ self.correlation_temp_ids: dict[str, int] = {}
76
+ self.temp_id_counter = 0
77
+
78
+ self.stats = {
79
+ "records": 0,
80
+ "workouts": 0,
81
+ "correlations": 0,
82
+ "activity_summaries": 0,
83
+ "clinical_records": 0,
84
+ "audiograms": 0,
85
+ "vision_prescriptions": 0,
86
+ "metadata_entries": 0,
87
+ "hrv_lists": 0,
88
+ "correlation_records": 0,
89
+ "errors": 0,
90
+ "duplicates": 0,
91
+ "filtered_old": 0,
92
+ }
93
+
94
+ def parse_file(self, xml_path: str) -> None:
95
+ """Parse Apple Health export XML file using streaming."""
96
+ print(f"Starting to parse: {xml_path}")
97
+
98
+ # Check if file exists
99
+ if not os.path.exists(xml_path):
100
+ raise FileNotFoundError(f"XML file not found: {xml_path}")
101
+
102
+ # Get file size for progress tracking
103
+ file_size = os.path.getsize(xml_path)
104
+ print(f"File size: {file_size / (1024**3):.2f} GB")
105
+
106
+ # Clear events to free memory during parsing
107
+ context = etree.iterparse(
108
+ xml_path,
109
+ events=("start", "end"),
110
+ tag=None, # Process all tags
111
+ huge_tree=True, # Enable parsing of large files
112
+ )
113
+
114
+ # Make iterator return start-end event pairs
115
+ context = iter(context)
116
+
117
+ # Skip to root element
118
+ event, root = next(context)
119
+
120
+ # Current element being processed
121
+ health_data: HealthData | None = None
122
+ current_correlation: Correlation | None = None
123
+ current_workout: Workout | None = None
124
+ current_audiogram: Audiogram | None = None
125
+ current_vision_prescription: VisionPrescription | None = None
126
+ current_record: Record | None = None
127
+ current_hrv_list: HeartRateVariabilityMetadataList | None = None
128
+
129
+ # Track parent elements for metadata
130
+ current_parent_type: str | None = None
131
+ current_parent_id: int | None = None
132
+
133
+ with Session(self.engine) as session:
134
+ try:
135
+ # Process root element first
136
+ if root.tag == "HealthData":
137
+ # Check if HealthData already exists
138
+ existing_health_data = session.exec(select(HealthData)).first()
139
+ if existing_health_data:
140
+ health_data = existing_health_data
141
+ print(
142
+ f"Using existing HealthData record with ID: {health_data.id}"
143
+ )
144
+ else:
145
+ health_data = self._parse_health_data(root)
146
+ session.add(health_data)
147
+ session.commit()
148
+ print(f"Created HealthData record with ID: {health_data.id}")
149
+
150
+ # Create progress bar
151
+ pbar = tqdm(desc="Processing", unit=" elements", miniters=1000)
152
+
153
+ for event, elem in context:
154
+ if event == "start":
155
+ # Update progress bar
156
+ pbar.update(1)
157
+
158
+ # Update description with current stats every 5000 records
159
+ total_processed = (
160
+ self.stats["records"] + self.stats["duplicates"] + self.stats["filtered_old"]
161
+ )
162
+ if total_processed % 5000 == 0 and total_processed > 0:
163
+ pbar.set_description(
164
+ f"Records: {self.stats['records']:,} | "
165
+ f"Duplicates: {self.stats['duplicates']:,} | "
166
+ f"Filtered: {self.stats['filtered_old']:,} | "
167
+ f"Errors: {self.stats['errors']:,}"
168
+ )
169
+
170
+ try:
171
+ if elem.tag == "HealthData" and not health_data:
172
+ # Check if HealthData already exists
173
+ existing_health_data = session.exec(
174
+ select(HealthData)
175
+ ).first()
176
+ if existing_health_data:
177
+ health_data = existing_health_data
178
+ else:
179
+ health_data = self._parse_health_data(elem)
180
+ session.add(health_data)
181
+ session.commit()
182
+
183
+ elif elem.tag == "ExportDate" and health_data:
184
+ # Update health_data with export date
185
+ export_date_str = elem.get("value")
186
+ if export_date_str:
187
+ health_data.export_date = self._parse_datetime(
188
+ export_date_str
189
+ )
190
+ session.add(health_data)
191
+ session.commit()
192
+
193
+ elif elem.tag == "Me" and health_data:
194
+ # Update health_data with personal info
195
+ health_data.date_of_birth = elem.get(
196
+ "HKCharacteristicTypeIdentifierDateOfBirth", ""
197
+ )
198
+ health_data.biological_sex = elem.get(
199
+ "HKCharacteristicTypeIdentifierBiologicalSex", ""
200
+ )
201
+ health_data.blood_type = elem.get(
202
+ "HKCharacteristicTypeIdentifierBloodType", ""
203
+ )
204
+ health_data.fitzpatrick_skin_type = elem.get(
205
+ "HKCharacteristicTypeIdentifierFitzpatrickSkinType",
206
+ "",
207
+ )
208
+ health_data.cardio_fitness_medications_use = elem.get(
209
+ "HKCharacteristicTypeIdentifierCardioFitnessMedicationsUse",
210
+ "",
211
+ )
212
+ session.add(health_data)
213
+ session.commit()
214
+
215
+ elif (
216
+ elem.tag == "Record" and health_data and health_data.id
217
+ ):
218
+ record = self._parse_record(elem, health_data.id)
219
+
220
+ # Filter by cutoff date
221
+ if record.start_date < self.cutoff_date:
222
+ self.stats["filtered_old"] += 1
223
+ continue
224
+
225
+ # Check if inside a correlation - always use individual processing
226
+ if current_correlation and current_correlation.id:
227
+ existing = self._check_duplicate_record(
228
+ session, record
229
+ )
230
+ if existing:
231
+ self.stats["duplicates"] += 1
232
+ record = existing
233
+ else:
234
+ session.add(record)
235
+ session.commit()
236
+
237
+ if record.id:
238
+ existing_link = (
239
+ self._check_duplicate_correlation_record(
240
+ session,
241
+ current_correlation.id,
242
+ record.id,
243
+ )
244
+ )
245
+ if not existing_link:
246
+ link = CorrelationRecord(
247
+ correlation_id=current_correlation.id,
248
+ record_id=record.id,
249
+ )
250
+ self._add_to_batch(session, link)
251
+ self.stats["correlation_records"] += 1
252
+ else:
253
+ # Regular records - check for duplicate and use batched commits
254
+ existing = self._check_duplicate_record(
255
+ session, record
256
+ )
257
+ if existing:
258
+ self.stats["duplicates"] += 1
259
+ current_record = existing
260
+ current_parent_type = "record"
261
+ current_parent_id = existing.id
262
+ else:
263
+ session.add(record)
264
+ self.pending_commits += 1
265
+ current_record = record
266
+ current_parent_type = "record"
267
+ # Defer commit for batching
268
+ if (
269
+ self.pending_commits
270
+ >= self.transaction_batch_size
271
+ ):
272
+ session.commit()
273
+ self.pending_commits = 0
274
+ else:
275
+ session.flush() # Get ID without committing
276
+ current_parent_id = record.id
277
+ self.stats["records"] += 1
278
+
279
+ elif (
280
+ elem.tag == "Correlation"
281
+ and health_data
282
+ and health_data.id
283
+ ):
284
+ correlation = self._parse_correlation(
285
+ elem, health_data.id
286
+ )
287
+
288
+ # Filter by cutoff date
289
+ if correlation.start_date < self.cutoff_date:
290
+ self.stats["filtered_old"] += 1
291
+ continue
292
+
293
+ # Check for duplicate
294
+ existing = self._check_duplicate_correlation(
295
+ session, correlation
296
+ )
297
+ if existing:
298
+ self.stats["duplicates"] += 1
299
+ current_correlation = existing
300
+ else:
301
+ session.add(correlation)
302
+ self.pending_commits += 1
303
+ current_correlation = correlation
304
+ # Defer commit for batching
305
+ if (
306
+ self.pending_commits
307
+ >= self.transaction_batch_size
308
+ ):
309
+ session.commit()
310
+ self.pending_commits = 0
311
+ else:
312
+ session.flush() # Get ID without committing
313
+ self.stats["correlations"] += 1
314
+
315
+ current_parent_type = "correlation"
316
+ current_parent_id = current_correlation.id
317
+
318
+ elif (
319
+ elem.tag == "Workout" and health_data and health_data.id
320
+ ):
321
+ workout = self._parse_workout(elem, health_data.id)
322
+
323
+ # Filter by cutoff date
324
+ if workout.start_date < self.cutoff_date:
325
+ self.stats["filtered_old"] += 1
326
+ continue
327
+
328
+ # Check for duplicate
329
+ existing = self._check_duplicate_workout(
330
+ session, workout
331
+ )
332
+ if existing:
333
+ self.stats["duplicates"] += 1
334
+ current_workout = existing
335
+ else:
336
+ session.add(workout)
337
+ self.pending_commits += 1
338
+ current_workout = workout
339
+ # Defer commit for batching
340
+ if (
341
+ self.pending_commits
342
+ >= self.transaction_batch_size
343
+ ):
344
+ session.commit()
345
+ self.pending_commits = 0
346
+ else:
347
+ session.flush() # Get ID without committing
348
+ self.stats["workouts"] += 1
349
+
350
+ current_parent_type = "workout"
351
+ current_parent_id = current_workout.id
352
+
353
+ elif (
354
+ elem.tag == "ActivitySummary"
355
+ and health_data
356
+ and health_data.id
357
+ ):
358
+ summary = self._parse_activity_summary(
359
+ elem, health_data.id
360
+ )
361
+
362
+ # Check for duplicate
363
+ existing = self._check_duplicate_activity_summary(
364
+ session, summary
365
+ )
366
+ if existing:
367
+ self.stats["duplicates"] += 1
368
+ else:
369
+ self._add_to_batch(session, summary)
370
+ self.stats["activity_summaries"] += 1
371
+
372
+ elif (
373
+ elem.tag == "ClinicalRecord"
374
+ and health_data
375
+ and health_data.id
376
+ ):
377
+ clinical = self._parse_clinical_record(
378
+ elem, health_data.id
379
+ )
380
+
381
+ # Check for duplicate
382
+ existing = self._check_duplicate_clinical_record(
383
+ session, clinical
384
+ )
385
+ if existing:
386
+ self.stats["duplicates"] += 1
387
+ else:
388
+ self._add_to_batch(session, clinical)
389
+ self.stats["clinical_records"] += 1
390
+
391
+ elif (
392
+ elem.tag == "Audiogram"
393
+ and health_data
394
+ and health_data.id
395
+ ):
396
+ audiogram = self._parse_audiogram(elem, health_data.id)
397
+
398
+ # Filter by cutoff date
399
+ if audiogram.start_date < self.cutoff_date:
400
+ self.stats["filtered_old"] += 1
401
+ continue
402
+
403
+ # Check for duplicate
404
+ existing = self._check_duplicate_audiogram(
405
+ session, audiogram
406
+ )
407
+ if existing:
408
+ self.stats["duplicates"] += 1
409
+ current_audiogram = existing
410
+ else:
411
+ session.add(audiogram)
412
+ session.commit()
413
+ current_audiogram = audiogram
414
+ self.stats["audiograms"] += 1
415
+
416
+ elif (
417
+ elem.tag == "VisionPrescription"
418
+ and health_data
419
+ and health_data.id
420
+ ):
421
+ prescription = self._parse_vision_prescription(
422
+ elem, health_data.id
423
+ )
424
+
425
+ # Check for duplicate
426
+ existing = self._check_duplicate_vision_prescription(
427
+ session, prescription
428
+ )
429
+ if existing:
430
+ self.stats["duplicates"] += 1
431
+ current_vision_prescription = existing
432
+ else:
433
+ session.add(prescription)
434
+ session.commit()
435
+ current_vision_prescription = prescription
436
+ self.stats["vision_prescriptions"] += 1
437
+
438
+ elif (
439
+ elem.tag == "MetadataEntry"
440
+ and current_parent_type
441
+ and current_parent_id
442
+ ):
443
+ metadata = self._parse_metadata_entry(
444
+ elem, current_parent_type, current_parent_id
445
+ )
446
+ self._add_to_batch(session, metadata)
447
+ self.stats["metadata_entries"] += 1
448
+
449
+ elif (
450
+ elem.tag == "HeartRateVariabilityMetadataList"
451
+ and current_record
452
+ and current_record.id
453
+ ):
454
+ # Check for existing HRV list
455
+ existing_hrv = self._check_duplicate_hrv_list(
456
+ session, current_record.id
457
+ )
458
+ if existing_hrv:
459
+ current_hrv_list = existing_hrv
460
+ self.stats["duplicates"] += 1
461
+ else:
462
+ current_hrv_list = self._parse_hrv_list(
463
+ current_record.id
464
+ )
465
+ session.add(current_hrv_list)
466
+ session.commit() # Need ID for relationships
467
+ self.stats["hrv_lists"] += 1
468
+
469
+ # Handle nested elements
470
+ elif (
471
+ elem.tag == "WorkoutEvent"
472
+ and current_workout
473
+ and current_workout.id
474
+ ):
475
+ event_obj = self._parse_workout_event(
476
+ elem, current_workout.id
477
+ )
478
+ self._add_to_batch(session, event_obj)
479
+
480
+ elif (
481
+ elem.tag == "WorkoutStatistics"
482
+ and current_workout
483
+ and current_workout.id
484
+ ):
485
+ stat = self._parse_workout_statistics(
486
+ elem, current_workout.id
487
+ )
488
+ self._add_to_batch(session, stat)
489
+
490
+ elif (
491
+ elem.tag == "WorkoutRoute"
492
+ and current_workout
493
+ and current_workout.id
494
+ ):
495
+ route = self._parse_workout_route(
496
+ elem, current_workout.id
497
+ )
498
+
499
+ # Check for duplicate WorkoutRoute
500
+ existing = self._check_duplicate_workout_route(
501
+ session, route
502
+ )
503
+ if existing:
504
+ self.stats["duplicates"] += 1
505
+ else:
506
+ session.add(route)
507
+ session.commit() # Immediate commit due to unique constraint
508
+
509
+ elif (
510
+ elem.tag == "SensitivityPoint"
511
+ and current_audiogram
512
+ and current_audiogram.id
513
+ ):
514
+ point = self._parse_sensitivity_point(
515
+ elem, current_audiogram.id
516
+ )
517
+ self._add_to_batch(session, point)
518
+
519
+ elif (
520
+ elem.tag == "Prescription"
521
+ and current_vision_prescription
522
+ and current_vision_prescription.id
523
+ ):
524
+ prescription = self._parse_eye_prescription(
525
+ elem, current_vision_prescription.id
526
+ )
527
+ self._add_to_batch(session, prescription)
528
+
529
+ elif (
530
+ elem.tag == "Attachment"
531
+ and current_vision_prescription
532
+ and current_vision_prescription.id
533
+ ):
534
+ attachment = self._parse_vision_attachment(
535
+ elem, current_vision_prescription.id
536
+ )
537
+ self._add_to_batch(session, attachment)
538
+
539
+ elif (
540
+ elem.tag == "InstantaneousBeatsPerMinute"
541
+ and current_hrv_list
542
+ and current_hrv_list.id
543
+ ):
544
+ bpm = self._parse_instantaneous_bpm(
545
+ elem, current_hrv_list.id, current_record.start_date if current_record else None
546
+ )
547
+ self._add_to_batch(session, bpm)
548
+
549
+ except Exception as e:
550
+ self.stats["errors"] += 1
551
+ if self.stats["errors"] <= 10: # Only print first 10 errors
552
+ print(f"Error parsing {elem.tag}: {e}")
553
+
554
+ elif event == "end":
555
+ # Clear completed elements
556
+ if elem.tag == "Correlation":
557
+ current_correlation = None
558
+ current_parent_type = None
559
+ current_parent_id = None
560
+ elif elem.tag == "Workout":
561
+ current_workout = None
562
+ current_parent_type = None
563
+ current_parent_id = None
564
+ elif elem.tag == "Audiogram":
565
+ current_audiogram = None
566
+ elif elem.tag == "VisionPrescription":
567
+ current_vision_prescription = None
568
+ elif elem.tag == "Record" and not current_correlation:
569
+ current_record = None
570
+ current_parent_type = None
571
+ current_parent_id = None
572
+ elif elem.tag == "HeartRateVariabilityMetadataList":
573
+ current_hrv_list = None
574
+
575
+ # Clear the element to free memory
576
+ elem.clear()
577
+ # Also remove preceding siblings
578
+ while elem.getprevious() is not None:
579
+ del elem.getparent()[0]
580
+
581
+ # Final commit for any pending transactions
582
+ if self.pending_commits > 0:
583
+ session.commit()
584
+ self.pending_commits = 0
585
+
586
+ # Flush any remaining batches
587
+ self._flush_all_batches(session)
588
+ pbar.close()
589
+
590
+ except Exception as e:
591
+ pbar.close()
592
+ print(f"Fatal error during parsing: {e}")
593
+ raise
594
+
595
+ # Final statistics
596
+ self._print_progress()
597
+ print(f"Parsing complete! Data cutoff: {self.cutoff_date.isoformat()}")
598
+
599
+ def _add_to_batch(self, session: Session, obj: Any) -> None:
600
+ """Add object to batch and flush if necessary."""
601
+ self.current_batch.append(obj)
602
+ if len(self.current_batch) >= self.batch_size:
603
+ self._flush_batch(session)
604
+
605
+ def _flush_batch(self, session: Session) -> None:
606
+ """Flush current batch to database."""
607
+ if self.current_batch:
608
+ session.add_all(self.current_batch)
609
+ session.commit()
610
+ self.current_batch = []
611
+
612
+ def _create_indexes(self) -> None:
613
+ """Create database indexes for performance."""
614
+ from sqlalchemy import text
615
+
616
+ with Session(self.engine) as session:
617
+ # Indexes for duplicate checking
618
+ indexes = [
619
+ "CREATE INDEX IF NOT EXISTS idx_record_duplicate ON record (type, start_date, end_date, health_data_id, value)",
620
+ "CREATE INDEX IF NOT EXISTS idx_workout_duplicate ON workout (workout_activity_type, start_date, end_date, health_data_id)",
621
+ "CREATE INDEX IF NOT EXISTS idx_correlation_duplicate ON correlation (type, start_date, end_date, health_data_id)",
622
+ "CREATE INDEX IF NOT EXISTS idx_activity_summary_duplicate ON activitysummary (date_components, health_data_id)",
623
+ "CREATE INDEX IF NOT EXISTS idx_clinical_record_duplicate ON clinicalrecord (identifier, health_data_id)",
624
+ "CREATE INDEX IF NOT EXISTS idx_audiogram_duplicate ON audiogram (type, start_date, end_date, health_data_id)",
625
+ "CREATE INDEX IF NOT EXISTS idx_vision_prescription_duplicate ON visionprescription (type, date_issued, health_data_id)",
626
+ "CREATE INDEX IF NOT EXISTS idx_correlation_record_duplicate ON correlationrecord (correlation_id, record_id)",
627
+ ]
628
+ for index_sql in indexes:
629
+ try:
630
+ session.execute(text(index_sql))
631
+ except Exception as e:
632
+ print(f"Index creation warning: {e}")
633
+ session.commit()
634
+
635
+ def _bulk_insert_records(self, session: Session) -> None:
636
+ """Bulk insert records with batch duplicate checking."""
637
+ if not self.records_batch:
638
+ return
639
+
640
+ # Group records by type for efficient duplicate checking
641
+ records_by_type: dict[tuple[str | None, int], list[Record]] = {}
642
+ for record in self.records_batch:
643
+ key = (record.type, record.health_data_id or 0)
644
+ if key not in records_by_type:
645
+ records_by_type[key] = []
646
+ records_by_type[key].append(record)
647
+
648
+ new_records = []
649
+ for (record_type, health_data_id), type_records in records_by_type.items():
650
+ # Batch check for existing records of this type
651
+ start_dates = [r.start_date for r in type_records]
652
+ end_dates = [r.end_date for r in type_records]
653
+
654
+ # Build query conditions
655
+ stmt = select(Record).where(
656
+ Record.type == record_type,
657
+ Record.health_data_id == health_data_id,
658
+ )
659
+
660
+ if start_dates:
661
+ from sqlalchemy import or_
662
+
663
+ date_conditions = []
664
+ for i, (start_date, end_date) in enumerate(zip(start_dates, end_dates)):
665
+ date_conditions.append(
666
+ (Record.start_date == start_date)
667
+ & (Record.end_date == end_date)
668
+ )
669
+ if date_conditions:
670
+ stmt = stmt.where(or_(*date_conditions))
671
+
672
+ existing_records = session.exec(stmt).all()
673
+
674
+ # Create lookup set for existing records
675
+ existing_set: set[tuple[datetime, datetime, str | None]] = set()
676
+ for existing in existing_records:
677
+ lookup_key = (existing.start_date, existing.end_date, existing.value)
678
+ existing_set.add(lookup_key)
679
+
680
+ # Filter out duplicates
681
+ for record in type_records:
682
+ record_key = (record.start_date, record.end_date, record.value)
683
+ if record_key in existing_set:
684
+ self.stats["duplicates"] += 1
685
+ else:
686
+ new_records.append(record)
687
+
688
+ if new_records:
689
+ session.add_all(new_records)
690
+ session.commit()
691
+ self.stats["records"] += len(new_records)
692
+
693
+ self.records_batch = []
694
+
695
+ def _bulk_insert_workouts(self, session: Session) -> None:
696
+ """Bulk insert workouts with duplicate checking."""
697
+ if not self.workouts_batch:
698
+ return
699
+
700
+ new_workouts = []
701
+ for workout in self.workouts_batch:
702
+ existing = self._check_duplicate_workout(session, workout)
703
+ if existing:
704
+ self.stats["duplicates"] += 1
705
+ else:
706
+ new_workouts.append(workout)
707
+
708
+ if new_workouts:
709
+ session.add_all(new_workouts)
710
+ session.commit()
711
+ self.stats["workouts"] += len(new_workouts)
712
+
713
+ self.workouts_batch = []
714
+
715
+ def _bulk_insert_correlations(self, session: Session) -> None:
716
+ """Bulk insert correlations with duplicate checking."""
717
+ if not self.correlations_batch:
718
+ return
719
+
720
+ new_correlations = []
721
+ for correlation in self.correlations_batch:
722
+ existing = self._check_duplicate_correlation(session, correlation)
723
+ if existing:
724
+ self.stats["duplicates"] += 1
725
+ else:
726
+ new_correlations.append(correlation)
727
+
728
+ if new_correlations:
729
+ session.add_all(new_correlations)
730
+ session.commit()
731
+ self.stats["correlations"] += len(new_correlations)
732
+
733
+ self.correlations_batch = []
734
+
735
+ def _flush_all_batches(self, session: Session) -> None:
736
+ """Flush all bulk batches to database."""
737
+ if self.bulk_mode:
738
+ self._bulk_insert_records(session)
739
+ self._bulk_insert_workouts(session)
740
+ self._bulk_insert_correlations(session)
741
+ session.commit()
742
+ self._flush_batch(session) # Handle remaining objects
743
+
744
+ def _print_progress(self) -> None:
745
+ """Print current parsing progress."""
746
+ print("Final Statistics:")
747
+ for key, value in self.stats.items():
748
+ print(f" {key}: {value:,}")
749
+
750
+ # Duplicate checking methods
751
+ def _check_duplicate_record(
752
+ self, session: Session, record: Record
753
+ ) -> Record | None:
754
+ """Check if a record already exists."""
755
+ stmt = select(Record).where(
756
+ Record.type == record.type,
757
+ Record.start_date == record.start_date,
758
+ Record.end_date == record.end_date,
759
+ Record.health_data_id == record.health_data_id,
760
+ )
761
+
762
+ # Also check value if present
763
+ if record.value is not None:
764
+ stmt = stmt.where(Record.value == record.value)
765
+ else:
766
+ stmt = stmt.where(Record.value.is_(None))
767
+
768
+ return session.exec(stmt).first()
769
+
770
+ def _check_duplicate_workout(
771
+ self, session: Session, workout: Workout
772
+ ) -> Workout | None:
773
+ """Check if a workout already exists."""
774
+ return session.exec(
775
+ select(Workout).where(
776
+ Workout.workout_activity_type == workout.workout_activity_type,
777
+ Workout.start_date == workout.start_date,
778
+ Workout.end_date == workout.end_date,
779
+ Workout.health_data_id == workout.health_data_id,
780
+ )
781
+ ).first()
782
+
783
+ def _check_duplicate_correlation(
784
+ self, session: Session, correlation: Correlation
785
+ ) -> Correlation | None:
786
+ """Check if a correlation already exists."""
787
+ return session.exec(
788
+ select(Correlation).where(
789
+ Correlation.type == correlation.type,
790
+ Correlation.start_date == correlation.start_date,
791
+ Correlation.end_date == correlation.end_date,
792
+ Correlation.health_data_id == correlation.health_data_id,
793
+ )
794
+ ).first()
795
+
796
+ def _check_duplicate_activity_summary(
797
+ self, session: Session, summary: ActivitySummary
798
+ ) -> ActivitySummary | None:
799
+ """Check if an activity summary already exists."""
800
+ return session.exec(
801
+ select(ActivitySummary).where(
802
+ ActivitySummary.date_components == summary.date_components,
803
+ ActivitySummary.health_data_id == summary.health_data_id,
804
+ )
805
+ ).first()
806
+
807
+ def _check_duplicate_clinical_record(
808
+ self, session: Session, record: ClinicalRecord
809
+ ) -> ClinicalRecord | None:
810
+ """Check if a clinical record already exists."""
811
+ return session.exec(
812
+ select(ClinicalRecord).where(
813
+ ClinicalRecord.identifier == record.identifier,
814
+ ClinicalRecord.health_data_id == record.health_data_id,
815
+ )
816
+ ).first()
817
+
818
+ def _check_duplicate_audiogram(
819
+ self, session: Session, audiogram: Audiogram
820
+ ) -> Audiogram | None:
821
+ """Check if an audiogram already exists."""
822
+ return session.exec(
823
+ select(Audiogram).where(
824
+ Audiogram.type == audiogram.type,
825
+ Audiogram.start_date == audiogram.start_date,
826
+ Audiogram.end_date == audiogram.end_date,
827
+ Audiogram.health_data_id == audiogram.health_data_id,
828
+ )
829
+ ).first()
830
+
831
+ def _check_duplicate_vision_prescription(
832
+ self, session: Session, prescription: VisionPrescription
833
+ ) -> VisionPrescription | None:
834
+ """Check if a vision prescription already exists."""
835
+ return session.exec(
836
+ select(VisionPrescription).where(
837
+ VisionPrescription.type == prescription.type,
838
+ VisionPrescription.date_issued == prescription.date_issued,
839
+ VisionPrescription.health_data_id == prescription.health_data_id,
840
+ )
841
+ ).first()
842
+
843
+ def _check_duplicate_correlation_record(
844
+ self, session: Session, correlation_id: int, record_id: int
845
+ ) -> CorrelationRecord | None:
846
+ """Check if a correlation-record link already exists."""
847
+ return session.exec(
848
+ select(CorrelationRecord).where(
849
+ CorrelationRecord.correlation_id == correlation_id,
850
+ CorrelationRecord.record_id == record_id,
851
+ )
852
+ ).first()
853
+
854
+ def _check_duplicate_workout_route(
855
+ self, session: Session, route: WorkoutRoute
856
+ ) -> WorkoutRoute | None:
857
+ """Check if a workout route already exists."""
858
+ return session.exec(
859
+ select(WorkoutRoute).where(
860
+ WorkoutRoute.workout_id == route.workout_id,
861
+ )
862
+ ).first()
863
+
864
+ def _check_duplicate_hrv_list(
865
+ self, session: Session, record_id: int
866
+ ) -> HeartRateVariabilityMetadataList | None:
867
+ """Check if an HRV list already exists for this record."""
868
+ return session.exec(
869
+ select(HeartRateVariabilityMetadataList).where(
870
+ HeartRateVariabilityMetadataList.record_id == record_id,
871
+ )
872
+ ).first()
873
+
874
+ # Parsing methods remain the same
875
+ def _parse_datetime(self, date_str: str, base_date: datetime | None = None) -> datetime:
876
+ """Parse datetime string from Apple Health format.
877
+
878
+ Args:
879
+ date_str: The datetime or time string to parse
880
+ base_date: Base date to use for time-only strings (for InstantaneousBeatsPerMinute)
881
+ """
882
+ # Check if this is a time-only format like "7:47:41.86 PM"
883
+ if base_date and ("AM" in date_str or "PM" in date_str) and ":" in date_str and "-" not in date_str:
884
+ # Parse time-only format and combine with base date
885
+ try:
886
+ # Handle formats like "7:47:41.86 PM"
887
+ time_part = datetime.strptime(date_str, "%I:%M:%S.%f %p").time()
888
+ except ValueError:
889
+ try:
890
+ # Fallback for formats like "7:47:41 PM" (no microseconds)
891
+ time_part = datetime.strptime(date_str, "%I:%M:%S %p").time()
892
+ except ValueError:
893
+ # If all fails, try without seconds
894
+ time_part = datetime.strptime(date_str, "%I:%M %p").time()
895
+
896
+ # Combine with base date
897
+ combined = datetime.combine(base_date.date(), time_part)
898
+ # Use the same timezone as base_date
899
+ return combined.replace(tzinfo=base_date.tzinfo)
900
+ else:
901
+ # Apple Health standard format: "2023-12-31 23:59:59 +0000"
902
+ dt = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S %z")
903
+ # Convert to preferred timezone
904
+ return dt.astimezone(ZoneInfo("Europe/Zurich"))
905
+
906
+ def _parse_health_data(self, elem: Any) -> HealthData:
907
+ """Parse HealthData root element."""
908
+ # HealthData only has locale attribute
909
+ # ExportDate and Me are child elements that we'll handle separately
910
+ return HealthData(
911
+ locale=elem.get("locale", ""),
912
+ export_date=datetime.now(
913
+ ZoneInfo("Europe/Zurich")
914
+ ), # Will be updated by ExportDate element
915
+ date_of_birth="", # Will be updated by Me element
916
+ biological_sex="", # Will be updated by Me element
917
+ blood_type="", # Will be updated by Me element
918
+ fitzpatrick_skin_type="", # Will be updated by Me element
919
+ cardio_fitness_medications_use="", # Will be updated by Me element
920
+ )
921
+
922
+ def _parse_record(self, elem: Any, health_data_id: int) -> Record:
923
+ """Parse Record element."""
924
+ return Record(
925
+ type=elem.get("type"),
926
+ source_name=elem.get("sourceName"),
927
+ source_version=elem.get("sourceVersion"),
928
+ device=elem.get("device"),
929
+ unit=elem.get("unit"),
930
+ value=elem.get("value"),
931
+ creation_date=self._parse_datetime(elem.get("creationDate"))
932
+ if elem.get("creationDate")
933
+ else None,
934
+ start_date=self._parse_datetime(elem.get("startDate")),
935
+ end_date=self._parse_datetime(elem.get("endDate")),
936
+ health_data_id=health_data_id,
937
+ )
938
+
939
+ def _parse_correlation(self, elem: Any, health_data_id: int) -> Correlation:
940
+ """Parse Correlation element."""
941
+ return Correlation(
942
+ type=elem.get("type"),
943
+ source_name=elem.get("sourceName"),
944
+ source_version=elem.get("sourceVersion"),
945
+ device=elem.get("device"),
946
+ creation_date=self._parse_datetime(elem.get("creationDate"))
947
+ if elem.get("creationDate")
948
+ else None,
949
+ start_date=self._parse_datetime(elem.get("startDate")),
950
+ end_date=self._parse_datetime(elem.get("endDate")),
951
+ health_data_id=health_data_id,
952
+ )
953
+
954
+ def _parse_workout(self, elem: Any, health_data_id: int) -> Workout:
955
+ """Parse Workout element."""
956
+ return Workout(
957
+ workout_activity_type=elem.get("workoutActivityType"),
958
+ duration=float(elem.get("duration")) if elem.get("duration") else None,
959
+ duration_unit=elem.get("durationUnit"),
960
+ total_distance=float(elem.get("totalDistance"))
961
+ if elem.get("totalDistance")
962
+ else None,
963
+ total_distance_unit=elem.get("totalDistanceUnit"),
964
+ total_energy_burned=float(elem.get("totalEnergyBurned"))
965
+ if elem.get("totalEnergyBurned")
966
+ else None,
967
+ total_energy_burned_unit=elem.get("totalEnergyBurnedUnit"),
968
+ source_name=elem.get("sourceName"),
969
+ source_version=elem.get("sourceVersion"),
970
+ device=elem.get("device"),
971
+ creation_date=self._parse_datetime(elem.get("creationDate"))
972
+ if elem.get("creationDate")
973
+ else None,
974
+ start_date=self._parse_datetime(elem.get("startDate")),
975
+ end_date=self._parse_datetime(elem.get("endDate")),
976
+ health_data_id=health_data_id,
977
+ )
978
+
979
+ def _parse_activity_summary(
980
+ self, elem: Any, health_data_id: int
981
+ ) -> ActivitySummary:
982
+ """Parse ActivitySummary element."""
983
+ return ActivitySummary(
984
+ date_components=elem.get("dateComponents"),
985
+ active_energy_burned=float(elem.get("activeEnergyBurned"))
986
+ if elem.get("activeEnergyBurned")
987
+ else None,
988
+ active_energy_burned_goal=float(elem.get("activeEnergyBurnedGoal"))
989
+ if elem.get("activeEnergyBurnedGoal")
990
+ else None,
991
+ active_energy_burned_unit=elem.get("activeEnergyBurnedUnit"),
992
+ apple_move_time=float(elem.get("appleMoveTime"))
993
+ if elem.get("appleMoveTime")
994
+ else None,
995
+ apple_move_time_goal=float(elem.get("appleMoveTimeGoal"))
996
+ if elem.get("appleMoveTimeGoal")
997
+ else None,
998
+ apple_exercise_time=float(elem.get("appleExerciseTime"))
999
+ if elem.get("appleExerciseTime")
1000
+ else None,
1001
+ apple_exercise_time_goal=float(elem.get("appleExerciseTimeGoal"))
1002
+ if elem.get("appleExerciseTimeGoal")
1003
+ else None,
1004
+ apple_stand_hours=int(elem.get("appleStandHours"))
1005
+ if elem.get("appleStandHours")
1006
+ else None,
1007
+ apple_stand_hours_goal=int(elem.get("appleStandHoursGoal"))
1008
+ if elem.get("appleStandHoursGoal")
1009
+ else None,
1010
+ health_data_id=health_data_id,
1011
+ )
1012
+
1013
+ def _parse_clinical_record(self, elem: Any, health_data_id: int) -> ClinicalRecord:
1014
+ """Parse ClinicalRecord element."""
1015
+ return ClinicalRecord(
1016
+ type=elem.get("type"),
1017
+ identifier=elem.get("identifier"),
1018
+ source_name=elem.get("sourceName"),
1019
+ source_url=elem.get("sourceURL"),
1020
+ fhir_version=elem.get("fhirVersion"),
1021
+ received_date=self._parse_datetime(elem.get("receivedDate")),
1022
+ resource_file_path=elem.get("resourceFilePath"),
1023
+ health_data_id=health_data_id,
1024
+ )
1025
+
1026
+ def _parse_audiogram(self, elem: Any, health_data_id: int) -> Audiogram:
1027
+ """Parse Audiogram element."""
1028
+ return Audiogram(
1029
+ type=elem.get("type"),
1030
+ source_name=elem.get("sourceName"),
1031
+ source_version=elem.get("sourceVersion"),
1032
+ device=elem.get("device"),
1033
+ creation_date=self._parse_datetime(elem.get("creationDate"))
1034
+ if elem.get("creationDate")
1035
+ else None,
1036
+ start_date=self._parse_datetime(elem.get("startDate")),
1037
+ end_date=self._parse_datetime(elem.get("endDate")),
1038
+ health_data_id=health_data_id,
1039
+ )
1040
+
1041
+ def _parse_vision_prescription(
1042
+ self, elem: Any, health_data_id: int
1043
+ ) -> VisionPrescription:
1044
+ """Parse VisionPrescription element."""
1045
+ return VisionPrescription(
1046
+ type=elem.get("type"),
1047
+ date_issued=self._parse_datetime(elem.get("dateIssued")),
1048
+ expiration_date=self._parse_datetime(elem.get("expirationDate"))
1049
+ if elem.get("expirationDate")
1050
+ else None,
1051
+ brand=elem.get("brand"),
1052
+ health_data_id=health_data_id,
1053
+ )
1054
+
1055
+ def _parse_workout_event(self, elem: Any, workout_id: int) -> WorkoutEvent:
1056
+ """Parse WorkoutEvent element."""
1057
+ return WorkoutEvent(
1058
+ type=elem.get("type"),
1059
+ date=self._parse_datetime(elem.get("date")),
1060
+ duration=float(elem.get("duration")) if elem.get("duration") else None,
1061
+ duration_unit=elem.get("durationUnit"),
1062
+ workout_id=workout_id,
1063
+ )
1064
+
1065
+ def _parse_workout_statistics(
1066
+ self, elem: Any, workout_id: int
1067
+ ) -> WorkoutStatistics:
1068
+ """Parse WorkoutStatistics element."""
1069
+ return WorkoutStatistics(
1070
+ type=elem.get("type"),
1071
+ start_date=self._parse_datetime(elem.get("startDate")),
1072
+ end_date=self._parse_datetime(elem.get("endDate")),
1073
+ average=float(elem.get("average")) if elem.get("average") else None,
1074
+ minimum=float(elem.get("minimum")) if elem.get("minimum") else None,
1075
+ maximum=float(elem.get("maximum")) if elem.get("maximum") else None,
1076
+ sum=float(elem.get("sum")) if elem.get("sum") else None,
1077
+ unit=elem.get("unit"),
1078
+ workout_id=workout_id,
1079
+ )
1080
+
1081
+ def _parse_workout_route(self, elem: Any, workout_id: int) -> WorkoutRoute:
1082
+ """Parse WorkoutRoute element."""
1083
+ return WorkoutRoute(
1084
+ source_name=elem.get("sourceName"),
1085
+ source_version=elem.get("sourceVersion"),
1086
+ device=elem.get("device"),
1087
+ creation_date=self._parse_datetime(elem.get("creationDate"))
1088
+ if elem.get("creationDate")
1089
+ else None,
1090
+ start_date=self._parse_datetime(elem.get("startDate")),
1091
+ end_date=self._parse_datetime(elem.get("endDate")),
1092
+ file_path=elem.get("filePath"),
1093
+ workout_id=workout_id,
1094
+ )
1095
+
1096
+ def _parse_sensitivity_point(
1097
+ self, elem: Any, audiogram_id: int
1098
+ ) -> SensitivityPoint:
1099
+ """Parse SensitivityPoint element."""
1100
+ return SensitivityPoint(
1101
+ frequency_value=float(elem.get("frequencyValue")),
1102
+ frequency_unit=elem.get("frequencyUnit"),
1103
+ left_ear_value=float(elem.get("leftEarValue"))
1104
+ if elem.get("leftEarValue")
1105
+ else None,
1106
+ left_ear_unit=elem.get("leftEarUnit"),
1107
+ left_ear_masked=elem.get("leftEarMasked") == "true"
1108
+ if elem.get("leftEarMasked")
1109
+ else None,
1110
+ left_ear_clamping_range_lower_bound=float(
1111
+ elem.get("leftEarClampingRangeLowerBound")
1112
+ )
1113
+ if elem.get("leftEarClampingRangeLowerBound")
1114
+ else None,
1115
+ left_ear_clamping_range_upper_bound=float(
1116
+ elem.get("leftEarClampingRangeUpperBound")
1117
+ )
1118
+ if elem.get("leftEarClampingRangeUpperBound")
1119
+ else None,
1120
+ right_ear_value=float(elem.get("rightEarValue"))
1121
+ if elem.get("rightEarValue")
1122
+ else None,
1123
+ right_ear_unit=elem.get("rightEarUnit"),
1124
+ right_ear_masked=elem.get("rightEarMasked") == "true"
1125
+ if elem.get("rightEarMasked")
1126
+ else None,
1127
+ right_ear_clamping_range_lower_bound=float(
1128
+ elem.get("rightEarClampingRangeLowerBound")
1129
+ )
1130
+ if elem.get("rightEarClampingRangeLowerBound")
1131
+ else None,
1132
+ right_ear_clamping_range_upper_bound=float(
1133
+ elem.get("rightEarClampingRangeUpperBound")
1134
+ )
1135
+ if elem.get("rightEarClampingRangeUpperBound")
1136
+ else None,
1137
+ audiogram_id=audiogram_id,
1138
+ )
1139
+
1140
+ def _parse_eye_prescription(
1141
+ self, elem: Any, vision_prescription_id: int
1142
+ ) -> EyePrescription:
1143
+ """Parse Prescription (eye) element."""
1144
+ eye_side = EyeSide.LEFT if elem.get("eye") == "left" else EyeSide.RIGHT
1145
+
1146
+ return EyePrescription(
1147
+ eye_side=eye_side,
1148
+ sphere=float(elem.get("sphere")) if elem.get("sphere") else None,
1149
+ sphere_unit=elem.get("sphereUnit"),
1150
+ cylinder=float(elem.get("cylinder")) if elem.get("cylinder") else None,
1151
+ cylinder_unit=elem.get("cylinderUnit"),
1152
+ axis=float(elem.get("axis")) if elem.get("axis") else None,
1153
+ axis_unit=elem.get("axisUnit"),
1154
+ add=float(elem.get("add")) if elem.get("add") else None,
1155
+ add_unit=elem.get("addUnit"),
1156
+ vertex=float(elem.get("vertex")) if elem.get("vertex") else None,
1157
+ vertex_unit=elem.get("vertexUnit"),
1158
+ prism_amount=float(elem.get("prismAmount"))
1159
+ if elem.get("prismAmount")
1160
+ else None,
1161
+ prism_amount_unit=elem.get("prismAmountUnit"),
1162
+ prism_angle=float(elem.get("prismAngle"))
1163
+ if elem.get("prismAngle")
1164
+ else None,
1165
+ prism_angle_unit=elem.get("prismAngleUnit"),
1166
+ far_pd=float(elem.get("farPD")) if elem.get("farPD") else None,
1167
+ far_pd_unit=elem.get("farPDUnit"),
1168
+ near_pd=float(elem.get("nearPD")) if elem.get("nearPD") else None,
1169
+ near_pd_unit=elem.get("nearPDUnit"),
1170
+ base_curve=float(elem.get("baseCurve")) if elem.get("baseCurve") else None,
1171
+ base_curve_unit=elem.get("baseCurveUnit"),
1172
+ diameter=float(elem.get("diameter")) if elem.get("diameter") else None,
1173
+ diameter_unit=elem.get("diameterUnit"),
1174
+ vision_prescription_id=vision_prescription_id,
1175
+ )
1176
+
1177
+ def _parse_vision_attachment(
1178
+ self, elem: Any, vision_prescription_id: int
1179
+ ) -> VisionAttachment:
1180
+ """Parse Attachment element."""
1181
+ return VisionAttachment(
1182
+ identifier=elem.get("identifier"),
1183
+ vision_prescription_id=vision_prescription_id,
1184
+ )
1185
+
1186
+ def _parse_metadata_entry(
1187
+ self, elem: Any, parent_type: str, parent_id: int
1188
+ ) -> MetadataEntry:
1189
+ """Parse MetadataEntry element."""
1190
+ return MetadataEntry(
1191
+ key=elem.get("key"),
1192
+ value=elem.get("value"),
1193
+ parent_type=parent_type,
1194
+ parent_id=parent_id,
1195
+ )
1196
+
1197
+ def _parse_hrv_list(self, record_id: int) -> HeartRateVariabilityMetadataList:
1198
+ """Parse HeartRateVariabilityMetadataList element."""
1199
+ return HeartRateVariabilityMetadataList(record_id=record_id)
1200
+
1201
+ def _parse_instantaneous_bpm(
1202
+ self, elem: Any, hrv_list_id: int, base_date: datetime | None = None
1203
+ ) -> InstantaneousBeatsPerMinute:
1204
+ """Parse InstantaneousBeatsPerMinute element."""
1205
+ return InstantaneousBeatsPerMinute(
1206
+ bpm=int(elem.get("bpm")),
1207
+ time=self._parse_datetime(elem.get("time"), base_date),
1208
+ hrv_list_id=hrv_list_id,
1209
+ )
1210
+
1211
+
1212
+ if __name__ == "__main__":
1213
+ # Example usage
1214
+ parser = AppleHealthParser()
1215
+ parser.parse_file("data/export.xml")
uv.lock CHANGED
@@ -44,6 +44,9 @@ source = { virtual = "." }
44
  dependencies = [
45
  { name = "gradio" },
46
  { name = "huggingface-hub" },
 
 
 
47
  ]
48
 
49
  [package.dev-dependencies]
@@ -58,6 +61,9 @@ dev = [
58
  requires-dist = [
59
  { name = "gradio", specifier = ">=5.34.0" },
60
  { name = "huggingface-hub", specifier = ">=0.33.0" },
 
 
 
61
  ]
62
 
63
  [package.metadata.requires-dev]
@@ -271,6 +277,39 @@ wheels = [
271
  { url = "https://files.pythonhosted.org/packages/ea/72/1e76abc821f8efaaeb2e3bd727a6c97bf87c6a9a0ffacfed0647e587824a/gradio_client-1.10.3-py3-none-any.whl", hash = "sha256:941e7f8d9a160f88487e9780a3db2736a40ea2b8b69d53ffdb306e47ef658b76", size = 323599 },
272
  ]
273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  [[package]]
275
  name = "groovy"
276
  version = "0.1.2"
@@ -399,6 +438,48 @@ wheels = [
399
  { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 },
400
  ]
401
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  [[package]]
403
  name = "markdown-it-py"
404
  version = "3.0.0"
@@ -943,6 +1024,48 @@ wheels = [
943
  { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
944
  ]
945
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
946
  [[package]]
947
  name = "starlette"
948
  version = "0.46.2"
 
44
  dependencies = [
45
  { name = "gradio" },
46
  { name = "huggingface-hub" },
47
+ { name = "lxml" },
48
+ { name = "sqlmodel" },
49
+ { name = "tqdm" },
50
  ]
51
 
52
  [package.dev-dependencies]
 
61
  requires-dist = [
62
  { name = "gradio", specifier = ">=5.34.0" },
63
  { name = "huggingface-hub", specifier = ">=0.33.0" },
64
+ { name = "lxml", specifier = ">=5.4.0" },
65
+ { name = "sqlmodel", specifier = ">=0.0.24" },
66
+ { name = "tqdm", specifier = ">=4.67.1" },
67
  ]
68
 
69
  [package.metadata.requires-dev]
 
277
  { url = "https://files.pythonhosted.org/packages/ea/72/1e76abc821f8efaaeb2e3bd727a6c97bf87c6a9a0ffacfed0647e587824a/gradio_client-1.10.3-py3-none-any.whl", hash = "sha256:941e7f8d9a160f88487e9780a3db2736a40ea2b8b69d53ffdb306e47ef658b76", size = 323599 },
278
  ]
279
 
280
+ [[package]]
281
+ name = "greenlet"
282
+ version = "3.2.3"
283
+ source = { registry = "https://pypi.org/simple" }
284
+ sdist = { url = "https://files.pythonhosted.org/packages/c9/92/bb85bd6e80148a4d2e0c59f7c0c2891029f8fd510183afc7d8d2feeed9b6/greenlet-3.2.3.tar.gz", hash = "sha256:8b0dd8ae4c0d6f5e54ee55ba935eeb3d735a9b58a8a1e5b5cbab64e01a39f365", size = 185752 }
285
+ wheels = [
286
+ { url = "https://files.pythonhosted.org/packages/f3/94/ad0d435f7c48debe960c53b8f60fb41c2026b1d0fa4a99a1cb17c3461e09/greenlet-3.2.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:25ad29caed5783d4bd7a85c9251c651696164622494c00802a139c00d639242d", size = 271992 },
287
+ { url = "https://files.pythonhosted.org/packages/93/5d/7c27cf4d003d6e77749d299c7c8f5fd50b4f251647b5c2e97e1f20da0ab5/greenlet-3.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88cd97bf37fe24a6710ec6a3a7799f3f81d9cd33317dcf565ff9950c83f55e0b", size = 638820 },
288
+ { url = "https://files.pythonhosted.org/packages/c6/7e/807e1e9be07a125bb4c169144937910bf59b9d2f6d931578e57f0bce0ae2/greenlet-3.2.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:baeedccca94880d2f5666b4fa16fc20ef50ba1ee353ee2d7092b383a243b0b0d", size = 653046 },
289
+ { url = "https://files.pythonhosted.org/packages/9d/ab/158c1a4ea1068bdbc78dba5a3de57e4c7aeb4e7fa034320ea94c688bfb61/greenlet-3.2.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:be52af4b6292baecfa0f397f3edb3c6092ce071b499dd6fe292c9ac9f2c8f264", size = 647701 },
290
+ { url = "https://files.pythonhosted.org/packages/cc/0d/93729068259b550d6a0288da4ff72b86ed05626eaf1eb7c0d3466a2571de/greenlet-3.2.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0cc73378150b8b78b0c9fe2ce56e166695e67478550769536a6742dca3651688", size = 649747 },
291
+ { url = "https://files.pythonhosted.org/packages/f6/f6/c82ac1851c60851302d8581680573245c8fc300253fc1ff741ae74a6c24d/greenlet-3.2.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:706d016a03e78df129f68c4c9b4c4f963f7d73534e48a24f5f5a7101ed13dbbb", size = 605461 },
292
+ { url = "https://files.pythonhosted.org/packages/98/82/d022cf25ca39cf1200650fc58c52af32c90f80479c25d1cbf57980ec3065/greenlet-3.2.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:419e60f80709510c343c57b4bb5a339d8767bf9aef9b8ce43f4f143240f88b7c", size = 1121190 },
293
+ { url = "https://files.pythonhosted.org/packages/f5/e1/25297f70717abe8104c20ecf7af0a5b82d2f5a980eb1ac79f65654799f9f/greenlet-3.2.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:93d48533fade144203816783373f27a97e4193177ebaaf0fc396db19e5d61163", size = 1149055 },
294
+ { url = "https://files.pythonhosted.org/packages/1f/8f/8f9e56c5e82eb2c26e8cde787962e66494312dc8cb261c460e1f3a9c88bc/greenlet-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:7454d37c740bb27bdeddfc3f358f26956a07d5220818ceb467a483197d84f849", size = 297817 },
295
+ { url = "https://files.pythonhosted.org/packages/b1/cf/f5c0b23309070ae93de75c90d29300751a5aacefc0a3ed1b1d8edb28f08b/greenlet-3.2.3-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:500b8689aa9dd1ab26872a34084503aeddefcb438e2e7317b89b11eaea1901ad", size = 270732 },
296
+ { url = "https://files.pythonhosted.org/packages/48/ae/91a957ba60482d3fecf9be49bc3948f341d706b52ddb9d83a70d42abd498/greenlet-3.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a07d3472c2a93117af3b0136f246b2833fdc0b542d4a9799ae5f41c28323faef", size = 639033 },
297
+ { url = "https://files.pythonhosted.org/packages/6f/df/20ffa66dd5a7a7beffa6451bdb7400d66251374ab40b99981478c69a67a8/greenlet-3.2.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:8704b3768d2f51150626962f4b9a9e4a17d2e37c8a8d9867bbd9fa4eb938d3b3", size = 652999 },
298
+ { url = "https://files.pythonhosted.org/packages/51/b4/ebb2c8cb41e521f1d72bf0465f2f9a2fd803f674a88db228887e6847077e/greenlet-3.2.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5035d77a27b7c62db6cf41cf786cfe2242644a7a337a0e155c80960598baab95", size = 647368 },
299
+ { url = "https://files.pythonhosted.org/packages/8e/6a/1e1b5aa10dced4ae876a322155705257748108b7fd2e4fae3f2a091fe81a/greenlet-3.2.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2d8aa5423cd4a396792f6d4580f88bdc6efcb9205891c9d40d20f6e670992efb", size = 650037 },
300
+ { url = "https://files.pythonhosted.org/packages/26/f2/ad51331a157c7015c675702e2d5230c243695c788f8f75feba1af32b3617/greenlet-3.2.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2c724620a101f8170065d7dded3f962a2aea7a7dae133a009cada42847e04a7b", size = 608402 },
301
+ { url = "https://files.pythonhosted.org/packages/26/bc/862bd2083e6b3aff23300900a956f4ea9a4059de337f5c8734346b9b34fc/greenlet-3.2.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:873abe55f134c48e1f2a6f53f7d1419192a3d1a4e873bace00499a4e45ea6af0", size = 1119577 },
302
+ { url = "https://files.pythonhosted.org/packages/86/94/1fc0cc068cfde885170e01de40a619b00eaa8f2916bf3541744730ffb4c3/greenlet-3.2.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:024571bbce5f2c1cfff08bf3fbaa43bbc7444f580ae13b0099e95d0e6e67ed36", size = 1147121 },
303
+ { url = "https://files.pythonhosted.org/packages/27/1a/199f9587e8cb08a0658f9c30f3799244307614148ffe8b1e3aa22f324dea/greenlet-3.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:5195fb1e75e592dd04ce79881c8a22becdfa3e6f500e7feb059b1e6fdd54d3e3", size = 297603 },
304
+ { url = "https://files.pythonhosted.org/packages/d8/ca/accd7aa5280eb92b70ed9e8f7fd79dc50a2c21d8c73b9a0856f5b564e222/greenlet-3.2.3-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:3d04332dddb10b4a211b68111dabaee2e1a073663d117dc10247b5b1642bac86", size = 271479 },
305
+ { url = "https://files.pythonhosted.org/packages/55/71/01ed9895d9eb49223280ecc98a557585edfa56b3d0e965b9fa9f7f06b6d9/greenlet-3.2.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8186162dffde068a465deab08fc72c767196895c39db26ab1c17c0b77a6d8b97", size = 683952 },
306
+ { url = "https://files.pythonhosted.org/packages/ea/61/638c4bdf460c3c678a0a1ef4c200f347dff80719597e53b5edb2fb27ab54/greenlet-3.2.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f4bfbaa6096b1b7a200024784217defedf46a07c2eee1a498e94a1b5f8ec5728", size = 696917 },
307
+ { url = "https://files.pythonhosted.org/packages/22/cc/0bd1a7eb759d1f3e3cc2d1bc0f0b487ad3cc9f34d74da4b80f226fde4ec3/greenlet-3.2.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:ed6cfa9200484d234d8394c70f5492f144b20d4533f69262d530a1a082f6ee9a", size = 692443 },
308
+ { url = "https://files.pythonhosted.org/packages/67/10/b2a4b63d3f08362662e89c103f7fe28894a51ae0bc890fabf37d1d780e52/greenlet-3.2.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:02b0df6f63cd15012bed5401b47829cfd2e97052dc89da3cfaf2c779124eb892", size = 692995 },
309
+ { url = "https://files.pythonhosted.org/packages/5a/c6/ad82f148a4e3ce9564056453a71529732baf5448ad53fc323e37efe34f66/greenlet-3.2.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86c2d68e87107c1792e2e8d5399acec2487a4e993ab76c792408e59394d52141", size = 655320 },
310
+ { url = "https://files.pythonhosted.org/packages/5c/4f/aab73ecaa6b3086a4c89863d94cf26fa84cbff63f52ce9bc4342b3087a06/greenlet-3.2.3-cp314-cp314-win_amd64.whl", hash = "sha256:8c47aae8fbbfcf82cc13327ae802ba13c9c36753b67e760023fd116bc124a62a", size = 301236 },
311
+ ]
312
+
313
  [[package]]
314
  name = "groovy"
315
  version = "0.1.2"
 
438
  { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 },
439
  ]
440
 
441
+ [[package]]
442
+ name = "lxml"
443
+ version = "5.4.0"
444
+ source = { registry = "https://pypi.org/simple" }
445
+ sdist = { url = "https://files.pythonhosted.org/packages/76/3d/14e82fc7c8fb1b7761f7e748fd47e2ec8276d137b6acfe5a4bb73853e08f/lxml-5.4.0.tar.gz", hash = "sha256:d12832e1dbea4be280b22fd0ea7c9b87f0d8fc51ba06e92dc62d52f804f78ebd", size = 3679479 }
446
+ wheels = [
447
+ { url = "https://files.pythonhosted.org/packages/f8/4c/d101ace719ca6a4ec043eb516fcfcb1b396a9fccc4fcd9ef593df34ba0d5/lxml-5.4.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b5aff6f3e818e6bdbbb38e5967520f174b18f539c2b9de867b1e7fde6f8d95a4", size = 8127392 },
448
+ { url = "https://files.pythonhosted.org/packages/11/84/beddae0cec4dd9ddf46abf156f0af451c13019a0fa25d7445b655ba5ccb7/lxml-5.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942a5d73f739ad7c452bf739a62a0f83e2578afd6b8e5406308731f4ce78b16d", size = 4415103 },
449
+ { url = "https://files.pythonhosted.org/packages/d0/25/d0d93a4e763f0462cccd2b8a665bf1e4343dd788c76dcfefa289d46a38a9/lxml-5.4.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:460508a4b07364d6abf53acaa0a90b6d370fafde5693ef37602566613a9b0779", size = 5024224 },
450
+ { url = "https://files.pythonhosted.org/packages/31/ce/1df18fb8f7946e7f3388af378b1f34fcf253b94b9feedb2cec5969da8012/lxml-5.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:529024ab3a505fed78fe3cc5ddc079464e709f6c892733e3f5842007cec8ac6e", size = 4769913 },
451
+ { url = "https://files.pythonhosted.org/packages/4e/62/f4a6c60ae7c40d43657f552f3045df05118636be1165b906d3423790447f/lxml-5.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ca56ebc2c474e8f3d5761debfd9283b8b18c76c4fc0967b74aeafba1f5647f9", size = 5290441 },
452
+ { url = "https://files.pythonhosted.org/packages/9e/aa/04f00009e1e3a77838c7fc948f161b5d2d5de1136b2b81c712a263829ea4/lxml-5.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a81e1196f0a5b4167a8dafe3a66aa67c4addac1b22dc47947abd5d5c7a3f24b5", size = 4820165 },
453
+ { url = "https://files.pythonhosted.org/packages/c9/1f/e0b2f61fa2404bf0f1fdf1898377e5bd1b74cc9b2cf2c6ba8509b8f27990/lxml-5.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00b8686694423ddae324cf614e1b9659c2edb754de617703c3d29ff568448df5", size = 4932580 },
454
+ { url = "https://files.pythonhosted.org/packages/24/a2/8263f351b4ffe0ed3e32ea7b7830f845c795349034f912f490180d88a877/lxml-5.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c5681160758d3f6ac5b4fea370495c48aac0989d6a0f01bb9a72ad8ef5ab75c4", size = 4759493 },
455
+ { url = "https://files.pythonhosted.org/packages/05/00/41db052f279995c0e35c79d0f0fc9f8122d5b5e9630139c592a0b58c71b4/lxml-5.4.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:2dc191e60425ad70e75a68c9fd90ab284df64d9cd410ba8d2b641c0c45bc006e", size = 5324679 },
456
+ { url = "https://files.pythonhosted.org/packages/1d/be/ee99e6314cdef4587617d3b3b745f9356d9b7dd12a9663c5f3b5734b64ba/lxml-5.4.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:67f779374c6b9753ae0a0195a892a1c234ce8416e4448fe1e9f34746482070a7", size = 4890691 },
457
+ { url = "https://files.pythonhosted.org/packages/ad/36/239820114bf1d71f38f12208b9c58dec033cbcf80101cde006b9bde5cffd/lxml-5.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:79d5bfa9c1b455336f52343130b2067164040604e41f6dc4d8313867ed540079", size = 4955075 },
458
+ { url = "https://files.pythonhosted.org/packages/d4/e1/1b795cc0b174efc9e13dbd078a9ff79a58728a033142bc6d70a1ee8fc34d/lxml-5.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d3c30ba1c9b48c68489dc1829a6eede9873f52edca1dda900066542528d6b20", size = 4838680 },
459
+ { url = "https://files.pythonhosted.org/packages/72/48/3c198455ca108cec5ae3662ae8acd7fd99476812fd712bb17f1b39a0b589/lxml-5.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1af80c6316ae68aded77e91cd9d80648f7dd40406cef73df841aa3c36f6907c8", size = 5391253 },
460
+ { url = "https://files.pythonhosted.org/packages/d6/10/5bf51858971c51ec96cfc13e800a9951f3fd501686f4c18d7d84fe2d6352/lxml-5.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4d885698f5019abe0de3d352caf9466d5de2baded00a06ef3f1216c1a58ae78f", size = 5261651 },
461
+ { url = "https://files.pythonhosted.org/packages/2b/11/06710dd809205377da380546f91d2ac94bad9ff735a72b64ec029f706c85/lxml-5.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea53d51859b6c64e7c51d522c03cc2c48b9b5d6172126854cc7f01aa11f52bc", size = 5024315 },
462
+ { url = "https://files.pythonhosted.org/packages/f5/b0/15b6217834b5e3a59ebf7f53125e08e318030e8cc0d7310355e6edac98ef/lxml-5.4.0-cp312-cp312-win32.whl", hash = "sha256:d90b729fd2732df28130c064aac9bb8aff14ba20baa4aee7bd0795ff1187545f", size = 3486149 },
463
+ { url = "https://files.pythonhosted.org/packages/91/1e/05ddcb57ad2f3069101611bd5f5084157d90861a2ef460bf42f45cced944/lxml-5.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1dc4ca99e89c335a7ed47d38964abcb36c5910790f9bd106f2a8fa2ee0b909d2", size = 3817095 },
464
+ { url = "https://files.pythonhosted.org/packages/87/cb/2ba1e9dd953415f58548506fa5549a7f373ae55e80c61c9041b7fd09a38a/lxml-5.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:773e27b62920199c6197130632c18fb7ead3257fce1ffb7d286912e56ddb79e0", size = 8110086 },
465
+ { url = "https://files.pythonhosted.org/packages/b5/3e/6602a4dca3ae344e8609914d6ab22e52ce42e3e1638c10967568c5c1450d/lxml-5.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ce9c671845de9699904b1e9df95acfe8dfc183f2310f163cdaa91a3535af95de", size = 4404613 },
466
+ { url = "https://files.pythonhosted.org/packages/4c/72/bf00988477d3bb452bef9436e45aeea82bb40cdfb4684b83c967c53909c7/lxml-5.4.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9454b8d8200ec99a224df8854786262b1bd6461f4280064c807303c642c05e76", size = 5012008 },
467
+ { url = "https://files.pythonhosted.org/packages/92/1f/93e42d93e9e7a44b2d3354c462cd784dbaaf350f7976b5d7c3f85d68d1b1/lxml-5.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cccd007d5c95279e529c146d095f1d39ac05139de26c098166c4beb9374b0f4d", size = 4760915 },
468
+ { url = "https://files.pythonhosted.org/packages/45/0b/363009390d0b461cf9976a499e83b68f792e4c32ecef092f3f9ef9c4ba54/lxml-5.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0fce1294a0497edb034cb416ad3e77ecc89b313cff7adbee5334e4dc0d11f422", size = 5283890 },
469
+ { url = "https://files.pythonhosted.org/packages/19/dc/6056c332f9378ab476c88e301e6549a0454dbee8f0ae16847414f0eccb74/lxml-5.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24974f774f3a78ac12b95e3a20ef0931795ff04dbb16db81a90c37f589819551", size = 4812644 },
470
+ { url = "https://files.pythonhosted.org/packages/ee/8a/f8c66bbb23ecb9048a46a5ef9b495fd23f7543df642dabeebcb2eeb66592/lxml-5.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:497cab4d8254c2a90bf988f162ace2ddbfdd806fce3bda3f581b9d24c852e03c", size = 4921817 },
471
+ { url = "https://files.pythonhosted.org/packages/04/57/2e537083c3f381f83d05d9b176f0d838a9e8961f7ed8ddce3f0217179ce3/lxml-5.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e794f698ae4c5084414efea0f5cc9f4ac562ec02d66e1484ff822ef97c2cadff", size = 4753916 },
472
+ { url = "https://files.pythonhosted.org/packages/d8/80/ea8c4072109a350848f1157ce83ccd9439601274035cd045ac31f47f3417/lxml-5.4.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:2c62891b1ea3094bb12097822b3d44b93fc6c325f2043c4d2736a8ff09e65f60", size = 5289274 },
473
+ { url = "https://files.pythonhosted.org/packages/b3/47/c4be287c48cdc304483457878a3f22999098b9a95f455e3c4bda7ec7fc72/lxml-5.4.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:142accb3e4d1edae4b392bd165a9abdee8a3c432a2cca193df995bc3886249c8", size = 4874757 },
474
+ { url = "https://files.pythonhosted.org/packages/2f/04/6ef935dc74e729932e39478e44d8cfe6a83550552eaa072b7c05f6f22488/lxml-5.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1a42b3a19346e5601d1b8296ff6ef3d76038058f311902edd574461e9c036982", size = 4947028 },
475
+ { url = "https://files.pythonhosted.org/packages/cb/f9/c33fc8daa373ef8a7daddb53175289024512b6619bc9de36d77dca3df44b/lxml-5.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4291d3c409a17febf817259cb37bc62cb7eb398bcc95c1356947e2871911ae61", size = 4834487 },
476
+ { url = "https://files.pythonhosted.org/packages/8d/30/fc92bb595bcb878311e01b418b57d13900f84c2b94f6eca9e5073ea756e6/lxml-5.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4f5322cf38fe0e21c2d73901abf68e6329dc02a4994e483adbcf92b568a09a54", size = 5381688 },
477
+ { url = "https://files.pythonhosted.org/packages/43/d1/3ba7bd978ce28bba8e3da2c2e9d5ae3f8f521ad3f0ca6ea4788d086ba00d/lxml-5.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0be91891bdb06ebe65122aa6bf3fc94489960cf7e03033c6f83a90863b23c58b", size = 5242043 },
478
+ { url = "https://files.pythonhosted.org/packages/ee/cd/95fa2201041a610c4d08ddaf31d43b98ecc4b1d74b1e7245b1abdab443cb/lxml-5.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:15a665ad90054a3d4f397bc40f73948d48e36e4c09f9bcffc7d90c87410e478a", size = 5021569 },
479
+ { url = "https://files.pythonhosted.org/packages/2d/a6/31da006fead660b9512d08d23d31e93ad3477dd47cc42e3285f143443176/lxml-5.4.0-cp313-cp313-win32.whl", hash = "sha256:d5663bc1b471c79f5c833cffbc9b87d7bf13f87e055a5c86c363ccd2348d7e82", size = 3485270 },
480
+ { url = "https://files.pythonhosted.org/packages/fc/14/c115516c62a7d2499781d2d3d7215218c0731b2c940753bf9f9b7b73924d/lxml-5.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:bcb7a1096b4b6b24ce1ac24d4942ad98f983cd3810f9711bcd0293f43a9d8b9f", size = 3814606 },
481
+ ]
482
+
483
  [[package]]
484
  name = "markdown-it-py"
485
  version = "3.0.0"
 
1024
  { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
1025
  ]
1026
 
1027
+ [[package]]
1028
+ name = "sqlalchemy"
1029
+ version = "2.0.41"
1030
+ source = { registry = "https://pypi.org/simple" }
1031
+ dependencies = [
1032
+ { name = "greenlet", marker = "(python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')" },
1033
+ { name = "typing-extensions" },
1034
+ ]
1035
+ sdist = { url = "https://files.pythonhosted.org/packages/63/66/45b165c595ec89aa7dcc2c1cd222ab269bc753f1fc7a1e68f8481bd957bf/sqlalchemy-2.0.41.tar.gz", hash = "sha256:edba70118c4be3c2b1f90754d308d0b79c6fe2c0fdc52d8ddf603916f83f4db9", size = 9689424 }
1036
+ wheels = [
1037
+ { url = "https://files.pythonhosted.org/packages/3e/2a/f1f4e068b371154740dd10fb81afb5240d5af4aa0087b88d8b308b5429c2/sqlalchemy-2.0.41-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:81f413674d85cfd0dfcd6512e10e0f33c19c21860342a4890c3a2b59479929f9", size = 2119645 },
1038
+ { url = "https://files.pythonhosted.org/packages/9b/e8/c664a7e73d36fbfc4730f8cf2bf930444ea87270f2825efbe17bf808b998/sqlalchemy-2.0.41-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:598d9ebc1e796431bbd068e41e4de4dc34312b7aa3292571bb3674a0cb415dd1", size = 2107399 },
1039
+ { url = "https://files.pythonhosted.org/packages/5c/78/8a9cf6c5e7135540cb682128d091d6afa1b9e48bd049b0d691bf54114f70/sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a104c5694dfd2d864a6f91b0956eb5d5883234119cb40010115fd45a16da5e70", size = 3293269 },
1040
+ { url = "https://files.pythonhosted.org/packages/3c/35/f74add3978c20de6323fb11cb5162702670cc7a9420033befb43d8d5b7a4/sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6145afea51ff0af7f2564a05fa95eb46f542919e6523729663a5d285ecb3cf5e", size = 3303364 },
1041
+ { url = "https://files.pythonhosted.org/packages/6a/d4/c990f37f52c3f7748ebe98883e2a0f7d038108c2c5a82468d1ff3eec50b7/sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b46fa6eae1cd1c20e6e6f44e19984d438b6b2d8616d21d783d150df714f44078", size = 3229072 },
1042
+ { url = "https://files.pythonhosted.org/packages/15/69/cab11fecc7eb64bc561011be2bd03d065b762d87add52a4ca0aca2e12904/sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41836fe661cc98abfae476e14ba1906220f92c4e528771a8a3ae6a151242d2ae", size = 3268074 },
1043
+ { url = "https://files.pythonhosted.org/packages/5c/ca/0c19ec16858585d37767b167fc9602593f98998a68a798450558239fb04a/sqlalchemy-2.0.41-cp312-cp312-win32.whl", hash = "sha256:a8808d5cf866c781150d36a3c8eb3adccfa41a8105d031bf27e92c251e3969d6", size = 2084514 },
1044
+ { url = "https://files.pythonhosted.org/packages/7f/23/4c2833d78ff3010a4e17f984c734f52b531a8c9060a50429c9d4b0211be6/sqlalchemy-2.0.41-cp312-cp312-win_amd64.whl", hash = "sha256:5b14e97886199c1f52c14629c11d90c11fbb09e9334fa7bb5f6d068d9ced0ce0", size = 2111557 },
1045
+ { url = "https://files.pythonhosted.org/packages/d3/ad/2e1c6d4f235a97eeef52d0200d8ddda16f6c4dd70ae5ad88c46963440480/sqlalchemy-2.0.41-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4eeb195cdedaf17aab6b247894ff2734dcead6c08f748e617bfe05bd5a218443", size = 2115491 },
1046
+ { url = "https://files.pythonhosted.org/packages/cf/8d/be490e5db8400dacc89056f78a52d44b04fbf75e8439569d5b879623a53b/sqlalchemy-2.0.41-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d4ae769b9c1c7757e4ccce94b0641bc203bbdf43ba7a2413ab2523d8d047d8dc", size = 2102827 },
1047
+ { url = "https://files.pythonhosted.org/packages/a0/72/c97ad430f0b0e78efaf2791342e13ffeafcbb3c06242f01a3bb8fe44f65d/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a62448526dd9ed3e3beedc93df9bb6b55a436ed1474db31a2af13b313a70a7e1", size = 3225224 },
1048
+ { url = "https://files.pythonhosted.org/packages/5e/51/5ba9ea3246ea068630acf35a6ba0d181e99f1af1afd17e159eac7e8bc2b8/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc56c9788617b8964ad02e8fcfeed4001c1f8ba91a9e1f31483c0dffb207002a", size = 3230045 },
1049
+ { url = "https://files.pythonhosted.org/packages/78/2f/8c14443b2acea700c62f9b4a8bad9e49fc1b65cfb260edead71fd38e9f19/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c153265408d18de4cc5ded1941dcd8315894572cddd3c58df5d5b5705b3fa28d", size = 3159357 },
1050
+ { url = "https://files.pythonhosted.org/packages/fc/b2/43eacbf6ccc5276d76cea18cb7c3d73e294d6fb21f9ff8b4eef9b42bbfd5/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f67766965996e63bb46cfbf2ce5355fc32d9dd3b8ad7e536a920ff9ee422e23", size = 3197511 },
1051
+ { url = "https://files.pythonhosted.org/packages/fa/2e/677c17c5d6a004c3c45334ab1dbe7b7deb834430b282b8a0f75ae220c8eb/sqlalchemy-2.0.41-cp313-cp313-win32.whl", hash = "sha256:bfc9064f6658a3d1cadeaa0ba07570b83ce6801a1314985bf98ec9b95d74e15f", size = 2082420 },
1052
+ { url = "https://files.pythonhosted.org/packages/e9/61/e8c1b9b6307c57157d328dd8b8348ddc4c47ffdf1279365a13b2b98b8049/sqlalchemy-2.0.41-cp313-cp313-win_amd64.whl", hash = "sha256:82ca366a844eb551daff9d2e6e7a9e5e76d2612c8564f58db6c19a726869c1df", size = 2108329 },
1053
+ { url = "https://files.pythonhosted.org/packages/1c/fc/9ba22f01b5cdacc8f5ed0d22304718d2c758fce3fd49a5372b886a86f37c/sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576", size = 1911224 },
1054
+ ]
1055
+
1056
+ [[package]]
1057
+ name = "sqlmodel"
1058
+ version = "0.0.24"
1059
+ source = { registry = "https://pypi.org/simple" }
1060
+ dependencies = [
1061
+ { name = "pydantic" },
1062
+ { name = "sqlalchemy" },
1063
+ ]
1064
+ sdist = { url = "https://files.pythonhosted.org/packages/86/4b/c2ad0496f5bdc6073d9b4cef52be9c04f2b37a5773441cc6600b1857648b/sqlmodel-0.0.24.tar.gz", hash = "sha256:cc5c7613c1a5533c9c7867e1aab2fd489a76c9e8a061984da11b4e613c182423", size = 116780 }
1065
+ wheels = [
1066
+ { url = "https://files.pythonhosted.org/packages/16/91/484cd2d05569892b7fef7f5ceab3bc89fb0f8a8c0cde1030d383dbc5449c/sqlmodel-0.0.24-py3-none-any.whl", hash = "sha256:6778852f09370908985b667d6a3ab92910d0d5ec88adcaf23dbc242715ff7193", size = 28622 },
1067
+ ]
1068
+
1069
  [[package]]
1070
  name = "starlette"
1071
  version = "0.46.2"