acmc commited on
Commit
91009ad
·
verified ·
1 Parent(s): b32600d

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +131 -8
streamlit_app.py CHANGED
@@ -15,6 +15,7 @@ import tempfile
15
  import shutil
16
  import time
17
  from datetime import datetime, timezone
 
18
 
19
  # Set page config
20
  st.set_page_config(
@@ -68,8 +69,63 @@ class AttentionResultsExplorer:
68
  self.available_languages = self._get_available_languages_from_github()
69
  self.relation_types = None
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  def _get_available_languages_from_github(self):
72
- """Get available languages from GitHub API without downloading"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  api_url = f"https://api.github.com/repos/{self.github_repo}/contents"
74
 
75
  response = self._make_github_request(api_url, "available languages")
@@ -89,6 +145,56 @@ class AttentionResultsExplorer:
89
  st.warning(f"Could not parse language list from GitHub: {str(e)}")
90
  # Fallback to local cache if available
91
  return self._get_available_languages_local()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  def _get_available_languages_local(self):
94
  """Get available languages from local cache"""
@@ -176,15 +282,25 @@ class AttentionResultsExplorer:
176
  def _discover_config_parameters(self, language=None):
177
  """Dynamically discover configuration parameters from available configs
178
 
179
- For performance optimization, we only inspect the first language since
180
- configurations are consistent across all languages and models.
 
181
  """
182
  try:
183
- # Use first available language if none specified (optimization)
184
  if language is None:
185
- if not self.available_languages:
 
 
186
  return {}
187
- language = self.available_languages[0]
 
 
 
 
 
 
 
188
 
189
  available_configs = self._get_experimental_configs(language)
190
  if not available_configs:
@@ -361,7 +477,14 @@ class AttentionResultsExplorer:
361
  return self._find_best_matching_config(language, target_params)
362
 
363
  def _get_models(self, language, config):
364
- """Get all models for a language and configuration from GitHub API"""
 
 
 
 
 
 
 
365
  api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}/{config}"
366
  response = self._make_github_request(api_url, f"models for {language}/{config}")
367
 
@@ -373,7 +496,7 @@ class AttentionResultsExplorer:
373
  except Exception as e:
374
  st.warning(f"Could not parse models for {language}/{config}: {str(e)}")
375
 
376
- # Fallback to local cache if available
377
  config_dir = self.base_path / f"results_{language}" / config
378
  if config_dir.exists():
379
  models = [d.name for d in config_dir.iterdir() if d.is_dir()]
 
15
  import shutil
16
  import time
17
  from datetime import datetime, timezone
18
+ import yaml
19
 
20
  # Set page config
21
  st.set_page_config(
 
69
  self.available_languages = self._get_available_languages_from_github()
70
  self.relation_types = None
71
 
72
+ def _download_experiment_config(self):
73
+ """Download and parse the experiment_config.yaml file from GitHub"""
74
+ config_path = self.cache_dir / "experiment_config.yaml"
75
+
76
+ # Check if we have a cached version and use_cache is enabled
77
+ if config_path.exists() and self.use_cache:
78
+ try:
79
+ with open(config_path, 'r', encoding='utf-8') as f:
80
+ return yaml.safe_load(f)
81
+ except Exception as e:
82
+ st.warning(f"Could not load cached config, downloading fresh: {str(e)}")
83
+
84
+ # Download from GitHub
85
+ config_url = f"https://raw.githubusercontent.com/{self.github_repo}/refs/heads/master/experiment_config.yaml"
86
+ response = self._make_github_request(config_url, "experiment configuration file")
87
+
88
+ if response is None:
89
+ # Try to load from cache as fallback
90
+ if config_path.exists():
91
+ try:
92
+ with open(config_path, 'r', encoding='utf-8') as f:
93
+ return yaml.safe_load(f)
94
+ except Exception:
95
+ pass
96
+ return None
97
+
98
+ try:
99
+ config_content = response.text
100
+ # Save to cache
101
+ with open(config_path, 'w', encoding='utf-8') as f:
102
+ f.write(config_content)
103
+
104
+ # Parse and return
105
+ return yaml.safe_load(StringIO(config_content))
106
+
107
+ except Exception as e:
108
+ st.error(f"Could not parse experiment configuration: {str(e)}")
109
+ return None
110
+
111
  def _get_available_languages_from_github(self):
112
+ """Get available languages from experiment config file"""
113
+ config = self._download_experiment_config()
114
+
115
+ if config is None:
116
+ # Fallback to directory-based discovery
117
+ return self._get_available_languages_from_directories()
118
+
119
+ try:
120
+ languages = list(config.get('languages', {}).keys())
121
+ return sorted(languages)
122
+ except Exception as e:
123
+ st.warning(f"Could not parse languages from config: {str(e)}")
124
+ # Fallback to directory-based discovery
125
+ return self._get_available_languages_from_directories()
126
+
127
+ def _get_available_languages_from_directories(self):
128
+ """Fallback method: Get available languages from GitHub API directory listing"""
129
  api_url = f"https://api.github.com/repos/{self.github_repo}/contents"
130
 
131
  response = self._make_github_request(api_url, "available languages")
 
145
  st.warning(f"Could not parse language list from GitHub: {str(e)}")
146
  # Fallback to local cache if available
147
  return self._get_available_languages_local()
148
+
149
+ def _get_models_for_language(self, language):
150
+ """Get all models for a specific language from the experiment config"""
151
+ config = self._download_experiment_config()
152
+
153
+ if config is None:
154
+ return []
155
+
156
+ try:
157
+ # Get language-specific models
158
+ language_models = config.get('languages', {}).get(language, {}).get('models', [])
159
+
160
+ # Get multilingual models
161
+ multilingual_models = config.get('multilingual_models', [])
162
+
163
+ # Combine both lists
164
+ all_models = language_models + multilingual_models
165
+ return sorted(list(set(all_models))) # Remove duplicates and sort
166
+
167
+ except Exception as e:
168
+ st.warning(f"Could not get models for {language}: {str(e)}")
169
+ return []
170
+
171
+ def _get_first_language_model_pair(self):
172
+ """Get the first language-model pair from the experiment config for configuration discovery"""
173
+ config = self._download_experiment_config()
174
+
175
+ if config is None:
176
+ return None, None
177
+
178
+ try:
179
+ languages = config.get('languages', {})
180
+ multilingual_models = config.get('multilingual_models', [])
181
+
182
+ # Find first language with models
183
+ for language, lang_config in languages.items():
184
+ models = lang_config.get('models', [])
185
+ if models:
186
+ return language, models[0]
187
+
188
+ # If no language-specific models, use first language with first multilingual model
189
+ if multilingual_models and languages:
190
+ first_language = list(languages.keys())[0]
191
+ return first_language, multilingual_models[0]
192
+
193
+ return None, None
194
+
195
+ except Exception as e:
196
+ st.warning(f"Could not find language-model pair: {str(e)}")
197
+ return None, None
198
 
199
  def _get_available_languages_local(self):
200
  """Get available languages from local cache"""
 
282
  def _discover_config_parameters(self, language=None):
283
  """Dynamically discover configuration parameters from available configs
284
 
285
+ Now uses the first language-model pair from experiment config to discover
286
+ valid configuration parameters, since configurations are consistent across
287
+ all language-model combinations.
288
  """
289
  try:
290
+ # Get the first language-model pair from experiment config
291
  if language is None:
292
+ language, model = self._get_first_language_model_pair()
293
+ if language is None or model is None:
294
+ st.warning("Could not find any language-model pairs in experiment config")
295
  return {}
296
+ st.info(f"🔍 Discovering configurations using {language.upper()}/{model} (configurations are consistent across all languages and models)")
297
+ else:
298
+ # If language is specified, try to get first model for that language
299
+ models = self._get_models_for_language(language)
300
+ if not models:
301
+ st.warning(f"No models found for language {language}")
302
+ return {}
303
+ model = models[0]
304
 
305
  available_configs = self._get_experimental_configs(language)
306
  if not available_configs:
 
477
  return self._find_best_matching_config(language, target_params)
478
 
479
  def _get_models(self, language, config):
480
+ """Get all models for a language and configuration from experiment config"""
481
+ # First try to get models from experiment config
482
+ models = self._get_models_for_language(language)
483
+
484
+ if models:
485
+ return models
486
+
487
+ # Fallback to GitHub API directory listing if config unavailable
488
  api_url = f"https://api.github.com/repos/{self.github_repo}/contents/results_{language}/{config}"
489
  response = self._make_github_request(api_url, f"models for {language}/{config}")
490
 
 
496
  except Exception as e:
497
  st.warning(f"Could not parse models for {language}/{config}: {str(e)}")
498
 
499
+ # Final fallback to local cache if available
500
  config_dir = self.base_path / f"results_{language}" / config
501
  if config_dir.exists():
502
  models = [d.name for d in config_dir.iterdir() if d.is_dir()]