lisabdunlap commited on
Commit
5883076
·
verified ·
1 Parent(s): 468e203

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -21
app.py CHANGED
@@ -10,6 +10,9 @@ def load_community_alignment_dataset():
10
  """Load the Facebook Community Alignment Dataset"""
11
  try:
12
  dataset = load_dataset("facebook/community-alignment-dataset")
 
 
 
13
  return dataset
14
  except Exception as e:
15
  print(f"Error loading dataset: {e}")
@@ -45,12 +48,18 @@ def get_conversation_data(conversation_id: int) -> Dict[str, Any]:
45
  if not dataset:
46
  return None
47
 
48
- # Search for conversation in the dataset
49
- for split in dataset.keys():
50
- for item in dataset[split]:
51
- if item.get('conversation_id') == conversation_id:
52
- return item
53
- return None
 
 
 
 
 
 
54
 
55
  def format_annotator_info(item: Dict[str, Any]) -> str:
56
  """Format annotator information"""
@@ -142,10 +151,20 @@ def get_random_conversation() -> int:
142
  if not dataset:
143
  return 0
144
 
145
- # Get a random split and item
146
- split = random.choice(list(dataset.keys()))
147
- item = random.choice(dataset[split])
148
- return item.get('conversation_id', 0)
 
 
 
 
 
 
 
 
 
 
149
 
150
  def get_dataset_stats() -> str:
151
  """Get dataset statistics"""
@@ -174,16 +193,21 @@ def search_conversations(query: str, field: str) -> str:
174
  results = []
175
  query_lower = query.lower()
176
 
177
- for split_name, split_data in dataset.items():
178
- for item in split_data[:100]: # Limit search to first 100 items per split
179
- if field in item and item[field]:
180
- field_value = str(item[field]).lower()
181
- if query_lower in field_value:
182
- results.append({
183
- 'conversation_id': item.get('conversation_id'),
184
- 'split': split_name,
185
- 'field_value': str(item[field])[:100] + "..." if len(str(item[field])) > 100 else str(item[field])
186
- })
 
 
 
 
 
187
 
188
  if not results:
189
  return f"No results found for '{query}' in field '{field}'"
@@ -334,4 +358,4 @@ if __name__ == "__main__":
334
  server_port=7860,
335
  share=False,
336
  show_error=True
337
- )
 
10
  """Load the Facebook Community Alignment Dataset"""
11
  try:
12
  dataset = load_dataset("facebook/community-alignment-dataset")
13
+ print(f"Dataset loaded successfully. Available splits: {list(dataset.keys())}")
14
+ for split_name, split_data in dataset.items():
15
+ print(f"Split '{split_name}': {len(split_data)} items")
16
  return dataset
17
  except Exception as e:
18
  print(f"Error loading dataset: {e}")
 
48
  if not dataset:
49
  return None
50
 
51
+ try:
52
+ # Search for conversation in the dataset
53
+ for split in dataset.keys():
54
+ split_data = dataset[split]
55
+ for i in range(len(split_data)):
56
+ item = split_data[i]
57
+ if item.get('conversation_id') == conversation_id:
58
+ return item
59
+ return None
60
+ except Exception as e:
61
+ print(f"Error getting conversation data: {e}")
62
+ return None
63
 
64
  def format_annotator_info(item: Dict[str, Any]) -> str:
65
  """Format annotator information"""
 
151
  if not dataset:
152
  return 0
153
 
154
+ try:
155
+ # Get a random split
156
+ split = random.choice(list(dataset.keys()))
157
+ split_data = dataset[split]
158
+
159
+ # Get a random index
160
+ random_index = random.randint(0, len(split_data) - 1)
161
+ item = split_data[random_index]
162
+
163
+ return item.get('conversation_id', 0)
164
+ except Exception as e:
165
+ print(f"Error getting random conversation: {e}")
166
+ # Fallback: return a default conversation ID
167
+ return 1061830552573006 # The ID from your example
168
 
169
  def get_dataset_stats() -> str:
170
  """Get dataset statistics"""
 
193
  results = []
194
  query_lower = query.lower()
195
 
196
+ try:
197
+ for split_name, split_data in dataset.items():
198
+ # Limit search to first 100 items per split
199
+ for i in range(min(100, len(split_data))):
200
+ item = split_data[i]
201
+ if field in item and item[field]:
202
+ field_value = str(item[field]).lower()
203
+ if query_lower in field_value:
204
+ results.append({
205
+ 'conversation_id': item.get('conversation_id'),
206
+ 'split': split_name,
207
+ 'field_value': str(item[field])[:100] + "..." if len(str(item[field])) > 100 else str(item[field])
208
+ })
209
+ except Exception as e:
210
+ return f"Error during search: {e}"
211
 
212
  if not results:
213
  return f"No results found for '{query}' in field '{field}'"
 
358
  server_port=7860,
359
  share=False,
360
  show_error=True
361
+ )