Files changed (1) hide show
  1. classifier.py +65 -33
classifier.py CHANGED
@@ -268,62 +268,94 @@ class GarbageClassifier:
268
  def _extract_reasoning(self, response: str) -> str:
269
  """Extract only the reasoning content, removing all formatting markers and classification info"""
270
  import re
271
-
272
  # Remove all formatting markers
273
  cleaned_response = response.replace("**Classification**:", "")
274
  cleaned_response = cleaned_response.replace("**Reasoning**:", "")
275
- cleaned_response = re.sub(
276
- r"\*\*.*?\*\*:", "", cleaned_response
277
- ) # Remove any **text**: patterns
278
- cleaned_response = cleaned_response.replace(
279
- "**", ""
280
- ) # Remove remaining ** markers
281
-
282
  # Remove category names that might appear at the beginning
283
  categories = self.knowledge.get_categories()
284
  for category in categories:
285
  if cleaned_response.strip().startswith(category):
286
  cleaned_response = cleaned_response.replace(category, "", 1)
287
  break
288
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  # Split into sentences and clean up
290
  sentences = []
291
-
292
- # Split by common sentence endings
293
- parts = re.split(r"[.!?]\s+", cleaned_response)
294
-
295
- for part in parts:
 
 
 
 
 
 
 
 
 
296
  part = part.strip()
297
  if not part:
298
  continue
299
-
300
- # Skip parts that are just category names
301
- if part in categories:
302
  continue
303
-
304
- # Skip parts that start with category names
305
  is_category_line = False
306
- for category in categories:
307
- if part.startswith(category):
308
  is_category_line = True
309
  break
310
-
311
  if is_category_line:
312
  continue
313
-
314
  # Clean up the sentence
315
- part = re.sub(
316
- r"^[A-Za-z\s]+:", "", part
317
- ).strip() # Remove "Category:" type prefixes
318
-
319
  if part and len(part) > 3: # Only keep meaningful content
320
  sentences.append(part)
321
-
322
- # Join sentences and ensure proper punctuation
323
- reasoning = ". ".join(sentences)
324
- if reasoning and not reasoning.endswith((".", "!", "?")):
325
- reasoning += "."
326
-
 
 
 
 
 
 
 
 
 
 
 
 
327
  return reasoning if reasoning else "Analysis not available"
328
 
329
  def get_categories_info(self):
 
268
  def _extract_reasoning(self, response: str) -> str:
269
  """Extract only the reasoning content, removing all formatting markers and classification info"""
270
  import re
271
+
272
  # Remove all formatting markers
273
  cleaned_response = response.replace("**Classification**:", "")
274
  cleaned_response = cleaned_response.replace("**Reasoning**:", "")
275
+ cleaned_response = re.sub(r'\*\*.*?\*\*:', '', cleaned_response) # Remove any **text**: patterns
276
+ cleaned_response = cleaned_response.replace("**", "") # Remove remaining ** markers
277
+
 
 
 
 
278
  # Remove category names that might appear at the beginning
279
  categories = self.knowledge.get_categories()
280
  for category in categories:
281
  if cleaned_response.strip().startswith(category):
282
  cleaned_response = cleaned_response.replace(category, "", 1)
283
  break
284
+
285
+ # Remove common material names that might appear at the beginning
286
+ material_names = [
287
+ "Glass", "Plastic", "Metal", "Paper", "Cardboard", "Aluminum",
288
+ "Steel", "Iron", "Tin", "Foil", "Wood", "Ceramic", "Fabric",
289
+ "Recyclable Waste", "Food/Kitchen Waste", "Hazardous Waste", "Other Waste"
290
+ ]
291
+
292
+ # Clean the response
293
+ cleaned_response = cleaned_response.strip()
294
+
295
+ # Remove material names at the beginning
296
+ for material in material_names:
297
+ if cleaned_response.startswith(material):
298
+ # Remove the material name and any following punctuation/whitespace
299
+ cleaned_response = cleaned_response[len(material):].lstrip(" .,;:")
300
+ break
301
+
302
  # Split into sentences and clean up
303
  sentences = []
304
+
305
+ # Split by common sentence endings, but keep the endings
306
+ parts = re.split(r'([.!?])\s+', cleaned_response)
307
+
308
+ # Rejoin parts to maintain sentence structure
309
+ reconstructed_parts = []
310
+ for i in range(0, len(parts), 2):
311
+ if i < len(parts):
312
+ sentence = parts[i]
313
+ if i + 1 < len(parts):
314
+ sentence += parts[i + 1] # Add the punctuation back
315
+ reconstructed_parts.append(sentence)
316
+
317
+ for part in reconstructed_parts:
318
  part = part.strip()
319
  if not part:
320
  continue
321
+
322
+ # Skip parts that are just category names or material names
323
+ if part in categories or part.rstrip(".,;:") in material_names:
324
  continue
325
+
326
+ # Skip parts that start with category names or material names
327
  is_category_line = False
328
+ for item in categories + material_names:
329
+ if part.startswith(item):
330
  is_category_line = True
331
  break
332
+
333
  if is_category_line:
334
  continue
335
+
336
  # Clean up the sentence
337
+ part = re.sub(r'^[A-Za-z\s]+:', '', part).strip() # Remove "Category:" type prefixes
338
+
 
 
339
  if part and len(part) > 3: # Only keep meaningful content
340
  sentences.append(part)
341
+
342
+ # Join sentences
343
+ reasoning = ' '.join(sentences)
344
+
345
+ # Final cleanup - remove any remaining standalone material words at the beginning
346
+ reasoning_words = reasoning.split()
347
+ if reasoning_words and reasoning_words[0] in [m.lower() for m in material_names]:
348
+ reasoning_words = reasoning_words[1:]
349
+ reasoning = ' '.join(reasoning_words)
350
+
351
+ # Ensure proper capitalization
352
+ if reasoning:
353
+ reasoning = reasoning[0].upper() + reasoning[1:] if len(reasoning) > 1 else reasoning.upper()
354
+
355
+ # Ensure proper punctuation
356
+ if not reasoning.endswith(('.', '!', '?')):
357
+ reasoning += '.'
358
+
359
  return reasoning if reasoning else "Analysis not available"
360
 
361
  def get_categories_info(self):