CoffeBank commited on
Commit
95b0158
·
1 Parent(s): 821629c
Files changed (1) hide show
  1. demo/binary_classifier_demo.py +71 -12
demo/binary_classifier_demo.py CHANGED
@@ -101,30 +101,89 @@ def run_binary_classifier(text, show_analysis=False):
101
 
102
  # Basic statistics
103
  analysis_md += "### Основная статистика\n"
104
- analysis_md += f"- Всего токенов: {text_analysis['basic_stats']['total_tokens']}\n"
105
- analysis_md += f"- Всего слов: {text_analysis['basic_stats']['total_words']}\n"
106
- analysis_md += f"- Уникальных слов: {text_analysis['basic_stats']['unique_words']}\n"
107
- analysis_md += f"- Стоп-слов: {text_analysis['basic_stats']['stop_words']}\n"
108
- analysis_md += f"- Средняя длина слова: {text_analysis['basic_stats']['avg_word_length']:.2f} символов\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  # Lexical diversity
111
  analysis_md += "### Лексическое разнообразие\n"
112
- analysis_md += f"- TTR (Type-Token Ratio): {text_analysis['lexical_diversity']['ttr']:.3f}\n"
113
- analysis_md += f"- MTLD (упрощенный): {text_analysis['lexical_diversity']['mtld']:.2f}\n\n"
 
 
 
 
114
 
115
  # Text structure
116
  analysis_md += "### Структура текста\n"
117
- analysis_md += f"- Количество предложений: {text_analysis['text_structure']['sentence_count']}\n"
118
- analysis_md += f"- Средняя длина предложения: {text_analysis['text_structure']['avg_sentence_length']:.2f} токенов\n\n"
 
 
 
 
119
 
120
  # Readability
121
  analysis_md += "### Читабельность\n"
122
- analysis_md += f"- Flesch-Kincaid score: {text_analysis['readability']['flesh_kincaid_score']:.2f}\n"
123
- analysis_md += f"- Процент длинных слов: {text_analysis['readability']['long_words_percent']:.2f}%\n\n"
 
 
 
 
124
 
125
  # Semantic coherence
126
  analysis_md += "### Семантическая связность\n"
127
- analysis_md += f"- Средняя связность между предложениями: {text_analysis['semantic_coherence']['avg_coherence_score']:.3f}\n"
 
 
 
 
128
 
129
  return gr.Markdown(result_md), gr.Markdown(analysis_md) if analysis_md else None, text
130
 
 
101
 
102
  # Basic statistics
103
  analysis_md += "### Основная статистика\n"
104
+ for key, value in text_analysis.get('basic_stats', {}).items():
105
+ if isinstance(value, float):
106
+ analysis_md += f"- {key}: {value:.2f}\n"
107
+ else:
108
+ analysis_md += f"- {key}: {value}\n"
109
+ analysis_md += "\n"
110
+
111
+ # Morphological analysis
112
+ analysis_md += "### Морфологический анализ\n"
113
+ morph_analysis = text_analysis.get('morphological_analysis', {})
114
+ for key, value in morph_analysis.items():
115
+ if key == 'pos_distribution':
116
+ analysis_md += "- Распределение частей речи:\n"
117
+ for pos, count in value.items():
118
+ analysis_md += f" - {pos}: {count}\n"
119
+ elif isinstance(value, float):
120
+ analysis_md += f"- {key}: {value:.3f}\n"
121
+ else:
122
+ analysis_md += f"- {key}: {value}\n"
123
+ analysis_md += "\n"
124
+
125
+ # Syntactic analysis
126
+ analysis_md += "### Синтаксический анализ\n"
127
+ synt_analysis = text_analysis.get('syntactic_analysis', {})
128
+ for key, value in synt_analysis.items():
129
+ if key == 'dependencies':
130
+ analysis_md += "- Зависимости:\n"
131
+ for dep, count in value.items():
132
+ analysis_md += f" - {dep}: {count}\n"
133
+ elif isinstance(value, float):
134
+ analysis_md += f"- {key}: {value:.3f}\n"
135
+ else:
136
+ analysis_md += f"- {key}: {value}\n"
137
+ analysis_md += "\n"
138
+
139
+ # Named entities
140
+ analysis_md += "### Именованные сущности\n"
141
+ entities = text_analysis.get('named_entities', {})
142
+ for key, value in entities.items():
143
+ if key == 'entity_types':
144
+ analysis_md += "- Типы сущностей:\n"
145
+ for ent, count in value.items():
146
+ analysis_md += f" - {ent}: {count}\n"
147
+ elif isinstance(value, float):
148
+ analysis_md += f"- {key}: {value:.3f}\n"
149
+ else:
150
+ analysis_md += f"- {key}: {value}\n"
151
+ analysis_md += "\n"
152
 
153
  # Lexical diversity
154
  analysis_md += "### Лексическое разнообразие\n"
155
+ for key, value in text_analysis.get('lexical_diversity', {}).items():
156
+ if isinstance(value, float):
157
+ analysis_md += f"- {key}: {value:.3f}\n"
158
+ else:
159
+ analysis_md += f"- {key}: {value}\n"
160
+ analysis_md += "\n"
161
 
162
  # Text structure
163
  analysis_md += "### Структура текста\n"
164
+ for key, value in text_analysis.get('text_structure', {}).items():
165
+ if isinstance(value, float):
166
+ analysis_md += f"- {key}: {value:.2f}\n"
167
+ else:
168
+ analysis_md += f"- {key}: {value}\n"
169
+ analysis_md += "\n"
170
 
171
  # Readability
172
  analysis_md += "### Читабельность\n"
173
+ for key, value in text_analysis.get('readability', {}).items():
174
+ if isinstance(value, float):
175
+ analysis_md += f"- {key}: {value:.2f}\n"
176
+ else:
177
+ analysis_md += f"- {key}: {value}\n"
178
+ analysis_md += "\n"
179
 
180
  # Semantic coherence
181
  analysis_md += "### Семантическая связность\n"
182
+ for key, value in text_analysis.get('semantic_coherence', {}).items():
183
+ if isinstance(value, float):
184
+ analysis_md += f"- {key}: {value:.3f}\n"
185
+ else:
186
+ analysis_md += f"- {key}: {value}\n"
187
 
188
  return gr.Markdown(result_md), gr.Markdown(analysis_md) if analysis_md else None, text
189