dejanseo commited on
Commit
28dd4be
·
verified ·
1 Parent(s): 1762723

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+ import torch
4
+ import pandas as pd
5
+
6
+ # Constants
7
+ MODEL_ID = "dejanseo/substance"
8
+
9
+ @st.cache_resource
10
+ def load_model():
11
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
12
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
13
+ model.eval()
14
+ return tokenizer, model
15
+
16
+ def classify(text, tokenizer, model):
17
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
18
+ with torch.no_grad():
19
+ logits = model(**inputs).logits
20
+ probs = torch.softmax(logits, dim=1).squeeze()
21
+ pred = torch.argmax(probs).item()
22
+ confidence = probs[pred].item()
23
+ return pred, confidence
24
+
25
+ # --- UI ---
26
+ st.set_page_config(layout="wide")
27
+ st.title("Content Substance Classifier")
28
+ st.markdown("This tool estimates the likelihood that content is thin or lacking in substance, using our [deep learning model](https://dejan.ai/blog/content-substance-classification/).")
29
+
30
+ tokenizer, model = load_model()
31
+
32
+ pasted_text = st.text_area("Enter full text for granular page analysis:", height=100)
33
+ run = st.button("🚀 Run Analysis", use_container_width=True)
34
+
35
+ if not pasted_text.strip():
36
+ st.info("Enter some text above, then click **Run Analysis**.")
37
+
38
+ # --- Classify Text ---
39
+ if run and pasted_text.strip():
40
+ st.markdown("### Results: Text Classification")
41
+ lines = [line.strip() for line in pasted_text.strip().split("\n") if line.strip()]
42
+ results = []
43
+ for line in lines:
44
+ label, conf = classify(line, tokenizer, model)
45
+ results.append({
46
+ "Text": line,
47
+ "Contains Thin Content": "Yes" if label == 0 else "No",
48
+ "Confidence": round(conf, 4)
49
+ })
50
+ df = pd.DataFrame(results)
51
+ st.data_editor(
52
+ df,
53
+ column_config={
54
+ "Confidence": st.column_config.ProgressColumn(
55
+ label="Confidence",
56
+ min_value=0.0,
57
+ max_value=1.0,
58
+ format="%.4f"
59
+ )
60
+ },
61
+ hide_index=True,
62
+ use_container_width=True
63
+ )