Spaces:
Sleeping
Sleeping
Michelle Lam
commited on
Commit
·
37d1f1c
1
Parent(s):
51bb6f7
Sets default scaffolding method to 'personal' method; adjusts topic selection with new preds_df columns; removes print and log statements
Browse files- audit_utils.py +11 -9
- indie_label_svelte/src/ClusterResults.svelte +0 -3
- indie_label_svelte/src/Explore.svelte +0 -1
- indie_label_svelte/src/HypothesisPanel.svelte +3 -6
- indie_label_svelte/src/KeywordSearch.svelte +0 -1
- indie_label_svelte/src/Labeling.svelte +0 -1
- indie_label_svelte/src/TopicTraining.svelte +0 -2
- server.py +28 -24
audit_utils.py
CHANGED
|
@@ -115,8 +115,6 @@ readable_to_internal = {
|
|
| 115 |
}
|
| 116 |
internal_to_readable = {v: k for k, v in readable_to_internal.items()}
|
| 117 |
|
| 118 |
-
def get_system_preds_df():
|
| 119 |
-
return system_preds_df
|
| 120 |
|
| 121 |
########################################
|
| 122 |
# Data storage helper functions
|
|
@@ -455,7 +453,7 @@ def get_predictions_by_user_and_item(predictions):
|
|
| 455 |
# - model: trained model
|
| 456 |
# - user_ids: list of user IDs to compute predictions for
|
| 457 |
# - sys_eval_df: dataframe of system eval labels (pre-computed)
|
| 458 |
-
def get_preds_df(model, user_ids, sys_eval_df=sys_eval_df, bins=BINS):
|
| 459 |
# Prep dataframe for all predictions we'd like to request
|
| 460 |
start = time.time()
|
| 461 |
sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
|
|
@@ -464,7 +462,8 @@ def get_preds_df(model, user_ids, sys_eval_df=sys_eval_df, bins=BINS):
|
|
| 464 |
for user_id in user_ids:
|
| 465 |
empty_ratings_rows.extend([[user_id, c_id, 0] for c_id in sys_eval_comment_ids])
|
| 466 |
empty_ratings_df = pd.DataFrame(empty_ratings_rows, columns=["user_id", "item_id", "rating"])
|
| 467 |
-
|
|
|
|
| 468 |
|
| 469 |
# Evaluate model to get predictions
|
| 470 |
start = time.time()
|
|
@@ -472,7 +471,8 @@ def get_preds_df(model, user_ids, sys_eval_df=sys_eval_df, bins=BINS):
|
|
| 472 |
eval_set_data = Dataset.load_from_df(empty_ratings_df, reader)
|
| 473 |
_, testset = train_test_split(eval_set_data, test_size=1.)
|
| 474 |
predictions = model.test(testset)
|
| 475 |
-
|
|
|
|
| 476 |
|
| 477 |
# Update dataframe with predictions
|
| 478 |
start = time.time()
|
|
@@ -513,7 +513,7 @@ def train_user_model(ratings_df, train_df=train_df, model_eval_df=model_eval_df,
|
|
| 513 |
# - train_df: dataframe of training labels
|
| 514 |
# - model_eval_df: dataframe of model eval labels (validation set)
|
| 515 |
# - model_type: type of model to train
|
| 516 |
-
def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_based=True):
|
| 517 |
# Train model
|
| 518 |
reader = Reader(rating_scale=(0, 4))
|
| 519 |
train_data = Dataset.load_from_df(train_df, reader)
|
|
@@ -542,7 +542,8 @@ def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_b
|
|
| 542 |
mae = accuracy.mae(predictions)
|
| 543 |
mse = accuracy.mse(predictions)
|
| 544 |
|
| 545 |
-
|
|
|
|
| 546 |
perf = [mae, mse, rmse, fcp]
|
| 547 |
|
| 548 |
return algo, perf
|
|
@@ -1038,7 +1039,7 @@ def plot_overall_vis_cluster(cur_user, preds_df, error_type, n_comments=None, bi
|
|
| 1038 |
|
| 1039 |
return final_plot, df
|
| 1040 |
|
| 1041 |
-
def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, sys_col="rating_sys", use_model=True):
|
| 1042 |
df["user_color"] = [get_user_color(user, threshold) for user in df["pred"].tolist()] # get cell colors
|
| 1043 |
df["system_color"] = [get_user_color(sys, threshold) for sys in df[sys_col].tolist()] # get cell colors
|
| 1044 |
df["error_color"] = [get_system_color(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())] # get cell colors
|
|
@@ -1049,7 +1050,8 @@ def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, sys_col="rat
|
|
| 1049 |
if use_model:
|
| 1050 |
df = df.sort_values(by=["error_amt"], ascending=False) # surface largest errors first
|
| 1051 |
else:
|
| 1052 |
-
|
|
|
|
| 1053 |
df = df.sort_values(by=[sys_col], ascending=True)
|
| 1054 |
|
| 1055 |
df["id"] = df["item_id"]
|
|
|
|
| 115 |
}
|
| 116 |
internal_to_readable = {v: k for k, v in readable_to_internal.items()}
|
| 117 |
|
|
|
|
|
|
|
| 118 |
|
| 119 |
########################################
|
| 120 |
# Data storage helper functions
|
|
|
|
| 453 |
# - model: trained model
|
| 454 |
# - user_ids: list of user IDs to compute predictions for
|
| 455 |
# - sys_eval_df: dataframe of system eval labels (pre-computed)
|
| 456 |
+
def get_preds_df(model, user_ids, sys_eval_df=sys_eval_df, bins=BINS, debug=False):
|
| 457 |
# Prep dataframe for all predictions we'd like to request
|
| 458 |
start = time.time()
|
| 459 |
sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
|
|
|
|
| 462 |
for user_id in user_ids:
|
| 463 |
empty_ratings_rows.extend([[user_id, c_id, 0] for c_id in sys_eval_comment_ids])
|
| 464 |
empty_ratings_df = pd.DataFrame(empty_ratings_rows, columns=["user_id", "item_id", "rating"])
|
| 465 |
+
if debug:
|
| 466 |
+
print("setup", time.time() - start)
|
| 467 |
|
| 468 |
# Evaluate model to get predictions
|
| 469 |
start = time.time()
|
|
|
|
| 471 |
eval_set_data = Dataset.load_from_df(empty_ratings_df, reader)
|
| 472 |
_, testset = train_test_split(eval_set_data, test_size=1.)
|
| 473 |
predictions = model.test(testset)
|
| 474 |
+
if debug:
|
| 475 |
+
print("train_test_split", time.time() - start)
|
| 476 |
|
| 477 |
# Update dataframe with predictions
|
| 478 |
start = time.time()
|
|
|
|
| 513 |
# - train_df: dataframe of training labels
|
| 514 |
# - model_eval_df: dataframe of model eval labels (validation set)
|
| 515 |
# - model_type: type of model to train
|
| 516 |
+
def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_based=True, debug=False):
|
| 517 |
# Train model
|
| 518 |
reader = Reader(rating_scale=(0, 4))
|
| 519 |
train_data = Dataset.load_from_df(train_df, reader)
|
|
|
|
| 542 |
mae = accuracy.mae(predictions)
|
| 543 |
mse = accuracy.mse(predictions)
|
| 544 |
|
| 545 |
+
if debug:
|
| 546 |
+
print(f"MAE: {mae}, MSE: {mse}, RMSE: {rmse}, FCP: {fcp}")
|
| 547 |
perf = [mae, mse, rmse, fcp]
|
| 548 |
|
| 549 |
return algo, perf
|
|
|
|
| 1039 |
|
| 1040 |
return final_plot, df
|
| 1041 |
|
| 1042 |
+
def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, sys_col="rating_sys", use_model=True, debug=False):
|
| 1043 |
df["user_color"] = [get_user_color(user, threshold) for user in df["pred"].tolist()] # get cell colors
|
| 1044 |
df["system_color"] = [get_user_color(sys, threshold) for sys in df[sys_col].tolist()] # get cell colors
|
| 1045 |
df["error_color"] = [get_system_color(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())] # get cell colors
|
|
|
|
| 1050 |
if use_model:
|
| 1051 |
df = df.sort_values(by=["error_amt"], ascending=False) # surface largest errors first
|
| 1052 |
else:
|
| 1053 |
+
if debug:
|
| 1054 |
+
print("get_cluster_comments; not using model")
|
| 1055 |
df = df.sort_values(by=[sys_col], ascending=True)
|
| 1056 |
|
| 1057 |
df["id"] = df["item_id"]
|
indie_label_svelte/src/ClusterResults.svelte
CHANGED
|
@@ -55,12 +55,10 @@
|
|
| 55 |
//your code goes here on location change
|
| 56 |
let cur_url = window.location.href;
|
| 57 |
let cur_url_elems = cur_url.split("#");
|
| 58 |
-
// console.log(cur_url_elems)
|
| 59 |
if (cur_url_elems.length > 0) {
|
| 60 |
let path = cur_url_elems[2];
|
| 61 |
if (path == "comment") {
|
| 62 |
let comment_id = cur_url_elems[1].split("/")[0];
|
| 63 |
-
console.log("comment_id", comment_id)
|
| 64 |
selected_comment_id = parseInt(comment_id);
|
| 65 |
let table_ind = null;
|
| 66 |
for (let i = 0; i < items.length; i++) {
|
|
@@ -130,7 +128,6 @@
|
|
| 130 |
items = data["cluster_comments"];
|
| 131 |
set_length = items.length;
|
| 132 |
}
|
| 133 |
-
// console.log(set_length);
|
| 134 |
|
| 135 |
let cur_open_evidence;
|
| 136 |
open_evidence.subscribe(value => {
|
|
|
|
| 55 |
//your code goes here on location change
|
| 56 |
let cur_url = window.location.href;
|
| 57 |
let cur_url_elems = cur_url.split("#");
|
|
|
|
| 58 |
if (cur_url_elems.length > 0) {
|
| 59 |
let path = cur_url_elems[2];
|
| 60 |
if (path == "comment") {
|
| 61 |
let comment_id = cur_url_elems[1].split("/")[0];
|
|
|
|
| 62 |
selected_comment_id = parseInt(comment_id);
|
| 63 |
let table_ind = null;
|
| 64 |
for (let i = 0; i < items.length; i++) {
|
|
|
|
| 128 |
items = data["cluster_comments"];
|
| 129 |
set_length = items.length;
|
| 130 |
}
|
|
|
|
| 131 |
|
| 132 |
let cur_open_evidence;
|
| 133 |
open_evidence.subscribe(value => {
|
indie_label_svelte/src/Explore.svelte
CHANGED
|
@@ -48,7 +48,6 @@
|
|
| 48 |
const text = await response.text();
|
| 49 |
const data = JSON.parse(text);
|
| 50 |
cur_examples = JSON.parse(data["examples"]);
|
| 51 |
-
console.log(cur_examples); // TEMP
|
| 52 |
return true;
|
| 53 |
}
|
| 54 |
</script>
|
|
|
|
| 48 |
const text = await response.text();
|
| 49 |
const data = JSON.parse(text);
|
| 50 |
cur_examples = JSON.parse(data["examples"]);
|
|
|
|
| 51 |
return true;
|
| 52 |
}
|
| 53 |
</script>
|
indie_label_svelte/src/HypothesisPanel.svelte
CHANGED
|
@@ -35,14 +35,11 @@
|
|
| 35 |
// Handle routing
|
| 36 |
let searchParams = new URLSearchParams(window.location.search);
|
| 37 |
let scaffold_method = searchParams.get("scaffold");
|
|
|
|
|
|
|
|
|
|
| 38 |
let topic_vis_method = searchParams.get("topic_vis_method");
|
| 39 |
|
| 40 |
-
// TODO: connect to selected["error_type"] so changes on main panel affect report panel
|
| 41 |
-
// let cur_error_type;
|
| 42 |
-
// error_type.subscribe(value => {
|
| 43 |
-
// cur_error_type = value;
|
| 44 |
-
// });
|
| 45 |
-
|
| 46 |
// Handle drawer
|
| 47 |
let open = false;
|
| 48 |
let selected = null;
|
|
|
|
| 35 |
// Handle routing
|
| 36 |
let searchParams = new URLSearchParams(window.location.search);
|
| 37 |
let scaffold_method = searchParams.get("scaffold");
|
| 38 |
+
if (scaffold_method == null) {
|
| 39 |
+
scaffold_method = "personal"; // Default to personalized model scaffold
|
| 40 |
+
}
|
| 41 |
let topic_vis_method = searchParams.get("topic_vis_method");
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
// Handle drawer
|
| 44 |
let open = false;
|
| 45 |
let selected = null;
|
indie_label_svelte/src/KeywordSearch.svelte
CHANGED
|
@@ -36,7 +36,6 @@
|
|
| 36 |
keyword: keyword,
|
| 37 |
error_type: cur_error_type,
|
| 38 |
};
|
| 39 |
-
console.log("topic_df_ids", topic_df_ids);
|
| 40 |
let params = new URLSearchParams(req_params).toString();
|
| 41 |
const response = await fetch("./get_cluster_results?" + params);
|
| 42 |
const text = await response.text();
|
|
|
|
| 36 |
keyword: keyword,
|
| 37 |
error_type: cur_error_type,
|
| 38 |
};
|
|
|
|
| 39 |
let params = new URLSearchParams(req_params).toString();
|
| 40 |
const response = await fetch("./get_cluster_results?" + params);
|
| 41 |
const text = await response.text();
|
indie_label_svelte/src/Labeling.svelte
CHANGED
|
@@ -93,7 +93,6 @@
|
|
| 93 |
const response = await fetch("./get_group_model?" + params);
|
| 94 |
const text = await response.text();
|
| 95 |
const data = JSON.parse(text);
|
| 96 |
-
console.log("getGroupModel", data);
|
| 97 |
return data
|
| 98 |
}
|
| 99 |
|
|
|
|
| 93 |
const response = await fetch("./get_group_model?" + params);
|
| 94 |
const text = await response.text();
|
| 95 |
const data = JSON.parse(text);
|
|
|
|
| 96 |
return data
|
| 97 |
}
|
| 98 |
|
indie_label_svelte/src/TopicTraining.svelte
CHANGED
|
@@ -75,7 +75,6 @@
|
|
| 75 |
topic: topic,
|
| 76 |
};
|
| 77 |
|
| 78 |
-
console.log("topic training model name", model_name);
|
| 79 |
let params = new URLSearchParams(req_params).toString();
|
| 80 |
const response = await fetch("./get_personalized_model_topic?" + params); // TODO
|
| 81 |
const text = await response.text();
|
|
@@ -84,7 +83,6 @@
|
|
| 84 |
model_name = data["new_model_name"];
|
| 85 |
model_chosen.update((value) => model_name);
|
| 86 |
|
| 87 |
-
console.log("topicTraining", data);
|
| 88 |
return data;
|
| 89 |
}
|
| 90 |
</script>
|
|
|
|
| 75 |
topic: topic,
|
| 76 |
};
|
| 77 |
|
|
|
|
| 78 |
let params = new URLSearchParams(req_params).toString();
|
| 79 |
const response = await fetch("./get_personalized_model_topic?" + params); // TODO
|
| 80 |
const text = await response.text();
|
|
|
|
| 83 |
model_name = data["new_model_name"];
|
| 84 |
model_chosen.update((value) => model_name);
|
| 85 |
|
|
|
|
| 86 |
return data;
|
| 87 |
}
|
| 88 |
</script>
|
server.py
CHANGED
|
@@ -203,7 +203,7 @@ def get_group_size():
|
|
| 203 |
########################################
|
| 204 |
# ROUTE: /GET_GROUP_MODEL
|
| 205 |
@app.route("/get_group_model")
|
| 206 |
-
def get_group_model():
|
| 207 |
# Fetch info for initial labeling component
|
| 208 |
model_name = request.args.get("model_name")
|
| 209 |
user = request.args.get("user")
|
|
@@ -236,7 +236,8 @@ def get_group_model():
|
|
| 236 |
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings_grp, user)
|
| 237 |
|
| 238 |
duration = time.time() - start
|
| 239 |
-
|
|
|
|
| 240 |
|
| 241 |
context = {
|
| 242 |
"group_size": group_size,
|
|
@@ -360,13 +361,14 @@ def get_personalized_model(debug=DEBUG):
|
|
| 360 |
########################################
|
| 361 |
# ROUTE: /GET_PERSONALIZED_MODEL_TOPIC
|
| 362 |
@app.route("/get_personalized_model_topic")
|
| 363 |
-
def get_personalized_model_topic():
|
| 364 |
model_name = request.args.get("model_name")
|
| 365 |
ratings_json = request.args.get("ratings")
|
| 366 |
user = request.args.get("user")
|
| 367 |
ratings = json.loads(ratings_json)
|
| 368 |
topic = request.args.get("topic")
|
| 369 |
-
|
|
|
|
| 370 |
start = time.time()
|
| 371 |
|
| 372 |
# Modify model name
|
|
@@ -375,14 +377,13 @@ def get_personalized_model_topic():
|
|
| 375 |
|
| 376 |
# Handle existing or new model cases
|
| 377 |
# Train model and cache predictions using new labels
|
| 378 |
-
|
|
|
|
| 379 |
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings, user, topic=topic)
|
| 380 |
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
def round_metric(x):
|
| 385 |
-
return np.round(abs(x), 3)
|
| 386 |
|
| 387 |
results = {
|
| 388 |
"success": "success",
|
|
@@ -499,8 +500,8 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
|
|
| 499 |
topic_errors = {}
|
| 500 |
for topic in topics:
|
| 501 |
t_df = df[df["topic"] == topic]
|
| 502 |
-
y_true = t_df["pred"].to_numpy()
|
| 503 |
-
y_pred = t_df["
|
| 504 |
if topic_vis_method == "mae":
|
| 505 |
t_err = mean_absolute_error(y_true, y_pred)
|
| 506 |
elif topic_vis_method == "mse":
|
|
@@ -508,8 +509,8 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
|
|
| 508 |
elif topic_vis_method == "avg_diff":
|
| 509 |
t_err = np.mean(y_true - y_pred)
|
| 510 |
elif topic_vis_method == "fp_proportion":
|
| 511 |
-
y_true = [0 if rating < threshold else 1 for rating in
|
| 512 |
-
y_pred = [0 if rating < threshold else 1 for rating in
|
| 513 |
try:
|
| 514 |
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
| 515 |
except:
|
|
@@ -517,8 +518,8 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
|
|
| 517 |
total = float(len(y_true))
|
| 518 |
t_err = fp / total
|
| 519 |
elif topic_vis_method == "fn_proportion":
|
| 520 |
-
y_true = [0 if rating < threshold else 1 for rating in
|
| 521 |
-
y_pred = [0 if rating < threshold else 1 for rating in
|
| 522 |
try:
|
| 523 |
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
| 524 |
except:
|
|
@@ -529,16 +530,14 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
|
|
| 529 |
|
| 530 |
return topic_errors
|
| 531 |
|
| 532 |
-
def get_personal_scaffold(cur_user, model, topic_vis_method, n_topics=200, n=5):
|
| 533 |
threshold = utils.get_toxic_threshold()
|
| 534 |
|
| 535 |
# Get topics with greatest amount of error
|
| 536 |
preds_file = utils.get_preds_file(cur_user, model)
|
| 537 |
with open(preds_file, "rb") as f:
|
| 538 |
preds_df = pickle.load(f)
|
| 539 |
-
|
| 540 |
-
preds_df_mod = preds_df.merge(system_preds_df, on="item_id", how="left", suffixes=('', '_sys'))
|
| 541 |
-
preds_df_mod = preds_df_mod[preds_df_mod["user_id"] == cur_user].sort_values(by=["item_id"]).reset_index()
|
| 542 |
preds_df_mod = preds_df_mod[preds_df_mod["topic_id"] < n_topics]
|
| 543 |
|
| 544 |
if topic_vis_method == "median":
|
|
@@ -557,11 +556,12 @@ def get_personal_scaffold(cur_user, model, topic_vis_method, n_topics=200, n=5):
|
|
| 557 |
df = preds_df_mod.groupby(["topic", "user_id"]).mean().reset_index()
|
| 558 |
|
| 559 |
# Get system error
|
| 560 |
-
|
|
|
|
| 561 |
|
| 562 |
if topic_vis_method == "median" or topic_vis_method == "mean":
|
| 563 |
-
df["error_magnitude"] = [utils.get_error_magnitude(sys, user, threshold) for sys, user in zip(df["
|
| 564 |
-
df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["
|
| 565 |
|
| 566 |
df_under = df[df["error_type"] == "System is under-sensitive"]
|
| 567 |
df_under = df_under.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
|
|
@@ -577,17 +577,21 @@ def get_personal_scaffold(cur_user, model, topic_vis_method, n_topics=200, n=5):
|
|
| 577 |
elif topic_vis_method == "fp_fn":
|
| 578 |
df_under = df.sort_values(by=["fn_proportion"], ascending=False).head(n)
|
| 579 |
df_under = df_under[df_under["fn_proportion"] > 0]
|
|
|
|
|
|
|
| 580 |
report_under = [get_empty_report(row["topic"], "System is under-sensitive") for _, row in df_under.iterrows()]
|
| 581 |
|
| 582 |
df_over = df.sort_values(by=["fp_proportion"], ascending=False).head(n)
|
| 583 |
df_over = df_over[df_over["fp_proportion"] > 0]
|
|
|
|
|
|
|
| 584 |
report_over = [get_empty_report(row["topic"], "System is over-sensitive") for _, row in df_over.iterrows()]
|
| 585 |
|
| 586 |
reports = (report_under + report_over)
|
| 587 |
random.shuffle(reports)
|
| 588 |
else:
|
| 589 |
df = df.sort_values(by=[topic_vis_method], ascending=False).head(n * 2)
|
| 590 |
-
df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["
|
| 591 |
reports = [get_empty_report(row["topic"], row["error_type"]) for _, row in df.iterrows()]
|
| 592 |
|
| 593 |
return reports
|
|
|
|
| 203 |
########################################
|
| 204 |
# ROUTE: /GET_GROUP_MODEL
|
| 205 |
@app.route("/get_group_model")
|
| 206 |
+
def get_group_model(debug=DEBUG):
|
| 207 |
# Fetch info for initial labeling component
|
| 208 |
model_name = request.args.get("model_name")
|
| 209 |
user = request.args.get("user")
|
|
|
|
| 236 |
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings_grp, user)
|
| 237 |
|
| 238 |
duration = time.time() - start
|
| 239 |
+
if debug:
|
| 240 |
+
print("Time to train/cache:", duration)
|
| 241 |
|
| 242 |
context = {
|
| 243 |
"group_size": group_size,
|
|
|
|
| 361 |
########################################
|
| 362 |
# ROUTE: /GET_PERSONALIZED_MODEL_TOPIC
|
| 363 |
@app.route("/get_personalized_model_topic")
|
| 364 |
+
def get_personalized_model_topic(debug=DEBUG):
|
| 365 |
model_name = request.args.get("model_name")
|
| 366 |
ratings_json = request.args.get("ratings")
|
| 367 |
user = request.args.get("user")
|
| 368 |
ratings = json.loads(ratings_json)
|
| 369 |
topic = request.args.get("topic")
|
| 370 |
+
if debug:
|
| 371 |
+
print(ratings)
|
| 372 |
start = time.time()
|
| 373 |
|
| 374 |
# Modify model name
|
|
|
|
| 377 |
|
| 378 |
# Handle existing or new model cases
|
| 379 |
# Train model and cache predictions using new labels
|
| 380 |
+
if debug:
|
| 381 |
+
print("get_personalized_model_topic train")
|
| 382 |
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings, user, topic=topic)
|
| 383 |
|
| 384 |
+
if debug:
|
| 385 |
+
duration = time.time() - start
|
| 386 |
+
print("Time to train/cache:", duration)
|
|
|
|
|
|
|
| 387 |
|
| 388 |
results = {
|
| 389 |
"success": "success",
|
|
|
|
| 500 |
topic_errors = {}
|
| 501 |
for topic in topics:
|
| 502 |
t_df = df[df["topic"] == topic]
|
| 503 |
+
y_true = t_df["pred"].to_numpy() # Predicted user rating (treated as ground truth)
|
| 504 |
+
y_pred = t_df["rating_sys"].to_numpy() # System rating (which we're auditing)
|
| 505 |
if topic_vis_method == "mae":
|
| 506 |
t_err = mean_absolute_error(y_true, y_pred)
|
| 507 |
elif topic_vis_method == "mse":
|
|
|
|
| 509 |
elif topic_vis_method == "avg_diff":
|
| 510 |
t_err = np.mean(y_true - y_pred)
|
| 511 |
elif topic_vis_method == "fp_proportion":
|
| 512 |
+
y_true = [0 if rating < threshold else 1 for rating in y_true]
|
| 513 |
+
y_pred = [0 if rating < threshold else 1 for rating in y_pred]
|
| 514 |
try:
|
| 515 |
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
| 516 |
except:
|
|
|
|
| 518 |
total = float(len(y_true))
|
| 519 |
t_err = fp / total
|
| 520 |
elif topic_vis_method == "fn_proportion":
|
| 521 |
+
y_true = [0 if rating < threshold else 1 for rating in y_true]
|
| 522 |
+
y_pred = [0 if rating < threshold else 1 for rating in y_pred]
|
| 523 |
try:
|
| 524 |
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
| 525 |
except:
|
|
|
|
| 530 |
|
| 531 |
return topic_errors
|
| 532 |
|
| 533 |
+
def get_personal_scaffold(cur_user, model, topic_vis_method, n_topics=200, n=5, debug=DEBUG):
|
| 534 |
threshold = utils.get_toxic_threshold()
|
| 535 |
|
| 536 |
# Get topics with greatest amount of error
|
| 537 |
preds_file = utils.get_preds_file(cur_user, model)
|
| 538 |
with open(preds_file, "rb") as f:
|
| 539 |
preds_df = pickle.load(f)
|
| 540 |
+
preds_df_mod = preds_df[preds_df["user_id"] == cur_user].sort_values(by=["item_id"]).reset_index()
|
|
|
|
|
|
|
| 541 |
preds_df_mod = preds_df_mod[preds_df_mod["topic_id"] < n_topics]
|
| 542 |
|
| 543 |
if topic_vis_method == "median":
|
|
|
|
| 556 |
df = preds_df_mod.groupby(["topic", "user_id"]).mean().reset_index()
|
| 557 |
|
| 558 |
# Get system error
|
| 559 |
+
junk_topics = ["53_maiareficco_kallystas_dyisisitmanila_tractorsazi", "-1_dude_bullshit_fight_ain"]
|
| 560 |
+
df = df[~df["topic"].isin(junk_topics)] # Exclude known "junk topics"
|
| 561 |
|
| 562 |
if topic_vis_method == "median" or topic_vis_method == "mean":
|
| 563 |
+
df["error_magnitude"] = [utils.get_error_magnitude(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
|
| 564 |
+
df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
|
| 565 |
|
| 566 |
df_under = df[df["error_type"] == "System is under-sensitive"]
|
| 567 |
df_under = df_under.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
|
|
|
|
| 577 |
elif topic_vis_method == "fp_fn":
|
| 578 |
df_under = df.sort_values(by=["fn_proportion"], ascending=False).head(n)
|
| 579 |
df_under = df_under[df_under["fn_proportion"] > 0]
|
| 580 |
+
if debug:
|
| 581 |
+
print(df_under[["topic", "fn_proportion"]])
|
| 582 |
report_under = [get_empty_report(row["topic"], "System is under-sensitive") for _, row in df_under.iterrows()]
|
| 583 |
|
| 584 |
df_over = df.sort_values(by=["fp_proportion"], ascending=False).head(n)
|
| 585 |
df_over = df_over[df_over["fp_proportion"] > 0]
|
| 586 |
+
if debug:
|
| 587 |
+
print(df_over[["topic", "fp_proportion"]])
|
| 588 |
report_over = [get_empty_report(row["topic"], "System is over-sensitive") for _, row in df_over.iterrows()]
|
| 589 |
|
| 590 |
reports = (report_under + report_over)
|
| 591 |
random.shuffle(reports)
|
| 592 |
else:
|
| 593 |
df = df.sort_values(by=[topic_vis_method], ascending=False).head(n * 2)
|
| 594 |
+
df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
|
| 595 |
reports = [get_empty_report(row["topic"], row["error_type"]) for _, row in df.iterrows()]
|
| 596 |
|
| 597 |
return reports
|