|
import os |
|
import re |
|
import json |
|
|
|
from tqdm import tqdm |
|
|
|
from config import config |
|
|
|
|
|
def refine_answer(): |
|
print("-------- Refine start --------") |
|
rawpath, kflen, num_group, base_dir = config.kf_answer_path, config.refine_kflen, config.refine_num_group, config.base_dir |
|
|
|
videos = [json.loads(q) for q in open(os.path.expanduser(rawpath), "r")] |
|
outpath = config.refine_output_path |
|
outfile = open(outpath, "w") |
|
|
|
kflen_group = kflen // num_group |
|
for video_ in tqdm(videos): |
|
VLM_path = [] |
|
VLM_timeline = [] |
|
VLM_images = [] |
|
VLM_keyword = [] |
|
idx_list = [e for e in range(8)] |
|
|
|
q_uid = video_['q_uid'] |
|
concatimgs = video_['output_VLM'] |
|
kf_paths_VLM = video_['kf_paths_VLM'] |
|
kf_timeline = video_['kf_timeline'] |
|
kw_perconcat_clip = video_["kw_perconcat_clip"] |
|
|
|
for idx_concat, concatimg in enumerate(concatimgs): |
|
VLM_images_iter = [] |
|
if isinstance(concatimg, list): concatimg = concatimg[0] |
|
|
|
try: |
|
tmp = concatimg.replace("```json\n", "").replace("```", "").replace("':", "\":").replace("{'", "{\"").replace("any image", "0").replace("\n'", "\n\"") |
|
img_dict = json.loads(tmp) |
|
|
|
for e in img_dict.keys(): |
|
e = e.replace("image_", "").replace("image", "").replace("_", "") |
|
e = re.findall(r"[-+]?(?:\d*\.*\d+)", e) |
|
e = int(e[0]) |
|
if e < 8: VLM_images_iter.append(e) |
|
|
|
except: |
|
try: |
|
tmp = tmp.replace("image_", "").replace("image", "").replace("_", "") |
|
tmp = [int(e) for e in re.findall(r"[-+]?(?:\d*\.*\d+)", tmp)] |
|
|
|
for e in tmp: |
|
if e < 8: VLM_images_iter.append(e) |
|
|
|
print(f"integer parsing was running at q_uid:{q_uid}, VLM_images_iter:{VLM_images_iter}") |
|
|
|
except: |
|
assert False, f"q_uid:{q_uid} has a problem of jsonify. concatimg:{concatimg}, tmp:{tmp}" |
|
|
|
if len(VLM_images_iter) < kflen_group: |
|
diff = list(set(idx_list) - set(VLM_images_iter)) |
|
extralen = kflen_group - len(VLM_images_iter) |
|
VLM_images_iter.extend(diff[:extralen]) |
|
|
|
elif len(VLM_images_iter) > kflen_group: VLM_images_iter = VLM_images_iter[:kflen_group] |
|
|
|
assert len(VLM_images_iter) == kflen_group, f"len(VLM_images_iter):{len(VLM_images_iter)} != kflen_group:{kflen_group}" |
|
|
|
for e in VLM_images_iter: |
|
VLM_path.append(kf_paths_VLM[idx_concat][e][0]) |
|
VLM_timeline.append(kf_timeline[idx_concat][e]) |
|
VLM_images.append(e) |
|
VLM_keyword.append(kw_perconcat_clip[idx_concat][e][0]) |
|
|
|
video_["VLM_path"] = VLM_path |
|
video_["VLM_timeline"] = VLM_timeline |
|
video_["VLM_images"] = VLM_images |
|
video_["VLM_keyword"] = VLM_keyword |
|
|
|
video_.pop("kf_paths_VLM", None) |
|
video_.pop("kf_timeline", None) |
|
outfile.write(json.dumps(video_) + "\n") |
|
|
|
outfile.close() |
|
print(f"outpath:{outpath}") |
|
print("-------- Refine done --------") |
|
|