JasonSmithSO's picture
Upload 777 files
0034848 verified
"""
Copyright (c) Microsoft Corporation.
Licensed under the MIT license.
Basic operations for TSV files
"""
import os
import os.path as op
import json
import numpy as np
import base64
import cv2
from tqdm import tqdm
import yaml
from custom_mesh_graphormer.utils.miscellaneous import mkdir
from custom_mesh_graphormer.utils.tsv_file import TSVFile
def img_from_base64(imagestring):
try:
jpgbytestring = base64.b64decode(imagestring)
nparr = np.frombuffer(jpgbytestring, np.uint8)
r = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
return r
except ValueError:
return None
def load_linelist_file(linelist_file):
if linelist_file is not None:
line_list = []
with open(linelist_file, 'r') as fp:
for i in fp:
line_list.append(int(i.strip()))
return line_list
def tsv_writer(values, tsv_file, sep='\t'):
mkdir(op.dirname(tsv_file))
lineidx_file = op.splitext(tsv_file)[0] + '.lineidx'
idx = 0
tsv_file_tmp = tsv_file + '.tmp'
lineidx_file_tmp = lineidx_file + '.tmp'
with open(tsv_file_tmp, 'w') as fp, open(lineidx_file_tmp, 'w') as fpidx:
assert values is not None
for value in values:
assert value is not None
value = [v if type(v)!=bytes else v.decode('utf-8') for v in value]
v = '{0}\n'.format(sep.join(map(str, value)))
fp.write(v)
fpidx.write(str(idx) + '\n')
idx = idx + len(v)
os.rename(tsv_file_tmp, tsv_file)
os.rename(lineidx_file_tmp, lineidx_file)
def tsv_reader(tsv_file, sep='\t'):
with open(tsv_file, 'r') as fp:
for i, line in enumerate(fp):
yield [x.strip() for x in line.split(sep)]
def config_save_file(tsv_file, save_file=None, append_str='.new.tsv'):
if save_file is not None:
return save_file
return op.splitext(tsv_file)[0] + append_str
def get_line_list(linelist_file=None, num_rows=None):
if linelist_file is not None:
return load_linelist_file(linelist_file)
if num_rows is not None:
return [i for i in range(num_rows)]
def generate_hw_file(img_file, save_file=None):
rows = tsv_reader(img_file)
def gen_rows():
for i, row in tqdm(enumerate(rows)):
row1 = [row[0]]
img = img_from_base64(row[-1])
height = img.shape[0]
width = img.shape[1]
row1.append(json.dumps([{"height":height, "width": width}]))
yield row1
save_file = config_save_file(img_file, save_file, '.hw.tsv')
tsv_writer(gen_rows(), save_file)
def generate_linelist_file(label_file, save_file=None, ignore_attrs=()):
# generate a list of image that has labels
# images with only ignore labels are not selected.
line_list = []
rows = tsv_reader(label_file)
for i, row in tqdm(enumerate(rows)):
labels = json.loads(row[1])
if labels:
if ignore_attrs and all([any([lab[attr] for attr in ignore_attrs if attr in lab]) \
for lab in labels]):
continue
line_list.append([i])
save_file = config_save_file(label_file, save_file, '.linelist.tsv')
tsv_writer(line_list, save_file)
def load_from_yaml_file(yaml_file):
with open(yaml_file, 'r') as fp:
return yaml.load(fp, Loader=yaml.CLoader)
def find_file_path_in_yaml(fname, root):
if fname is not None:
if op.isfile(fname):
return fname
elif op.isfile(op.join(root, fname)):
return op.join(root, fname)
else:
raise FileNotFoundError(
errno.ENOENT, os.strerror(errno.ENOENT), op.join(root, fname)
)