|
import argparse |
|
|
|
import cv2 as cv |
|
import numpy as np |
|
|
|
|
|
opencv_python_version = lambda str_version: tuple(map(int, (str_version.split(".")))) |
|
assert opencv_python_version(cv.__version__) >= opencv_python_version("4.10.0"), \ |
|
"Please install latest opencv-python for benchmark: python3 -m pip install --upgrade opencv-python" |
|
|
|
from raft import Raft |
|
|
|
parser = argparse.ArgumentParser(description='RAFT (https://github.com/princeton-vl/RAFT)') |
|
parser.add_argument('--input1', '-i1', type=str, |
|
help='Usage: Set input1 path to first image, omit if using camera or video.') |
|
parser.add_argument('--input2', '-i2', type=str, |
|
help='Usage: Set input2 path to second image, omit if using camera or video.') |
|
parser.add_argument('--video', '-vid', type=str, |
|
help='Usage: Set video path to desired input video, omit if using camera or two image inputs.') |
|
parser.add_argument('--model', '-m', type=str, default='optical_flow_estimation_raft_2023aug.onnx', |
|
help='Usage: Set model path, defaults to optical_flow_estimation_raft_2023aug.onnx.') |
|
parser.add_argument('--save', '-s', action='store_true', |
|
help='Usage: Specify to save a file with results. Invalid in case of camera input.') |
|
parser.add_argument('--visual', '-vis', action='store_true', |
|
help='Usage: Specify to open a new window to show results. Invalid in case of camera input.') |
|
args = parser.parse_args() |
|
|
|
UNKNOWN_FLOW_THRESH = 1e7 |
|
|
|
def make_color_wheel(): |
|
""" Generate color wheel according Middlebury color code. |
|
|
|
Returns: |
|
Color wheel(numpy.ndarray): Color wheel |
|
""" |
|
RY = 15 |
|
YG = 6 |
|
GC = 4 |
|
CB = 11 |
|
BM = 13 |
|
MR = 6 |
|
|
|
ncols = RY + YG + GC + CB + BM + MR |
|
|
|
colorwheel = np.zeros([ncols, 3]) |
|
|
|
col = 0 |
|
|
|
|
|
colorwheel[0:RY, 0] = 255 |
|
colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY)) |
|
col += RY |
|
|
|
|
|
colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG)) |
|
colorwheel[col:col+YG, 1] = 255 |
|
col += YG |
|
|
|
|
|
colorwheel[col:col+GC, 1] = 255 |
|
colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC)) |
|
col += GC |
|
|
|
|
|
colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB)) |
|
colorwheel[col:col+CB, 2] = 255 |
|
col += CB |
|
|
|
|
|
colorwheel[col:col+BM, 2] = 255 |
|
colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM)) |
|
col += + BM |
|
|
|
|
|
colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR)) |
|
colorwheel[col:col+MR, 0] = 255 |
|
|
|
return colorwheel |
|
|
|
colorwheel = make_color_wheel() |
|
|
|
def compute_color(u, v): |
|
""" Compute optical flow color map |
|
|
|
Args: |
|
u(numpy.ndarray): Optical flow horizontal map |
|
v(numpy.ndarray): Optical flow vertical map |
|
|
|
Returns: |
|
img (numpy.ndarray): Optical flow in color code |
|
""" |
|
[h, w] = u.shape |
|
img = np.zeros([h, w, 3]) |
|
nanIdx = np.isnan(u) | np.isnan(v) |
|
u[nanIdx] = 0 |
|
v[nanIdx] = 0 |
|
|
|
ncols = np.size(colorwheel, 0) |
|
|
|
rad = np.sqrt(u**2+v**2) |
|
|
|
a = np.arctan2(-v, -u) / np.pi |
|
|
|
fk = (a+1) / 2 * (ncols - 1) + 1 |
|
|
|
k0 = np.floor(fk).astype(int) |
|
|
|
k1 = k0 + 1 |
|
k1[k1 == ncols+1] = 1 |
|
f = fk - k0 |
|
|
|
for i in range(0, np.size(colorwheel,1)): |
|
tmp = colorwheel[:, i] |
|
col0 = tmp[k0-1] / 255 |
|
col1 = tmp[k1-1] / 255 |
|
col = (1-f) * col0 + f * col1 |
|
|
|
idx = rad <= 1 |
|
col[idx] = 1-rad[idx]*(1-col[idx]) |
|
notidx = np.logical_not(idx) |
|
|
|
col[notidx] *= 0.75 |
|
img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx))) |
|
|
|
return img |
|
|
|
def flow_to_image(flow): |
|
"""Convert flow into middlebury color code image |
|
|
|
Args: |
|
flow (np.ndarray): The computed flow map |
|
|
|
Returns: |
|
(np.ndarray): Image corresponding to the flow map. |
|
""" |
|
u = flow[:, :, 0] |
|
v = flow[:, :, 1] |
|
|
|
maxu = -999. |
|
maxv = -999. |
|
minu = 999. |
|
minv = 999. |
|
|
|
idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH) |
|
u[idxUnknow] = 0 |
|
v[idxUnknow] = 0 |
|
|
|
maxu = max(maxu, np.max(u)) |
|
minu = min(minu, np.min(u)) |
|
|
|
maxv = max(maxv, np.max(v)) |
|
minv = min(minv, np.min(v)) |
|
|
|
rad = np.sqrt(u ** 2 + v ** 2) |
|
maxrad = max(-1, np.max(rad)) |
|
|
|
u = u/(maxrad + np.finfo(float).eps) |
|
v = v/(maxrad + np.finfo(float).eps) |
|
|
|
img = compute_color(u, v) |
|
|
|
idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2) |
|
img[idx] = 0 |
|
|
|
return np.uint8(img) |
|
|
|
|
|
def draw_flow(flow_map, img_width, img_height): |
|
"""Convert flow map to image |
|
|
|
Args: |
|
flow_map (np.ndarray): The computed flow map |
|
img_width (int): The width of the first input photo |
|
img_height (int): The height of the first input photo |
|
|
|
Returns: |
|
(np.ndarray): Image corresponding to the flow map. |
|
""" |
|
|
|
flow_img = flow_to_image(flow_map) |
|
|
|
flow_img = cv.cvtColor(flow_img, cv.COLOR_RGB2BGR) |
|
|
|
return cv.resize(flow_img, (img_width, img_height)) |
|
|
|
|
|
def visualize(image1, image2, flow_img): |
|
""" |
|
Combine two input images with resulting flow img and display them together |
|
|
|
Args: |
|
image1 (np.ndarray): The first input image. |
|
imag2 (np.ndarray): The second input image. |
|
flow_img (np.ndarray): The output flow map drawn as an image |
|
|
|
Returns: |
|
combined_img (np.ndarray): The visualized result. |
|
""" |
|
combined_img = np.hstack((image1, image2, flow_img)) |
|
cv.namedWindow("Estimated flow", cv.WINDOW_NORMAL) |
|
cv.imshow("Estimated flow", combined_img) |
|
cv.waitKey(0) |
|
return combined_img |
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
model = Raft(modelPath=args.model) |
|
|
|
if args.input1 is not None and args.input2 is not None: |
|
|
|
image1 = cv.imread(args.input1) |
|
image2 = cv.imread(args.input2) |
|
img_height, img_width, img_channels = image1.shape |
|
|
|
|
|
result = model.infer(image1, image2) |
|
|
|
|
|
flow_image = draw_flow(result, img_width, img_height) |
|
|
|
|
|
if args.save: |
|
print('Results saved to result.jpg\n') |
|
cv.imwrite('result.jpg', flow_image) |
|
|
|
|
|
if args.visual: |
|
input_output_visualization = visualize(image1, image2, flow_image) |
|
|
|
|
|
elif args.video is not None: |
|
cap = cv.VideoCapture(args.video) |
|
FLOW_FRAME_OFFSET = 3 |
|
|
|
if args.visual: |
|
cv.namedWindow("Estimated flow", cv.WINDOW_NORMAL) |
|
|
|
frame_list = [] |
|
img_array = [] |
|
frame_num = 0 |
|
while cap.isOpened(): |
|
try: |
|
|
|
ret, prev_frame = cap.read() |
|
frame_list.append(prev_frame) |
|
if not ret: |
|
break |
|
except: |
|
continue |
|
|
|
frame_num += 1 |
|
if frame_num <= FLOW_FRAME_OFFSET: |
|
continue |
|
else: |
|
frame_num = 0 |
|
|
|
result = model.infer(frame_list[0], frame_list[-1]) |
|
img_height, img_width, img_channels = frame_list[0].shape |
|
flow_img = draw_flow(result, img_width, img_height) |
|
|
|
alpha = 0.6 |
|
combined_img = cv.addWeighted(frame_list[0], alpha, flow_img, (1-alpha),0) |
|
|
|
if args.visual: |
|
cv.imshow("Estimated flow", combined_img) |
|
img_array.append(combined_img) |
|
|
|
frame_list.pop(0) |
|
|
|
|
|
if cv.waitKey(1) == ord('q'): |
|
break |
|
|
|
cap.release() |
|
|
|
if args.save: |
|
fourcc = cv.VideoWriter_fourcc(*'mp4v') |
|
height,width,layers= img_array[0].shape |
|
video = cv.VideoWriter('result.mp4', fourcc, 30.0, (width, height), isColor=True) |
|
for img in img_array: |
|
video.write(img) |
|
video.release() |
|
|
|
cv.destroyAllWindows() |
|
|
|
|
|
else: |
|
deviceId = 0 |
|
cap = cv.VideoCapture(deviceId) |
|
w = int(cap.get(cv.CAP_PROP_FRAME_WIDTH)) |
|
h = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)) |
|
|
|
tm = cv.TickMeter() |
|
while cv.waitKey(30) < 0: |
|
hasFrame1, frame1 = cap.read() |
|
hasFrame2, frame2 = cap.read() |
|
if not hasFrame1: |
|
print('First frame was not grabbed!') |
|
break |
|
|
|
if not hasFrame2: |
|
print('Second frame was not grabbed!') |
|
break |
|
|
|
|
|
tm.start() |
|
result = model.infer(frame1, frame2) |
|
tm.stop() |
|
result = draw_flow(result, w, h) |
|
|
|
|
|
frame = visualize(frame1, frame2, result) |
|
|
|
tm.reset() |
|
|