File size: 9,320 Bytes

import argparse

import cv2 as cv
import numpy as np

# Check OpenCV version
opencv_python_version = lambda str_version: tuple(map(int, (str_version.split("."))))
assert opencv_python_version(cv.__version__) >= opencv_python_version("4.10.0"), \
       "Please install latest opencv-python for benchmark: python3 -m pip install --upgrade opencv-python"

from raft import Raft

parser = argparse.ArgumentParser(description='RAFT (https://github.com/princeton-vl/RAFT)')
parser.add_argument('--input1', '-i1', type=str,
                    help='Usage: Set input1 path to first image, omit if using camera or video.')
parser.add_argument('--input2', '-i2', type=str,
                    help='Usage: Set input2 path to second image, omit if using camera or video.')
parser.add_argument('--video', '-vid', type=str,
                    help='Usage: Set video path to desired input video, omit if using camera or two image inputs.')
parser.add_argument('--model', '-m', type=str, default='optical_flow_estimation_raft_2023aug.onnx',
                    help='Usage: Set model path, defaults to optical_flow_estimation_raft_2023aug.onnx.')
parser.add_argument('--save', '-s', action='store_true',
                    help='Usage: Specify to save a file with results. Invalid in case of camera input.')
parser.add_argument('--visual', '-vis', action='store_true',
                    help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
args = parser.parse_args()

UNKNOWN_FLOW_THRESH = 1e7

def make_color_wheel():
    """ Generate color wheel according Middlebury color code.
    
    Returns:
        Color wheel(numpy.ndarray): Color wheel
    """
    RY = 15
    YG = 6
    GC = 4
    CB = 11
    BM = 13
    MR = 6

    ncols = RY + YG + GC + CB + BM + MR

    colorwheel = np.zeros([ncols, 3])

    col = 0

    # RY
    colorwheel[0:RY, 0] = 255
    colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY))
    col += RY

    # YG
    colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG))
    colorwheel[col:col+YG, 1] = 255
    col += YG

    # GC
    colorwheel[col:col+GC, 1] = 255
    colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC))
    col += GC

    # CB
    colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB))
    colorwheel[col:col+CB, 2] = 255
    col += CB

    # BM
    colorwheel[col:col+BM, 2] = 255
    colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM))
    col += + BM

    # MR
    colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
    colorwheel[col:col+MR, 0] = 255

    return colorwheel

colorwheel = make_color_wheel()

def compute_color(u, v):
    """ Compute optical flow color map
    
    Args:
        u(numpy.ndarray): Optical flow horizontal map
        v(numpy.ndarray): Optical flow vertical map
        
    Returns:
        img (numpy.ndarray): Optical flow in color code
    """
    [h, w] = u.shape
    img = np.zeros([h, w, 3])
    nanIdx = np.isnan(u) | np.isnan(v)
    u[nanIdx] = 0
    v[nanIdx] = 0

    ncols = np.size(colorwheel, 0)

    rad = np.sqrt(u**2+v**2)

    a = np.arctan2(-v, -u) / np.pi

    fk = (a+1) / 2 * (ncols - 1) + 1

    k0 = np.floor(fk).astype(int)

    k1 = k0 + 1
    k1[k1 == ncols+1] = 1
    f = fk - k0

    for i in range(0, np.size(colorwheel,1)):
        tmp = colorwheel[:, i]
        col0 = tmp[k0-1] / 255
        col1 = tmp[k1-1] / 255
        col = (1-f) * col0 + f * col1

        idx = rad <= 1
        col[idx] = 1-rad[idx]*(1-col[idx])
        notidx = np.logical_not(idx)

        col[notidx] *= 0.75
        img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx)))

    return img

def flow_to_image(flow):
    """Convert flow into middlebury color code image

    Args:
        flow (np.ndarray): The computed flow map
        
    Returns:
        (np.ndarray): Image corresponding to the flow map.
    """
    u = flow[:, :, 0]
    v = flow[:, :, 1]

    maxu = -999.
    maxv = -999.
    minu = 999.
    minv = 999.

    idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH)
    u[idxUnknow] = 0
    v[idxUnknow] = 0

    maxu = max(maxu, np.max(u))
    minu = min(minu, np.min(u))

    maxv = max(maxv, np.max(v))
    minv = min(minv, np.min(v))

    rad = np.sqrt(u ** 2 + v ** 2)
    maxrad = max(-1, np.max(rad))

    u = u/(maxrad + np.finfo(float).eps)
    v = v/(maxrad + np.finfo(float).eps)

    img = compute_color(u, v)

    idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)
    img[idx] = 0

    return np.uint8(img)


def draw_flow(flow_map, img_width, img_height):
    """Convert flow map to image

    Args:
        flow_map (np.ndarray): The computed flow map
        img_width (int): The width of the first input photo
        img_height (int): The height of the first input photo

    Returns:
        (np.ndarray): Image corresponding to the flow map.
    """
	# Convert flow to image
    flow_img = flow_to_image(flow_map)
	# Convert to BGR
    flow_img = cv.cvtColor(flow_img, cv.COLOR_RGB2BGR)
	# Resize the depth map to match the input image shape
    return cv.resize(flow_img, (img_width, img_height))


def visualize(image1, image2, flow_img):
    """
    Combine two input images with resulting flow img and display them together

    Args:
        image1 (np.ndarray): The first input image.
        imag2 (np.ndarray): The second input image.
        flow_img (np.ndarray): The output flow map drawn as an image

    Returns:
        combined_img (np.ndarray): The visualized result.
    """
    combined_img = np.hstack((image1, image2, flow_img))
    cv.namedWindow("Estimated flow", cv.WINDOW_NORMAL)
    cv.imshow("Estimated flow", combined_img)
    cv.waitKey(0)
    return combined_img


if __name__ == '__main__':
    # Instantiate RAFT
    model = Raft(modelPath=args.model)

    if args.input1 is not None and args.input2 is not None:
        # Read image
        image1 = cv.imread(args.input1)
        image2 = cv.imread(args.input2)
        img_height, img_width, img_channels = image1.shape

        # Inference
        result = model.infer(image1, image2)

        # Create flow image based on the result flow map
        flow_image = draw_flow(result, img_width, img_height)

        # Save results if save is true
        if args.save:
            print('Results saved to result.jpg\n')
            cv.imwrite('result.jpg', flow_image)

        # Visualize results in a new window
        if args.visual:
            input_output_visualization = visualize(image1, image2, flow_image)
            
            
    elif args.video is not None:
        cap = cv.VideoCapture(args.video)    
        FLOW_FRAME_OFFSET = 3 # Number of frame difference to estimate the optical flow
        
        if args.visual:
            cv.namedWindow("Estimated flow", cv.WINDOW_NORMAL)
        
        frame_list = []	
        img_array = []
        frame_num = 0
        while cap.isOpened():
            try:
                # Read frame from the video
                ret, prev_frame = cap.read()
                frame_list.append(prev_frame)
                if not ret:	
                    break
            except:
                continue

            frame_num += 1
            if frame_num <= FLOW_FRAME_OFFSET:
                continue
            else:
                frame_num = 0

            result = model.infer(frame_list[0], frame_list[-1])
            img_height, img_width, img_channels = frame_list[0].shape
            flow_img = draw_flow(result, img_width, img_height)

            alpha = 0.6
            combined_img = cv.addWeighted(frame_list[0], alpha, flow_img, (1-alpha),0)

            if args.visual:
                cv.imshow("Estimated flow", combined_img)
            img_array.append(combined_img)
            # Remove the oldest frame
            frame_list.pop(0)

            # Press key q to stop
            if cv.waitKey(1) == ord('q'):
                break
            
        cap.release()

        if args.save:
            fourcc = cv.VideoWriter_fourcc(*'mp4v') 
            height,width,layers= img_array[0].shape
            video = cv.VideoWriter('result.mp4', fourcc, 30.0, (width, height), isColor=True)
            for img in img_array:
                video.write(img)
            video.release()

        cv.destroyAllWindows()


    else: # Omit input to call default camera
        deviceId = 0
        cap = cv.VideoCapture(deviceId)
        w = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
        h = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))

        tm = cv.TickMeter()
        while cv.waitKey(30) < 0:
            hasFrame1, frame1 = cap.read()
            hasFrame2, frame2 = cap.read()
            if not hasFrame1:
                print('First frame was not grabbed!')
                break
            
            if not hasFrame2:
                print('Second frame was not grabbed!')
                break

            # Inference
            tm.start()
            result = model.infer(frame1, frame2)
            tm.stop()
            result = draw_flow(result, w, h)

            # Draw results on the input image
            frame = visualize(frame1, frame2, result)

            tm.reset()