import pandas as pd from datasets import load_dataset import numpy as np import tqdm.auto as tqdm import os import io import torch import time import av import torch import numpy as np # Import your model and anything else you want # You can even install other packages included in your repo # However, during the evaluation the container will not have access to the internet. # So you must include everything you need in your model repo. import torch # from torchcodec.decoders import VideoDecoder # def preprocess_v1(file_like): # file_like.seek(0) # decoder = VideoDecoder(file_like) # frames = decoder[0:-1:20] # frames = frames.float() / 255.0 # return frames def preprocess(file_like): # Open the video file file_like.seek(0) container = av.open(file_like) frames = [] every = 10 for i,frame in enumerate(container.decode(video=0)): if i % every == 0: frame_array = frame.to_ndarray(format="rgb24") frame_tensor = torch.from_numpy(frame_array).permute(2, 0, 1).float() frames.append(frame_tensor) video_tensor = torch.stack(frames) return video_tensor class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() self.fc1 = torch.nn.Linear(10, 5) self.threshold = 0.0 def forward(self, x): ## generates a random float the same size as x return torch.randn(x.shape[0]).to(x.device) # load the dataset. dataset will be automatically downloaded to /tmp/data during evaluation DATASET_PATH = "/tmp/data" dataset_remote = load_dataset(DATASET_PATH, split="test", streaming=True) # load your model device = "cuda:0" model = Model().to(device) # iterate over the dataset out = [] for el in tqdm.tqdm(dataset_remote): # start_time = time.time() # each element is a dict # el["video"]["bytes"] contains bytes from reading the raw file # el["video"]["path"] containts the filename. This is just for reference and you cant actually load it # if you are using libraries that expect a file. You can use BytesIO object try: file_like = io.BytesIO(el["video"]["bytes"]) tensor = preprocess(file_like) with torch.no_grad(): # soft decision (such as log likelihood score) # positive score correspond to synthetic prediction # negative score correspond to pristine prediction score = model(tensor[None].to(device)).cpu().item() # we require a hard decision to be submited. so you need to pick a threshold pred = "generated" if score > model.threshold else "pristine" # append your prediction # "id" and "pred" are required. "score" will not be used in scoring but we encourage you to include it. We'll use it for analysis of the results out.append(dict(id=el["id"], pred=pred, score=score)) except Exception as e: print(e) print("failed", el["id"]) out.append(dict(id=el["id"])) # save the final result and that's it pd.DataFrame(out).to_csv("submission.csv", index=False)