Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,812 Bytes
56238f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import torch
import os
from torch.utils.data import Dataset
from torchvision.transforms import CenterCrop, Normalize, Resize
from torchvision.transforms.functional import to_tensor
from PIL import Image
EXTs = ['.png', '.jpg', '.jpeg', ".JPEG"]
def is_image_file(filename):
return any(filename.endswith(ext) for ext in EXTs)
class ImageText(Dataset):
def __init__(self, root, resolution):
super().__init__()
self.image_paths = []
self.texts = []
for dir, subdirs, files in os.walk(root):
for file in files:
if is_image_file(file):
image_path = os.path.join(dir, file)
image_base_path = image_path.split(".")[:-1]
text_path = ".".join(image_base_path) + ".txt"
if os.path.exists(text_path):
with open(text_path, 'r') as f:
text = f.read()
self.texts.append(text)
self.image_paths.append(image_path)
self.resize = Resize(resolution)
self.center_crop = CenterCrop(resolution)
self.normalize = Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
def __getitem__(self, idx: int):
image_path = self.image_paths[idx]
text = self.texts[idx]
pil_image = Image.open(image_path).convert('RGB')
pil_image = self.resize(pil_image)
pil_image = self.center_crop(pil_image)
raw_image = to_tensor(pil_image)
normalized_image = self.normalize(raw_image)
metadata = {
"image_path": image_path,
"prompt": text,
"raw_image": raw_image,
}
return normalized_image, text, metadata
def __len__(self):
return len(self.image_paths)
|