Skip to content
Snippets Groups Projects
Commit 1c8d03d9 authored by Jakob's avatar Jakob
Browse files

updated dataset to do preprocessing only once

parent 5b15111f
No related branches found
No related tags found
No related merge requests found
......@@ -68,7 +68,7 @@ if __name__ == '__main__':
validation_losses = []
print("creating train dataset and loader")
train_dataset = CocoDataSet("./data/train2014", "./data/labels/train2014", transform, 1)
train_dataset = CocoDataSet("./data/train2014", "./data/labels/train2014", transform, 0.2)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
shuffle=True, num_workers=0)
......
from torch.utils.data import Dataset
import torch
import os
from pathlib import Path
from skimage import io
from skimage.color import gray2rgb
from random import random
from random import random, sample
from utils import toMatrix
import math
from PIL import Image
......@@ -17,53 +18,35 @@ class CocoDataSet(Dataset):
self.transform = transform
img_dir = f"{Path().resolve()}\{img_dir}"
label_dir = f"{Path().resolve()}\{label_dir}"
self.person_id = 0
self.preprocess_data(img_dir, label_dir)
self.person_images = 0
self.non_person_images = 0
x = []
y = []
img_names = [file.stem for file in list(Path(img_dir).glob("*.jpg"))]
annotation_file_names = [file.stem for file in list(Path(label_dir).glob("*.txt"))]
# If both img and annotation file exists add to (x,y)
for img_name in list(set(img_names) & set(annotation_file_names)):
x.append(f"{img_dir}\{img_name}.jpg")
y.append(f"{label_dir}\{img_name}.txt")
self.x = [str(file) for file in list(Path(fr"{img_dir}\person").glob("*.jpg"))]
self.y = [str(file) for file in list(Path(fr"{label_dir}\person").glob("*.txt"))]
self.person_id = 0
# if 1.0 or above inluce all files
if percentage_without_person >= 1.0:
self.x.extend([str(file) for file in list(Path(fr"{img_dir}").glob("*.jpg"))])
self.y.extend([str(file) for file in list(Path(fr"{label_dir}").glob("*.txt"))])
self.x = []
self.y = []
# if not below or equal to 0.0 take random sample of files
elif percentage_without_person > 0.0:
x_without_person = [file.stem for file in list(Path(fr"{img_dir}").glob("*.jpg"))]
y_without_person = [file.stem for file in list(Path(fr"{label_dir}").glob("*.txt"))]
if percentage_without_person < 1.0:
for i, image, annotation_file in zip(range(len(x)),x,y):
print(f"{i}/{len(x)}", end="\r")
labels = self.read_annotation_file(annotation_file)
# with open(label_file) as file:
# labels = [line.strip(" \n").split(" ") for line in file.readlines()]
image_has_persons = self.person_id in [label[0] for label in labels]
# include if there is person in the image
if image_has_persons:
self.x.append(image)
self.y.append(annotation_file)
# self.y.append(persons)
self.person_images += 1
# include one element with prob. if no person is on the image
elif percentage_without_person > 0.0:
if percentage_without_person > random():
self.x.append(image)
self.y.append(annotation_file)
# self.y.append([]) # one element with label 0 as confidence
self.non_person_images += 1
else:
self.x = x
self.y = y
images_with_annotations = list(set(x_without_person) & set(y_without_person))
images_without_person_to_add = sample(images_with_annotations, math.floor(random() * len(images_with_annotations)))
for file_name in images_without_person_to_add:
self.x.append(fr"{img_dir}\{file_name}.jpg")
self.y.append(fr"{label_dir}\{file_name}.txt")
def __len__(self):
return len(self.x)
......@@ -111,6 +94,39 @@ class CocoDataSet(Dataset):
data = [[int(point[0]), *point[1:]] for point in data]
return data
def preprocess_data(self, img_dir, label_dir):
if (os.path.isdir(fr"{img_dir}\person") and os.path.isdir(fr"{label_dir}\person")):
return
if (not os.path.isdir(fr"{img_dir}\person")):
os.mkdir(fr"{img_dir}\person")
if (not os.path.isdir(fr"{label_dir}\person")):
os.mkdir(fr"{label_dir}\person")
img_names = [file.stem for file in list(Path(img_dir).glob("*.jpg"))]
annotation_file_names = [file.stem for file in list(Path(label_dir).glob("*.txt"))]
# If both img and annotation file exists
images_with_annotations = list(set(img_names) & set(annotation_file_names))
for i, img_name in enumerate(images_with_annotations):
image = f"{img_dir}\{img_name}.jpg"
annotation_file = f"{label_dir}\{img_name}.txt"
print(f"Preprocessing: {i}/{len(images_with_annotations)}", end="\r")
labels = self.read_annotation_file(annotation_file)
image_has_persons = self.person_id in [label[0] for label in labels]
# move files if there is person in the image
if image_has_persons:
new_img_file_name = fr"{img_dir}\person{image.replace(img_dir, '')}"
new_ann_file_name = fr"{label_dir}\person{annotation_file.replace(label_dir, '')}"
os.rename(annotation_file, new_ann_file_name)
os.rename(image, new_img_file_name)
print("Done Preprocessing!", end=f"{' '*20}\n")
if __name__ == "__main__":
from PIL import Image
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment