语义分割丨PSPNet源码解析「网络训练」 (2)

日期：2021-06-07 栏目：程序人生浏览：次

注意：为了让Image和Label对应，也要对Label作相应的预处理，具体过程详见代码。

import os import os.path as osp import numpy as np import random import collections import torch import torchvision import cv2 from torch.utils import data #Cityscapes数据集加载 #crop_size(769,769)、max_iters = num_steps * batch_size = 8 * 40000 = 320000 class CSDataSet(data.Dataset): def __init__(self, root, list_path, max_iters=None, crop_size=(321, 321), mean=(128, 128, 128), scale=True, mirror=True, ignore_label=255): self.root = root #数据集地址 self.list_path = list_path #数据集列表 self.crop_h, self.crop_w = crop_size #剪裁尺寸 self.scale = scale #尺度 self.ignore_label = ignore_label #忽略类别 self.mean = mean #数据集各通道平均值 self.is_mirror = mirror #是否镜像 # self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434]) self.img_ids = [i_id.strip().split() for i_id in open(list_path)] #列表存放每张图像及其标签在数据集中的地址 if not max_iters==None: #训练时根据max_iter数将列表翻倍 if max_iter=320000、len(trainset)=2975 #每一个iter训练一张图，要计算max_iter要训练多少轮trainset self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids))) # 2975 * (32000/2975) = 321300 self.files = [] #用来放数据的列表 # for split in ["train", "trainval", "val"]: for item in self.img_ids: #遍历每一张训练样本 image_path, label_path = item #图像、标签地址 name = osp.splitext(osp.basename(label_path))[0] img_file = osp.join(self.root, image_path) label_file = osp.join(self.root, label_path) self.files.append({ #列表的每一项是一个字典 "img": img_file, "label": label_file, "name": name #aachen_000000_000019_leftImg8bit.png }) #19类与官方给定类别的转换 self.id_to_trainid = {-1: ignore_label, 0: ignore_label, 1: ignore_label, 2: ignore_label, 3: ignore_label, 4: ignore_label, 5: ignore_label, 6: ignore_label, 7: 0, 8: 1, 9: ignore_label, 10: ignore_label, 11: 2, 12: 3, 13: 4, 14: ignore_label, 15: ignore_label, 16: ignore_label, 17: 5, 18: ignore_label, 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12, 26: 13, 27: 14, 28: 15, 29: ignore_label, 30: ignore_label, 31: 16, 32: 17, 33: 18} print('{} images are loaded!'.format(len(self.img_ids))) def __len__(self): #数据集长度 return len(self.files) #321300 #生成不同尺度下的样本和标签 def generate_scale_label(self, image, label): f_scale = 0.7 + random.randint(0, 14) / 10.0 # 0.7 + (0~1.4) image = cv2.resize(image, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_LINEAR) label = cv2.resize(label, None, fx=f_scale, fy=f_scale, interpolation = cv2.INTER_NEAREST) return image, label #实现类别数和trainId的相互转换：如第19类对应trainId 33 def id2trainId(self, label, reverse=False): label_copy = label.copy() if reverse: #trainId2id for v, k in self.id_to_trainid.items(): label_copy[label == k] = v else: #id2trainId for k, v in self.id_to_trainid.items(): label_copy[label == k] = v return label_copy #返回一张样本 def __getitem__(self, index): datafiles = self.files[index] image = cv2.imread(datafiles["img"], cv2.IMREAD_COLOR) #shape(1024,2048,3) label = cv2.imread(datafiles["label"], cv2.IMREAD_GRAYSCALE) #shape(1024,2048) label = self.id2trainId(label) #label图像(-1~33) 转化为数组(0~19) size = image.shape #[1024,2048,3] name = datafiles["name"] if self.scale: #若采用多尺度 image, label = self.generate_scale_label(image, label) image = np.asarray(image, np.float32) image -= self.mean #减去均值 img_h, img_w = label.shape #1024, 2048 pad_h = max(self.crop_h - img_h, 0) #max(769-1024, 0) pad_w = max(self.crop_w - img_w, 0) #max(769-2048, 0) if pad_h > 0 or pad_w > 0: #若尺度缩放后的尺寸比crop_size尺寸小，则对边界进行填充 img_pad = cv2.copyMakeBorder(image, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT, value=(0.0, 0.0, 0.0)) label_pad = cv2.copyMakeBorder(label, 0, pad_h, 0, pad_w, cv2.BORDER_CONSTANT, value=(self.ignore_label,)) else: img_pad, label_pad = image, label img_h, img_w = label_pad.shape #1024、2048 h_off = random.randint(0, img_h - self.crop_h) #生成随机数如100 w_off = random.randint(0, img_w - self.crop_w) #20 # roi = cv2.Rect(w_off, h_off, self.crop_w, self.crop_h); image = np.asarray(img_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32) #任意扣下([100:100+769, 20:20+769]) label = np.asarray(label_pad[h_off : h_off+self.crop_h, w_off : w_off+self.crop_w], np.float32) #([100:100+769, 20:20+769]) #image = image[:, :, ::-1] # change to BGR image = image.transpose((2, 0, 1)) #shape(3, 769, 769) if self.is_mirror: #镜像随机翻转 flip = np.random.choice(2) * 2 - 1 #flip = 1 or -1 image = image[:, :, ::flip] label = label[:, ::flip] return image.copy(), label.copy(), np.array(size), name #image.shape(3, 769, 769)、label.shape(769, 769)

转载注明出处：https://www.heiqu.com/wpszpx.html

语义分割丨PSPNet源码解析「网络训练」 (2)

相关推荐