Add files via upload

2024-07-15 17:04:35 -04:00 · 2024-07-15 17:04:35 -04:00 · 3f6433f7ab
parent d0512f6a95
commit 3f6433f7ab
3 changed files with 205 additions and 0 deletions
--- a/pyimagesearch/config.py
+++ b/pyimagesearch/config.py
@ -0,0 +1,47 @@
 # import the necessary packages
 import torch
 import os
 # base path of the dataset
 DATASET_PATH = os.path.join("dataset", "train")
 # define the path to the images and masks dataset
 IMAGE_DATASET_PATH = os.path.join(DATASET_PATH, "images")
 MASK_DATASET_PATH = os.path.join(DATASET_PATH, "masks")
 # define the test split
 TEST_SPLIT = 0.15
 # determine the device to be used for training and evaluation
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 # determine if we will be pinning memory during data loading
 PIN_MEMORY = True if DEVICE == "cuda" else False
 # define the number of channels in the input, number of classes,
 # and number of levels in the U-Net model
 NUM_CHANNELS = 1
 NUM_CLASSES = 1
 NUM_LEVELS = 3
 # initialize learning rate, number of epochs to train for, and the
 # batch size
 INIT_LR = 0.001
 NUM_EPOCHS = 40
 BATCH_SIZE = 64
 # define the input image dimensions
 INPUT_IMAGE_WIDTH = 128
 INPUT_IMAGE_HEIGHT = 128
 # define threshold to filter weak predictions
 THRESHOLD = 0.5
 # define the path to the base output directory
 BASE_OUTPUT = "output"
 # define the path to the output serialized model, model training
 # plot, and testing image paths
 MODEL_PATH = os.path.join(BASE_OUTPUT, "unet_tgs_salt.pth")
 PLOT_PATH = os.path.sep.join([BASE_OUTPUT, "plot.png"])
 TEST_PATHS = os.path.sep.join([BASE_OUTPUT, "test_paths.txt"])
--- a/pyimagesearch/dataset.py
+++ b/pyimagesearch/dataset.py
@ -0,0 +1,34 @@
 # import the necessary packages
 from torch.utils.data import Dataset
 import cv2
 class SegmentationDataset(Dataset):
 	def __init__(self, imagePaths, maskPaths, transforms):
 		# store the image and mask filepaths, and augmentation
 		# transforms
 		self.imagePaths = imagePaths
 		self.maskPaths = maskPaths
 		self.transforms = transforms
 	def __len__(self):
 		# return the number of total samples contained in the dataset
 		return len(self.imagePaths)
 	def __getitem__(self, idx):
 		# grab the image path from the current index
 		imagePath = self.imagePaths[idx]
 		# load the image from disk, swap its channels from BGR to RGB,
 		# and read the associated mask from disk in grayscale mode
 		image = cv2.imread(imagePath)
 		image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 		mask = cv2.imread(self.maskPaths[idx], 0)
 		# check to see if we are applying any transformations
 		if self.transforms is not None:
 			# apply the transformations to both image and its mask
 			image = self.transforms(image)
 			mask = self.transforms(mask)
 		# return a tuple of the image and its mask
 		return (image, mask)
--- a/pyimagesearch/model.py
+++ b/pyimagesearch/model.py
@ -0,0 +1,124 @@
 # import the necessary packages
 from . import config
 from torch.nn import ConvTranspose2d
 from torch.nn import Conv2d
 from torch.nn import MaxPool2d
 from torch.nn import Module
 from torch.nn import ModuleList
 from torch.nn import ReLU
 from torchvision.transforms import CenterCrop
 from torch.nn import functional as F
 import torch
 class Block(Module):
 	def __init__(self, inChannels, outChannels):
 		super().__init__()
 		# store the convolution and RELU layers
 		self.conv1 = Conv2d(inChannels, outChannels, 3)
 		self.relu = ReLU()
 		self.conv2 = Conv2d(outChannels, outChannels, 3)
 	def forward(self, x):
 		# apply CONV => RELU => CONV block to the inputs and return it
 		return self.conv2(self.relu(self.conv1(x)))
 class Encoder(Module):
 	def __init__(self, channels=(3, 16, 32, 64)):
 		super().__init__()
 		# store the encoder blocks and maxpooling layer
 		self.encBlocks = ModuleList(
 			[Block(channels[i], channels[i + 1])
 			 	for i in range(len(channels) - 1)])
 		self.pool = MaxPool2d(2)
 	def forward(self, x):
 		# initialize an empty list to store the intermediate outputs
 		blockOutputs = []
 		# loop through the encoder blocks
 		for block in self.encBlocks:
 			# pass the inputs through the current encoder block, store
 			# the outputs, and then apply maxpooling on the output
 			x = block(x)
 			blockOutputs.append(x)
 			x = self.pool(x)
 		# return the list containing the intermediate outputs
 		return blockOutputs
 class Decoder(Module):
 	def __init__(self, channels=(64, 32, 16)):
 		super().__init__()
 		# initialize the number of channels, upsampler blocks, and
 		# decoder blocks
 		self.channels = channels
 		self.upconvs = ModuleList(
 			[ConvTranspose2d(channels[i], channels[i + 1], 2, 2)
 			 	for i in range(len(channels) - 1)])
 		self.dec_blocks = ModuleList(
 			[Block(channels[i], channels[i + 1])
 			 	for i in range(len(channels) - 1)])
 	def forward(self, x, encFeatures):
 		# loop through the number of channels
 		for i in range(len(self.channels) - 1):
 			# pass the inputs through the upsampler blocks
 			x = self.upconvs[i](x)
 			# crop the current features from the encoder blocks,
 			# concatenate them with the current upsampled features,
 			# and pass the concatenated output through the current
 			# decoder block
 			encFeat = self.crop(encFeatures[i], x)
 			x = torch.cat([x, encFeat], dim=1)
 			x = self.dec_blocks[i](x)
 		# return the final decoder output
 		return x
 	def crop(self, encFeatures, x):
 		# grab the dimensions of the inputs, and crop the encoder
 		# features to match the dimensions
 		(_, _, H, W) = x.shape
 		encFeatures = CenterCrop([H, W])(encFeatures)
 		# return the cropped features
 		return encFeatures
 class UNet(Module):
 	def __init__(self, encChannels=(3, 16, 32, 64),
 		 decChannels=(64, 32, 16),
 		 nbClasses=1, retainDim=True,
 		 outSize=(config.INPUT_IMAGE_HEIGHT,  config.INPUT_IMAGE_WIDTH)):
 		super().__init__()
 		# initialize the encoder and decoder
 		self.encoder = Encoder(encChannels)
 		self.decoder = Decoder(decChannels)
 		# initialize the regression head and store the class variables
 		self.head = Conv2d(decChannels[-1], nbClasses, 1)
 		self.retainDim = retainDim
 		self.outSize = outSize
 	def forward(self, x):
 			# grab the features from the encoder
 			encFeatures = self.encoder(x)
 			# pass the encoder features through decoder making sure that
 			# their dimensions are suited for concatenation
 			decFeatures = self.decoder(encFeatures[::-1][0],
 				encFeatures[::-1][1:])
 			# pass the decoder features through the regression head to
 			# obtain the segmentation mask
 			map = self.head(decFeatures)
 			# check to see if we are retaining the original output
 			# dimensions and if so, then resize the output to match them
 			if self.retainDim:
 				map = F.interpolate(map, self.outSize)
 			# return the segmentation map
 			return map