Combine k-fold cross-validation and data loader

I want to combine k fold cross-validation and data loader, but I don’t know how to do the data augmentation, because if I use k fold cross-validation, the train set and the validation set will be changed all the time, and in this case, their augmentation will be the same, but usually, there is no augmentation needed for the validation set, below is my code:

class KD_Set(Dataset):

def __init__(self,a,b):
    self.imgs = a
    self.index = b

def __len__(self):
    return len(self.imgs)

def __getitem__(self,index):
    return self.imgs[index], self.index[index]

--------------------------------------------------------------------------

def load_data(train_data):

x = []
y = []

for step,(b_x,b_y) in enumerate (train_data):
    x.append(b_x)
    y.append(b_y)

x = torch.tensor([item.cpu().detach().numpy() for item in x]) 
y = torch.tensor([item for item in y])

return x, y

--------------------------------------------------------------------------

def data_preprocess(): # I think the validation should use the same as the test use

train_data_transforms = transforms.Compose([ 
    transforms.RandomResizedCrop(224), 
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]) 
])

test_data_transforms = transforms.Compose([ 
    transforms.Resize(256), 
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

train_data_dir = "./training_extend"
train_data = ImageFolder(train_data_dir,transform = train_data_transforms)

test_data_dir = "./test"
test_data = ImageFolder(test_data_dir,transform = test_data_transforms)
test_data_loader = Data.DataLoader(
    test_data,
    batch_size = 256,
    shuffle = True,
    num_workers = 0)

return test_data_loader, train_data, test_data

--------------------------------------------------------------------------

def train(model, x, y):

since = time.time()
num_epochs = 50
learn_r = 0.003 # 0.0003 Originally
relr_f = 0.5
relr_p = 5
k = 1
n_splits = 10

train_loss = []
train_acc = []
val_loss = []
val_acc = []

cv = StratifiedKFold(n_splits=n_splits, shuffle=True)

for i, (train, val) in enumerate(cv.split(x, y)):

    # Instantiate the Class
    kdt = KD_Set(x[train],y[train])
    kdv = KD_Set(x[val],y[val])

    # Set Train Dataloader
    train_data_loader = Data.DataLoader(
        kdt,
        batch_size = 64,
        shuffle = True,
        num_workers = 0)

    # Set Validation Dataloader
    val_data_loader = Data.DataLoader(
        kdv,
        batch_size = 64,
        shuffle = True,
        num_workers = 0)

    # Init the List
    best_acc = 0.0
    min_loss = 10000.0 
    optimizer = torch.optim.Adam(model.parameters(),lr = learn_r)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,'min',factor = relr_f,patience = relr_p,verbose = False) 
    loss_func = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):

        # Indicate the Start
        print('Epoch %s'%(epoch+1))

        # Init the Index
        train_loss_epoch = 0
        train_corrects = 0
        val_loss_epoch = 0
        val_corrects = 0

        # Train the Model
        model.train()
        for step, (t_x,t_y) in enumerate (train_data_loader):
            output = model(t_x) 
            loss = loss_func(output,t_y)
            pre_lab = torch.argmax(output,1)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss_epoch += loss.item() * t_x.size(0)
            train_corrects += torch.sum(pre_lab == t_y)

        # Test the Model
        model.eval()
        for step, (v_x,v_y) in enumerate (val_data_loader):     
            output = model(v_x)
            loss = loss_func(output,v_y)
            pre_lab = torch.argmax(output,1)
            val_loss_epoch += loss.item() * v_x.size(0)
            val_corrects += torch.sum(pre_lab == v_y)

        # Record the Loss
        train_loss.append(train_loss_epoch / len(x[train]))
        train_acc.append(train_corrects.double() / len(x[train]))
        val_loss.append(val_loss_epoch / len(x[val]))
        val_acc.append(val_corrects.double() / len(x[val]))

        # Print the Information
        print('{} Train Loss: {:.4f} Train Acc: {:.4f}'.format(epoch + 1, train_loss[-1], train_acc[-1]))
        print('{} Val Loss: {:.4f} Val Acc: {:.4f}'.format(epoch + 1, val_loss[-1], val_acc[-1]))
        time_use = time.time() - since
        print('Train and Val Complete in {:.0f}m {:.0f}s'.format(time_use // 60, time_use % 60))

        # Save the Model
        if val_loss[-1] < min_loss: # Store or Not
            min_loss = val_loss[-1]
            torch.save(model,"./vgg_k10_" + str(k) + '.pkl') 
            print("Model for Division %s Saved" %(k))

    if k < n_splits:
        print('------Now in %sth Division------' %(k))
    k += 1

return num_epochs, n_splits, train_loss, val_loss, train_acc, val_acc

Source: Python Questions

LEAVE A COMMENT