Why is a simple Binary classification failing in a feedforward neural network?

I am new to Pytorch. I was trying to model a binary classifier on the Kepler dataset. The following was my dataset class.

class KeplerDataset(Dataset):    def __init__(self, test=False):        self.dataframe_orig = pd.read_csv(koi_cumm_path)        if (test == False):            self.data = df_numeric[( df_numeric.koi_disposition == 1 ) | ( df_numeric.koi_disposition == 0 )].values        else:            self.data = df_numeric[~(( df_numeric.koi_disposition == 1 ) | ( df_numeric.koi_disposition == 0 ))].values        self.X_data = torch.FloatTensor(self.data[:, 1:])        self.y_data = torch.FloatTensor(self.data[:, 0])    def __len__(self):        return len(self.data)    def __getitem__(self, index):        return self.X_data[index], self.y_data[index]

Here, I created a custom classifier class with one hidden layer and a single output unit that produces sigmoidal probability of being in class 1 (planet).

class KOIClassifier(nn.Module):    def __init__(self, input_dim, out_dim):        super(KOIClassifier, self).__init__()        self.linear1 = nn.Linear(input_dim, 32)        self.linear2 = nn.Linear(32, 32)        self.linear3 = nn.Linear(32, out_dim)    def forward(self, xb):        out = self.linear1(xb)        out = F.relu(out)        out = self.linear2(out)        out = F.relu(out)        out = self.linear3(out)        out = torch.sigmoid(out)        return out

I then created a train_model function to optimize the loss using SGD.

def train_model(X, y):    criterion = nn.BCELoss()    optim = torch.optim.SGD(model.parameters(), lr=0.001)    n_epochs = 100    losses = []    for epoch in range(n_epochs):        y_pred = model.forward(X)        loss = criterion(y_pred, y)        losses.append(loss.item())        optim.zero_grad()        loss.backward()        optim.step()losses = []for X, y in train_loader:    losses.append(train_model(X, y))

But after performing the optimization over the train_loader, When I try predicting on the trainn_loader itself, the prediction values are so much worse.

for features, y in train_loader:    y_pred = model.predict(features)    breaky_pred> tensor([[4.5436e-02],        [1.5024e-02],        [2.2579e-01],        [4.2279e-01],        [6.0811e-02],        .....

Why is my model not working properly? Is it the problem with the dataset or am I doing something wrong with implementing the Neural net? I will link my Kaggle notebook because more context might be helpful. Please help.

Latest Images

Trending Articles

Latest Images