Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
MATF_Hackathon_2021/
.ipynb_checkpoints/
.vscode/
__pycache__
__pycache__
train/
45 changes: 45 additions & 0 deletions DAE/data_loaders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
import os
import pandas as pd
import numpy as np
from numba import jit

def addNoise(data):
print(data.shape)
mask = np.random.rand(data.shape[0], data.shape[1]) > 0.8
data[mask] = -1
return data


class AEDataset(Dataset):
def __init__(self, data, transform = None, target_transform = None):
# Input and target are same
data_copy = np.copy(data)
data_copy = addNoise(data_copy)
self.X = [torch.tensor(row).float().to('cuda') for row in data_copy]
self.y = [torch.tensor(row).float().to('cuda') for row in data]

# Keep target transform always None
# Input transform should be non for basic AE, but noising function for DAE
self.transform = transform
self.target_transform = None

def __len__(self):
assert(len(self.X) == len(self.y))
return len(self.X)

def __getitem__(self, idx):
inp = self.X[idx]
target = self.y[idx]

if self.transform:
inp = self.transform(inp)

if self.target_transform:
assert(False)

sample = [inp, target]
return sample
34 changes: 34 additions & 0 deletions DAE/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import numpy as np
from torch import nn
from torch.functional import F

class DAE(nn.Module):
def __init__(self, input_dim, latent_dim):
super().__init__()
self.encoder_layer1 = nn.Linear(input_dim, 32)
self.encoder_layer2 = nn.Linear(32, 16)
self.encoder_layer3 = nn.Linear(16, latent_dim)

self.decoder_layer1 = nn.Linear(latent_dim, 16)
self.decoder_layer2 = nn.Linear(16, 32)
self.decoder_layer3 = nn.Linear(32, input_dim)

def encoder(self, x):
h1 = F.relu(self.encoder_layer1(x))
h2 = F.relu(self.encoder_layer2(h1))
return F.relu(self.encoder_layer3(h2))

def decoder(self, latent):
h1 = F.relu(self.decoder_layer1(latent))
h2 = F.relu(self.decoder_layer2(h1))
return self.decoder_layer3(h2)

def forward(self, x, encode):
latent = self.encoder(x)

# Run just encoding and prevent gradient from moving in this direction
if encode:
return latent.detach()

# else, run decoder and allow propagation for training
return self.decoder(latent)
157 changes: 157 additions & 0 deletions DAE/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import torch
from torch.utils.data import DataLoader
from torch import nn,optim
from torch.autograd import Variable as V
from torch.functional import F
import pandas as pd
import numpy as np
from torch.optim.lr_scheduler import LambdaLR
from model import DAE
from data_loaders import AEDataset, addNoise

def train_loop(data_loader, model, loss_fn, optimizer):
model.train()

size = len(data_loader.dataset)
for batch, (X, y) in enumerate(data_loader):
pred = model(X, encode = False)
loss = loss_fn(pred, y)

# reset gradients to avoid accumulation
optimizer.zero_grad()
loss.backward()
optimizer.step()

if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

def valid_loop(data_loader, model, loss_fn):
model.eval()
size = len(data_loader.dataset)
valid_loss = 0

losses = []
with torch.no_grad():
for batch, (X, y) in enumerate(data_loader):
pred = model(X, encode = False)
loss = loss_fn(pred, y).item()
losses.append(loss)

print(f"Validation avg loss: {sum(losses) / len(losses):>8f} \n")
with torch.no_grad():
(X, y) = next(iter(data_loader))
pred = model(X, encode = False)
print(y[0])
print(pred[0])
return sum(losses) / len(losses)

def test(data_loader, model, loss_fn):
model.eval()
size = len(data_loader.dataset)
test_loss = 0

with torch.no_grad():
for X, y in data_loader:
pred = model(X, encode = False)
test_loss += loss_fn(pred, y).item()

test_loss /= size
print(f"Test avg loss: {test_loss:>8f} \n")

def find_min_max(in_data):
num_of_attr = in_data.shape[1]

min_values = np.zeros(num_of_attr)
max_values = np.zeros(num_of_attr)

for col in range(num_of_attr):
min_values[col] = np.nanmin(in_data[:, col])
max_values[col] = np.nanmax(in_data[:, col])

return min_values, max_values

def apply_min_max(in_data, min_values, max_values):

out_data = in_data.copy()

for col in range(in_data.shape[1]):
max_mask = in_data[:, col] > max_values[col]
min_mask = in_data[:, col] < min_values[col]
out_data[:, col] = in_data[:, col] - min_values[col] #/ (max_values[col] - min_values[col])

if max_values[col] - min_values[col] != 0:
out_data[:, col] /= max_values[col] - min_values[col]

np.place(out_data[:, col], max_mask, 1.0)
np.place(out_data[:, col], min_mask, 0.0)
return out_data

if __name__ == "__main__":
epochs = 5000
batch_size = 128

input_size = 34
latent_size = 8

model = DAE(input_size, latent_size)
model.to('cuda')
torch.backends.cudnn.benchmark = True

loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
DECAY = 0.95
scheduler = LambdaLR(optimizer, lr_lambda = lambda t : DECAY**t)


data = pd.read_csv("model/training_test_data.csv")
data.sample(frac = 1, random_state = 200)
data = data.to_numpy()
size = data.shape[0]

training_data = data[: int(0.7*size)]
validation_data = data[int(0.7*size) : int(0.9*size)]
test_data = data[int(0.9*size) :]

# Train data clean
weather_train = training_data[:, 5:-7]
weather_train = weather_train[~np.isnan(weather_train).any(axis=1)]
np.argwhere(np.isnan(weather_train))
min_values, max_values = find_min_max(weather_train)
weather_train = apply_min_max(weather_train, min_values, max_values)

# Validation data
weather_valid = validation_data[:, 5:-7]
weather_valid = weather_valid[~np.isnan(weather_valid).any(axis=1)]
np.argwhere(np.isnan(weather_valid))
weather_valid = apply_min_max(weather_valid, min_values, max_values)

# Test data
weather_test = test_data[:, 5:-7]
weather_test = weather_test[~np.isnan(weather_test).any(axis=1)]
np.argwhere(np.isnan(weather_test))
weather_test = apply_min_max(weather_test, min_values, max_values)

training_dataset = AEDataset(weather_train)
training_loader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True)

validation_dataset = AEDataset(weather_valid)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size)

test_dataset = AEDataset(weather_test)
test_loader = DataLoader(test_dataset)

valid_losses = []
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train_loop(training_loader, model, loss_fn, optimizer)
val_loss = valid_loop(validation_loader, model, loss_fn)
print(optimizer.param_groups[0]['lr'])
torch.save(model, "DAE/weather_train/model" + str(t) + ".pth")
valid_losses.append(val_loss)

if t % 200 == 0:
scheduler.step()

np.savetxt("DAE/weather_train/valid.txt", np.array(valid_losses))
test(test_loader, model, loss_fn)
33 changes: 33 additions & 0 deletions MainModel/dae_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import numpy as np
from torch import nn
from torch.functional import F

class DAE(nn.Module):
def __init__(self, input_dim, latent_dim):
super().__init__()
self.encoder_layer1 = nn.Linear(input_dim, 32)
self.encoder_layer2 = nn.Linear(32, 16)
self.encoder_layer3 = nn.Linear(16, latent_dim)

self.decoder_layer1 = nn.Linear(latent_dim, 16)
self.decoder_layer2 = nn.Linear(16, 32)
self.decoder_layer3 = nn.Linear(32, input_dim)

def encoder(self, x):
h1 = F.relu(self.encoder_layer1(x))
h2 = F.relu(self.encoder_layer2(h1))
return F.relu(self.encoder_layer3(h2))

def decoder(self, latent):
h1 = F.relu(self.decoder_layer1(latent))
h2 = F.relu(self.decoder_layer2(h1))
return self.decoder_layer3(h2)

def forward(self, x, encode):
latent = self.encoder(x)
# Run just encoding and prevent gradient from moving in this direction
if encode:
return latent.detach()

# else, run decoder and allow propagation for training
return self.decoder(latent)
36 changes: 36 additions & 0 deletions MainModel/data_loaders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
import os
import pandas as pd
import numpy as np
from numba import jit

class MDataset(Dataset):
def __init__(self, data, transform = None, target_transform = None):
# Input and target are same
self.X = [torch.tensor(row[:-1]).float().to('cuda') for row in data]
self.y = [torch.tensor(row[-1:]).float().to('cuda') for row in data]

# Keep target transform always None
# Input transform should be non for basic AE, but noising function for DAE
self.transform = transform
self.target_transform = None

def __len__(self):
assert(len(self.X) == len(self.y))
return len(self.X)

def __getitem__(self, idx):
inp = self.X[idx]
target = self.y[idx]

if self.transform:
inp = self.transform(inp)

if self.target_transform:
assert(False)

sample = [inp, target]
return sample
25 changes: 25 additions & 0 deletions MainModel/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import numpy as np
from torch import nn
import torch
from torch.functional import F
from dae_model import DAE

class MainModel(nn.Module):

def __init__(self, weather_encoder, pollution_encoder):
super().__init__()
self.weather_encoder = weather_encoder
self.pollution_encoder = pollution_encoder

self.layer1 = nn.Linear(18, 16)
self.layer2 = nn.Linear(16, 8)
self.layer3 = nn.Linear(8, 1)

def forward(self, x):
w1 = self.weather_encoder(x[:, 2:-7], encode=True)
w2 = self.pollution_encoder(x[:, -7:], encode=True)
feature = torch.cat([w1, w2, x[:, 0:2]], dim=1)
h1 = F.relu(self.layer1(feature))
h2 = F.relu(self.layer2(h1))

return self.layer3(h2)
Binary file added MainModel/penc.pth
Binary file not shown.
Binary file added MainModel/pred_model.pth
Binary file not shown.
Loading