728x90
글씨가 잘 보이지 않는다면 블로그 하단에 테마를 변경하시거나 아래 사이트에서 확인하시면 됩니다.
https://github.com/JeongWooYeol/Deep-Learning-Pytorch/blob/main/2%EC%9E%A5.ipynb
2.2.1 텐서 다루기¶
In [8]:
import torch
print(torch.tensor([[1,2],[3,4]]))
print('------------------------')
#print(torch.tensor([[1,2],[3,4]], device="cuda:0")) #GPU가 없다면 오류가 발생하므로 주석 처리하였습니다.
print('------------------------')
print(torch.tensor([[1,2],[3,4]], dtype=torch.float64))
tensor([[1, 2],
[3, 4]])
------------------------
------------------------
tensor([[1., 2.],
[3., 4.]], dtype=torch.float64)
In [9]:
temp = torch.tensor([[1,2],[3,4]])
print(temp.numpy())
print('------------------------')
#temp = torch.tensor([[1,2],[3,4]], device="cuda:0") #GPU가 없다면 오류가 발생하므로 주석 처리하였습니다.
temp = torch.tensor([[1,2],[3,4]], device="cpu:0")
print(temp.to("cpu").numpy())
[[1 2]
[3 4]]
------------------------
[[1 2]
[3 4]]
In [10]:
temp = torch.FloatTensor([1, 2, 3, 4, 5, 6, 7])
print(temp[0], temp[1], temp[-1])
print('------------------------')
print(temp[2:5], temp[4:-1])
tensor(1.) tensor(2.) tensor(7.)
------------------------
tensor([3., 4., 5.]) tensor([5., 6.])
In [11]:
v = torch.tensor([1, 2, 3])
w = torch.tensor([3, 4, 6])
print(w - v)
tensor([2, 2, 3])
In [12]:
temp = torch.tensor([
[1, 2], [3, 4]
])
print(temp.shape)
print('------------------------')
print(temp.view(4,1))
print('------------------------')
print(temp.view(-1))
print('------------------------')
print(temp.view(1, -1))
print('------------------------')
print(temp.view(-1, 1))
torch.Size([2, 2])
------------------------
tensor([[1],
[2],
[3],
[4]])
------------------------
tensor([1, 2, 3, 4])
------------------------
tensor([[1, 2, 3, 4]])
------------------------
tensor([[1],
[2],
[3],
[4]])
In [13]:
import torch
import torchmetrics
metric = torchmetrics.Accuracy()
print(torch.randn(10, 5)) # 가우시안 분포를 갖는 랜덤한 수 10 x 5 만큼 생성
n_batches = 10
for i in range(n_batches):
preds = torch.randn(10, 5).softmax(dim = -1)
target = torch.randint(5, (10,)) # 0 ~ 4 사이의 정수 10개 생성
acc = metric(preds, target)
print(f"Accuracy on batch {i}: {acc}")
acc = metric.compute()
print(f"Accuracy on all data: {acc}")
tensor([[-0.2095, 1.2581, -0.3453, 0.6943, 0.6771],
[-0.3742, -0.0177, -1.3701, -1.0506, -0.4190],
[-1.2103, -0.9815, -0.6881, 1.0028, 1.7338],
[ 0.0335, 0.3642, -0.2073, 0.3253, -0.6061],
[ 0.6011, -1.9420, -1.2126, -0.4270, -0.0037],
[-1.1644, 0.9407, -0.8218, 1.6110, 0.3960],
[ 0.2882, -0.8448, 0.7214, -0.2357, -0.4199],
[ 0.0367, 1.6615, -0.9399, 0.2168, -0.8942],
[ 1.0685, 0.5933, 1.3627, -0.2034, 0.0077],
[ 1.1227, -0.3337, 0.0856, 1.9073, -0.8779]])
Accuracy on batch 0: 0.5
Accuracy on batch 1: 0.20000000298023224
Accuracy on batch 2: 0.10000000149011612
Accuracy on batch 3: 0.20000000298023224
Accuracy on batch 4: 0.30000001192092896
Accuracy on batch 5: 0.20000000298023224
Accuracy on batch 6: 0.20000000298023224
Accuracy on batch 7: 0.20000000298023224
Accuracy on batch 8: 0.10000000149011612
Accuracy on batch 9: 0.10000000149011612
Accuracy on all data: 0.20999999344348907
In [ ]:
import pandas as pd
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
class CustomDataset(Dataset):
def __init__(self, csv_file):
self.label = pd.read_csv(csv_file)
def __len__(self):
return len(self.label)
def __getitem(self, idx):
sample = torch.tensor(self.label.illoc[idx, 0:3]).int
label = torch.tensor(self.label.iloc[idx, 3]).int()
return sample, label
tensor_dataset = CustomDataset('./data/covtype.csv')
dataset = DataLoader(tensor_dataset, batcj_size = 4, shuffle = True)
In [14]:
import torchvision.transforms as transforms
mnist_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, ), (1.0, ))
])
from torchvision.datasets import MNIST
import requests
download_root = './data/MNIST_DATASET'
train_dataset = MNIST(download_root, transform = mnist_transform, train = True, download = True)
valid_dataset = MNIST(download_root, transform = mnist_transform, train = False, download = True)
test_dataset = MNIST(download_root, transform = mnist_transform, train = False, download = True)
In [18]:
import torch.nn as nn
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 5),
nn.ReLU(inplace = True),
nn.MaxPool2d(2))
self.layer2 = nn.Sequential(
nn.Conv2d(in_channels = 64, out_channels = 30, kernel_size = 5),
nn.ReLU(inplace = True),
nn.MaxPool2d(2))
self.layer3 = nn.Sequential(
nn.Linear(in_features = 30 * 5 * 5, out_features = 10, bias = True),
nn.ReLU(inplace = True))
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = x.view(x.shape[0], -1)
x = self.layer3(x)
return x
model = MLP()
In [ ]:
from torch.optim import optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer = optimizer, lr_lambda = lambda epoch: 0.95 ** epoch)
dataloader = train_dataset
for epoch in range(1, 101):
for x, y in dataloader:
optimizer.zero_grad()
loss_fn(model(x), y).backward()
optimizer.step()
scheduler.step()
In [ ]:
import torch
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('./data/tensorboard')
num_epochs = 100
for epoch in range(num_epochs):
model.train()
batch_loss = 0.0
for i, (x, y) in enumerate(dataloader):
x, y = x.to(device).float(), y.to(device).float()
outputs = model(x)
loss = criterion(outputs, y)
writer.add_scalar("Loss", loss, epoch)
optimizer.zero_grad()
loss.backward()
optimizer.step()
writer.close()
2.4 파이토치 코드 맛보기¶
In [ ]:
import torch
import torch.nn as nn
import numpy as np # 벡터 및 행렬 연산에서 매우 편리한 기능을 제공하는 파이썬 라이브러리 패키지
import pandas as pd # 데이터 처리를 위해 널리 사용되는 파이썬 라이브러리 패키지
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [5]:
dataset = pd.read_csv(r'./data/car_evaluation.csv') # pandas를 통해 읽는다. 그리고 경로 조심!!
In [6]:
dataset.head(n = 10) # 위에서부터 10개
Out[6]:
price | maint | doors | persons | lug_capacity | safety | output | |
---|---|---|---|---|---|---|---|
0 | vhigh | vhigh | 2 | 2 | small | low | unacc |
1 | vhigh | vhigh | 2 | 2 | small | med | unacc |
2 | vhigh | vhigh | 2 | 2 | small | high | unacc |
3 | vhigh | vhigh | 2 | 2 | med | low | unacc |
4 | vhigh | vhigh | 2 | 2 | med | med | unacc |
5 | vhigh | vhigh | 2 | 2 | med | high | unacc |
6 | vhigh | vhigh | 2 | 2 | big | low | unacc |
7 | vhigh | vhigh | 2 | 2 | big | med | unacc |
8 | vhigh | vhigh | 2 | 2 | big | high | unacc |
9 | vhigh | vhigh | 2 | 4 | small | low | unacc |
In [47]:
dataset.tail(n = 10) # 아래에서부터 10개
Out[47]:
price | maint | doors | persons | lug_capacity | safety | output | |
---|---|---|---|---|---|---|---|
1718 | low | low | 5more | 4 | big | high | vgood |
1719 | low | low | 5more | more | small | low | unacc |
1720 | low | low | 5more | more | small | med | acc |
1721 | low | low | 5more | more | small | high | good |
1722 | low | low | 5more | more | med | low | unacc |
1723 | low | low | 5more | more | med | med | good |
1724 | low | low | 5more | more | med | high | vgood |
1725 | low | low | 5more | more | big | low | unacc |
1726 | low | low | 5more | more | big | med | good |
1727 | low | low | 5more | more | big | high | vgood |
In [48]:
fig_size = plt.rcParams["figure.figsize"] # rcParams = runtime configuration Parameters
# 위와 같이 하면 그래프 그리는 설정을 변경할 수 있다.
fig_size[0] = 8 # 가로 길이
fig_size[1] = 6 # 세로 길이
plt.rcParams["figure.figsize"] = fig_size
dataset.output.value_counts().plot(kind='pie', autopct='%0.05f%%', colors=['lightblue', 'lightgreen', 'orange', 'pink'], explode=(0.05, 0.05, 0.05,0.05))
# value_counts()는 카테고리별 개수를 세는 역할
Out[48]:
<AxesSubplot:ylabel='output'>
In [53]:
categorical_columns = ['price', 'maint', 'doors', 'persons', 'lug_capacity', 'safety'] # 데이터 예제의 카테고리 목록들
In [54]:
for category in categorical_columns:
dataset[category] = dataset[category].astype('category') # astype을 사용해 데이터를 범주형으로 변환
In [60]:
price = dataset['price'].cat.codes.values
print(price)
maint = dataset['maint'].cat.codes.values
print(maint)
doors = dataset['doors'].cat.codes.values
print(doors)
persons = dataset['persons'].cat.codes.values
print(persons)
lug_capacity = dataset['lug_capacity'].cat.codes.values
print(lug_capacity)
safety = dataset['safety'].cat.codes.values
print(safety)
categorical_data = np.stack([price, maint, doors, persons, lug_capacity, safety], 1)
categorical_data[:10]
[3 3 3 ... 1 1 1]
[3 3 3 ... 1 1 1]
[0 0 0 ... 3 3 3]
[0 0 0 ... 2 2 2]
[2 2 2 ... 0 0 0]
[1 2 0 ... 1 2 0]
Out[60]:
array([[3, 3, 0, 0, 2, 1],
[3, 3, 0, 0, 2, 2],
[3, 3, 0, 0, 2, 0],
[3, 3, 0, 0, 1, 1],
[3, 3, 0, 0, 1, 2],
[3, 3, 0, 0, 1, 0],
[3, 3, 0, 0, 0, 1],
[3, 3, 0, 0, 0, 2],
[3, 3, 0, 0, 0, 0],
[3, 3, 0, 1, 2, 1]], dtype=int8)
In [9]:
categorical_data = torch.tensor(categorical_data, dtype=torch.int64)
categorical_data[:10]
Out[9]:
tensor([[3, 3, 0, 0, 2, 1],
[3, 3, 0, 0, 2, 2],
[3, 3, 0, 0, 2, 0],
[3, 3, 0, 0, 1, 1],
[3, 3, 0, 0, 1, 2],
[3, 3, 0, 0, 1, 0],
[3, 3, 0, 0, 0, 1],
[3, 3, 0, 0, 0, 2],
[3, 3, 0, 0, 0, 0],
[3, 3, 0, 1, 2, 1]])
In [101]:
outputs = pd.get_dummies(dataset.output) # 가변수로 만들어 주는 함수(0, 1로 표현되도록)
print(outputs, outputs.shape)
outputs = outputs.values
print(outputs, outputs.shape)
outputs = torch.tensor(outputs).flatten()
print(categorical_data.shape) # 얘는 output으로 따로 데이터를 표현하기 전에 original 값에 대한 정보여서
# (1728 x 6)
print(outputs.shape)
# 1728 x 4를 1차원 배열로 펴준 모습
acc good unacc vgood
0 0 0 1 0
1 0 0 1 0
2 0 0 1 0
3 0 0 1 0
4 0 0 1 0
... ... ... ... ...
1723 0 1 0 0
1724 0 0 0 1
1725 0 0 1 0
1726 0 1 0 0
1727 0 0 0 1
[1728 rows x 4 columns] (1728, 4)
[[0 0 1 0]
[0 0 1 0]
[0 0 1 0]
...
[0 0 1 0]
[0 1 0 0]
[0 0 0 1]] (1728, 4)
(1728, 6)
torch.Size([6912])
In [236]:
import pandas as pd
import numpy as np
data ={
'gender' : ['male', 'female', 'male'],
'weight' : [72, 55, 68],
'nation' : ['Japan', 'Korea', 'Australia']
}
df = pd.DataFrame(data)
print(df)
print(pd.get_dummies(df))
print(pd.get_dummies(df.nation))
gender weight nation
0 male 72 Japan
1 female 55 Korea
2 male 68 Australia
weight gender_female gender_male nation_Australia nation_Japan \
0 72 0 1 0 1
1 55 1 0 0 0
2 68 0 1 1 0
nation_Korea
0 0
1 1
2 0
Australia Japan Korea
0 0 1 0
1 0 0 1
2 1 0 0
In [247]:
categorical_column_sizes = [len(dataset[column].cat.categories) for column in categorical_columns]
print(categorical_column_sizes)
categorical_embedding_sizes = [(col_size, min(50, (col_size+1)//2)) for col_size in categorical_column_sizes]
print(categorical_embedding_sizes)
[4, 4, 4, 3, 3, 3]
[(4, 2), (4, 2), (4, 2), (3, 2), (3, 2), (3, 2)]
In [295]:
total_records = 1728
test_records = int(total_records * .2)
categorical_train_data = categorical_data[:total_records-test_records]
categorical_test_data = categorical_data[total_records-test_records:total_records]
train_outputs = outputs[:total_records - test_records]
test_outputs = outputs[total_records-test_records:total_records]
In [266]:
print(len(categorical_train_data))
print(len(train_outputs))
print(len(categorical_test_data))
print(len(test_outputs))
1383
1383
345
345
In [267]:
class Model(nn.Module):
def __init__(self, embedding_size, output_size, layers, p=0.4):
super().__init__()
self.all_embeddings = nn.ModuleList([nn.Embedding(ni, nf) for ni, nf in embedding_size])
self.embedding_dropout = nn.Dropout(p)
all_layers = []
num_categorical_cols = sum((nf for ni, nf in embedding_size))
input_size = num_categorical_cols
for i in layers:
all_layers.append(nn.Linear(input_size, i))
all_layers.append(nn.ReLU(inplace=True))
all_layers.append(nn.BatchNorm1d(i))
all_layers.append(nn.Dropout(p))
input_size = i
all_layers.append(nn.Linear(layers[-1], output_size))
self.layers = nn.Sequential(*all_layers)
def forward(self, x_categorical):
embeddings = []
for i,e in enumerate(self.all_embeddings):
embeddings.append(e(x_categorical[ : , i]))
x = torch.cat(embeddings, 1)
x = self.embedding_dropout(x)
x = self.layers(x)
return x
In [268]:
model = Model(categorical_embedding_sizes, 4, [200,100,50], p=0.4)
print(model)
Model(
(all_embeddings): ModuleList(
(0): Embedding(4, 2)
(1): Embedding(4, 2)
(2): Embedding(4, 2)
(3): Embedding(3, 2)
(4): Embedding(3, 2)
(5): Embedding(3, 2)
)
(embedding_dropout): Dropout(p=0.4, inplace=False)
(layers): Sequential(
(0): Linear(in_features=12, out_features=200, bias=True)
(1): ReLU(inplace=True)
(2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): Dropout(p=0.4, inplace=False)
(4): Linear(in_features=200, out_features=100, bias=True)
(5): ReLU(inplace=True)
(6): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): Dropout(p=0.4, inplace=False)
(8): Linear(in_features=100, out_features=50, bias=True)
(9): ReLU(inplace=True)
(10): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): Dropout(p=0.4, inplace=False)
(12): Linear(in_features=50, out_features=4, bias=True)
)
)
In [269]:
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
In [270]:
if torch.cuda.is_available():
device = torch.device('cuda')
else:
device = torch.device('cpu')
device
Out[270]:
device(type='cuda')
In [296]:
epochs = 500
aggregated_losses = []
train_outputs = train_outputs.to(device = device, dtype=torch.int64)
print(train_outputs.dtype)
categorical_train_data = torch.Tensor(categorical_train_data).long()
categorical_train_data = categorical_train_data.to(device)
model = model.to(device)
for i in range(epochs):
i += 1
y_pred = model(categorical_train_data)
single_loss = loss_function(y_pred, train_outputs)
aggregated_losses.append(single_loss)
if i%25 == 1:
print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')
optimizer.zero_grad()
single_loss.backward()
optimizer.step()
print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')
torch.int64
epoch: 1 loss: 1.50740862
epoch: 26 loss: 1.32828343
epoch: 51 loss: 1.27542818
epoch: 76 loss: 1.15993237
epoch: 101 loss: 1.05491567
epoch: 126 loss: 0.90227455
epoch: 151 loss: 0.80448341
epoch: 176 loss: 0.74190193
epoch: 201 loss: 0.68707675
epoch: 226 loss: 0.65051717
epoch: 251 loss: 0.63140100
epoch: 276 loss: 0.61486077
epoch: 301 loss: 0.60089266
epoch: 326 loss: 0.60259157
epoch: 351 loss: 0.59290385
epoch: 376 loss: 0.59279698
epoch: 401 loss: 0.57534498
epoch: 426 loss: 0.57585049
epoch: 451 loss: 0.57348573
epoch: 476 loss: 0.57816023
epoch: 500 loss: 0.5778155923
In [309]:
test_outputs = test_outputs.to(device = device, dtype = torch.int64)
with torch.no_grad():
y_val = model(torch.Tensor(categorical_test_data).to(device = device, dtype = torch.long))
loss = loss_function(y_val, test_outputs)
print(f'Loss: {loss:.8f}')
Loss: 0.58028483
In [310]:
print(y_val[:5])
tensor([[ 2.0192, 1.2106, -2.6477, -2.5275],
[ 2.0297, 1.0864, -2.7678, -2.5864],
[ 2.2745, 1.3213, -2.9151, -2.8978],
[ 2.2103, 1.4020, -2.4808, -2.1627],
[ 3.1314, 1.8246, -3.3417, -2.9252]], device='cuda:0')
In [312]:
y_val = y_val.to('cpu')
y_val = np.argmax(y_val, axis=1)
print(y_val[:5])
tensor([0, 0, 0, 0, 0])
In [314]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
test_outputs = test_outputs.to('cpu')
print(confusion_matrix(test_outputs,y_val))
print(classification_report(test_outputs,y_val))
print(accuracy_score(test_outputs, y_val))
[[259 0]
[ 86 0]]
precision recall f1-score support
0 0.75 1.00 0.86 259
1 0.00 0.00 0.00 86
accuracy 0.75 345
macro avg 0.38 0.50 0.43 345
weighted avg 0.56 0.75 0.64 345
0.7507246376811594
C:\ProgramData\Miniconda3\envs\yolov5\lib\site-packages\sklearn\metrics\_classification.py:1334: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
C:\ProgramData\Miniconda3\envs\yolov5\lib\site-packages\sklearn\metrics\_classification.py:1334: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
C:\ProgramData\Miniconda3\envs\yolov5\lib\site-packages\sklearn\metrics\_classification.py:1334: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
In [ ]:
728x90
'Deep Learning(강의 및 책) > Pytorch' 카테고리의 다른 글
[Pytorch] Deep Learning Pytorch 11. 클러스터링(Clustering) (0) | 2023.01.16 |
---|---|
[Pytorch] Deep Learning Pytorch 8. 성능 최적화 (0) | 2023.01.15 |
[Pytorch] Deep Learning Pytorch 6. 합성곱신경망 2 (0) | 2023.01.12 |
[Pytorch] Deep Learning Pytorch 5. 합성곱 신경망Ⅰ (0) | 2022.09.25 |
[Pytorch] Deep Learning Pytorch 3. 머신러닝 핵심 알고리즘 (0) | 2022.09.10 |