1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
| class SimpleCNN(nn.Module):
def __init__(self):
super(SimpleCNN, self).__init__() # Must call this first
# ----- LAYER 1: First Convolution -----
# What it does: Looks for simple patterns (edges, corners)
self.conv1 = nn.Conv2d(
in_channels=1, # Input has 1 channel (grayscale)
out_channels=4, # We want 4 different filters
kernel_size=3, # Filter is 3x3 pixels
padding=1 # Add 1 pixel border to keep same size
)
# After this layer: [batch, 1, 28, 28] β [batch, 4, 28, 28]
# ----- POOLING LAYER -----
# What it does: Makes image smaller, keeps only important info
self.pool = nn.MaxPool2d(
kernel_size=2, # Look at 2x2 blocks
stride=2 # Move 2 pixels each time
)
# After pooling: [batch, 4, 28, 28] β [batch, 4, 14, 14]
# Takes maximum value from each 2x2 block (downsamples)
# ----- LAYER 2: Second Convolution -----
# What it does: Looks for more complex patterns (combinations of edges)
self.conv2 = nn.Conv2d(
in_channels=4, # Input has 4 channels (from previous layer)
out_channels=8, # We want 8 different filters
kernel_size=3, # Filter is 3x3
padding=1 # Keep same size
)
# After: [batch, 4, 14, 14] β [batch, 8, 14, 14]
# After pooling: [batch, 8, 14, 14] β [batch, 8, 7, 7]
# ----- FLATTEN LAYER (not really a layer, just reshaping) -----
# What it does: Converts 3D feature maps to 1D vector
# [batch, 8, 7, 7] β [batch, 8 * 7 * 7] = [batch, 392]
# ----- FULLY CONNECTED LAYER 1 -----
# What it does: Combines all features to make decisions
self.fc1 = nn.Linear(
in_features=8 * 7 * 7, # 392 features from conv layers
out_features=64 # 64 neurons in this layer
)
# ----- FINAL CLASSIFICATION LAYER -----
# What it does: Decides which digit it is (0-9)
self.fc2 = nn.Linear(
in_features=64, # From previous layer
out_features=10 # 10 classes (digits 0-9)
)
def forward(self, x):
"""This defines HOW data flows through the network"""
# x shape starts as: [batch_size, 1, 28, 28]
# Step 1: First convolution + activation + pooling
x = self.pool(F.relu(self.conv1(x)))
# ReLU: Makes negative values 0, keeps positives
# Example: ReLU([-2, 0, 5]) = [0, 0, 5]
# Step 2: Second convolution + activation + pooling
x = self.pool(F.relu(self.conv2(x)))
# Step 3: Flatten (3D β 1D)
x = x.view(-1, 8 * 7 * 7) # -1 means "figure out batch size automatically"
# Step 4: First fully connected layer + activation
x = F.relu(self.fc1(x))
# Step 5: Final classification (no activation here - will add softmax later)
x = self.fc2(x)
return x
|