Skip to content

CNN

CNN

Bases: Module

Source code in engines/contentFilterEngine/nn_based_algorithms/cnn.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
class CNN(nn.Module):
    def __init__(self, input_dim, num_classes, emb_dim=128, kernel_sizes=[3, 4, 5], num_filters=100, dropout=0.5):
        """
        Initialize the CNN model for classification.

        Args:
            input_dim (int): Dimension of the input features.
            num_classes (int): Number of output classes.
            emb_dim (int): Embedding dimension.
            kernel_sizes (list): List of kernel sizes for convolution.
            num_filters (int): Number of filters per kernel size.
            dropout (float): Dropout rate.
        """
        super(CNN, self).__init__()
        self.embedding = nn.Linear(input_dim, emb_dim)

        self.convs = nn.ModuleList([
            nn.Conv1d(in_channels=emb_dim, out_channels=num_filters, kernel_size=k)
            for k in kernel_sizes
        ])

        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(num_filters * len(kernel_sizes), num_classes)

    def forward(self, x):
        """
        Forward pass of the CNN model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, input_dim).

        Returns:
            torch.Tensor: Output logits of shape (batch_size, num_classes).
        """
        x = self.embedding(x)  # (batch_size, emb_dim)
        x = x.unsqueeze(2)  # (batch_size, emb_dim, 1)

        # Determine the required padding based on the largest kernel size
        max_kernel_size = max([conv.kernel_size[0] for conv in self.convs])
        pad_size = (max_kernel_size // 2, max_kernel_size // 2)
        x = torch.nn.functional.pad(x, pad_size)  # (batch_size, emb_dim, padded_length)

        # Apply convolution and activation
        conv_out = [torch.relu(conv(x)) for conv in self.convs]  # List of (batch_size, num_filters, L)

        # Apply max pooling over the time dimension
        pooled = [torch.max(feature_map, dim=2)[0] for feature_map in conv_out]  # List of (batch_size, num_filters)

        # Concatenate pooled features
        concat = torch.cat(pooled, dim=1)  # (batch_size, num_filters * len(kernel_sizes))

        # Apply dropout
        drop = self.dropout(concat)

        # Final fully connected layer
        out = self.fc(drop)  # (batch_size, num_classes)

        return out

__init__(input_dim, num_classes, emb_dim=128, kernel_sizes=[3, 4, 5], num_filters=100, dropout=0.5)

Initialize the CNN model for classification.

Parameters:

Name Type Description Default
input_dim int

Dimension of the input features.

required
num_classes int

Number of output classes.

required
emb_dim int

Embedding dimension.

128
kernel_sizes list

List of kernel sizes for convolution.

[3, 4, 5]
num_filters int

Number of filters per kernel size.

100
dropout float

Dropout rate.

0.5
Source code in engines/contentFilterEngine/nn_based_algorithms/cnn.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def __init__(self, input_dim, num_classes, emb_dim=128, kernel_sizes=[3, 4, 5], num_filters=100, dropout=0.5):
    """
    Initialize the CNN model for classification.

    Args:
        input_dim (int): Dimension of the input features.
        num_classes (int): Number of output classes.
        emb_dim (int): Embedding dimension.
        kernel_sizes (list): List of kernel sizes for convolution.
        num_filters (int): Number of filters per kernel size.
        dropout (float): Dropout rate.
    """
    super(CNN, self).__init__()
    self.embedding = nn.Linear(input_dim, emb_dim)

    self.convs = nn.ModuleList([
        nn.Conv1d(in_channels=emb_dim, out_channels=num_filters, kernel_size=k)
        for k in kernel_sizes
    ])

    self.dropout = nn.Dropout(dropout)
    self.fc = nn.Linear(num_filters * len(kernel_sizes), num_classes)

forward(x)

Forward pass of the CNN model.

Parameters:

Name Type Description Default
x Tensor

Input tensor of shape (batch_size, input_dim).

required

Returns:

Type Description

torch.Tensor: Output logits of shape (batch_size, num_classes).

Source code in engines/contentFilterEngine/nn_based_algorithms/cnn.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def forward(self, x):
    """
    Forward pass of the CNN model.

    Args:
        x (torch.Tensor): Input tensor of shape (batch_size, input_dim).

    Returns:
        torch.Tensor: Output logits of shape (batch_size, num_classes).
    """
    x = self.embedding(x)  # (batch_size, emb_dim)
    x = x.unsqueeze(2)  # (batch_size, emb_dim, 1)

    # Determine the required padding based on the largest kernel size
    max_kernel_size = max([conv.kernel_size[0] for conv in self.convs])
    pad_size = (max_kernel_size // 2, max_kernel_size // 2)
    x = torch.nn.functional.pad(x, pad_size)  # (batch_size, emb_dim, padded_length)

    # Apply convolution and activation
    conv_out = [torch.relu(conv(x)) for conv in self.convs]  # List of (batch_size, num_filters, L)

    # Apply max pooling over the time dimension
    pooled = [torch.max(feature_map, dim=2)[0] for feature_map in conv_out]  # List of (batch_size, num_filters)

    # Concatenate pooled features
    concat = torch.cat(pooled, dim=1)  # (batch_size, num_filters * len(kernel_sizes))

    # Apply dropout
    drop = self.dropout(concat)

    # Final fully connected layer
    out = self.fc(drop)  # (batch_size, num_classes)

    return out