Skip to content

DSSM

DSSM

Bases: Module

Source code in engines/contentFilterEngine/nn_based_algorithms/DSSM.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
class DSSM(nn.Module):
    def __init__(self, 
                 vocab_size, 
                 embedding_dim, 
                 hidden_dims=[256, 128],
                 dropout=0.5):
        """
        Initialize the DSSM model.

        Args:
            vocab_size (int): Size of the vocabulary for text encoding.
            embedding_dim (int): Dimension of word embeddings.
            hidden_dims (list): List of hidden layer dimensions.
            dropout (float): Dropout rate.
        """
        super(DSSM, self).__init__()

        # Text Embedding Layer
        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim, padding_idx=0)

        # Fully Connected Layers
        layers = []
        input_dim = embedding_dim
        for hidden_dim in hidden_dims:
            layers.append(nn.Linear(input_dim, hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            input_dim = hidden_dim
        self.fc = nn.Sequential(*layers)

        # Output Embedding Layer
        self.output = nn.Linear(input_dim, hidden_dim)  # Final embedding

    def forward(self, text):
        """
        Forward pass of the DSSM model.

        Args:
            text (torch.Tensor): Input text tensor of shape (batch_size, seq_length).

        Returns:
            torch.Tensor: Semantic embeddings of shape (batch_size, embed_dim).
        """
        # Text Embedding
        embedded = self.embedding(text)  # (batch_size, seq_length, embedding_dim)
        embedded = torch.mean(embedded, dim=1)  # (batch_size, embedding_dim)

        # Fully Connected Layers
        features = self.fc(embedded)  # (batch_size, hidden_dims[-1])

        # Output Embedding
        output = self.output(features)  # (batch_size, hidden_dim)

        # Normalize embeddings
        output = F.normalize(output, p=2, dim=1)

        return output

__init__(vocab_size, embedding_dim, hidden_dims=[256, 128], dropout=0.5)

Initialize the DSSM model.

Parameters:

Name Type Description Default
vocab_size int

Size of the vocabulary for text encoding.

required
embedding_dim int

Dimension of word embeddings.

required
hidden_dims list

List of hidden layer dimensions.

[256, 128]
dropout float

Dropout rate.

0.5
Source code in engines/contentFilterEngine/nn_based_algorithms/DSSM.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def __init__(self, 
             vocab_size, 
             embedding_dim, 
             hidden_dims=[256, 128],
             dropout=0.5):
    """
    Initialize the DSSM model.

    Args:
        vocab_size (int): Size of the vocabulary for text encoding.
        embedding_dim (int): Dimension of word embeddings.
        hidden_dims (list): List of hidden layer dimensions.
        dropout (float): Dropout rate.
    """
    super(DSSM, self).__init__()

    # Text Embedding Layer
    self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim, padding_idx=0)

    # Fully Connected Layers
    layers = []
    input_dim = embedding_dim
    for hidden_dim in hidden_dims:
        layers.append(nn.Linear(input_dim, hidden_dim))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout))
        input_dim = hidden_dim
    self.fc = nn.Sequential(*layers)

    # Output Embedding Layer
    self.output = nn.Linear(input_dim, hidden_dim)  # Final embedding

forward(text)

Forward pass of the DSSM model.

Parameters:

Name Type Description Default
text Tensor

Input text tensor of shape (batch_size, seq_length).

required

Returns:

Type Description

torch.Tensor: Semantic embeddings of shape (batch_size, embed_dim).

Source code in engines/contentFilterEngine/nn_based_algorithms/DSSM.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def forward(self, text):
    """
    Forward pass of the DSSM model.

    Args:
        text (torch.Tensor): Input text tensor of shape (batch_size, seq_length).

    Returns:
        torch.Tensor: Semantic embeddings of shape (batch_size, embed_dim).
    """
    # Text Embedding
    embedded = self.embedding(text)  # (batch_size, seq_length, embedding_dim)
    embedded = torch.mean(embedded, dim=1)  # (batch_size, embedding_dim)

    # Fully Connected Layers
    features = self.fc(embedded)  # (batch_size, hidden_dims[-1])

    # Output Embedding
    output = self.output(features)  # (batch_size, hidden_dim)

    # Normalize embeddings
    output = F.normalize(output, p=2, dim=1)

    return output