| 26 | """ |
| 27 | |
| 28 | def __init__(self, dataset, config): |
| 29 | super(FastText, self).__init__() |
| 30 | self.config = config |
| 31 | assert "token" in self.config.feature.feature_names |
| 32 | self.token_embedding = \ |
| 33 | Embedding(dataset.token_map, |
| 34 | config.embedding.dimension, |
| 35 | cDataset.DOC_TOKEN, config, |
| 36 | padding_idx=dataset.VOCAB_PADDING, |
| 37 | pretrained_embedding_file= |
| 38 | config.feature.token_pretrained_file, |
| 39 | mode=EmbeddingProcessType.SUM, dropout=0, |
| 40 | init_type=config.embedding.initializer, |
| 41 | low=-config.embedding.uniform_bound, |
| 42 | high=config.embedding.uniform_bound, |
| 43 | std=config.embedding.random_stddev, |
| 44 | activation_type=ActivationType.NONE) |
| 45 | if self.config.feature.token_ngram > 1: |
| 46 | self.token_ngram_embedding = \ |
| 47 | Embedding(dataset.token_ngram_map, |
| 48 | config.embedding.dimension, |
| 49 | cDataset.DOC_TOKEN_NGRAM, config, |
| 50 | padding_idx=dataset.VOCAB_PADDING, |
| 51 | mode=EmbeddingProcessType.SUM, dropout=0, |
| 52 | init_type=config.embedding.initializer, |
| 53 | low=-config.embedding.uniform_bound, |
| 54 | high=config.embedding.uniform_bound, |
| 55 | std=config.embedding.random_stddev, |
| 56 | activation_type=ActivationType.NONE) |
| 57 | if "keyword" in self.config.feature.feature_names: |
| 58 | self.keyword_embedding = \ |
| 59 | Embedding(dataset.keyword_map, |
| 60 | config.embedding.dimension, |
| 61 | cDataset.DOC_KEYWORD, config, |
| 62 | padding_idx=dataset.VOCAB_PADDING, |
| 63 | pretrained_embedding_file= |
| 64 | config.feature.keyword_pretrained_file, |
| 65 | mode=EmbeddingProcessType.SUM, dropout=0, |
| 66 | init_type=config.embedding.initializer, |
| 67 | low=-config.embedding.uniform_bound, |
| 68 | high=config.embedding.uniform_bound, |
| 69 | std=config.embedding.random_stddev, |
| 70 | activation_type=ActivationType.NONE) |
| 71 | if "topic" in self.config.feature.feature_names: |
| 72 | self.topic_embedding = \ |
| 73 | Embedding(dataset.topic_map, |
| 74 | config.embedding.dimension, |
| 75 | cDataset.DOC_TOPIC, config, |
| 76 | padding_idx=dataset.VOCAB_PADDING, |
| 77 | mode=EmbeddingProcessType.SUM, dropout=0, |
| 78 | init_type=config.embedding.initializer, |
| 79 | low=-config.embedding.uniform_bound, |
| 80 | high=config.embedding.uniform_bound, |
| 81 | std=config.embedding.random_stddev, |
| 82 | activation_type=ActivationType.NONE) |
| 83 | self.linear = torch.nn.Linear( |
| 84 | config.embedding.dimension, len(dataset.label_map)) |
| 85 | self.dropout = torch.nn.Dropout(p=config.train.hidden_layer_dropout) |