| 43 | |
| 44 | |
| 45 | def Build_Model_RCNN_Text(word_index, embeddings_index, nclasses, MAX_SEQUENCE_LENGTH=500, EMBEDDING_DIM=100): |
| 46 | |
| 47 | kernel_size = 2 |
| 48 | filters = 256 |
| 49 | pool_size = 2 |
| 50 | gru_node = 256 |
| 51 | |
| 52 | embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM)) |
| 53 | for word, i in word_index.items(): |
| 54 | embedding_vector = embeddings_index.get(word) |
| 55 | if embedding_vector is not None: |
| 56 | # words not found in embedding index will be all-zeros. |
| 57 | if len(embedding_matrix[i]) !=len(embedding_vector): |
| 58 | print("could not broadcast input array from shape",str(len(embedding_matrix[i])), |
| 59 | "into shape",str(len(embedding_vector))," Please make sure your" |
| 60 | " EMBEDDING_DIM is equal to embedding_vector file ,GloVe,") |
| 61 | exit(1) |
| 62 | |
| 63 | embedding_matrix[i] = embedding_vector |
| 64 | |
| 65 | |
| 66 | |
| 67 | model = Sequential() |
| 68 | model.add(Embedding(len(word_index) + 1, |
| 69 | EMBEDDING_DIM, |
| 70 | weights=[embedding_matrix], |
| 71 | input_length=MAX_SEQUENCE_LENGTH, |
| 72 | trainable=True)) |
| 73 | model.add(Dropout(0.25)) |
| 74 | model.add(Conv1D(filters, kernel_size, activation='relu')) |
| 75 | model.add(MaxPooling1D(pool_size=pool_size)) |
| 76 | model.add(Conv1D(filters, kernel_size, activation='relu')) |
| 77 | model.add(MaxPooling1D(pool_size=pool_size)) |
| 78 | model.add(Conv1D(filters, kernel_size, activation='relu')) |
| 79 | model.add(MaxPooling1D(pool_size=pool_size)) |
| 80 | model.add(Conv1D(filters, kernel_size, activation='relu')) |
| 81 | model.add(MaxPooling1D(pool_size=pool_size)) |
| 82 | model.add(LSTM(gru_node, return_sequences=True, recurrent_dropout=0.2)) |
| 83 | model.add(LSTM(gru_node, return_sequences=True, recurrent_dropout=0.2)) |
| 84 | model.add(LSTM(gru_node, return_sequences=True, recurrent_dropout=0.2)) |
| 85 | model.add(LSTM(gru_node, recurrent_dropout=0.2)) |
| 86 | model.add(Dense(1024,activation='relu')) |
| 87 | model.add(Dense(nclasses)) |
| 88 | model.add(Activation('softmax')) |
| 89 | |
| 90 | model.compile(loss='sparse_categorical_crossentropy', |
| 91 | optimizer='adam', |
| 92 | metrics=['accuracy']) |
| 93 | |
| 94 | return model |
| 95 | |
| 96 | newsgroups_train = fetch_20newsgroups(subset='train') |
| 97 | newsgroups_test = fetch_20newsgroups(subset='test') |