(source_sents,target_sents)
| 23 | |
| 24 | |
| 25 | def create_data(source_sents,target_sents): |
| 26 | de2idx,idx2de = load_de_vocab() |
| 27 | en2idx,idx2en = load_en_vocab() |
| 28 | |
| 29 | x_list ,y_list,Sources,Targets = [],[],[],[] |
| 30 | for source_sent,target_sent in zip(source_sents,target_sents): |
| 31 | x = [de2idx.get(word,1) for word in (source_sent+u" </S>").split()] # 1: OOV, </S>: End of Text |
| 32 | y = [en2idx.get(word,1) for word in (target_sent+u" </S>").split()] |
| 33 | |
| 34 | if max(len(x),len(y)) <= hp.maxlen: |
| 35 | x_list.append(np.array(x)) |
| 36 | y_list.append(np.array(y)) |
| 37 | Sources.append(source_sent) |
| 38 | Targets.append(target_sent) |
| 39 | |
| 40 | #Pad |
| 41 | X = np.zeros([len(x_list),hp.maxlen],np.int32) |
| 42 | Y = np.zeros([len(y_list),hp.maxlen],np.int32) |
| 43 | |
| 44 | for i,(x,y) in enumerate(zip(x_list,y_list)): |
| 45 | X[i] = np.lib.pad(x,[0,hp.maxlen-len(x)],'constant',constant_values=(0,0)) |
| 46 | Y[i] = np.lib.pad(y,[0,hp.maxlen-len(y)],'constant',constant_values=(0,0)) |
| 47 | return X,Y,Sources,Targets |
| 48 | |
| 49 | |
| 50 |
no test coverage detected