MCPcopy
hub / github.com/wb14123/seq2seq-couplet / _init_reader

Method _init_reader

reader.py:115–141  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

113
114
115 def _init_reader(self):
116 self.data = []
117 input_f = open(self.input_file, 'rb')
118 target_f = open(self.target_file, 'rb')
119 for input_line in input_f:
120 input_line = input_line.decode('utf-8')[:-1]
121 target_line = target_f.readline().decode('utf-8')[:-1]
122 input_words = [x for x in input_line.split(' ') if x != '']
123 if len(input_words) >= self.max_len:
124 input_words = input_words[:self.max_len-1]
125 input_words.append(self.end_token)
126 target_words = [x for x in target_line.split(' ') if x != '']
127 if len(target_words) >= self.max_len:
128 target_words = target_words[:self.max_len-1]
129 target_words = ['<s>',] + target_words
130 target_words.append(self.end_token)
131 in_seq = encode_text(input_words, self.vocab_indices)
132 target_seq = encode_text(target_words, self.vocab_indices)
133 self.data.append({
134 'in_seq': in_seq,
135 'in_seq_len': len(in_seq),
136 'target_seq': target_seq,
137 'target_seq_len': len(target_seq) - 1
138 })
139 input_f.close()
140 target_f.close()
141 self.data_pos = len(self.data)

Callers 1

__init__Method · 0.95

Calls 1

encode_textFunction · 0.85

Tested by

no test coverage detected