Method _init_reader

reader.py:115–141 · view source on GitHub ↗

(self)

Source from the content-addressed store, hash-verified

113
114
115	def _init_reader(self):
116	self.data = []
117	input_f = open(self.input_file, 'rb')
118	target_f = open(self.target_file, 'rb')
119	for input_line in input_f:
120	input_line = input_line.decode('utf-8')[:-1]
121	target_line = target_f.readline().decode('utf-8')[:-1]
122	input_words = [x for x in input_line.split(' ') if x != '']
123	if len(input_words) >= self.max_len:
124	input_words = input_words[:self.max_len-1]
125	input_words.append(self.end_token)
126	target_words = [x for x in target_line.split(' ') if x != '']
127	if len(target_words) >= self.max_len:
128	target_words = target_words[:self.max_len-1]
129	target_words = ['<s>',] + target_words
130	target_words.append(self.end_token)
131	in_seq = encode_text(input_words, self.vocab_indices)
132	target_seq = encode_text(target_words, self.vocab_indices)
133	self.data.append({
134	'in_seq': in_seq,
135	'in_seq_len': len(in_seq),
136	'target_seq': target_seq,
137	'target_seq_len': len(target_seq) - 1
138	})
139	input_f.close()
140	target_f.close()
141	self.data_pos = len(self.data)

__init__Method · 0.95

encode_textFunction · 0.85

no test coverage detected