MCPcopy Index your code
hub / github.com/Turing-Project/WriteGPT / load_predict_gen_vector

Function load_predict_gen_vector

LanguageNetwork/BERT/sentence_encoder.py:22–55  ·  view source on GitHub ↗
(path, arg)

Source from the content-addressed store, hash-verified

20
21
22def load_predict_gen_vector(path, arg):
23 content_path = path + '.origin'
24 abs_path = path + '.candidate'
25 print('Loading origin text')
26 content_data = load_txt_data(content_path, origin=True)[:-1]
27 print('Loading abstract text')
28 abs_data = load_txt_data(abs_path, origin=True)[:-1]
29
30 print('Loading finished')
31
32 res = {}
33 if check_dir(arg.tmp):
34 res = load_variable(arg.tmp)
35 else:
36 for i in tqdm(range(len(content_data)), desc="gen sentence vector"):
37 content_raw = content_data[i].split('\t')
38 doc_id = re.sub("\"", '', content_raw[0])
39 sentence = content_raw[1].replace(' ', '')
40 abs_raw = abs_data[i]
41 sent_abs = ','.join(abs_raw.replace(' ', '').split('<q>'))
42 if 'CANNOTPREDICT' in sentence:
43 sent_abs = 'CAN NOT PREDICT'
44 if not sent_abs:
45 sent_abs = sentence
46
47 res[doc_id] = [sent_abs]
48
49 # abs_vector = gen_bert_vector(sent_abs)[0]
50 # res[doc_id] = [sent_abs, abs_vector]
51 abs_vector = gen_sentence_vector_use_third_party_func(sent_abs)
52 res[doc_id] = [sent_abs, abs_vector]
53 # print(sent_abs, abs_vector, abs_vector.size())
54 save_variable(res, arg.tmp)
55 return res
56
57
58def _pad(data, pad_id, width=-1):

Callers 1

Calls 5

load_txt_dataFunction · 0.90
check_dirFunction · 0.90
load_variableFunction · 0.90
save_variableFunction · 0.90

Tested by

no test coverage detected