(path, arg)
| 20 | |
| 21 | |
| 22 | def load_predict_gen_vector(path, arg): |
| 23 | content_path = path + '.origin' |
| 24 | abs_path = path + '.candidate' |
| 25 | print('Loading origin text') |
| 26 | content_data = load_txt_data(content_path, origin=True)[:-1] |
| 27 | print('Loading abstract text') |
| 28 | abs_data = load_txt_data(abs_path, origin=True)[:-1] |
| 29 | |
| 30 | print('Loading finished') |
| 31 | |
| 32 | res = {} |
| 33 | if check_dir(arg.tmp): |
| 34 | res = load_variable(arg.tmp) |
| 35 | else: |
| 36 | for i in tqdm(range(len(content_data)), desc="gen sentence vector"): |
| 37 | content_raw = content_data[i].split('\t') |
| 38 | doc_id = re.sub("\"", '', content_raw[0]) |
| 39 | sentence = content_raw[1].replace(' ', '') |
| 40 | abs_raw = abs_data[i] |
| 41 | sent_abs = ','.join(abs_raw.replace(' ', '').split('<q>')) |
| 42 | if 'CANNOTPREDICT' in sentence: |
| 43 | sent_abs = 'CAN NOT PREDICT' |
| 44 | if not sent_abs: |
| 45 | sent_abs = sentence |
| 46 | |
| 47 | res[doc_id] = [sent_abs] |
| 48 | |
| 49 | # abs_vector = gen_bert_vector(sent_abs)[0] |
| 50 | # res[doc_id] = [sent_abs, abs_vector] |
| 51 | abs_vector = gen_sentence_vector_use_third_party_func(sent_abs) |
| 52 | res[doc_id] = [sent_abs, abs_vector] |
| 53 | # print(sent_abs, abs_vector, abs_vector.size()) |
| 54 | save_variable(res, arg.tmp) |
| 55 | return res |
| 56 | |
| 57 | |
| 58 | def _pad(data, pad_id, width=-1): |
no test coverage detected