(p, lower)
| 21 | |
| 22 | |
| 23 | def load_json(p, lower): |
| 24 | source = [] |
| 25 | tgt = [] |
| 26 | flag = False |
| 27 | for sent in json.load(open(p, 'r', encoding='UTF-8'))['sentences']: |
| 28 | tokens = [t['word'] for t in sent['tokens']] |
| 29 | if lower: |
| 30 | tokens = [t.lower() for t in tokens] |
| 31 | if tokens[0] == '@highlight': |
| 32 | flag = True |
| 33 | continue |
| 34 | if flag: |
| 35 | tgt.append(tokens) |
| 36 | flag = False |
| 37 | else: |
| 38 | source.append(tokens) |
| 39 | |
| 40 | source = [clean(' '.join(sent)).split() for sent in source] |
| 41 | tgt = [clean(' '.join(sent)).split() for sent in tgt] |
| 42 | return source, tgt |
| 43 | |
| 44 | |
| 45 | def cal_rouge(evaluated_ngrams, reference_ngrams): |
no test coverage detected