(self)
| 59 | self.assertEqual(specials, specials_expected) |
| 60 | |
| 61 | def test_transform_pipe(self): |
| 62 | # 1. Init first transform in the pipe |
| 63 | prefix_cls = get_transforms_cls(["prefix"])["prefix"] |
| 64 | corpora = yaml.safe_load( |
| 65 | """ |
| 66 | trainset: |
| 67 | path_src: data/src-train.txt |
| 68 | path_tgt: data/tgt-train.txt |
| 69 | transforms: [prefix, filtertoolong] |
| 70 | weight: 1 |
| 71 | src_prefix: "⦅_pf_src⦆" |
| 72 | tgt_prefix: "⦅_pf_tgt⦆" |
| 73 | """ |
| 74 | ) |
| 75 | opt = Namespace(data=corpora, seed=-1) |
| 76 | prefix_transform = prefix_cls(opt) |
| 77 | prefix_transform.warm_up() |
| 78 | # 2. Init second transform in the pipe |
| 79 | filter_cls = get_transforms_cls(["filtertoolong"])["filtertoolong"] |
| 80 | opt = Namespace(src_seq_length=4, tgt_seq_length=4) |
| 81 | filter_transform = filter_cls(opt) |
| 82 | # 3. Sequential combine them into a transform pipe |
| 83 | transform_pipe = TransformPipe.build_from([prefix_transform, filter_transform]) |
| 84 | ex = { |
| 85 | "src": ["Hello", ",", "world", "."], |
| 86 | "tgt": ["Bonjour", "le", "monde", "."], |
| 87 | } |
| 88 | # 4. apply transform pipe for example |
| 89 | ex_after = transform_pipe.apply(copy.deepcopy(ex), corpus_name="trainset") |
| 90 | # 5. example after the pipe exceed the length limit, thus filtered |
| 91 | self.assertIsNone(ex_after) |
| 92 | # 6. Transform statistics registed (here for filtertoolong) |
| 93 | self.assertTrue(len(transform_pipe.statistics.observables) > 0) |
| 94 | msg = transform_pipe.statistics.report() |
| 95 | self.assertIsNotNone(msg) |
| 96 | # 7. after report, statistics become empty as a fresh start |
| 97 | self.assertTrue(len(transform_pipe.statistics.observables) == 0) |
| 98 | |
| 99 | |
| 100 | class TestMiscTransform(unittest.TestCase): |
nothing calls this directly
no test coverage detected