| 286 | |
| 287 | |
| 288 | def test_pickle_size(): |
| 289 | # We will serialize many of these objects which drives both memory usage and |
| 290 | # serialization runtime performance. |
| 291 | # Reducing pickle size is beneficial but the numbers below are determined |
| 292 | # empirically |
| 293 | # Analyzing the output with pickletools.dis is useful to debug memoization |
| 294 | # and serialization by value |
| 295 | |
| 296 | a = Alias("a", "b") |
| 297 | # We cannot shrink it to nothing |
| 298 | assert len(pickle.dumps(a)) < 55 |
| 299 | b = Alias("b", "c") |
| 300 | # But most of it should be overhead that is memoized |
| 301 | assert len(pickle.dumps((a, b))) <= 70 |
| 302 | |
| 303 | # Pickle should be able to memoize this. On py3.10 that's 2 additional bytes |
| 304 | assert len(pickle.dumps((a, b, b))) <= len(pickle.dumps((a, b))) + 10 |
| 305 | |
| 306 | t1 = Task("key-1", func, "a", "b") |
| 307 | assert len(pickle.dumps(t1)) < 120 |
| 308 | |
| 309 | t2 = Task("key-2", func, TaskRef("key-1"), "c") |
| 310 | assert len(pickle.dumps(t2)) < 140 |
| 311 | |
| 312 | assert len(pickle.dumps((t1, t2))) < 170 |
| 313 | |
| 314 | l = List(t1, t2) |
| 315 | assert len(pickle.dumps(l)) <= 272 |
| 316 | |
| 317 | sizes = [] |
| 318 | growth = [] |
| 319 | inner = List(t1, t2) |
| 320 | for depth in range(20): |
| 321 | inner = List(inner, t1) |
| 322 | size = len(pickle.dumps(inner)) |
| 323 | if len(sizes) > 0: |
| 324 | growth.append(size - sizes[-1][1]) |
| 325 | sizes.append((depth, size)) |
| 326 | growth = set(growth) |
| 327 | # If this breaks, something cannot be memoized. That's very concerning |
| 328 | assert len(growth) == 1 |
| 329 | # If this goes up, that's not great but not a disaster |
| 330 | assert growth.pop() <= 32 |
| 331 | |
| 332 | |
| 333 | def test_tokenize(): |