Function test_pickle_size

dask/tests/test_task_spec.py:288–330 · view source on GitHub ↗

()

Source from the content-addressed store, hash-verified

286
287
288	def test_pickle_size():
289	# We will serialize many of these objects which drives both memory usage and
290	# serialization runtime performance.
291	# Reducing pickle size is beneficial but the numbers below are determined
292	# empirically
293	# Analyzing the output with pickletools.dis is useful to debug memoization
294	# and serialization by value
295
296	a = Alias("a", "b")
297	# We cannot shrink it to nothing
298	assert len(pickle.dumps(a)) < 55
299	b = Alias("b", "c")
300	# But most of it should be overhead that is memoized
301	assert len(pickle.dumps((a, b))) <= 70
302
303	# Pickle should be able to memoize this. On py3.10 that's 2 additional bytes
304	assert len(pickle.dumps((a, b, b))) <= len(pickle.dumps((a, b))) + 10
305
306	t1 = Task("key-1", func, "a", "b")
307	assert len(pickle.dumps(t1)) < 120
308
309	t2 = Task("key-2", func, TaskRef("key-1"), "c")
310	assert len(pickle.dumps(t2)) < 140
311
312	assert len(pickle.dumps((t1, t2))) < 170
313
314	l = List(t1, t2)
315	assert len(pickle.dumps(l)) <= 272
316
317	sizes = []
318	growth = []
319	inner = List(t1, t2)
320	for depth in range(20):
321	inner = List(inner, t1)
322	size = len(pickle.dumps(inner))
323	if len(sizes) > 0:
324	growth.append(size - sizes[-1][1])
325	sizes.append((depth, size))
326	growth = set(growth)
327	# If this breaks, something cannot be memoized. That's very concerning
328	assert len(growth) == 1
329	# If this goes up, that's not great but not a disaster
330	assert growth.pop() <= 32
331
332
333	def test_tokenize():

nothing calls this directly

AliasClass · 0.90

TaskClass · 0.90

TaskRefClass · 0.90

ListClass · 0.90

setClass · 0.85

popMethod · 0.80

no test coverage detected

searching dependent graphs…