MCPcopy Index your code
hub / github.com/dask/dask / RearrangeByColumn

Class RearrangeByColumn

dask/dataframe/dask_expr/_shuffle.py:239–293  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

237
238
239class RearrangeByColumn(ShuffleBase):
240 def _lower(self):
241 frame = self.frame
242 partitioning_index = self.partitioning_index
243 npartitions_out = self.npartitions_out
244 ignore_index = self.ignore_index
245 options = self.options
246 index_shuffle = self.index_shuffle
247
248 # Normalize partitioning_index
249
250 if isinstance(partitioning_index, str):
251 partitioning_index = [partitioning_index]
252 if index_shuffle:
253 pass
254 elif not isinstance(partitioning_index, (list, Expr)):
255 raise ValueError(
256 f"{type(partitioning_index)} not a supported type for partitioning_index"
257 )
258
259 if not isinstance(partitioning_index, Expr) and not index_shuffle:
260 cs = [col for col in partitioning_index if col not in frame.columns]
261 if len(cs) == 1:
262 frame = Assign(frame, "_partitions_0", frame.index)
263 partitioning_index = partitioning_index.copy()
264 partitioning_index[partitioning_index.index(cs[0])] = "_partitions_0"
265
266 # Assign new "_partitions" column
267 index_added = AssignPartitioningIndex(
268 frame,
269 partitioning_index,
270 "_partitions",
271 npartitions_out,
272 frame._meta,
273 index_shuffle,
274 )
275
276 # Apply shuffle
277 shuffled = Shuffle(
278 index_added,
279 "_partitions",
280 npartitions_out,
281 ignore_index,
282 self.method,
283 options,
284 original_partitioning_index=self._partitioning_index,
285 )
286 if frame.ndim == 1:
287 # Reduce back to series
288 return shuffled[index_added.columns[0]]
289
290 # Drop "_partitions" column and return
291 return shuffled[
292 [c for c in shuffled.columns if c not in ["_partitions", "_partitions_0"]]
293 ]
294
295
296class SimpleShuffle(PartitionsFiltered, Shuffle):

Callers 5

_lowerMethod · 0.90
_lowerMethod · 0.90
_lowerMethod · 0.90
shuffleMethod · 0.90
_lowerMethod · 0.90

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…