MCPcopy
hub / github.com/dask/dask / merge_chunk

Function merge_chunk

dask/dataframe/multi.py:77–144  ·  view source on GitHub ↗
(
    lhs,
    *args,
    result_meta,
    **kwargs,
)

Source from the content-addressed store, hash-verified

75
76
77def merge_chunk(
78 lhs,
79 *args,
80 result_meta,
81 **kwargs,
82):
83 rhs, *args = args
84 left_index = kwargs.get("left_index", False)
85 right_index = kwargs.get("right_index", False)
86 empty_index_dtype = result_meta.index.dtype
87 categorical_columns = result_meta.select_dtypes(include="category").columns
88
89 if categorical_columns is not None:
90 for col in categorical_columns:
91 left = None
92 right = None
93
94 if col in lhs:
95 left = lhs[col]
96 elif col == kwargs.get("right_on") and left_index:
97 if isinstance(lhs.index.dtype, pd.CategoricalDtype):
98 left = lhs.index
99
100 if col in rhs:
101 right = rhs[col]
102 elif col == kwargs.get("left_on") and right_index:
103 if isinstance(rhs.index.dtype, pd.CategoricalDtype):
104 right = rhs.index
105
106 dtype = "category"
107 if left is not None and right is not None:
108 dtype = methods.union_categoricals(
109 [left.astype("category"), right.astype("category")]
110 ).dtype
111
112 if left is not None:
113 if isinstance(left, pd.Index):
114 lhs.index = left.astype(dtype)
115 else:
116 lhs = lhs.assign(**{col: left.astype(dtype)})
117 if right is not None:
118 if isinstance(right, pd.Index):
119 rhs.index = right.astype(dtype)
120 else:
121 rhs = rhs.assign(**{col: right.astype(dtype)})
122
123 if len(args) and args[0] == "leftsemi" or kwargs.get("how") == "leftsemi":
124 if isinstance(rhs, (pd.DataFrame, pd.Series)):
125 # otherwise it's cudf
126 rhs = rhs.drop_duplicates()
127 if len(args):
128 args[0] = "inner"
129 else:
130 kwargs["how"] = "inner"
131 out = lhs.merge(rhs, *args, **kwargs)
132
133 # Workaround for pandas bug where if the left frame of a merge operation is
134 # empty, the resulting dataframe can have columns in the wrong order.

Callers 1

_merge_chunk_wrapperFunction · 0.85

Calls 6

select_dtypesMethod · 0.80
assignMethod · 0.80
getMethod · 0.45
astypeMethod · 0.45
drop_duplicatesMethod · 0.45
mergeMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…