(
lhs,
*args,
result_meta,
**kwargs,
)
| 75 | |
| 76 | |
| 77 | def merge_chunk( |
| 78 | lhs, |
| 79 | *args, |
| 80 | result_meta, |
| 81 | **kwargs, |
| 82 | ): |
| 83 | rhs, *args = args |
| 84 | left_index = kwargs.get("left_index", False) |
| 85 | right_index = kwargs.get("right_index", False) |
| 86 | empty_index_dtype = result_meta.index.dtype |
| 87 | categorical_columns = result_meta.select_dtypes(include="category").columns |
| 88 | |
| 89 | if categorical_columns is not None: |
| 90 | for col in categorical_columns: |
| 91 | left = None |
| 92 | right = None |
| 93 | |
| 94 | if col in lhs: |
| 95 | left = lhs[col] |
| 96 | elif col == kwargs.get("right_on") and left_index: |
| 97 | if isinstance(lhs.index.dtype, pd.CategoricalDtype): |
| 98 | left = lhs.index |
| 99 | |
| 100 | if col in rhs: |
| 101 | right = rhs[col] |
| 102 | elif col == kwargs.get("left_on") and right_index: |
| 103 | if isinstance(rhs.index.dtype, pd.CategoricalDtype): |
| 104 | right = rhs.index |
| 105 | |
| 106 | dtype = "category" |
| 107 | if left is not None and right is not None: |
| 108 | dtype = methods.union_categoricals( |
| 109 | [left.astype("category"), right.astype("category")] |
| 110 | ).dtype |
| 111 | |
| 112 | if left is not None: |
| 113 | if isinstance(left, pd.Index): |
| 114 | lhs.index = left.astype(dtype) |
| 115 | else: |
| 116 | lhs = lhs.assign(**{col: left.astype(dtype)}) |
| 117 | if right is not None: |
| 118 | if isinstance(right, pd.Index): |
| 119 | rhs.index = right.astype(dtype) |
| 120 | else: |
| 121 | rhs = rhs.assign(**{col: right.astype(dtype)}) |
| 122 | |
| 123 | if len(args) and args[0] == "leftsemi" or kwargs.get("how") == "leftsemi": |
| 124 | if isinstance(rhs, (pd.DataFrame, pd.Series)): |
| 125 | # otherwise it's cudf |
| 126 | rhs = rhs.drop_duplicates() |
| 127 | if len(args): |
| 128 | args[0] = "inner" |
| 129 | else: |
| 130 | kwargs["how"] = "inner" |
| 131 | out = lhs.merge(rhs, *args, **kwargs) |
| 132 | |
| 133 | # Workaround for pandas bug where if the left frame of a merge operation is |
| 134 | # empty, the resulting dataframe can have columns in the wrong order. |
no test coverage detected
searching dependent graphs…