Method drop_duplicates

dvc/compare.py:226–274 · view source on GitHub ↗

(  # noqa: C901
        self,
        axis: str = "rows",
        subset: Optional[Iterable[str]] = None,
        ignore_empty: bool = True,
    )

Source from the content-addressed store, hash-verified

224	self.drop(*to_drop)
225
226	def drop_duplicates( # noqa: C901
227	self,
228	axis: str = "rows",
229	subset: Optional[Iterable[str]] = None,
230	ignore_empty: bool = True,
231	):
232	if axis not in ["rows", "cols"]:
233	raise ValueError(
234	f"Invalid 'axis' value {axis}.Choose one of ['rows', 'cols']"
235	)
236
237	if axis == "cols":
238	cols_to_drop: list[str] = []
239	for n_col, col in enumerate(self.columns):
240	if subset and self.keys()[n_col] not in subset:
241	continue
242	# Cast to str because Text is not hashable error
243	unique_vals = {str(x) for x in col}
244	if ignore_empty and self._fill_value in unique_vals:
245	unique_vals -= {self._fill_value}
246	if len(unique_vals) == 1:
247	cols_to_drop.append(self.keys()[n_col])
248	self.drop(*cols_to_drop)
249
250	elif axis == "rows":
251	unique_rows = []
252	rows_to_drop: list[int] = []
253	for n_row, row in enumerate(self):
254	if subset:
255	row = [
256	col
257	for n_col, col in enumerate(row)
258	if self.keys()[n_col] in subset
259	]
260
261	tuple_row = tuple(row)
262	if tuple_row in unique_rows:
263	rows_to_drop.append(n_row)
264	else:
265	unique_rows.append(tuple_row)
266
267	for name in self.keys():
268	self._columns[name] = Column(
269	[
270	x
271	for n, x in enumerate(self._columns[name])
272	if n not in rows_to_drop
273	]
274	)
275
276
277	def _normalize_float(val: float, precision: int):

test_drop_duplicatesFunction · 0.95

test_drop_duplicates_rich_textFunction · 0.95

test_drop_duplicates_subsetFunction · 0.95

test_drop_duplicates_invalid_axisFunction · 0.95

show_experimentsFunction · 0.80

keysMethod · 0.95

dropMethod · 0.95

ColumnClass · 0.85

appendMethod · 0.80

test_drop_duplicatesFunction · 0.76

test_drop_duplicates_rich_textFunction · 0.76

test_drop_duplicates_subsetFunction · 0.76

test_drop_duplicates_invalid_axisFunction · 0.76