Source code for src.dfencoder.dataframe

import pandas as pd
import numpy as np

[docs]class EncoderDataFrame(pd.DataFrame): def __init__(self, *args, **kwargs): super(EncoderDataFrame, self).__init__(*args, **kwargs)
[docs] def swap(self, likelihood=.15): """ Performs random swapping of data. Each value has a likelihood of *argument likelihood* of being randomly replaced with a value from a different row. Returns a copy of the dataframe with equal size. """ #select values to swap tot_rows = self.__len__() n_rows = int(round(tot_rows*likelihood)) n_cols = len(self.columns) def gen_indices(): column = np.repeat(np.arange(n_cols).reshape(1, -1), repeats=n_rows, axis=0) row = np.random.randint(0, tot_rows, size=(n_rows, n_cols)) return row, column row, column = gen_indices() new_mat = self.values to_place = new_mat[row, column] row, column = gen_indices() new_mat[row, column] = to_place dtypes = {col:typ for col, typ in zip(self.columns, self.dtypes)} result = EncoderDataFrame(columns=self.columns, data=new_mat) result = result.astype(dtypes, copy=False) return result