############################################################################## ##### AUTOVIZ_MAIN PERFORMS AUTO VISUALIZATION OF ANY DATA USING MATPLOTLIB ## ##############################################################################
(self, filename: str or pd.DataFrame, sep=',', dep_var='', header=0, verbose=0,
lowess=False, chart_format='svg', max_rows_analyzed=150000,
max_cols_analyzed=30, save_plot_dir=None)
| 242 | return dft |
| 243 | |
| 244 | def AutoViz_Main(self, filename: str or pd.DataFrame, sep=',', dep_var='', header=0, verbose=0, |
| 245 | lowess=False, chart_format='svg', max_rows_analyzed=150000, |
| 246 | max_cols_analyzed=30, save_plot_dir=None): |
| 247 | """ |
| 248 | ############################################################################## |
| 249 | ##### AUTOVIZ_MAIN PERFORMS AUTO VISUALIZATION OF ANY DATA USING MATPLOTLIB ## |
| 250 | ############################################################################## |
| 251 | """ |
| 252 | ######### create a directory to save all plots generated by autoviz ############ |
| 253 | ############ THis is where you save the figures in a target directory ####### |
| 254 | target_dir = 'AutoViz' |
| 255 | |
| 256 | if dep_var is not None: |
| 257 | if isinstance(dep_var, list): |
| 258 | target_dir = dep_var[0] |
| 259 | elif isinstance(dep_var, str): |
| 260 | if dep_var != '': |
| 261 | target_dir = copy.deepcopy(dep_var) |
| 262 | if save_plot_dir is None: |
| 263 | mk_dir = os.path.join(".", "AutoViz_Plots") |
| 264 | else: |
| 265 | mk_dir = copy.deepcopy(save_plot_dir) |
| 266 | if verbose == 2 and not os.path.isdir(mk_dir): |
| 267 | os.mkdir(mk_dir) |
| 268 | mk_dir = os.path.join(mk_dir, target_dir) |
| 269 | if verbose == 2 and not os.path.isdir(mk_dir): |
| 270 | os.mkdir(mk_dir) |
| 271 | ############ Start the clock here and classify variables in data set first ######## |
| 272 | start_time = time.time() |
| 273 | |
| 274 | (dft, dep_var, id_cols, bool_vars, cats, continuous_vars, discrete_string_vars, date_vars, classes, |
| 275 | problem_type, selected_cols) = classify_print_vars(filename, sep, max_rows_analyzed, max_cols_analyzed, |
| 276 | dep_var, header, verbose) |
| 277 | |
| 278 | ########### This is where perform data quality checks on data ################ |
| 279 | if verbose >= 1: |
| 280 | print('To fix these data quality issues in the dataset, import FixDQ from autoviz...') |
| 281 | #### Run the Data Cleaning suggestions report now ############ |
| 282 | |
| 283 | if dep_var is not None: |
| 284 | if isinstance(dep_var, list): |
| 285 | remaining_vars = left_subtract(list(dft), dep_var) |
| 286 | if len(remaining_vars) == len(list(dft)): |
| 287 | print('depVar %s not found in given dataset. Please check your input and try again' % dep_var) |
| 288 | return dft |
| 289 | ### run the data cleaning report with a multi-label list of targets ## |
| 290 | data_cleaning_suggestions(dft, target=dep_var) |
| 291 | else: |
| 292 | ### run the data cleaning report with a single-label target ## |
| 293 | data_cleaning_suggestions(dft, target=dep_var) |
| 294 | else: |
| 295 | ### run data cleaning report with no target #### |
| 296 | data_cleaning_suggestions(dft, target='') |
| 297 | |
| 298 | ##### This is where we start plotting different kinds of charts depending on dependent variables |
| 299 | if dep_var is None or dep_var == '': |
| 300 | ##### This is when No dependent Variable is given ####### |
| 301 | if len(continuous_vars) > 1: |
no test coverage detected