(dft, cat_bools, conti, verbose, chart_format, problem_type, dep=None, classes=None, mk_dir=None)
| 699 | |
| 700 | |
| 701 | def draw_distplot(dft, cat_bools, conti, verbose, chart_format, problem_type, dep=None, classes=None, mk_dir=None): |
| 702 | cats = find_remove_duplicates(cat_bools) ### first make sure there are no duplicates in this ### |
| 703 | copy_cats = copy.deepcopy(cats) |
| 704 | conti = copy.deepcopy(conti) |
| 705 | plot_name = 'Dist_Plots' |
| 706 | #### Since we are making changes to dft and classes, we will be making copies of it here |
| 707 | conti = list(set(conti)) |
| 708 | dft = dft[:] |
| 709 | classes = copy.deepcopy(classes) |
| 710 | colors = cycle('brycgkbyrcmgkbyrcmgkbyrcmgkbyr') |
| 711 | imgdata_list = list() |
| 712 | width_size = 15 #### this is to control the width of chart as well as number of categories to display |
| 713 | height_size = 5 |
| 714 | gap = 0.4 #### This controls the space between rows ###### |
| 715 | |
| 716 | if dep is None or dep == '' or problem_type == 'Regression': |
| 717 | image_count = 0 |
| 718 | ######### This is for cases where there is No Target or Dependent Variable ######## |
| 719 | if problem_type == 'Regression': |
| 720 | if isinstance(dep, list): |
| 721 | conti += dep |
| 722 | else: |
| 723 | conti += [dep] |
| 724 | ### Be very careful with the next line. we have used the plural "subplots" ## |
| 725 | ## In this case, you have ax as an array and you have to use (row,col) to get each ax! |
| 726 | ########## This is where you insert the logic for distplots ############## |
| 727 | # sns.color_palette("Set1") |
| 728 | sns.set_palette("Set1") |
| 729 | ##### First draw all the numeric variables in row after row ############# |
| 730 | if len(conti) > 0: |
| 731 | cols = 3 |
| 732 | rows = len(conti) |
| 733 | fig, axes = plt.subplots(rows, cols, figsize=(width_size, rows * height_size)) |
| 734 | fig.subplots_adjust(hspace=gap) ### This controls the space between rows |
| 735 | k = 1 |
| 736 | binsize = 30 |
| 737 | for each_conti in conti: |
| 738 | color1 = next(colors) |
| 739 | ax1 = plt.subplot(rows, cols, k) |
| 740 | dft[each_conti].hist( |
| 741 | bins=binsize, |
| 742 | # sns.histplot(dft[each_conti], |
| 743 | # kde=False, |
| 744 | # kde=True, stat="density", linewidth=0, |
| 745 | ax=ax1, color=color1) |
| 746 | k += 1 |
| 747 | ax2 = plt.subplot(rows, cols, k) |
| 748 | sns.boxplot(dft[each_conti], ax=ax2, color=color1) |
| 749 | k += 1 |
| 750 | ax3 = plt.subplot(rows, cols, k) |
| 751 | probplot(dft[each_conti], plot=ax3) |
| 752 | k += 1 |
| 753 | skew_val = round(dft[each_conti].skew(), 1) |
| 754 | ax2.set_yticklabels([]) |
| 755 | ax2.set_yticks([]) |
| 756 | ax1.set_title(each_conti + " | Distplot", fontsize=9) |
| 757 | ax2.set_title(each_conti + " | Boxplot", fontsize=9) |
| 758 | ax3.set_title(each_conti + " | Probability Plot - Skew: " + str(skew_val), fontsize=9) |
no test coverage detected