class: center, middle, inverse, title-slide .title[ # Variable types and visualization types ] .subtitle[ ## STAT 4380 ] .author[ ### Dr. Katie Fitzgerald ] --- layout: true <div class="my-footer"> <span> <a href="https://nova-stat-4380.netlify.app" target="_blank">nova-stat-4380.netlify.app</a> </span> </div> --- --- class: middle # ggplot ❤️ 🏐 --- ## Data: Volleyball NCAA women's volleyball season-level statistics for 2022-2023 season. .pull-left-narrow[ <img src="img/vball.jpeg" width="80%" style="display: block; margin: auto;" /> ] .pull-right-wide[ ``` r library(tidyverse) volleyball <- read_csv("./data/volleyball_ncaa_div1_2022_23_clean.csv") glimpse(volleyball) ``` ``` ## Rows: 334 ## Columns: 15 ## $ team <chr> "Lafayette", "Delaware St.", "Yale… ## $ conference <chr> "Patriot", "MEAC", "Ivy League", "… ## $ region <chr> "East", "Southeast", "East", "Sout… ## $ aces_per_set <dbl> 2.33, 2.20, 2.15, 2.15, 2.03, 1.98… ## $ assists_per_set <dbl> 11.01, 11.45, 12.60, 10.56, 11.61,… ## $ team_attacks_per_set <dbl> 34.54, 29.98, 35.39, 32.52, 34.10,… ## $ blocks_per_set <dbl> 1.31, 2.17, 1.82, 1.81, 1.83, 2.39… ## $ digs_per_set <dbl> 13.60, 12.58, 15.29, 14.22, 14.27,… ## $ hitting_pctg <dbl> 0.180, 0.250, 0.242, 0.194, 0.201,… ## $ kills_per_set <dbl> 11.93, 12.12, 13.90, 11.54, 12.40,… ## $ opp_hitting_pctg <dbl> 0.227, 0.137, 0.155, 0.170, 0.188,… ## $ w <dbl> 8, 24, 23, 23, 18, 17, 19, 16, 18,… ## $ l <dbl> 15, 7, 3, 11, 13, 13, 13, 13, 13, … ## $ win_pctg <dbl> 0.348, 0.774, 0.885, 0.676, 0.581,… ## $ winning_season <chr> "no", "yes", "yes", "yes", "yes", … ``` ] --- ## Number of variables involved - Univariate data analysis - distribution of single variable - Bivariate data analysis - relationship between two variables - Multivariate data analysis - relationship between many variables at once, usually focusing on the relationship between two while conditioning for others --- ## Types of variables - **Numerical variables** can be classified as **continuous** or **discrete** based on whether or not the variable can take on an infinite number of values or only non-negative whole numbers, respectively. - If the variable is **categorical**, we can determine if it is **ordinal** based on whether or not the levels have a natural ordering.  --- class: middle # Visualizing 1 numeric variable --- ## Describing shapes of numerical distributions - shape: - skewness: right-skewed, left-skewed, symmetric (skew is to the side of the longer tail) - modality: unimodal, bimodal, multimodal, uniform - center: mean (`mean`), median (`median`), mode (not always useful) - spread: range (`range`), standard deviation (`sd`), inter-quartile range (`IQR`) - unusual observations --- # 1 numeric variable: histogram .panelset[ .panel[.panel-name[Plot] <img src="04-viz-types_files/figure-html/unnamed-chunk-4-1.png" width="50%" style="display: block; margin: auto;" /> ] .panel[.panel-name[Code] ``` r ggplot(volleyball, aes(x = kills_per_set)) + geom_histogram(color = "white") ``` ] ] --- ## Histograms and binwidth .panelset[ .panel[.panel-name[binwidth = 0.1] ``` r ggplot(volleyball, aes(x = kills_per_set)) + geom_histogram(color = "white", binwidth = 0.1) ``` <img src="04-viz-types_files/figure-html/unnamed-chunk-5-1.png" width="50%" style="display: block; margin: auto;" /> ] .panel[.panel-name[binwidth = 0.5] ``` r ggplot(volleyball, aes(x = kills_per_set)) + geom_histogram(color = "white", binwidth = 0.5) ``` <img src="04-viz-types_files/figure-html/unnamed-chunk-6-1.png" width="50%" style="display: block; margin: auto;" /> ] .panel[.panel-name[binwidth = 1] ``` r ggplot(volleyball, aes(x = kills_per_set)) + geom_histogram(color = "white", binwidth = 1) ``` <img src="04-viz-types_files/figure-html/unnamed-chunk-7-1.png" width="50%" style="display: block; margin: auto;" /> ] ] --- ## 1 numeric variable: Density plot ``` r ggplot(volleyball, aes(x = kills_per_set)) + geom_density() ``` <img src="04-viz-types_files/figure-html/unnamed-chunk-8-1.png" width="60%" style="display: block; margin: auto;" /> --- ## Density plots and adjusting bandwidth .panelset[ .panel[.panel-name[adjust = 0.5] ``` r ggplot(volleyball, aes(x = kills_per_set)) + geom_density(adjust = 0.5) ``` <img src="04-viz-types_files/figure-html/unnamed-chunk-9-1.png" width="50%" style="display: block; margin: auto;" /> ] .panel[.panel-name[adjust = 1] ``` r ggplot(volleyball, aes(x = kills_per_set)) + geom_density(adjust = 1) # default bandwidth ``` <img src="04-viz-types_files/figure-html/unnamed-chunk-10-1.png" width="50%" style="display: block; margin: auto;" /> ] .panel[.panel-name[adjust = 2] ``` r ggplot(volleyball, aes(x = kills_per_set)) + geom_density(adjust = 2) ``` <img src="04-viz-types_files/figure-html/unnamed-chunk-11-1.png" width="50%" style="display: block; margin: auto;" /> ] ] --- ## 1 numeric variable: Box plot ``` r ggplot(volleyball, aes(x = kills_per_set)) + geom_boxplot() ``` <img src="04-viz-types_files/figure-html/unnamed-chunk-12-1.png" width="60%" style="display: block; margin: auto;" /> --- ## Customizing box plots .panelset[ .panel[.panel-name[Plot] <img src="04-viz-types_files/figure-html/unnamed-chunk-13-1.png" width="60%" style="display: block; margin: auto;" /> ] .panel[.panel-name[Code] ``` r ggplot(volleyball, aes(x = kills_per_set)) + geom_boxplot() + labs( x = "Kills per set", y = NULL, title = "Kills per set is left-skewed with a median around 12.5" ) + * theme( * axis.ticks.y = element_blank(), * axis.text.y = element_blank() * ) ``` ] ] --- class: middle # Visualizing 1 numeric + 1 categorical variable --- # Faceted histograms .panelset[ .panel[.panel-name[Plot] <img src="04-viz-types_files/figure-html/unnamed-chunk-14-1.png" width="50%" style="display: block; margin: auto;" /> ] .panel[.panel-name[Code] ``` r ggplot(volleyball, aes(x = kills_per_set)) + geom_histogram(color = "white", binwidth = 1) + facet_wrap(~ region) ``` ] ] --- ## Overlapping densities .panelset[ .panel[.panel-name[Plot] <img src="04-viz-types_files/figure-html/unnamed-chunk-15-1.png" width="50%" style="display: block; margin: auto;" /> ] .panel[.panel-name[Code] ``` r ggplot(volleyball, aes(x = kills_per_set, fill = region, color = region)) + geom_density(alpha = 0.3) ``` ] ] --- ## Ridge plots .panelset[ .panel[.panel-name[Plot] <img src="04-viz-types_files/figure-html/unnamed-chunk-16-1.png" width="50%" style="display: block; margin: auto;" /> ] .panel[.panel-name[Code] ``` r ggplot(volleyball, aes(x = kills_per_set, y = region)) + geom_density_ridges() ``` ] ] --- ## Side-by-side Box plots ``` r ggplot(volleyball, aes(x = kills_per_set, y = region)) + geom_boxplot() ``` <img src="04-viz-types_files/figure-html/unnamed-chunk-17-1.png" width="60%" style="display: block; margin: auto;" /> --- ## Side-by-side violin plots ``` r ggplot(volleyball, aes(x = kills_per_set, y = region)) + geom_violin() ``` <img src="04-viz-types_files/figure-html/unnamed-chunk-18-1.png" width="60%" style="display: block; margin: auto;" /> --- class: middle # Visualizing 1 categorical variable --- # 1 categorical variable: bar plot .panelset[ .panel[.panel-name[color] ``` r ggplot(volleyball, aes(y = conference)) + geom_bar(color = "navyblue") ``` <img src="04-viz-types_files/figure-html/unnamed-chunk-19-1.png" width="50%" style="display: block; margin: auto;" /> ] .panel[.panel-name[fill] ``` r ggplot(volleyball, aes(y = conference)) + geom_bar(fill = "navyblue") ``` <img src="04-viz-types_files/figure-html/unnamed-chunk-20-1.png" width="50%" style="display: block; margin: auto;" /> ] .panel[.panel-name[both] ``` r ggplot(volleyball, aes(y = conference)) + geom_bar(fill = "navyblue", color = "pink") ``` <img src="04-viz-types_files/figure-html/unnamed-chunk-21-1.png" width="50%" style="display: block; margin: auto;" /> ] ] --- class: middle # 2 categorical variables --- # 2 categorical variables: stacked bar plot .panelset[ .panel[.panel-name[Plot] <img src="04-viz-types_files/figure-html/unnamed-chunk-22-1.png" width="70%" style="display: block; margin: auto;" /> ] .panel[.panel-name[Code] ``` r ggplot(volleyball, aes(y = conference, fill = winning_season)) + geom_bar() ``` ] ] --- # 2 categorical: standardized bar plot .panelset[ .panel[.panel-name[Plot] <img src="04-viz-types_files/figure-html/unnamed-chunk-23-1.png" width="70%" style="display: block; margin: auto;" /> ] .panel[.panel-name[Code] ``` r ggplot(volleyball, aes(y = conference, fill = winning_season)) + geom_bar(position = "fill") + labs(x = "proportion") ``` ] ] --- class: middle # 2 numeric variables --- # 2 numeric variables: scatterplot .panelset[ .panel[.panel-name[Plot] <img src="04-viz-types_files/figure-html/unnamed-chunk-24-1.png" width="70%" style="display: block; margin: auto;" /> ] .panel[.panel-name[Code] ``` r ggplot(volleyball, aes(x = digs_per_set, y = kills_per_set)) + geom_point() ``` ] ] --- # 2 numeric variables: hexplot .panelset[ .panel[.panel-name[Plot] <img src="04-viz-types_files/figure-html/unnamed-chunk-25-1.png" width="70%" style="display: block; margin: auto;" /> ] .panel[.panel-name[Code] ``` r ggplot(volleyball, aes(x = digs_per_set, y = kills_per_set)) + geom_hex() ``` ] ] --- # More than 2: scatterplot w/ color .panelset[ .panel[.panel-name[Plot] <img src="04-viz-types_files/figure-html/unnamed-chunk-26-1.png" width="70%" style="display: block; margin: auto;" /> ] .panel[.panel-name[Code] ``` r ggplot(volleyball, aes(x = hitting_pctg, y = opp_hitting_pctg, color = win_pctg)) + geom_point() ``` ``` ## Warning: Removed 2 rows containing missing values or values outside the ## scale range (`geom_point()`). ``` ] ] --- # More than 2: faceted scatterplot w/ color .panelset[ .panel[.panel-name[Plot] <img src="04-viz-types_files/figure-html/unnamed-chunk-27-1.png" width="70%" style="display: block; margin: auto;" /> ] .panel[.panel-name[Code] ``` r ggplot(volleyball, aes(x = hitting_pctg, y = opp_hitting_pctg, color = win_pctg)) + geom_point() + facet_wrap(~region) ``` ``` ## Warning: Removed 2 rows containing missing values or values outside the ## scale range (`geom_point()`). ``` ] ] --- # And many more! - [Directory of visualizations](https://clauswilke.com/dataviz/directory-of-visualizations.html) - [The R Graph Gallery](https://r-graph-gallery.com)