CustomerVisualizations.R 4.53 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
#=======================================================================================
#
# File:        CustomerVisualizations.R
# Author:      Dave Langer
# Description: This code illustrates R visualizaions used in the "Introduction to R 
#              Visualization with Power BI " Meetup dated 03/15/2017. More details on 
#              the Meetup are available at:
#
#                 https://www.meetup.com/Data-Science-Dojo-Toronto/events/237952698/
#
#              The code in this file leverages data from Microsoft's Wide World
#              Importers sample Data Warehouse available at:
#
#                 https://github.com/Microsoft/sql-server-samples/releases/tag/wide-world-importers-v1.0
#
# NOTE - This file is provided "As-Is" and no warranty regardings its contents are
#        offered nor implied. USE AT YOUR OWN RISK!
#
#=======================================================================================


# Uncomment and run these lines of code to install required packages
#install.packages("dplyr")
#install.packages("lubridate")
#install.packages("ggplot2")
#install.packages("scales")
#install.packages("qcc")


# NOTE - Change your working directory as needed
load("CustomerData.RData")


# Preprocessing to make dataset look like Power BI
library(dplyr)
library(lubridate)
dataset <- dataset %>% 
  mutate(Year = year(dataset$OrderDate),
         Month = month(dataset$OrderDate, label = TRUE))


#=============================================================================
#
# Visualization #1 - Aggregaed dynamic bar charts by Customer Category
#
#=============================================================================

library(dplyr)
library(ggplot2)
library(scales)


# Get total revenue by Buying Group, Supplier Category and Customer Catetory
customer.categories <- dataset %>%
  group_by(BuyingGroupName, SupplierCategoryName, CustomerCategoryName) %>%
  summarize(TotalRevenue = sum(LineTotal))

# Aggregate data across all supplier categories
all.suppliers <- dataset %>%
  group_by(BuyingGroupName, CustomerCategoryName) %>%
  summarize(TotalRevenue = sum(LineTotal))
all.suppliers$SupplierCategoryName <- "All Suppliers"

# Add aggregated data
customer.categories <- rbind(customer.categories,
                             all.suppliers)


# Format visualization title string dynamically
title.str.1 <- paste("Total Revenue for",
                     dataset$Year[1],
                     "by Buying Group and Supplier/Customer Categories for",
                     nrow(dataset),
                     "Rows of Data",
                     sep = " ")


# Plot 
ggplot(customer.categories, aes(x = CustomerCategoryName, y = TotalRevenue, fill = BuyingGroupName)) +
  theme_bw() +
  coord_flip() +
  facet_grid(BuyingGroupName ~ SupplierCategoryName) +
  geom_bar(stat = "identity") +
  scale_y_continuous(labels = comma) +
  theme(text = element_text(size = 18),
        axis.text.x = element_text(size = 12, angle=90, hjust=1)) +
  labs(x = "Customer Category",
       y = "Total Revenue",
       fill = "Buying Group",
       title = title.str.1)







#=============================================================================
#
# Visualization #2 - Aggregated Process Behavior Charts
#
#=============================================================================


# Add artificial filtering for example
dataset <- dataset %>%
  filter(is.na(BuyingGroupName) & 
         (Year == 2013 | Year == 2014))


# Power BI code starts here
library(dplyr)
library(qcc)

# Grab year variables
Year1 <- min(dataset$Year)
Year2 <- max(dataset$Year)

# Accumulate totals
totals <- dataset %>%
  filter(Year == Year1| Year == Year2 ) %>%
  mutate(Month = substr(Month, 1, 3),
         MonthNum =  match(Month, month.abb)) %>%
  group_by(Year, MonthNum, Month) %>%
  summarize(TotalRevenue = sum(LineTotal)) %>%
  mutate(Label = paste(Month, Year, sep = "-")) %>%
  arrange(Year, MonthNum)

# Make labels pretty with dummy vars
Revenue.Group.1 <- totals$TotalRevenue[1:12]
Revenue.Group.2 <- totals$TotalRevenue[13:24]

title.str <- paste("Process Behavior Chart - ", Year1, " and ", Year2, " ",
                   dataset$CustomerCategoryName[1], " Total Revenue for Buying Group '",
                   dataset$BuyingGroupName[1], "'", sep = "")

# Plot
blank.super.qcc <- qcc(Revenue.Group.1, type = "xbar.one",
                       newdata = Revenue.Group.2,
                       labels = totals$Label[1:12], 
                       newlabels = totals$Label[13:24],
                       title = title.str,
                       ylab = "Total Revenue", xlab = "Month-Year")