## Final Version


#install 
# pip install plotly==5.4.0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import plotly.express as px
import plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.arima_model import ARIMA

#pip install progressbar
from progressbar import *

from sklearn.preprocessing import LabelEncoder

from math import sqrt

import plotly.graph_objects as go
import plotly.express as px
import plotly as py

pd.set_option('display.max_rows',50)
pd.set_option('display.max_columns',150)
import matplotlib.pyplot as plt

#pip install datetime
from datetime import datetime as dt


#graph design
#pip install dash
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.graph_objects as go


from pandas.io import gbq
df_totalsale = gbq.read_gbq('SELECT date,extract(year from date) as year,ROUND(SUM(sale_dollars),2) as sale_dollars FROM bigquery-public-data.iowa_liquor_sales.sales where extract(year from date)>2016 group by 1,2', project_id='iowa-liquor-retail-sale', reauth='true')


import calendar
df_totalsale['year'] = df_totalsale['date'].apply(lambda x: x.strftime('%Y')).astype(int)
df_totalsale['year_month'] = df_totalsale['date'].apply(lambda x: x.strftime('%Y-%m'))
df_totalsale['month'] = df_totalsale['date'].apply(lambda x: x.strftime('%m')).astype(int)
df_totalsale['month_'] = df_totalsale['month'].apply(lambda x: calendar.month_name[x])


import plotly.express as px
dfg = df_totalsale[['date','sale_dollars']]
dfg = dfg.groupby(pd.Grouper(key='date',freq='M')).sum().reset_index()
dfg.rename(columns = {"sale_dollars": "sum"}, inplace=True)

fig = px.line(dfg, x="date", y="sum"
              ,hover_data={"date": "|%B %d, %Y"}
              ,markers=True
              ,color_discrete_sequence=px.colors.diverging.PRGn
             ,template = "plotly_white"
             )
fig.update_layout(
    title="Total Sale Over Time(2016-2020)"
    ,xaxis_title="date"
    ,yaxis_title="sale_dollars"

)

fig.update_xaxes(
    dtick="M1",
    tickformat="%b\n%Y",
    ticklabelmode="period")


fig.show()


#import seaborn as sns
import calendar

# given some dataframe, perform groupby and reset the index
dfg = df_totalsale.groupby(['year', 'month']).agg({'sale_dollars': sum}).reset_index()
dfg['month'] = dfg['month'].apply(lambda x: calendar.month_name[x])


import plotly.express as px
dfg = df_totalsale[['year','month','month_','sale_dollars']]
dfg = dfg.groupby(['year','month','month_']).sum().reset_index()
dfg.rename(columns = {"sale_dollars": "sum"}, inplace=True)

fig = px.line(dfg, x="month_", y="sum"
              ,color='year'
              ,markers=True
              ,color_discrete_sequence=px.colors.diverging.PRGn
             ,template = "plotly_white"

             )
fig.update_layout(
    title="Total Sale Over Time(2016-2020)"
    ,xaxis_title="month"
    ,yaxis_title="sale_dollars"

)

fig.update_xaxes(
    dtick="M1",
    ticklabelmode="period")

fig.show()


#manipulation data for waterfall chart


from pandas.io import gbq
MoM_Data = df_totalsale.loc[df_totalsale['year'] > 2020]
MoM_Data = MoM_Data[['year_month','sale_dollars']]
MoM_Data = MoM_Data.groupby(['year_month']).sum().reset_index()
MoM_Data.rename(columns = {"sale_dollars": "sum"}, inplace=True)
MoM_Data = MoM_Data.sort_values(by=['year_month'], ascending=True)
MoM_Data = MoM_Data.head(13)


MoM_Data['Last_Month'] = np.roll(MoM_Data['sum'],1)
# MoM_Data['MoMGrowth'] = (MoM_Data['sum']/MoM_Data['Last_Month'])-1

MoM_Data['MoMGrowth_amount'] = np.select(
    [
        MoM_Data['year_month'] == '2021-01'
        ,MoM_Data['year_month'] == '2022-01'
    ], 
    [
        MoM_Data['sum']
        ,0
    ], 
    default= MoM_Data['sum'] - MoM_Data['Last_Month']
).round(-4)

MoM_Data['MoMGrowth_text'] = np.select(
    [
        MoM_Data['year_month'] == '2021-01'
        ,MoM_Data['year_month'] == '2022-01'
    ], 
    [
        MoM_Data['sum']
        ,MoM_Data['sum']
    ], 
    default= MoM_Data['sum'] - MoM_Data['Last_Month']
).round(-4)


import plotly.graph_objects as go

text  = list(MoM_Data['MoMGrowth_text'].values.flatten())
text = [element /1000000 for element in text]

fig = go.Figure(go.Waterfall(
    name = "20", orientation = "v",
    measure = ["relative", "relative", "relative","relative","relative", "relative", "relative", "relative","relative", "relative", "relative", "relative","total"],
    x = list(MoM_Data['year_month'].values.flatten()),
    textposition = "outside",
    text = text,
    y = list(MoM_Data['MoMGrowth_amount'].values.flatten()),
    connector = {"line":{"color":"rgb(63, 63, 63)"}},
))

fig.update_layout(
        title = "Monthly Sales Change(2021-2022)",
        showlegend = False,
#       uniformtext_minsize=9, uniformtext_mode='hide'
#       width=850, height=600,
)

fig.show()


from pandas.io import gbq
df = gbq.read_gbq('SELECT date,extract(year from date) as year,category,ROUND(SUM(sale_dollars),2) as sale_dollars FROM bigquery-public-data.iowa_liquor_sales.sales  where extract(year from date) > 2019 and extract(year from date) < 2022 group by 1,2,3', project_id='iowa-liquor-retail-sale', reauth='true')


df['year'] = df['date'].apply(lambda x: x.strftime('%Y')).astype(int)


#Aggregate the sale by year
Data = df[['date','year','sale_dollars']]

Data['month'] = Data['date'].apply(lambda x: x.strftime('%m')).astype(int)
# Data['month'] = Data['month'].apply(lambda x: calendar.month_name[x])

Data = Data.groupby(['year','month']).sum().reset_index()
Data.rename(columns = {"sale_dollars": "sum"}, inplace=True)
Data = Data.sort_values(by=['month','year'], ascending=True)


DataPivot = Data.pivot(*Data).reset_index()
Data['last_sum'] = np.roll(Data['sum'],1)
Data['last_month'] = np.roll(Data['month'],1)


#calculate the grow by month 
Data.loc[Data['month'] == Data['last_month'], 'text%'] =  (((Data['sum']/Data['last_sum'])-1)*100)
Data.loc[Data['month'] == Data['last_month'], 'grow'] =  Data['sum'] - Data['last_sum']
Data = Data.loc[Data['year'] == 2021]
Data


dff = df.groupby(['year'])['sale_dollars'].sum().reset_index()
ly = dff.iat[0,1]
cy = dff.iat[-1,1]


y = list(Data['grow'].values.flatten())
y.insert(0,ly)
y.insert(len(y),cy)


import plotly.graph_objects as go

text  = list(Data['text%'].values.flatten())
text = [round(element,2) for element in text]
text = [str(int)+'%' for int in text]
text.insert(0, "")
text.insert(len(text),"" )
text

fig = go.Figure(go.Waterfall(
    name = "20", orientation = "v",
    measure = ["relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"total"],
    x = [['Total'
            ,'Month'
            ,'Month'
            ,'Month'
            ,'Month'
            ,'Month'
            ,'Month'
            ,'Month'
            ,'Month'
            ,'Month'
            ,'Month'
            ,'Month'
            ,'Month'
          , 'Total '],
            ['2020'
             ,'Jan'
             ,'Feb'
             ,'Mar'
             ,'Apr'
             ,'May'
             ,'Jun'
             ,'Jul'
             ,'Aug'
             ,'Sep'
             ,'Oct'
             ,'Nov'
             ,'Dec'
             ,'2021']],
    textposition = "outside",
    text = text,
    y = y,
    connector = {"line":{"color":"rgb(63, 63, 63)"}},
))

fig.update_layout(
        title = "YoY Liquor Sales Growth from 2020 to 2021",
        showlegend = False,
      width=950, height=700
	  )

fig.show()


#YoY Sale Change by category


#Aggregate the sale of liquor category group
Cat_Data = df[['year','category_group','sale_dollars']]
Cat_Data = Cat_Data.groupby(['year','category_group']).sum().reset_index()
Cat_Data.rename(columns = {"sale_dollars": "sum"}, inplace=True)
Cat_Data = Cat_Data.sort_values(by=['category_group','year'], ascending=True)


Cat_Data.head()


Cat_DataPivot = Cat_Data.pivot(*Cat_Data).reset_index()
Cat_DataPivot


Cat_Data['last_sum'] = np.roll(Cat_Data['sum'],1)
Cat_Data['last_category'] = np.roll(Cat_Data['category_group'],1)


Cat_Data.head()


#calculate the grow by category 
Cat_Data.loc[Cat_Data['category_group'] == Cat_Data['last_category'], 'text%'] =  (((Cat_Data['sum']/Cat_Data['last_sum'])-1)*100)
Cat_Data.loc[Cat_Data['category_group'] == Cat_Data['last_category'], 'grow'] =  Cat_Data['sum'] - Cat_Data['last_sum']


Cat_Data = Cat_Data.loc[Cat_Data['year'] == 2021]
Cat_Data


dff = df.groupby(['year'])['sale_dollars'].sum().reset_index()
ly = dff.iat[0,1]
cy = dff.iat[-1,1]


y = list(Cat_Data['grow'].values.flatten())
y.insert(0,ly)
y.insert(len(y),cy)


import plotly.graph_objects as go

text  = list(Cat_Data['text%'].values.flatten())
text = [round(element,2) for element in text]
text = [str(int)+'%' for int in text]
text.insert(0, "")
text.insert(len(text),"" )
text

fig = go.Figure(go.Waterfall(
    name = "20", orientation = "v",
    measure = ["relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"relative"
               ,"total"],
    x = [['Total'
          ,'Distilled Drinks'
          ,'Distilled Drinks'
          ,'Distilled Drinks'
          ,'Distilled Drinks'
          ,'Distilled Drinks'
          ,'Distilled Drinks'
          ,'Distilled Drinks'
          ,'Distilled Drinks'
          ,'Distilled Drinks'
          ,'Undistilled Drinks'
          ,'Undistilled Drinks'
          , 'Total '],
            ['2020'
             ,'whiskies'
             ,'mezcal/tequila'
             ,'vodka'
             ,'gins'
             ,'brandies'
             ,'rum'
             ,'cocktails/rtd'
             ,'liqueur/schnapps'
             ,'spirits'
             ,'temporary'
             ,'specialOrderItems'
             ,'2021']],
    textposition = "outside",
    text = text,
    y = y,
    connector = {"line":{"color":"rgb(63, 63, 63)"}},
))

fig.update_layout(
        title = "Liquor Sales Growth from 2020 to 2021 - YoY",
        showlegend = False,
#       uniformtext_minsize=9, uniformtext_mode='hide'
      width=950, height=700
    ,yaxis_range=[ly-10000000,cy+4000000]
)

fig.show()

Column Name	Description	Data Type
Invoice/Item Number	Concatenated invoice and line number associated with the liquor order.	Plain Text
Date	Date of Order	Date/Time
Store Number	Unique number assigned to the store who ordered the liquor.	Plain Text
Store Name	Name of store who ordered the liquor.	Plain Text
City	City where the store who ordered the liquor is located	Plain Text
Zip Code	Zip Code where the store who ordered the liquor is located	Plain Text
Store Location	Location of store who ordered the liquor.	Plain Text
County Number	Iowa county number for the county where store who ordered the liquor is located	Plain Text
County	County where the store who ordered the liquor is located	Plain Text
Category	Category code associated with the liquor ordered	Plain Text
Category Name	Category of the liquor ordered	Plain Text
Vendor Number	The vendor number of the company for the brand of liquor ordered	Plain Text
Vendor Name	The vendor name of the company for the brand of liquor ordered	Plain Text
Item Number	Item number for the individual liquor product ordered.	Plain Text
Item Description	Description of the individual liquor product ordered.	Plain Text
Pack	The number of bottles in a case for the liquor ordered	Number
Bottle Volume (ml)	Volume of each liquor bottle ordered in milliliters.	Number
State Bottle Cost	The amount that Alcoholic Beverages Division paid for each bottle of liquor ordered	Number
State Bottle Retail	The amount the store paid for each bottle of liquor ordered	Number
Bottles Sold	The amount the store paid for each bottle of liquor ordered	Number
Sale (Dollars)	Total cost of liquor order (number of bottles multiplied by the state bottle retail)	Number
Volume Sold (Liters)	Total volume of liquor ordered in liters. (i.e. (Bottle Volume (ml) x Bottles Sold)/1,000)	Number
Volume Sold (Gallons	Total volume of liquor ordered in gallons. (i.e. (Bottle Volume (ml) x Bottles Sold)/3785.411784)	Number

	date	year	category	sale_dollars
0	2020-05-12	2020	None	949.91
1	2020-07-10	2020	1090000.0	5401.44
2	2020-01-02	2020	1011800.0	9878.40
3	2020-03-13	2020	None	5488.02
4	2020-03-10	2020	None	579.82
5	2020-01-08	2020	None	693.00
6	2020-01-15	2020	None	1198.29
7	2020-01-13	2020	None	1157.13
8	2020-01-07	2020	None	849.75
9	2020-03-17	2020	None	1735.02
10	2020-01-15	2020	1011800.0	2499.00
11	2020-01-06	2020	None	752.04
12	2020-04-15	2020	None	14401.02

	year	month	sum	last_sum	last_month	text%	grow
12	2021	1	28665659.86	24794498.26	1	15.612986	3871161.60
13	2021	2	29617805.95	25720303.90	2	15.153406	3897502.05
14	2021	3	36654828.86	31210733.11	3	17.443024	5444095.75
15	2021	4	34734921.83	31987264.68	4	8.589847	2747657.15
16	2021	5	33659554.94	32081114.18	5	4.920156	1578440.76
17	2021	6	38890929.25	36007843.00	6	8.006829	2883086.25
18	2021	7	35176467.62	34272654.20	7	2.637127	903813.42
19	2021	8	36501798.74	33722408.12	8	8.241970	2779390.62
20	2021	9	35837061.02	36780839.35	9	-2.565951	-943778.33
21	2021	10	38085762.36	37465199.10	10	1.656373	620563.26
22	2021	11	38007977.91	31823796.14	11	19.432571	6184181.77
23	2021	12	42290767.06	40796477.62	12	3.662790	1494289.44

	year	category_group	sum
0	2020	101	1.279009e+08
11	2021	101	1.372200e+08
1	2020	102	2.700948e+07
12	2021	102	3.501522e+07
2	2020	103	8.898958e+07

category_group	year	101	102	103	104	105	106	107	108	109	170	190
0	2020	1.279009e+08	27009475.86	88989576.10	8839500.41	22117633.01	39348069.31	8970354.95	57777522.11	4473234.15	8903823.43	2149764.51
1	2021	1.372200e+08	35015218.98	92319665.76	9738124.79	21211310.82	41092021.45	8692335.02	65200272.90	5937210.76	10173754.73	1523594.60

Iowa Liquor Sales¶

Introduction¶

Goal¶

Column Descriptions (source)¶

Import libraries¶

Data Preparation (Download the data from the google BigQuery)¶

Q1. What is the overall sales trend?¶

Total Sales Over Time¶

Muliple line chart (Total Sale Over Time)¶

Q2. What is the Monthly growth?¶

Q3. What is the YoY growth?¶

Q4. What is the YoY growth by liquor category?¶

	year	category_group	sum	last_sum	last_category
0	2020	101	1.279009e+08	1.523595e+06	190
11	2021	101	1.372200e+08	1.279009e+08	101
1	2020	102	2.700948e+07	1.372200e+08	101
12	2021	102	3.501522e+07	2.700948e+07	102
2	2020	103	8.898958e+07	3.501522e+07	102