Portfolio

Vendor Data Analysis
Portfolio project
Effective inventory and sales management are critical for optimizing profitability in the retails and wholesale industry. Companies need to ensure that they are not incurring losses due to inefficient pricing, poor inventory turnover or vendor dependency. The goal of this analysis is to:
CLIENT
NA
DESIGNER
NA
CLIENT
NA
WEBSITE
NA
Questions to be answered.
- Identify underperforming brands that require promotion or pricing adjustment
- Determine top vendors contributing to ales and gross profit
- Analyze the impact of bulk purchasing on unit costs
- Access inventory turnover to reduce holding costs and improve efficiency.
- Investigate the profitability variance between high-performing and low-performing vendors.
Findings
- PurchasePrice has weak correlation with TotalSalesDolars (-0.01) and GrossProfit (0.016), suggesting that price variations do not significantly impact sales revenue or profit.
- Strong correlation between total purchase quantity and total sales quantity (0.99) confirming efficient inventory turnover.
- Negative correlation between profit margin and total sales price (-0.179) suggests taht as sales price increases, margins descrease, possibly due to competitive pricing pressures.
- StockTurnover has weak negative correlations with both GrossProfit (0.038) and ProfitMargin (-0.055) indicating that faster turnover doesn’t necessarily result in higher profitability.
- vendor buying in bulk (large order size) get the lowest unit price ($10.78 per unit) meaning higher margins if they can manage inventory efficiently.
- The price difference between small and large orders is substantial (-72% reduction in unit cost)
- This suggests that bulk pricing strategies successfully encourse vendors to purchase in largervolumes, leading to higher overall sales despite lower per-unit revenue.
Analysis Files
import sqlite3
import pandas as pd
import logging
from loading_data import ingest_db
logging.basicConfig(
filename = 'log/get_vendor_summary.log',
level = logging.DEBUG,
format = "%(asctime)s - %(levelname)s - %(message)s",
filemode ='a'
)
def create_vendor_summary(conn):
"""This functino will merge the different tables to get the overall vendor summary and adding new columns in the resultant data"""
vendor_sales_summary = pd.read_sql_query("""with FreightSummary as(
select VendorNumber,
SUM(Freight) as FreightCost
From Vendor_invoice
Group by VendorNumber
),
PurchaseSummary as(
Select
p.VendorNumber,
p.VendorName,
p.Brand,
p.Description,
P.PurchasePrice,
pp.Price as ActualPrice,
pp.Volume,
SUM(p.Quantity) as TotalPurchaseQuantity,
SUM(p.Dollars) as TotalPurchaseDollars
FROM purchases p
JOIN purchase_prices pp
ON p.Brand = pp.Brand
WHERE p.PurchasePrice > 0
Group by p.VendorNumber, p.VendorName, p.Brand, p.Description, p.PurchasePrice, pp.Price, pp.Volume
),
SalesSummary as(
Select
VendorNo,
Brand,
SUM(SalesQuantity) AS TotalSalesQuantity,
SUM(SalesDollars) AS TotalSalesDollars,
SUM(SalesPrice) AS TotalSalesPrice,
SUM(ExciseTax) as TotalExciseTax
From sales
Group By VendorNo, Brand
)
select
ps.VendorNumber,
ps.VendorName,
ps.Brand,
ps.Description,
ps.PurchasePrice,
ps.ActualPrice,
ps.Volume,
ps.TotalPurchaseQuantity,
ps.TotalPurchaseDollars,
ss.TotalSalesQuantity,
ss.TotalSalesDollars,
ss.TotalSalesPrice,
ss.TotalExciseTax,
fs.FreightCost
FROM PurchaseSummary ps
LEFT JOIN SalesSummary ss
ON ps.VendorNumber = ss.VendorNo
and ps.Brand = ss.Brand
LEFT JOIN FreightSummary fs
ON ps.VendorNumber = fs.VendorNumber
order by ps.TotalPurchaseDollars DESC
""", conn)
return vendor_sales_summary
def clean_data(df):
"""This functino will clean the data"""
# changing the datatype to float
df['Volume'] = df['Volume'].astype('float64')
# filling missing value with 0
df.fillna(0, inplace = True)
# removing spaces from the categorical columns
df['VendorName'] = df['VendorName'].str.strip()
df['Description'] = df['Description'].str.strip()
# Creating columns for better analysis
df['GrossProfit'] = df['TotalSalesDollars']-df['TotalPurchaseDollars']
df['ProfitMargin'] = (df['GrossProfit']/df['TotalSalesDollars'])*100
df['StockTurnover'] = df['TotalSalesQuantity']/df['TotalPurchaseQuantity']
df['SalestoPurchaseRation'] = df['TotalSalesDollars']/df['TotalPurchaseDollars']
return df
if __name__ == '__main__':
conn = sqlite3.connect('inventory.db')
logging.info('Creating vendor summary table....')
summary_df = create_vendor_summary(conn)
logging.info(summary_df.head())
logging.info('cleaning data....')
clean_df = clean_data(summary_df)
logging.info(clean_df.head())
logging.info('ingesting data....')
ingest_db(clean_df, 'vendor_sales_summary', conn)
logging.info('completed')
import pandas as pd
import os
from sqlalchemy import create_engine
import logging
import time
logging.basicConfig(
filename='logs/.log',
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(message)s',
filemode='a'
)
engine = create_engine("sqlite:///inventory.db")
def ingest_db(df, table_name, engine):
'''This function will ingest data into database'''
df.to_sql(table_name, engine, if_exists='replace', index=False)
def load_raw_data():
'''This function will ingest data into database'''
start = time.time()
for file in os.listdir("data"):
if ".csv" in file:
df = pd.read_csv("data/"+file)
logging.info("Ingesting {file}...".format(file=file))
ingest_db(df, file[:-4], engine)
end = time.time()
total_time = (end-start)/60
logging.info("Finished ingesting data")
logging.info("Total time: {:.2f} minutes".format(total_time))
if __name__ == "__main__":
load_raw_data()