๐ Data Manipulation
๐ Load and preview data
# Import pandas and read in a CSV file
import pandas as pd
df = pd.read_csv("data.csv")
# View the first 5 rows
df.head()
โ๏ธ Rename columns
# Rename a column from 'oldName' to 'new_name'
df.rename(columns={'oldName': 'new_name'}, inplace=True)
๐ Filter data
# Filter rows where the value column is greater than 10
filtered = df[df['value'] > 10]
๐ Group and aggregate
# Group by category and compute the mean of 'value'
grouped = df.groupby('category')['value'].mean()
๐ Merge datasets
# Merge df1 and df2 on the 'id' column using inner join
merged = pd.merge(df1, df2, on='id', how='inner')
๐ Convert to datetime
# Convert a string column to datetime format
df['date'] = pd.to_datetime(df['date'])
๐งผ Fill missing values
# Replace missing values in a column with 'Unknown'
df['column'].fillna('Unknown', inplace=True)
๐ Statistical Reference
๐ T-test: compare two independent groups
from scipy.stats import ttest_ind
# Compare means between group_a and group_b
ttest_ind(group_a, group_b)
๐งฎ Chi-square test: categorical relationships
from scipy.stats import chi2_contingency
import pandas as pd
# Test relationship between two categorical variables
chi2_contingency(pd.crosstab(df['group'], df['outcome']))
๐ Correlation matrix
# Show correlations between all numeric columns
df.corr()
๐ Linear regression with statsmodels
import statsmodels.api as sm
# Define X (independent vars) and y (target var)
X = df[['var1', 'var2']]
X = sm.add_constant(X)
y = df['target']
# Build and fit the model
model = sm.OLS(y, X).fit()
# Print full summary
print(model.summary())
๐ Confidence interval for a mean
import numpy as np
import scipy.stats as st
# Compute 95% confidence interval for a sample mean
ci = st.t.interval(0.95, len(data)-1, loc=np.mean(data), scale=st.sem(data))
๐ Marketing Calculations
๐ก Conversion Rate
# Conversion rate = conversions / visitors
conversions = 125
visitors = 5000
conversion_rate = conversions / visitors
๐ฐ Cost Per Acquisition (CPA)
# CPA = total spend / total acquisitions
spend = 15000
acquisitions = 300
cpa = spend / acquisitions
๐ Customer Lifetime Value (CLV)
# CLV = avg purchase ร purchase frequency ร avg lifespan
avg_purchase = 75
purchase_freq = 6
avg_customer_lifespan = 3
clv = avg_purchase * purchase_freq * avg_customer_lifespan
๐ฌ Email Open Rate
# Open rate = opened emails / delivered emails
opened = 400
delivered = 1000
open_rate = opened / delivered
๐ Marketing Lift
# Lift = (treatment conversion - control conversion) / control conversion
control = 0.08
treatment = 0.11
lift = (treatment - control) / control
๐ฌ Want to Contribute?
Have a go-to snippet or a favorite stat trick? Drop me a line.
