Unsurprisingly, math fits better in my head as code. Here are some statistics functions I’ve written to better understand.
from statistics import NormalDist
from math import sqrt
from scipy.stats import t
def get_zscore_for_datapoint(x,mean,stddev):
return (x-mean)/stddev
def prob_between_zscore(z1,z2):
return NormalDist().cdf(z1) - NormalDist().cdf(z2)
def get_value_from_percentile(percentile, mean, stddev):
return NormalDist().inv_cdf(percentile) * stddev + mean
def get_critical_t_value(df, confidence):
# ppf = percent point function (inverse of cdf)
# On the calculator, this is "Inverse t"
return t.ppf(half_confidence_tail(confidence), df)
def half_confidence_tail(confidence):
# TODO handle .95 in addition to 95
alpha_level = (100-confidence)/2
return .01 * (100-alpha_level)
def get_confidence_interval(mean, stddev, n, confidence):
x = mean
m = margin_of_error(stddev, n, confidence)
return (x-m, x+m)
def t_critical_value(alpha,n):
"Note: You need to split alpha in half if it's a double tail test"
# This is the same as t-inverse.
return t.ppf(1-alpha,n-1)
def test_statistic(u, u0, s, n):
return (u-u0)/(s/sqrt(n))
## UNSURE about what's below.
def get_sample_size_from_confidence(confidence, mean, stddev, margin_of_error):
"Probably should round up as well"
z = NormalDist().inv_cdf(half_confidence_tail(confidence))
return (z*stddev/ margin_of_error)**2
def margin_of_error(stddev, n, confidence):
if n < 30:
# t-statistic
df = n-1
crit_t = get_critical_t_value(df, confidence)
margin_of_error = crit_t * stddev / sqrt(n)
else:
# z-statistic
z = NormalDist().inv_cdf(half_confidence_tail(confidence))
margin_of_error = z*stddev/sqrt(n)
return margin_of_error
def in_critical_region(critical_value, n, alpha):
# TODO: I'm not certain < is always correct?
if n < 30:
return t.cdf(critical_value, n-1) < alpha
else:
return NormalDist().cdf(critical_value) < alpha
a,b = get_confidence_interval(25.1, 12.2, 49, 99)
assert (round(a,1), round(b,1)) == (20.4, 29.8)
def calc_pvalue():
"Nah. Use the t-test or z-test on your calculator"
def stddev(l):
mean = sum(l)/len(l)
return sqrt(sum([(x-mean)**2 for x in l])/(len(l)-1))