Welch's t-test
Integration using Simpson's Rule
Perhaps "inspired by C example" may be more accurate. Gamma subroutine from Gamma function task.
sub Γ(\z) {
constant g = 9;
z < .5 ?? π / sin(π × z) / Γ(1 - z) !!
τ.sqrt × (z + g - 1/2)**(z - 1/2) ×
exp(-(z + g - 1/2)) ×
[+] <
1.000000000000000174663
5716.400188274341379136
-14815.30426768413909044
14291.49277657478554025
-6348.160217641458813289
1301.608286058321874105
-108.1767053514369634679
2.605696505611755827729
-0.7423452510201416151527e-2
0.5384136432509564062961e-7
-0.4023533141268236372067e-8
> Z× 1, |map 1/(z + *), 0..*
}
sub p-value (@A, @B) {
return 1 if @A <= 1 or @B <= 1;
my $a-mean = @A.sum / @A;
my $b-mean = @B.sum / @B;
my $a-variance = @A.map( { ($a-mean - $_)² } ).sum / (@A - 1);
my $b-variance = @B.map( { ($b-mean - $_)² } ).sum / (@B - 1);
return 1 unless $a-variance && $b-variance;
my \Welchs-𝒕-statistic = ($a-mean - $b-mean)/($a-variance/@A + $b-variance/@B).sqrt;
my $DoF = ($a-variance / @A + $b-variance / @B)² /
(($a-variance² / (@A³ - @A²)) + ($b-variance² / (@B³ - @B²)));
my $sa = $DoF / 2 - 1;
my $x = $DoF / (Welchs-𝒕-statistic² + $DoF);
my $N = 65355;
my $h = $x / $N;
my ( $sum1, $sum2 );
for ^$N »×» $h -> $i {
$sum1 += (($i + $h / 2) ** $sa) / (1 - ($i + $h / 2)).sqrt;
$sum2 += $i ** $sa / (1 - $i).sqrt;
}
(($h / 6) × ( $x ** $sa / (1 - $x).sqrt + 4 × $sum1 + 2 × $sum2)) /
( Γ($sa + 1) × π.sqrt / Γ($sa + 1.5) );
}
# Testing
for (
[<27.5 21.0 19.0 23.6 17.0 17.9 16.9 20.1 21.9 22.6 23.1 19.6 19.0 21.7 21.4>],
[<27.1 22.0 20.8 23.4 23.4 23.5 25.8 22.0 24.8 20.2 21.9 22.1 22.9 20.5 24.4>],
[<17.2 20.9 22.6 18.1 21.7 21.4 23.5 24.2 14.7 21.8>],
[<21.5 22.8 21.0 23.0 21.6 23.6 22.5 20.7 23.4 21.8 20.7 21.7 21.5 22.5 23.6 21.5 22.5 23.5 21.5 21.8>],
[<19.8 20.4 19.6 17.8 18.5 18.9 18.3 18.9 19.5 22.0>],
[<28.2 26.6 20.1 23.3 25.2 22.1 17.7 27.6 20.6 13.7 23.2 17.5 20.6 18.0 23.9 21.6 24.3 20.4 24.0 13.2>],
[<30.02 29.99 30.11 29.97 30.01 29.99>],
[<29.89 29.93 29.72 29.98 30.02 29.98>],
[<3.0 4.0 1.0 2.1>],
[<490.2 340.0 433.9>]
) -> @left, @right { say p-value @left, @right }
Output:
0.0213780014628669
0.148841696605328
0.0359722710297969
0.0907733242856673
0.010751534033393
Using Burkhardt's 'incomplete beta'
This uses the Soper reduction formula to evaluate the integral, which converges much more quickly than Simpson's formula.
sub lgamma ( Num(Real) \n --> Num ){
use NativeCall;
sub lgamma (num64 --> num64) is native {}
lgamma( n )
}
sub p-value (@a, @b) {
return 1 if @a.elems | @b.elems ≤ 1;
my $mean1 = @a.sum / @a.elems;
my $mean2 = @b.sum / @b.elems;
return 1 if $mean1 == $mean2;
my $variance1 = sum (@a «-» $mean1) X**2;
my $variance2 = sum (@b «-» $mean2) X**2;
return 1 if $variance1 | $variance2 == 0;
$variance1 /= @a.elems - 1;
$variance2 /= @b.elems - 1;
my $Welchs-𝒕-statistic = ($mean1-$mean2)/sqrt($variance1/@a.elems+$variance2/@b.elems);
my $DoF = ($variance1/@a.elems + $variance2/@b.elems)² /
(($variance1 × $variance1)/(@a.elems × @a.elems × (@a.elems-1)) +
($variance2 × $variance2)/(@b.elems × @b.elems × (@b.elems-1))
);
my $A = $DoF / 2;
my $value = $DoF / ($Welchs-𝒕-statistic² + $DoF);
return $value if $A | $value ≤ 0 or $value ≥ 1;
# from here, translation of John Burkhardt's C
my $beta = lgamma($A) + 0.57236494292470009 - lgamma($A+0.5); # constant is logΓ(.5), more precise than 'lgamma' routine
my $eps = 10**-15;
my $psq = $A + 0.5;
my $cx = 1 - $value;
my ($xx,$pp,$qq,$indx);
if $A < $psq × $value { ($xx, $cx, $pp, $qq, $indx) = $cx, $value, 0.5, $A, 1 }
else { ($xx, $pp, $qq, $indx) = $value, $A, 0.5, 0 }
my $term = my $ai = $value = 1;
my $ns = floor $qq + $cx × $psq;
# Soper reduction formula
my $qq-ai = $qq - $ai;
my $rx = $ns == 0 ?? $xx !! $xx / $cx;
loop {
$term ×= $qq-ai × $rx / ($pp + $ai);
$value += $term;
$qq-ai = $term.abs;
if $qq-ai ≤ $eps & $eps×$value {
$value = $value × ($pp × $xx.log + ($qq - 1) × $cx.log - $beta).exp / $pp;
$value = 1 - $value if $indx;
last
}
$ai++;
$ns--;
if $ns ≥ 0 {
$qq-ai = $qq - $ai;
$rx = $xx if $ns == 0;
} else {
$qq-ai = $psq;
$psq += 1;
}
}
$value
}
my $error = 0;
my @answers = (
0.021378001462867,
0.148841696605327,
0.0359722710297968,
0.090773324285671,
0.0107515611497845,
0.00339907162713746,
0.52726574965384,
0.545266866977794,
);
for (
[<27.5 21.0 19.0 23.6 17.0 17.9 16.9 20.1 21.9 22.6 23.1 19.6 19.0 21.7 21.4>],
[<27.1 22.0 20.8 23.4 23.4 23.5 25.8 22.0 24.8 20.2 21.9 22.1 22.9 20.5 24.4>],
[<17.2 20.9 22.6 18.1 21.7 21.4 23.5 24.2 14.7 21.8>],
[<21.5 22.8 21.0 23.0 21.6 23.6 22.5 20.7 23.4 21.8 20.7 21.7 21.5 22.5 23.6 21.5 22.5 23.5 21.5 21.8>],
[<19.8 20.4 19.6 17.8 18.5 18.9 18.3 18.9 19.5 22.0>],
[<28.2 26.6 20.1 23.3 25.2 22.1 17.7 27.6 20.6 13.7 23.2 17.5 20.6 18.0 23.9 21.6 24.3 20.4 24.0 13.2>],
[<30.02 29.99 30.11 29.97 30.01 29.99>],
[<29.89 29.93 29.72 29.98 30.02 29.98>],
[<3.0 4.0 1.0 2.1>],
[<490.2 340.0 433.9>],
[<0.010268 0.000167 0.000167>],
[<0.159258 0.136278 0.122389>],
[<1.0/15 10.0/62.0>],
[<1.0/10 2/50.0>],
[<9/23.0 21/45.0 0/38.0>],
[<0/44.0 42/94.0 0/22.0>],
) -> @left, @right {
my $p-value = p-value @left, @right;
printf("p-value = %.14g\n",$p-value);
$error += abs($p-value - shift @answers);
}
printf("cumulative error is %g\n", $error);
Output:
p-value = 0.021378001462867
p-value = 0.14884169660533
p-value = 0.035972271029797
p-value = 0.090773324285667
p-value = 0.010751561149784
p-value = 0.0033990716271375
p-value = 0.52726574965384
p-value = 0.54526686697779
cumulative error is 5.30131e-15
Last updated