changelog shortlog tags changeset files revisions annotate raw

scripts/statistics/tests/anova.m

changeset 10289: 4b124317dc38
parent:1bf0ce0930be
author: John W. Eaton <jwe@octave.org>
date: Tue Feb 09 20:58:55 2010 -0500 (70 minutes ago)
permissions: -rw-r--r--
description: base_properties::set_children: account for hidden children
1## Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2005, 2006,
2## 2007, 2009 Kurt Hornik
3##
4## This file is part of Octave.
5##
6## Octave is free software; you can redistribute it and/or modify it
7## under the terms of the GNU General Public License as published by
8## the Free Software Foundation; either version 3 of the License, or (at
9## your option) any later version.
10##
11## Octave is distributed in the hope that it will be useful, but
12## WITHOUT ANY WARRANTY; without even the implied warranty of
13## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14## General Public License for more details.
15##
16## You should have received a copy of the GNU General Public License
17## along with Octave; see the file COPYING. If not, see
18## <http://www.gnu.org/licenses/>.
19
20## -*- texinfo -*-
21## @deftypefn {Function File} {[@var{pval}, @var{f}, @var{df_b}, @var{df_w}] =} anova (@var{y}, @var{g})
22## Perform a one-way analysis of variance (ANOVA). The goal is to test
23## whether the population means of data taken from @var{k} different
24## groups are all equal.
25##
26## Data may be given in a single vector @var{y} with groups specified by
27## a corresponding vector of group labels @var{g} (e.g., numbers from 1
28## to @var{k}). This is the general form which does not impose any
29## restriction on the number of data in each group or the group labels.
30##
31## If @var{y} is a matrix and @var{g} is omitted, each column of @var{y}
32## is treated as a group. This form is only appropriate for balanced
33## ANOVA in which the numbers of samples from each group are all equal.
34##
35## Under the null of constant means, the statistic @var{f} follows an F
36## distribution with @var{df_b} and @var{df_w} degrees of freedom.
37##
38## The p-value (1 minus the CDF of this distribution at @var{f}) is
39## returned in @var{pval}.
40##
41## If no output argument is given, the standard one-way ANOVA table is
42## printed.
43## @end deftypefn
44
45## Author: KH <Kurt.Hornik@wu-wien.ac.at>
46## Description: One-way analysis of variance (ANOVA)
47
48function [pval, f, df_b, df_w] = anova (y, g)
49
50 if ((nargin < 1) || (nargin > 2))
51 print_usage ();
52 elseif (nargin == 1)
53 if (isvector (y))
54 error ("anova: for `anova (y)', y must not be a vector");
55 endif
56 [group_count, k] = size (y);
57 n = group_count * k;
58 group_mean = mean (y);
59 else
60 if (! isvector (y))
61 error ("anova: for `anova (y, g)', y must be a vector");
62 endif
63 n = length (y);
64 if (! isvector (g) || (length (g) != n))
65 error ("anova: g must be a vector of the same length as y");
66 endif
67 s = sort (g);
68 i = find (s (2 : n) > s(1 : (n-1)));
69 k = length (i) + 1;
70 if (k == 1)
71 error ("anova: there should be at least 2 groups");
72 else
73 group_label = s ([1, (reshape (i, 1, k-1) + 1)]);
74 endif
75 for i = 1 : k;
76 v = y (find (g == group_label (i)));
77 group_count (i) = length (v);
78 group_mean (i) = mean (v);
79 endfor
80
81 endif
82
83 total_mean = mean (y(:));
84 SSB = sum (group_count .* (group_mean - total_mean) .^ 2);
85 SST = sumsq (reshape (y, n, 1) - total_mean);
86 SSW = SST - SSB;
87 df_b = k - 1;
88 df_w = n - k;
89 v_b = SSB / df_b;
90 v_w = SSW / df_w;
91 f = v_b / v_w;
92 pval = 1 - f_cdf (f, df_b, df_w);
93
94 if (nargout == 0)
95 ## This eventually needs to be done more cleanly ...
96 printf ("\n");
97 printf ("One-way ANOVA Table:\n");
98 printf ("\n");
99 printf ("Source of Variation Sum of Squares df Empirical Var\n");
100 printf ("*********************************************************\n");
101 printf ("Between Groups %15.4f %4d %13.4f\n", SSB, df_b, v_b);
102 printf ("Within Groups %15.4f %4d %13.4f\n", SSW, df_w, v_w);
103 printf ("---------------------------------------------------------\n");
104 printf ("Total %15.4f %4d\n", SST, n - 1);
105 printf ("\n");
106 printf ("Test Statistic f %15.4f\n", f);
107 printf ("p-value %15.4f\n", pval);
108 printf ("\n");
109 endif
110
111endfunction