changelog shortlog tags changeset files revisions annotate raw

scripts/general/accumarray.m

changeset 10289: 4b124317dc38
parent:703038d648f1
author: John W. Eaton <jwe@octave.org>
date: Tue Feb 09 20:58:55 2010 -0500 (51 minutes ago)
permissions: -rw-r--r--
description: base_properties::set_children: account for hidden children
1## Copyright (C) 2007, 2008, 2009 David Bateman
2## Copyright (C) 2009, 2010 VZLU Prague
3##
4## This file is part of Octave.
5##
6## Octave is free software; you can redistribute it and/or modify it
7## under the terms of the GNU General Public License as published by
8## the Free Software Foundation; either version 3 of the License, or (at
9## your option) any later version.
10##
11## Octave is distributed in the hope that it will be useful, but
12## WITHOUT ANY WARRANTY; without even the implied warranty of
13## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14## General Public License for more details.
15##
16## You should have received a copy of the GNU General Public License
17## along with Octave; see the file COPYING. If not, see
18## <http://www.gnu.org/licenses/>.
19
20## -*- texinfo -*-
21## @deftypefn {Function File} {} accumarray (@var{subs}, @var{vals}, @var{sz}, @var{func}, @var{fillval}, @var{issparse})
22## @deftypefnx {Function File} {} accumarray (@var{csubs}, @var{vals}, @dots{})
23##
24## Create an array by accumulating the elements of a vector into the
25## positions defined by their subscripts. The subscripts are defined by
26## the rows of the matrix @var{subs} and the values by @var{vals}. Each row
27## of @var{subs} corresponds to one of the values in @var{vals}.
28##
29## The size of the matrix will be determined by the subscripts themselves.
30## However, if @var{sz} is defined it determines the matrix size. The length
31## of @var{sz} must correspond to the number of columns in @var{subs}.
32##
33## The default action of @code{accumarray} is to sum the elements with the
34## same subscripts. This behavior can be modified by defining the @var{func}
35## function. This should be a function or function handle that accepts a
36## column vector and returns a scalar. The result of the function should not
37## depend on the order of the subscripts.
38##
39## The elements of the returned array that have no subscripts associated with
40## them are set to zero. Defining @var{fillval} to some other value allows
41## these values to be defined.
42##
43## By default @code{accumarray} returns a full matrix. If @var{issparse} is
44## logically true, then a sparse matrix is returned instead.
45##
46## An example of the use of @code{accumarray} is:
47##
48## @example
49## @group
50## accumarray ([1,1,1;2,1,2;2,3,2;2,1,2;2,3,2], 101:105)
51## @result{} ans(:,:,1) = [101, 0, 0; 0, 0, 0]
52## ans(:,:,2) = [0, 0, 0; 206, 0, 208]
53## @end group
54## @end example
55##
56## The complexity in the non-sparse case is generally O(M+N), where N is the number of
57## subscripts and M is the maximum subscript (linearized in multidimensional case).
58## If @var{func} is one of @code{@@sum} (default), @code{@@max}, @code{@@min}
59## or @code{@@(x) @{x@}}, an optimized code path is used.
60## Note that for general reduction function the interpreter overhead can play a
61## major part and it may be more efficient to do multiple accumarray calls and
62## compute the results in a vectorized manner.
63## @end deftypefn
64
65function A = accumarray (subs, val, sz = [], func = [], fillval = [], isspar = [])
66
67 if (nargin < 2 || nargin > 6)
68 print_usage ();
69 endif
70
71 if (iscell (subs))
72 subs = cellfun (@(x) x(:), subs, "UniformOutput", false);
73 ndims = numel (subs);
74 if (ndims == 1)
75 subs = subs{1};
76 endif
77 else
78 ndims = columns (subs);
79 endif
80
81 if (isempty (fillval))
82 fillval = 0;
83 endif
84
85 if (isempty (isspar))
86 isspar = false;
87 endif
88
89 if (isspar)
90
91 ## Sparse case. Avoid linearizing the subscripts, because it could overflow.
92
93 if (fillval != 0)
94 error ("accumarray: fillval must be zero in the sparse case");
95 endif
96
97 ## Ensure subscripts are a two-column matrix.
98 if (iscell (subs))
99 subs = [subs{:}];
100 endif
101
102 ## Validate dimensions.
103 if (ndims == 1)
104 subs(:,2) = 1;
105 elseif (ndims != 2)
106 error ("accumarray: in the sparse case, needs 1 or 2 subscripts");
107 endif
108
109 if (isnumeric (val) || islogical (val))
110 vals = double (val);
111 else
112 error ("accumarray: in the sparse case, values must be numeric or logical");
113 endif
114
115 if (! (isempty (func) || func == @sum))
116
117 ## Reduce values. This is not needed if we're about to sum them, because
118 ## "sparse" can do that.
119
120 ## Sort indices.
121 [subs, idx] = sortrows (subs);
122 n = rows (subs);
123 ## Identify runs.
124 jdx = find (any (diff (subs, 1, 1), 2));
125 jdx = [jdx; n];
126
127 val = cellfun (func, mat2cell (val(:)(idx), diff ([0; jdx])));
128 subs = subs(jdx, :);
129 mode = "unique";
130 else
131 mode = "sum";
132 endif
133
134 ## Form the sparse matrix.
135 if (isempty (sz))
136 A = sparse (subs(:,1), subs(:,2), val, mode);
137 elseif (length (sz) == 2)
138 A = sparse (subs(:,1), subs(:,2), val, sz(1), sz(2), mode);
139 else
140 error ("accumarray: dimensions mismatch")
141 endif
142
143 else
144
145 ## Linearize subscripts.
146 if (ndims > 1)
147 if (isempty (sz))
148 if (iscell (subs))
149 sz = cellfun (@max, subs);
150 else
151 sz = max (subs, [], 1);
152 endif
153 elseif (ndims != length (sz))
154 error ("accumarray: dimensions mismatch")
155 endif
156
157 ## Convert multidimensional subscripts.
158 if (ismatrix (subs))
159 subs = num2cell (subs, 1);
160 endif
161 subs = sub2ind (sz, subs{:}); # creates index cache
162 elseif (! isempty (sz) && length (sz) < 2)
163 error ("accumarray: needs at least 2 dimensions");
164 elseif (! isindex (subs)) # creates index cache
165 error ("accumarray: indices must be positive integers");
166 endif
167
168
169 ## Some built-in reductions handled efficiently.
170
171 if (isempty (func) || func == @sum)
172 ## Fast summation.
173 if (isempty (sz))
174 A = __accumarray_sum__ (subs, val);
175 else
176 A = __accumarray_sum__ (subs, val, prod (sz));
177 ## set proper shape.
178 A = reshape (A, sz);
179 endif
180
181 ## we fill in nonzero fill value.
182 if (fillval != 0)
183 mask = true (size (A));
184 mask(subs) = false;
185 A(mask) = fillval;
186 endif
187 elseif (func == @max)
188 ## Fast maximization.
189
190 if (isinteger (val))
191 zero = intmin (class (val));
192 elseif (islogical (val))
193 zero = false;
194 elseif (fillval == 0 && all (val(:) >= 0))
195 ## This is a common case - fillval is zero, all numbers nonegative.
196 zero = 0;
197 else
198 zero = NaN; # Neutral value.
199 endif
200
201 if (isempty (sz))
202 A = __accumarray_max__ (subs, val, zero);
203 else
204 A = __accumarray_max__ (subs, val, zero, prod (sz));
205 A = reshape (A, sz);
206 endif
207
208 if (fillval != zero && isnan (fillval) != isnan (zero))
209 mask = true (size (A));
210 mask(subs) = false;
211 A(mask) = fillval;
212 endif
213 elseif (func == @min)
214 ## Fast minimization.
215
216 if (isinteger (val))
217 zero = intmax (class (val));
218 elseif (islogical (val))
219 zero = true;
220 else
221 zero = NaN; # Neutral value.
222 endif
223
224 if (isempty (sz))
225 A = __accumarray_min__ (subs, val, zero);
226 else
227 A = __accumarray_min__ (subs, val, zero, prod (sz));
228 A = reshape (A, sz);
229 endif
230
231 if (fillval != zero && isnan (fillval) != isnan (zero))
232 mask = true (size (A));
233 mask(subs) = false;
234 A(mask) = fillval;
235 endif
236 else
237
238 ## The general case. Reduce values.
239 n = rows (subs);
240 if (numel (val) == 1)
241 val = val(ones (1, n), 1);
242 else
243 val = val(:);
244 endif
245
246 ## Sort indices.
247 [subs, idx] = sort (subs);
248 ## Identify runs.
249 jdx = find (subs(1:n-1) != subs(2:n));
250 jdx = [jdx; n];
251 val = mat2cell (val(idx), diff ([0; jdx]));
252 ## Optimize the case when function is @(x) {x}, i.e. we just want to
253 ## collect the values to cells.
254 persistent simple_cell_str = func2str (@(x) {x});
255 if (! strcmp (func2str (func), simple_cell_str))
256 val = cellfun (func, val);
257 endif
258 subs = subs(jdx);
259
260 ## Construct matrix of fillvals.
261 if (iscell (val))
262 A = cell (sz);
263 elseif (fillval == 0)
264 A = zeros (sz, class (val));
265 else
266 A = repmat (fillval, sz);
267 endif
268
269 ## Set the reduced values.
270 A(subs) = val;
271 endif
272 endif
273endfunction
274
275%!error (accumarray (1:5))
276%!error (accumarray ([1,2,3],1:2))
277%!assert (accumarray ([1;2;4;2;4],101:105), [101;206;0;208])
278%!assert (accumarray ([1,1,1;2,1,2;2,3,2;2,1,2;2,3,2],101:105),cat(3, [101,0,0;0,0,0],[0,0,0;206,0,208]))
279%!assert (accumarray ([1,1,1;2,1,2;2,3,2;2,1,2;2,3,2],101:105,[],@(x)sin(sum(x))),sin(cat(3, [101,0,0;0,0,0],[0,0,0;206,0,208])))
280%!assert (accumarray ({[1 3 3 2 3 1 2 2 3 3 1 2],[3 4 2 1 4 3 4 2 2 4 3 4],[1 1 2 2 1 1 2 1 1 1 2 2]},101:112),cat(3,[0,0,207,0;0,108,0,0;0,109,0,317],[0,0,111,0;104,0,0,219;0,103,0,0]))
281%!assert (accumarray ([1,1;2,1;2,3;2,1;2,3],101:105,[2,4],@max,NaN),[101,NaN,NaN,NaN;104,NaN,105,NaN])
282%!assert (accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],101:105,[2 4],@prod,0,true),sparse([1,2,2],[1,1,3],[101,10608,10815],2,4))
283%!assert (accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],1,[2,4]), [1,0,0,0;2,0,2,0])
284%!assert (accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],101:105,[2,4],@(x)length(x)>1),[false,false,false,false;true,false,true,false])
285%!test
286%! A = accumarray ([1 1; 2 1; 2 3; 2 1; 2 3],101:105,[2,4],@(x){x});
287%! assert (A{2},[102;104])
288%!test
289%! subs = ceil (rand (2000, 3)*10);
290%! val = rand (2000, 1);
291%! assert (accumarray (subs, val, [], @max), accumarray (subs, val, [], @(x) max (x)));
292%!test
293%! subs = ceil (rand (2000, 1)*100);
294%! val = rand (2000, 1);
295%! assert (accumarray (subs, val, [100, 1], @min, NaN), accumarray (subs, val, [100, 1], @(x) min (x), NaN));
296%!test
297%! subs = ceil (rand (2000, 2)*30);
298%! subsc = num2cell (subs, 1);
299%! val = rand (2000, 1);
300%! assert (accumarray (subsc, val, [], [], 0, true), accumarray (subs, val, [], [], 0, true));
301%!test
302%! subs = ceil (rand (2000, 3)*10);
303%! subsc = num2cell (subs, 1);
304%! val = rand (2000, 1);
305%! assert (accumarray (subsc, val, [], @max), accumarray (subs, val, [], @max));
306
307