sage-main

changeset 10142:b39c832e2eca

Many massive speedups for matrix_dense_modn.
author Robert Bradshaw <robertwb@math.washington.edu>
date Thu Aug 07 19:39:23 2008 -0700 (19 months ago)
parents 67f547159a56
children 87209fef487b
files sage/matrix/matrix_dense.pyx sage/matrix/matrix_modn_dense.pyx sage/matrix/matrix_window_modn_dense.pyx
line diff
1.1 --- a/sage/matrix/matrix_dense.pyx Sat Aug 02 16:50:33 2008 -0700 1.2 +++ b/sage/matrix/matrix_dense.pyx Thu Aug 07 19:39:23 2008 -0700 1.3 @@ -66,9 +66,8 @@ 1.4 1.5 v = self._list() 1.6 cdef Py_ssize_t i 1.7 - cdef long h 1.8 - h = 0 1.9 - n = 1 1.10 + cdef long h = 0 1.11 + 1.12 for i from 0 <= i < len(v): 1.13 h = h ^ (i * hash(v[i])) 1.14
2.1 --- a/sage/matrix/matrix_modn_dense.pyx Sat Aug 02 16:50:33 2008 -0700 2.2 +++ b/sage/matrix/matrix_modn_dense.pyx Thu Aug 07 19:39:23 2008 -0700 2.3 @@ -86,7 +86,11 @@ 2.4 include "../ext/cdefs.pxi" 2.5 include '../ext/stdsage.pxi' 2.6 include '../ext/random.pxi' 2.7 - 2.8 + 2.9 +cdef extern from *: 2.10 + int memcmp(void* a, void* b, long n) 2.11 + void* memcpy(void* dst, void* src, long n) 2.12 + 2.13 import sage.ext.multi_modular 2.14 cimport sage.ext.arith 2.15 import sage.ext.arith 2.16 @@ -125,7 +129,7 @@ 2.17 2.18 from sage.misc.misc import verbose, get_verbose, cputime 2.19 2.20 -from sage.rings.integer import Integer 2.21 +from sage.rings.integer cimport Integer 2.22 2.23 from sage.structure.element cimport ModuleElement, RingElement, Element, Vector 2.24 2.25 @@ -135,6 +139,12 @@ 2.26 ai = arith_int() 2.27 ################ 2.28 2.29 + 2.30 + 2.31 +cdef long num = 1 2.32 +cdef bint little_endian = (<char*>(&num))[0] 2.33 + 2.34 + 2.35 ############################################################################## 2.36 # Copyright (C) 2004,2005,2006 William Stein <wstein@gmail.com> 2.37 # Distributed under the terms of the GNU General Public License (GPL) 2.38 @@ -156,12 +166,12 @@ 2.39 2.40 matrix_dense.Matrix_dense.__init__(self, parent) 2.41 2.42 - cdef mod_int p 2.43 + cdef long p 2.44 p = self._base_ring.characteristic() 2.45 self.p = p 2.46 if p >= MAX_MODULUS: 2.47 raise OverflowError, "p (=%s) must be < %s"%(p, MAX_MODULUS) 2.48 - self.gather = MOD_INT_OVERFLOW/(p*p) 2.49 + self.gather = MOD_INT_OVERFLOW/<mod_int>(p*p) 2.50 2.51 _sig_on 2.52 self._entries = <mod_int *> sage_malloc(sizeof(mod_int)*self._nrows*self._ncols) 2.53 @@ -189,11 +199,17 @@ 2.54 sage_free(self._matrix) 2.55 2.56 def __init__(self, parent, entries, copy, coerce): 2.57 + """ 2.58 + TESTS: 2.59 + sage: matrix(GF(7), 2, 2, [-1, int(-2), GF(7)(-3), 1/4]) 2.60 + [6 5] 2.61 + [4 2] 2.62 + """ 2.63 2.64 cdef mod_int e 2.65 cdef Py_ssize_t i, j, k 2.66 cdef mod_int *v 2.67 - cdef mod_int p 2.68 + cdef long p 2.69 p = self._base_ring.characteristic() 2.70 2.71 R = self.base_ring() 2.72 @@ -220,25 +236,32 @@ 2.73 2.74 k = 0 2.75 cdef mod_int n 2.76 + cdef long tmp 2.77 2.78 - if coerce: 2.79 - for i from 0 <= i < self._nrows: 2.80 - if PyErr_CheckSignals(): raise KeyboardInterrupt 2.81 - v = self._matrix[i] 2.82 - for j from 0 <= j < self._ncols: 2.83 + for i from 0 <= i < self._nrows: 2.84 + if PyErr_CheckSignals(): raise KeyboardInterrupt 2.85 + v = self._matrix[i] 2.86 + for j from 0 <= j < self._ncols: 2.87 + x = entries[k] 2.88 + if PY_TYPE_CHECK_EXACT(x, int): 2.89 + tmp = (<long>x) % p 2.90 + v[j] = tmp + (tmp<0)*p 2.91 + elif PY_TYPE_CHECK_EXACT(x, IntegerMod_int) and (<IntegerMod_int>x)._parent is R: 2.92 + v[j] = (<IntegerMod_int>x).ivalue 2.93 + elif PY_TYPE_CHECK_EXACT(x, Integer): 2.94 + if coerce: 2.95 + v[j] = mpz_fdiv_ui((<Integer>x).value, p) 2.96 + else: 2.97 + v[j] = mpz_get_ui((<Integer>x).value) 2.98 + elif coerce: 2.99 v[j] = R(entries[k]) 2.100 - k = k + 1 2.101 - else: 2.102 - for i from 0 <= i < self._nrows: 2.103 - if PyErr_CheckSignals(): raise KeyboardInterrupt 2.104 - v = self._matrix[i] 2.105 - for j from 0 <= j < self._ncols: 2.106 - v[j] = int(entries[k]) 2.107 - k = k + 1 2.108 - 2.109 + else: 2.110 + v[j] = <long>(entries[k]) 2.111 + k = k + 1 2.112 2.113 def __richcmp__(Matrix_modn_dense self, right, int op): # always need for mysterious reasons. 2.114 return self._richcmp(right, op) 2.115 + 2.116 def __hash__(self): 2.117 return self._hash() 2.118 2.119 @@ -256,11 +279,11 @@ 2.120 # LEVEL 2 functionality 2.121 # * cdef _pickle 2.122 # * cdef _unpickle 2.123 - # * cdef _add_c_impl 2.124 + # x * cdef _add_c_impl 2.125 # * cdef _mul_c_impl 2.126 - # * cdef _matrix_times_matrix_c_impl 2.127 - # * cdef _cmp_c_impl 2.128 - # * __neg__ 2.129 + # x * cdef _matrix_times_matrix_c_impl 2.130 + # x * cdef _cmp_c_impl 2.131 + # x * __neg__ 2.132 # * __invert__ 2.133 # x * __copy__ 2.134 # * _multiply_classical 2.135 @@ -272,11 +295,269 @@ 2.136 # cdef ModuleElement _add_c_impl(self, ModuleElement right): 2.137 # cdef _mul_c_impl(self, Matrix right): 2.138 # cdef int _cmp_c_impl(self, Matrix right) except -2: 2.139 - # def __neg__(self): 2.140 # def __invert__(self): 2.141 # def _multiply_classical(left, matrix.Matrix _right): 2.142 # def _list(self): 2.143 # def _dict(self): 2.144 + 2.145 + def _pickle(self): 2.146 + """ 2.147 + Utility function for pickling. 2.148 + 2.149 + If the prime is small enough to fit in a byte, then it is stored as a 2.150 + contiguous string of bytes (to save space). Otherwise, memcpy is used 2.151 + to copy the raw data in the platforms native format. Any byte-swapping 2.152 + or word size difference is taken care of in unpickling (optimizing for 2.153 + unpickling on the same platform things were pickled on). 2.154 + 2.155 + The upcoming buffer protocol would be useful to not have to do any copying. 2.156 + 2.157 + EXAMPLES: 2.158 + sage: m = matrix(Integers(128), 3, 3, [ord(c) for c in "Hi there!"]); m 2.159 + [ 72 105 32] 2.160 + [116 104 101] 2.161 + [114 101 33] 2.162 + sage: m._pickle() 2.163 + ((1, True, 'Hi there!'), 10) 2.164 + """ 2.165 + cdef Py_ssize_t i, j 2.166 + cdef unsigned char* ss 2.167 + cdef unsigned char* row_ss 2.168 + cdef long word_size 2.169 + cdef mod_int *row_self 2.170 + 2.171 + if self.p <= 0xFF: 2.172 + word_size = sizeof(char) 2.173 + else: 2.174 + word_size = sizeof(mod_int) 2.175 + 2.176 + s = PyString_FromStringAndSize(NULL, word_size * self._nrows * self._ncols) 2.177 + ss = <unsigned char*><char *>s 2.178 + 2.179 + _sig_on 2.180 + if word_size == sizeof(char): 2.181 + for i from 0 <= i < self._nrows: 2.182 + row_self = self._matrix[i] 2.183 + row_ss = &ss[i*self._ncols] 2.184 + for j from 0<= j < self._ncols: 2.185 + row_ss[j] = row_self[j] 2.186 + else: 2.187 + for i from 0 <= i < self._nrows: 2.188 + memcpy(&ss[i*sizeof(mod_int)*self._ncols], self._matrix[i], sizeof(mod_int) * self._ncols) 2.189 + _sig_off 2.190 + 2.191 + return (word_size, little_endian, s), 10 2.192 + 2.193 + 2.194 + def _unpickle(self, data, int version): 2.195 + """ 2.196 + TESTS: 2.197 + 2.198 + Test for char-sized modulus: 2.199 + sage: A = random_matrix(GF(7), 5, 9) 2.200 + sage: data, version = A._pickle() 2.201 + sage: B = A.parent()(0) 2.202 + sage: B._unpickle(data, version) 2.203 + sage: B == A 2.204 + True 2.205 + 2.206 + And for larger modulus: 2.207 + sage: A = random_matrix(GF(1009), 51, 5) 2.208 + sage: data, version = A._pickle() 2.209 + sage: B = A.parent()(0) 2.210 + sage: B._unpickle(data, version) 2.211 + sage: B == A 2.212 + True 2.213 + 2.214 + Now test all the bit-packing options: 2.215 + sage: A = matrix(Integers(1000), 2, 2) 2.216 + sage: A._unpickle((1, True, '\x01\x02\xFF\x00'), 10) 2.217 + sage: A 2.218 + [ 1 2] 2.219 + [255 0] 2.220 + 2.221 + sage: A = matrix(Integers(1000), 1, 2) 2.222 + sage: A._unpickle((4, True, '\x02\x01\x00\x00\x01\x00\x00\x00'), 10) 2.223 + sage: A 2.224 + [258 1] 2.225 + sage: A._unpickle((4, False, '\x00\x00\x02\x01\x00\x00\x01\x03'), 10) 2.226 + sage: A 2.227 + [513 259] 2.228 + sage: A._unpickle((8, True, '\x03\x01\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00'), 10) 2.229 + sage: A 2.230 + [259 5] 2.231 + sage: A._unpickle((8, False, '\x00\x00\x00\x00\x00\x00\x02\x08\x00\x00\x00\x00\x00\x00\x01\x04'), 10) 2.232 + sage: A 2.233 + [520 260] 2.234 + 2.235 + Now make sure it works in context: 2.236 + sage: A = random_matrix(Integers(33), 31, 31) 2.237 + sage: loads(dumps(A)) == A 2.238 + True 2.239 + sage: A = random_matrix(Integers(3333), 31, 31) 2.240 + sage: loads(dumps(A)) == A 2.241 + True 2.242 + """ 2.243 + 2.244 + if version < 10: 2.245 + return matrix_dense.Matrix_dense._unpickle(self, data, version) 2.246 + 2.247 + cdef Py_ssize_t i, j 2.248 + cdef unsigned char* ss 2.249 + cdef unsigned char* row_ss 2.250 + cdef long word_size 2.251 + cdef mod_int *row_self 2.252 + cdef bint little_endian_data 2.253 + cdef unsigned char* udata 2.254 + 2.255 + if version == 10: 2.256 + word_size, little_endian_data, s = data 2.257 + ss = <unsigned char*><char *>s 2.258 + 2.259 + _sig_on 2.260 + if word_size == sizeof(char): 2.261 + for i from 0 <= i < self._nrows: 2.262 + row_self = self._matrix[i] 2.263 + row_ss = &ss[i*self._ncols] 2.264 + for j from 0<= j < self._ncols: 2.265 + row_self[j] = row_ss[j] 2.266 + 2.267 + elif word_size == sizeof(mod_int) and little_endian == little_endian_data: 2.268 + for i from 0 <= i < self._nrows: 2.269 + memcpy(self._matrix[i], &ss[i*sizeof(mod_int)*self._ncols], sizeof(mod_int) * self._ncols) 2.270 + 2.271 + # Note that mod_int is at least 32 bits, and never stores more than 32 bits of info 2.272 + elif little_endian_data: 2.273 + for i from 0 <= i < self._nrows: 2.274 + row_self = self._matrix[i] 2.275 + for j from 0<= j < self._ncols: 2.276 + udata = &ss[(i*self._ncols+j)*word_size] 2.277 + row_self[j] = ((udata[0]) + 2.278 + (udata[1] << 8) + 2.279 + (udata[2] << 16) + 2.280 + (udata[3] << 24)) 2.281 + 2.282 + else: 2.283 + for i from 0 <= i < self._nrows: 2.284 + row_self = self._matrix[i] 2.285 + for j from 0<= j < self._ncols: 2.286 + udata = &ss[(i*self._ncols+j)*word_size] 2.287 + row_self[j] = ((udata[word_size-1]) + 2.288 + (udata[word_size-2] << 8) + 2.289 + (udata[word_size-3] << 16) + 2.290 + (udata[word_size-4] << 24)) 2.291 + 2.292 + _sig_off 2.293 + 2.294 + else: 2.295 + raise RuntimeError, "unknown matrix version" 2.296 + 2.297 + 2.298 + cdef long _hash(self) except -1: 2.299 + """ 2.300 + TESTS: 2.301 + sage: a = random_matrix(GF(11), 5, 5, range(25)) 2.302 + sage: a.set_immutable() 2.303 + sage: hash(a) #random 2.304 + 216 2.305 + sage: b = a.change_ring(ZZ) 2.306 + sage: b.set_immutable() 2.307 + sage: hash(b) == hash(a) 2.308 + True 2.309 + """ 2.310 + x = self.fetch('hash') 2.311 + if not x is None: return x 2.312 + 2.313 + if not self._mutability._is_immutable: 2.314 + raise TypeError, "mutable matrices are unhashable" 2.315 + 2.316 + cdef Py_ssize_t i 2.317 + cdef long h = 0, n = 0 2.318 + 2.319 + _sig_on 2.320 + for i from 0 <= i < self._nrows: 2.321 + for j from 0<= j < self._ncols: 2.322 + h ^= n * self._matrix[i][j] 2.323 + n += 1 2.324 + _sig_off 2.325 + 2.326 + self.cache('hash', h) 2.327 + return h 2.328 + 2.329 + 2.330 + def __neg__(self): 2.331 + """ 2.332 + EXAMPLES: 2.333 + sage: m = matrix(GF(19), 3, 3, range(9)); m 2.334 + [0 1 2] 2.335 + [3 4 5] 2.336 + [6 7 8] 2.337 + sage: -m 2.338 + [19 18 17] 2.339 + [16 15 14] 2.340 + [13 12 11] 2.341 + """ 2.342 + cdef Py_ssize_t i,j 2.343 + cdef Matrix_modn_dense M 2.344 + cdef mod_int p = self.p 2.345 + 2.346 + M = Matrix_modn_dense.__new__(Matrix_modn_dense, self._parent,None,None,None) 2.347 + M.p = p 2.348 + 2.349 + _sig_on 2.350 + cdef mod_int *row_self, *row_ans 2.351 + for i from 0 <= i < self._nrows: 2.352 + row_self = self._matrix[i] 2.353 + row_ans = M._matrix[i] 2.354 + for j from 0<= j < self._ncols: 2.355 + row_ans[j] = p - row_self[j] 2.356 + _sig_off 2.357 + return M 2.358 + 2.359 + 2.360 + cdef ModuleElement _lmul_c_impl(self, RingElement right): 2.361 + """ 2.362 + EXAMPLES: 2.363 + sage: a = random_matrix(Integers(60), 400, 500) 2.364 + sage: 3*a + 9*a == 12*a 2.365 + True 2.366 + """ 2.367 + return self._rmul_c_impl(right) 2.368 + 2.369 + cdef ModuleElement _rmul_c_impl(self, RingElement left): 2.370 + """ 2.371 + EXAMPLES: 2.372 + sage: a = matrix(GF(101), 3, 3, range(9)); a 2.373 + [0 1 2] 2.374 + [3 4 5] 2.375 + [6 7 8] 2.376 + sage: a * 5 2.377 + [ 0 5 10] 2.378 + [15 20 25] 2.379 + [30 35 40] 2.380 + sage: a * 50 2.381 + [ 0 50 100] 2.382 + [ 49 99 48] 2.383 + [ 98 47 97] 2.384 + """ 2.385 + cdef Py_ssize_t i,j 2.386 + cdef Matrix_modn_dense M 2.387 + cdef mod_int p = self.p 2.388 + cdef mod_int a = left 2.389 + 2.390 + M = Matrix_modn_dense.__new__(Matrix_modn_dense, self._parent,None,None,None) 2.391 + M.p = p 2.392 + 2.393 + _sig_on 2.394 + cdef mod_int *row_self, *row_ans 2.395 + for i from 0 <= i < self._nrows: 2.396 + row_self = self._matrix[i] 2.397 + row_ans = M._matrix[i] 2.398 + for j from 0<= j < self._ncols: 2.399 + row_ans[j] = (a*row_self[j]) % p 2.400 + _sig_off 2.401 + return M 2.402 + 2.403 def __copy__(self): 2.404 cdef Matrix_modn_dense A 2.405 A = Matrix_modn_dense.__new__(Matrix_modn_dense, self._parent, 2.406 @@ -312,11 +593,12 @@ 2.407 """ 2.408 2.409 cdef Py_ssize_t i,j 2.410 - cdef long k 2.411 - cdef Matrix_modn_dense M 2.412 + cdef mod_int k, p 2.413 + cdef Matrix_modn_dense M 2.414 2.415 M = Matrix_modn_dense.__new__(Matrix_modn_dense, self._parent,None,None,None) 2.416 Matrix.__init__(M, self._parent) 2.417 + p = self.p 2.418 2.419 _sig_on 2.420 cdef mod_int *row_self, *row_right, *row_ans 2.421 @@ -325,15 +607,92 @@ 2.422 row_right = (<Matrix_modn_dense> right)._matrix[i] 2.423 row_ans = M._matrix[i] 2.424 for j from 0<= j < self._ncols: 2.425 - k = (row_self[j]+row_right[j]) 2.426 - # division is really slow, so we normalize only when it is necessary 2.427 - if k>=self.p: 2.428 - row_ans[j]=k-self.p 2.429 - else: 2.430 - row_ans[j]=k 2.431 - 2.432 + k = row_self[j] + row_right[j] 2.433 + row_ans[j] = k - (k >= p) * p 2.434 _sig_off 2.435 - return M; 2.436 + return M 2.437 + 2.438 + 2.439 + cdef ModuleElement _sub_c_impl(self, ModuleElement right): 2.440 + """ 2.441 + Subtract two dense matrices over Z/nZ 2.442 + 2.443 + EXAMPLES: 2.444 + sage: a = matrix(GF(11), 3, 3, range(9)); a 2.445 + [0 1 2] 2.446 + [3 4 5] 2.447 + [6 7 8] 2.448 + sage: a - 4 2.449 + [7 1 2] 2.450 + [3 0 5] 2.451 + [6 7 4] 2.452 + sage: a - matrix(GF(11), 3, 3, range(1, 19, 2)) 2.453 + [10 9 8] 2.454 + [ 7 6 5] 2.455 + [ 4 3 2] 2.456 + """ 2.457 + 2.458 + cdef Py_ssize_t i,j 2.459 + cdef mod_int k, p 2.460 + cdef Matrix_modn_dense M 2.461 + 2.462 + M = Matrix_modn_dense.__new__(Matrix_modn_dense, self._parent,None,None,None) 2.463 + Matrix.__init__(M, self._parent) 2.464 + p = self.p 2.465 + 2.466 + _sig_on 2.467 + cdef mod_int *row_self, *row_right, *row_ans 2.468 + for i from 0 <= i < self._nrows: 2.469 + row_self = self._matrix[i] 2.470 + row_right = (<Matrix_modn_dense> right)._matrix[i] 2.471 + row_ans = M._matrix[i] 2.472 + for j from 0<= j < self._ncols: 2.473 + k = p + row_self[j] - row_right[j] 2.474 + row_ans[j] = k - (k >= p) * p 2.475 + _sig_off 2.476 + return M 2.477 + 2.478 + 2.479 + cdef int _cmp_c_impl(self, Element right) except -2: 2.480 + """ 2.481 + Compare two dense matrices over Z/nZ 2.482 + 2.483 + EXAMPLES: 2.484 + sage: a = matrix(GF(17), 4, range(3, 83, 5)); a 2.485 + [ 3 8 13 1] 2.486 + [ 6 11 16 4] 2.487 + [ 9 14 2 7] 2.488 + [12 0 5 10] 2.489 + sage: a == a 2.490 + True 2.491 + sage: b = a - 3; b 2.492 + [ 0 8 13 1] 2.493 + [ 6 8 16 4] 2.494 + [ 9 14 16 7] 2.495 + [12 0 5 7] 2.496 + sage: b < a 2.497 + True 2.498 + sage: b > a 2.499 + False 2.500 + sage: b == a 2.501 + False 2.502 + sage: b + 3 == a 2.503 + True 2.504 + """ 2.505 + 2.506 + cdef Py_ssize_t i 2.507 + cdef int cmp 2.508 + 2.509 + _sig_on 2.510 + cdef mod_int *row_self, *row_right 2.511 + for i from 0 <= i < self._nrows: 2.512 + row_self = self._matrix[i] 2.513 + row_right = (<Matrix_modn_dense> right)._matrix[i] 2.514 + cmp = memcmp(row_self, row_right, sizeof(mod_int)*self._ncols) 2.515 + if cmp: 2.516 + return cmp 2.517 + _sig_off 2.518 + return 0 2.519 2.520 2.521 cdef Matrix _matrix_times_matrix_c_impl(self, Matrix right): 2.522 @@ -399,7 +758,7 @@ 2.523 2.524 ######################################################################## 2.525 # LEVEL 3 functionality (Optional) 2.526 - # * cdef _sub_c_impl 2.527 + # x * cdef _sub_c_impl 2.528 # * __deepcopy__ 2.529 # * __invert__ 2.530 # * Matrix windows -- only if you need strassen for that base
3.1 --- a/sage/matrix/matrix_window_modn_dense.pyx Sat Aug 02 16:50:33 2008 -0700 3.2 +++ b/sage/matrix/matrix_window_modn_dense.pyx Thu Aug 07 19:39:23 2008 -0700 3.3 @@ -1,3 +1,12 @@ 3.4 +""" 3.5 +TESTS: 3.6 + sage: a = random_matrix(GF(11), 30, 40) 3.7 + sage: b = random_matrix(GF(11), 40, 53) 3.8 + sage: a._multiply_strassen(b, 7) == a.change_ring(ZZ)*b.change_ring(ZZ) 3.9 + True 3.10 +""" 3.11 + 3.12 + 3.13 include "../ext/cdefs.pxi" 3.14 include "../ext/stdsage.pxi" 3.15 3.16 @@ -40,7 +49,7 @@ 3.17 3.18 cdef add(self, MatrixWindow A): 3.19 cdef Py_ssize_t i, j 3.20 - cdef mod_int p 3.21 + cdef mod_int k, p 3.22 cdef mod_int* self_row 3.23 cdef mod_int* A_row 3.24 if self._nrows != A._nrows or self._ncols != A._ncols: 3.25 @@ -50,13 +59,12 @@ 3.26 self_row = ( <Matrix_modn_dense> self._matrix )._matrix[i + self._row] + self._col 3.27 A_row = ( <Matrix_modn_dense> A._matrix )._matrix[i + A._row] + A._col 3.28 for j from 0 <= j < self._ncols: 3.29 - self_row[j] += A_row[j] 3.30 - if self_row[j] >= p: 3.31 - self_row[j] -= p 3.32 + k = self_row[j] + A_row[j] 3.33 + self_row[j] = k - (k >= p) * p 3.34 3.35 cdef subtract(self, MatrixWindow A): 3.36 cdef Py_ssize_t i, j 3.37 - cdef mod_int p 3.38 + cdef mod_int k, p 3.39 cdef mod_int* self_row 3.40 cdef mod_int* A_row 3.41 if self._nrows != A._nrows or self._ncols != A._ncols: 3.42 @@ -66,14 +74,12 @@ 3.43 self_row = ( <Matrix_modn_dense> self._matrix )._matrix[i + self._row] + self._col 3.44 A_row = ( <Matrix_modn_dense> A._matrix )._matrix[i + A._row] + A._col 3.45 for j from 0 <= j < self._ncols: 3.46 - if self_row[j] >= A_row[j]: 3.47 - self_row[j] -= A_row[j] 3.48 - else: 3.49 - self_row[j] += p - A_row[j] 3.50 + k = p + self_row[j] - A_row[j] 3.51 + self_row[j] = k - (k >= p) * p 3.52 3.53 cdef set_to_sum(self, MatrixWindow A, MatrixWindow B): 3.54 cdef Py_ssize_t i, j 3.55 - cdef mod_int p 3.56 + cdef mod_int k, p 3.57 cdef mod_int* self_row 3.58 cdef mod_int* A_row 3.59 cdef mod_int* B_row 3.60 @@ -87,13 +93,12 @@ 3.61 A_row = ( <Matrix_modn_dense> A._matrix )._matrix[i + A._row] + A._col 3.62 B_row = ( <Matrix_modn_dense> B._matrix )._matrix[i + B._row] + B._col 3.63 for j from 0 <= j < self._ncols: 3.64 - self_row[j] = A_row[j] + B_row[j] 3.65 - if self_row[j] >= p: 3.66 - self_row[j] -= p 3.67 + k = A_row[j] + B_row[j] 3.68 + self_row[j] = k - (k >= p) * p 3.69 3.70 cdef set_to_diff(self, MatrixWindow A, MatrixWindow B): 3.71 cdef Py_ssize_t i, j 3.72 - cdef mod_int p 3.73 + cdef mod_int k, p 3.74 cdef mod_int* self_row 3.75 cdef mod_int* A_row 3.76 cdef mod_int* B_row 3.77 @@ -107,10 +112,8 @@ 3.78 A_row = ( <Matrix_modn_dense> A._matrix )._matrix[i + A._row] + A._col 3.79 B_row = ( <Matrix_modn_dense> B._matrix )._matrix[i + B._row] + B._col 3.80 for j from 0 <= j < self._ncols: 3.81 - if A_row[j] > B_row[j]: 3.82 - self_row[j] = A_row[j] - B_row[j] 3.83 - else: 3.84 - self_row[j] = p + A_row[j] - B_row[j] 3.85 + k = p + A_row[j] - B_row[j] 3.86 + self_row[j] = k - (k >= p) * p 3.87 3.88 cdef set_to_prod(self, MatrixWindow A, MatrixWindow B): 3.89 cdef Py_ssize_t i, j, k, gather, top, A_ncols 3.90 @@ -133,12 +136,12 @@ 3.91 self_row = ( <Matrix_modn_dense> self._matrix )._matrix[i + self._row] + self._col 3.92 A_row = ( <Matrix_modn_dense> A._matrix )._matrix[i + A._row] + A._col 3.93 k = 0 3.94 - for j from 1 <= j < B._ncols: 3.95 + for j from 0 <= j < B._ncols: 3.96 self_row[j] = (A_row[k] * B_matrix_off[k][B._col+j]) % p 3.97 for k from 1 <= k < A._ncols: 3.98 A_i_k = A_row[k] 3.99 B_row_k = B_matrix_off[k] + B._col 3.100 - for j from 1 <= j < B._ncols: 3.101 + for j from 0 <= j < B._ncols: 3.102 self_row[j] = (self_row[j] + A_i_k * B_row_k[j]) % p 3.103 3.104 else: