sage-main
changeset 10142:b39c832e2eca
Many massive speedups for matrix_dense_modn.
| author | Robert Bradshaw <robertwb@math.washington.edu> |
|---|---|
| date | Thu Aug 07 19:39:23 2008 -0700 (19 months ago) |
| parents | 67f547159a56 |
| children | 87209fef487b |
| files | sage/matrix/matrix_dense.pyx sage/matrix/matrix_modn_dense.pyx sage/matrix/matrix_window_modn_dense.pyx |
line diff
1.1 --- a/sage/matrix/matrix_dense.pyx Sat Aug 02 16:50:33 2008 -0700
1.2 +++ b/sage/matrix/matrix_dense.pyx Thu Aug 07 19:39:23 2008 -0700
1.3 @@ -66,9 +66,8 @@
1.4
1.5 v = self._list()
1.6 cdef Py_ssize_t i
1.7 - cdef long h
1.8 - h = 0
1.9 - n = 1
1.10 + cdef long h = 0
1.11 +
1.12 for i from 0 <= i < len(v):
1.13 h = h ^ (i * hash(v[i]))
1.14
2.1 --- a/sage/matrix/matrix_modn_dense.pyx Sat Aug 02 16:50:33 2008 -0700
2.2 +++ b/sage/matrix/matrix_modn_dense.pyx Thu Aug 07 19:39:23 2008 -0700
2.3 @@ -86,7 +86,11 @@
2.4 include "../ext/cdefs.pxi"
2.5 include '../ext/stdsage.pxi'
2.6 include '../ext/random.pxi'
2.7 -
2.8 +
2.9 +cdef extern from *:
2.10 + int memcmp(void* a, void* b, long n)
2.11 + void* memcpy(void* dst, void* src, long n)
2.12 +
2.13 import sage.ext.multi_modular
2.14 cimport sage.ext.arith
2.15 import sage.ext.arith
2.16 @@ -125,7 +129,7 @@
2.17
2.18 from sage.misc.misc import verbose, get_verbose, cputime
2.19
2.20 -from sage.rings.integer import Integer
2.21 +from sage.rings.integer cimport Integer
2.22
2.23 from sage.structure.element cimport ModuleElement, RingElement, Element, Vector
2.24
2.25 @@ -135,6 +139,12 @@
2.26 ai = arith_int()
2.27 ################
2.28
2.29 +
2.30 +
2.31 +cdef long num = 1
2.32 +cdef bint little_endian = (<char*>(&num))[0]
2.33 +
2.34 +
2.35 ##############################################################################
2.36 # Copyright (C) 2004,2005,2006 William Stein <wstein@gmail.com>
2.37 # Distributed under the terms of the GNU General Public License (GPL)
2.38 @@ -156,12 +166,12 @@
2.39
2.40 matrix_dense.Matrix_dense.__init__(self, parent)
2.41
2.42 - cdef mod_int p
2.43 + cdef long p
2.44 p = self._base_ring.characteristic()
2.45 self.p = p
2.46 if p >= MAX_MODULUS:
2.47 raise OverflowError, "p (=%s) must be < %s"%(p, MAX_MODULUS)
2.48 - self.gather = MOD_INT_OVERFLOW/(p*p)
2.49 + self.gather = MOD_INT_OVERFLOW/<mod_int>(p*p)
2.50
2.51 _sig_on
2.52 self._entries = <mod_int *> sage_malloc(sizeof(mod_int)*self._nrows*self._ncols)
2.53 @@ -189,11 +199,17 @@
2.54 sage_free(self._matrix)
2.55
2.56 def __init__(self, parent, entries, copy, coerce):
2.57 + """
2.58 + TESTS:
2.59 + sage: matrix(GF(7), 2, 2, [-1, int(-2), GF(7)(-3), 1/4])
2.60 + [6 5]
2.61 + [4 2]
2.62 + """
2.63
2.64 cdef mod_int e
2.65 cdef Py_ssize_t i, j, k
2.66 cdef mod_int *v
2.67 - cdef mod_int p
2.68 + cdef long p
2.69 p = self._base_ring.characteristic()
2.70
2.71 R = self.base_ring()
2.72 @@ -220,25 +236,32 @@
2.73
2.74 k = 0
2.75 cdef mod_int n
2.76 + cdef long tmp
2.77
2.78 - if coerce:
2.79 - for i from 0 <= i < self._nrows:
2.80 - if PyErr_CheckSignals(): raise KeyboardInterrupt
2.81 - v = self._matrix[i]
2.82 - for j from 0 <= j < self._ncols:
2.83 + for i from 0 <= i < self._nrows:
2.84 + if PyErr_CheckSignals(): raise KeyboardInterrupt
2.85 + v = self._matrix[i]
2.86 + for j from 0 <= j < self._ncols:
2.87 + x = entries[k]
2.88 + if PY_TYPE_CHECK_EXACT(x, int):
2.89 + tmp = (<long>x) % p
2.90 + v[j] = tmp + (tmp<0)*p
2.91 + elif PY_TYPE_CHECK_EXACT(x, IntegerMod_int) and (<IntegerMod_int>x)._parent is R:
2.92 + v[j] = (<IntegerMod_int>x).ivalue
2.93 + elif PY_TYPE_CHECK_EXACT(x, Integer):
2.94 + if coerce:
2.95 + v[j] = mpz_fdiv_ui((<Integer>x).value, p)
2.96 + else:
2.97 + v[j] = mpz_get_ui((<Integer>x).value)
2.98 + elif coerce:
2.99 v[j] = R(entries[k])
2.100 - k = k + 1
2.101 - else:
2.102 - for i from 0 <= i < self._nrows:
2.103 - if PyErr_CheckSignals(): raise KeyboardInterrupt
2.104 - v = self._matrix[i]
2.105 - for j from 0 <= j < self._ncols:
2.106 - v[j] = int(entries[k])
2.107 - k = k + 1
2.108 -
2.109 + else:
2.110 + v[j] = <long>(entries[k])
2.111 + k = k + 1
2.112
2.113 def __richcmp__(Matrix_modn_dense self, right, int op): # always need for mysterious reasons.
2.114 return self._richcmp(right, op)
2.115 +
2.116 def __hash__(self):
2.117 return self._hash()
2.118
2.119 @@ -256,11 +279,11 @@
2.120 # LEVEL 2 functionality
2.121 # * cdef _pickle
2.122 # * cdef _unpickle
2.123 - # * cdef _add_c_impl
2.124 + # x * cdef _add_c_impl
2.125 # * cdef _mul_c_impl
2.126 - # * cdef _matrix_times_matrix_c_impl
2.127 - # * cdef _cmp_c_impl
2.128 - # * __neg__
2.129 + # x * cdef _matrix_times_matrix_c_impl
2.130 + # x * cdef _cmp_c_impl
2.131 + # x * __neg__
2.132 # * __invert__
2.133 # x * __copy__
2.134 # * _multiply_classical
2.135 @@ -272,11 +295,269 @@
2.136 # cdef ModuleElement _add_c_impl(self, ModuleElement right):
2.137 # cdef _mul_c_impl(self, Matrix right):
2.138 # cdef int _cmp_c_impl(self, Matrix right) except -2:
2.139 - # def __neg__(self):
2.140 # def __invert__(self):
2.141 # def _multiply_classical(left, matrix.Matrix _right):
2.142 # def _list(self):
2.143 # def _dict(self):
2.144 +
2.145 + def _pickle(self):
2.146 + """
2.147 + Utility function for pickling.
2.148 +
2.149 + If the prime is small enough to fit in a byte, then it is stored as a
2.150 + contiguous string of bytes (to save space). Otherwise, memcpy is used
2.151 + to copy the raw data in the platforms native format. Any byte-swapping
2.152 + or word size difference is taken care of in unpickling (optimizing for
2.153 + unpickling on the same platform things were pickled on).
2.154 +
2.155 + The upcoming buffer protocol would be useful to not have to do any copying.
2.156 +
2.157 + EXAMPLES:
2.158 + sage: m = matrix(Integers(128), 3, 3, [ord(c) for c in "Hi there!"]); m
2.159 + [ 72 105 32]
2.160 + [116 104 101]
2.161 + [114 101 33]
2.162 + sage: m._pickle()
2.163 + ((1, True, 'Hi there!'), 10)
2.164 + """
2.165 + cdef Py_ssize_t i, j
2.166 + cdef unsigned char* ss
2.167 + cdef unsigned char* row_ss
2.168 + cdef long word_size
2.169 + cdef mod_int *row_self
2.170 +
2.171 + if self.p <= 0xFF:
2.172 + word_size = sizeof(char)
2.173 + else:
2.174 + word_size = sizeof(mod_int)
2.175 +
2.176 + s = PyString_FromStringAndSize(NULL, word_size * self._nrows * self._ncols)
2.177 + ss = <unsigned char*><char *>s
2.178 +
2.179 + _sig_on
2.180 + if word_size == sizeof(char):
2.181 + for i from 0 <= i < self._nrows:
2.182 + row_self = self._matrix[i]
2.183 + row_ss = &ss[i*self._ncols]
2.184 + for j from 0<= j < self._ncols:
2.185 + row_ss[j] = row_self[j]
2.186 + else:
2.187 + for i from 0 <= i < self._nrows:
2.188 + memcpy(&ss[i*sizeof(mod_int)*self._ncols], self._matrix[i], sizeof(mod_int) * self._ncols)
2.189 + _sig_off
2.190 +
2.191 + return (word_size, little_endian, s), 10
2.192 +
2.193 +
2.194 + def _unpickle(self, data, int version):
2.195 + """
2.196 + TESTS:
2.197 +
2.198 + Test for char-sized modulus:
2.199 + sage: A = random_matrix(GF(7), 5, 9)
2.200 + sage: data, version = A._pickle()
2.201 + sage: B = A.parent()(0)
2.202 + sage: B._unpickle(data, version)
2.203 + sage: B == A
2.204 + True
2.205 +
2.206 + And for larger modulus:
2.207 + sage: A = random_matrix(GF(1009), 51, 5)
2.208 + sage: data, version = A._pickle()
2.209 + sage: B = A.parent()(0)
2.210 + sage: B._unpickle(data, version)
2.211 + sage: B == A
2.212 + True
2.213 +
2.214 + Now test all the bit-packing options:
2.215 + sage: A = matrix(Integers(1000), 2, 2)
2.216 + sage: A._unpickle((1, True, '\x01\x02\xFF\x00'), 10)
2.217 + sage: A
2.218 + [ 1 2]
2.219 + [255 0]
2.220 +
2.221 + sage: A = matrix(Integers(1000), 1, 2)
2.222 + sage: A._unpickle((4, True, '\x02\x01\x00\x00\x01\x00\x00\x00'), 10)
2.223 + sage: A
2.224 + [258 1]
2.225 + sage: A._unpickle((4, False, '\x00\x00\x02\x01\x00\x00\x01\x03'), 10)
2.226 + sage: A
2.227 + [513 259]
2.228 + sage: A._unpickle((8, True, '\x03\x01\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00'), 10)
2.229 + sage: A
2.230 + [259 5]
2.231 + sage: A._unpickle((8, False, '\x00\x00\x00\x00\x00\x00\x02\x08\x00\x00\x00\x00\x00\x00\x01\x04'), 10)
2.232 + sage: A
2.233 + [520 260]
2.234 +
2.235 + Now make sure it works in context:
2.236 + sage: A = random_matrix(Integers(33), 31, 31)
2.237 + sage: loads(dumps(A)) == A
2.238 + True
2.239 + sage: A = random_matrix(Integers(3333), 31, 31)
2.240 + sage: loads(dumps(A)) == A
2.241 + True
2.242 + """
2.243 +
2.244 + if version < 10:
2.245 + return matrix_dense.Matrix_dense._unpickle(self, data, version)
2.246 +
2.247 + cdef Py_ssize_t i, j
2.248 + cdef unsigned char* ss
2.249 + cdef unsigned char* row_ss
2.250 + cdef long word_size
2.251 + cdef mod_int *row_self
2.252 + cdef bint little_endian_data
2.253 + cdef unsigned char* udata
2.254 +
2.255 + if version == 10:
2.256 + word_size, little_endian_data, s = data
2.257 + ss = <unsigned char*><char *>s
2.258 +
2.259 + _sig_on
2.260 + if word_size == sizeof(char):
2.261 + for i from 0 <= i < self._nrows:
2.262 + row_self = self._matrix[i]
2.263 + row_ss = &ss[i*self._ncols]
2.264 + for j from 0<= j < self._ncols:
2.265 + row_self[j] = row_ss[j]
2.266 +
2.267 + elif word_size == sizeof(mod_int) and little_endian == little_endian_data:
2.268 + for i from 0 <= i < self._nrows:
2.269 + memcpy(self._matrix[i], &ss[i*sizeof(mod_int)*self._ncols], sizeof(mod_int) * self._ncols)
2.270 +
2.271 + # Note that mod_int is at least 32 bits, and never stores more than 32 bits of info
2.272 + elif little_endian_data:
2.273 + for i from 0 <= i < self._nrows:
2.274 + row_self = self._matrix[i]
2.275 + for j from 0<= j < self._ncols:
2.276 + udata = &ss[(i*self._ncols+j)*word_size]
2.277 + row_self[j] = ((udata[0]) +
2.278 + (udata[1] << 8) +
2.279 + (udata[2] << 16) +
2.280 + (udata[3] << 24))
2.281 +
2.282 + else:
2.283 + for i from 0 <= i < self._nrows:
2.284 + row_self = self._matrix[i]
2.285 + for j from 0<= j < self._ncols:
2.286 + udata = &ss[(i*self._ncols+j)*word_size]
2.287 + row_self[j] = ((udata[word_size-1]) +
2.288 + (udata[word_size-2] << 8) +
2.289 + (udata[word_size-3] << 16) +
2.290 + (udata[word_size-4] << 24))
2.291 +
2.292 + _sig_off
2.293 +
2.294 + else:
2.295 + raise RuntimeError, "unknown matrix version"
2.296 +
2.297 +
2.298 + cdef long _hash(self) except -1:
2.299 + """
2.300 + TESTS:
2.301 + sage: a = random_matrix(GF(11), 5, 5, range(25))
2.302 + sage: a.set_immutable()
2.303 + sage: hash(a) #random
2.304 + 216
2.305 + sage: b = a.change_ring(ZZ)
2.306 + sage: b.set_immutable()
2.307 + sage: hash(b) == hash(a)
2.308 + True
2.309 + """
2.310 + x = self.fetch('hash')
2.311 + if not x is None: return x
2.312 +
2.313 + if not self._mutability._is_immutable:
2.314 + raise TypeError, "mutable matrices are unhashable"
2.315 +
2.316 + cdef Py_ssize_t i
2.317 + cdef long h = 0, n = 0
2.318 +
2.319 + _sig_on
2.320 + for i from 0 <= i < self._nrows:
2.321 + for j from 0<= j < self._ncols:
2.322 + h ^= n * self._matrix[i][j]
2.323 + n += 1
2.324 + _sig_off
2.325 +
2.326 + self.cache('hash', h)
2.327 + return h
2.328 +
2.329 +
2.330 + def __neg__(self):
2.331 + """
2.332 + EXAMPLES:
2.333 + sage: m = matrix(GF(19), 3, 3, range(9)); m
2.334 + [0 1 2]
2.335 + [3 4 5]
2.336 + [6 7 8]
2.337 + sage: -m
2.338 + [19 18 17]
2.339 + [16 15 14]
2.340 + [13 12 11]
2.341 + """
2.342 + cdef Py_ssize_t i,j
2.343 + cdef Matrix_modn_dense M
2.344 + cdef mod_int p = self.p
2.345 +
2.346 + M = Matrix_modn_dense.__new__(Matrix_modn_dense, self._parent,None,None,None)
2.347 + M.p = p
2.348 +
2.349 + _sig_on
2.350 + cdef mod_int *row_self, *row_ans
2.351 + for i from 0 <= i < self._nrows:
2.352 + row_self = self._matrix[i]
2.353 + row_ans = M._matrix[i]
2.354 + for j from 0<= j < self._ncols:
2.355 + row_ans[j] = p - row_self[j]
2.356 + _sig_off
2.357 + return M
2.358 +
2.359 +
2.360 + cdef ModuleElement _lmul_c_impl(self, RingElement right):
2.361 + """
2.362 + EXAMPLES:
2.363 + sage: a = random_matrix(Integers(60), 400, 500)
2.364 + sage: 3*a + 9*a == 12*a
2.365 + True
2.366 + """
2.367 + return self._rmul_c_impl(right)
2.368 +
2.369 + cdef ModuleElement _rmul_c_impl(self, RingElement left):
2.370 + """
2.371 + EXAMPLES:
2.372 + sage: a = matrix(GF(101), 3, 3, range(9)); a
2.373 + [0 1 2]
2.374 + [3 4 5]
2.375 + [6 7 8]
2.376 + sage: a * 5
2.377 + [ 0 5 10]
2.378 + [15 20 25]
2.379 + [30 35 40]
2.380 + sage: a * 50
2.381 + [ 0 50 100]
2.382 + [ 49 99 48]
2.383 + [ 98 47 97]
2.384 + """
2.385 + cdef Py_ssize_t i,j
2.386 + cdef Matrix_modn_dense M
2.387 + cdef mod_int p = self.p
2.388 + cdef mod_int a = left
2.389 +
2.390 + M = Matrix_modn_dense.__new__(Matrix_modn_dense, self._parent,None,None,None)
2.391 + M.p = p
2.392 +
2.393 + _sig_on
2.394 + cdef mod_int *row_self, *row_ans
2.395 + for i from 0 <= i < self._nrows:
2.396 + row_self = self._matrix[i]
2.397 + row_ans = M._matrix[i]
2.398 + for j from 0<= j < self._ncols:
2.399 + row_ans[j] = (a*row_self[j]) % p
2.400 + _sig_off
2.401 + return M
2.402 +
2.403 def __copy__(self):
2.404 cdef Matrix_modn_dense A
2.405 A = Matrix_modn_dense.__new__(Matrix_modn_dense, self._parent,
2.406 @@ -312,11 +593,12 @@
2.407 """
2.408
2.409 cdef Py_ssize_t i,j
2.410 - cdef long k
2.411 - cdef Matrix_modn_dense M
2.412 + cdef mod_int k, p
2.413 + cdef Matrix_modn_dense M
2.414
2.415 M = Matrix_modn_dense.__new__(Matrix_modn_dense, self._parent,None,None,None)
2.416 Matrix.__init__(M, self._parent)
2.417 + p = self.p
2.418
2.419 _sig_on
2.420 cdef mod_int *row_self, *row_right, *row_ans
2.421 @@ -325,15 +607,92 @@
2.422 row_right = (<Matrix_modn_dense> right)._matrix[i]
2.423 row_ans = M._matrix[i]
2.424 for j from 0<= j < self._ncols:
2.425 - k = (row_self[j]+row_right[j])
2.426 - # division is really slow, so we normalize only when it is necessary
2.427 - if k>=self.p:
2.428 - row_ans[j]=k-self.p
2.429 - else:
2.430 - row_ans[j]=k
2.431 -
2.432 + k = row_self[j] + row_right[j]
2.433 + row_ans[j] = k - (k >= p) * p
2.434 _sig_off
2.435 - return M;
2.436 + return M
2.437 +
2.438 +
2.439 + cdef ModuleElement _sub_c_impl(self, ModuleElement right):
2.440 + """
2.441 + Subtract two dense matrices over Z/nZ
2.442 +
2.443 + EXAMPLES:
2.444 + sage: a = matrix(GF(11), 3, 3, range(9)); a
2.445 + [0 1 2]
2.446 + [3 4 5]
2.447 + [6 7 8]
2.448 + sage: a - 4
2.449 + [7 1 2]
2.450 + [3 0 5]
2.451 + [6 7 4]
2.452 + sage: a - matrix(GF(11), 3, 3, range(1, 19, 2))
2.453 + [10 9 8]
2.454 + [ 7 6 5]
2.455 + [ 4 3 2]
2.456 + """
2.457 +
2.458 + cdef Py_ssize_t i,j
2.459 + cdef mod_int k, p
2.460 + cdef Matrix_modn_dense M
2.461 +
2.462 + M = Matrix_modn_dense.__new__(Matrix_modn_dense, self._parent,None,None,None)
2.463 + Matrix.__init__(M, self._parent)
2.464 + p = self.p
2.465 +
2.466 + _sig_on
2.467 + cdef mod_int *row_self, *row_right, *row_ans
2.468 + for i from 0 <= i < self._nrows:
2.469 + row_self = self._matrix[i]
2.470 + row_right = (<Matrix_modn_dense> right)._matrix[i]
2.471 + row_ans = M._matrix[i]
2.472 + for j from 0<= j < self._ncols:
2.473 + k = p + row_self[j] - row_right[j]
2.474 + row_ans[j] = k - (k >= p) * p
2.475 + _sig_off
2.476 + return M
2.477 +
2.478 +
2.479 + cdef int _cmp_c_impl(self, Element right) except -2:
2.480 + """
2.481 + Compare two dense matrices over Z/nZ
2.482 +
2.483 + EXAMPLES:
2.484 + sage: a = matrix(GF(17), 4, range(3, 83, 5)); a
2.485 + [ 3 8 13 1]
2.486 + [ 6 11 16 4]
2.487 + [ 9 14 2 7]
2.488 + [12 0 5 10]
2.489 + sage: a == a
2.490 + True
2.491 + sage: b = a - 3; b
2.492 + [ 0 8 13 1]
2.493 + [ 6 8 16 4]
2.494 + [ 9 14 16 7]
2.495 + [12 0 5 7]
2.496 + sage: b < a
2.497 + True
2.498 + sage: b > a
2.499 + False
2.500 + sage: b == a
2.501 + False
2.502 + sage: b + 3 == a
2.503 + True
2.504 + """
2.505 +
2.506 + cdef Py_ssize_t i
2.507 + cdef int cmp
2.508 +
2.509 + _sig_on
2.510 + cdef mod_int *row_self, *row_right
2.511 + for i from 0 <= i < self._nrows:
2.512 + row_self = self._matrix[i]
2.513 + row_right = (<Matrix_modn_dense> right)._matrix[i]
2.514 + cmp = memcmp(row_self, row_right, sizeof(mod_int)*self._ncols)
2.515 + if cmp:
2.516 + return cmp
2.517 + _sig_off
2.518 + return 0
2.519
2.520
2.521 cdef Matrix _matrix_times_matrix_c_impl(self, Matrix right):
2.522 @@ -399,7 +758,7 @@
2.523
2.524 ########################################################################
2.525 # LEVEL 3 functionality (Optional)
2.526 - # * cdef _sub_c_impl
2.527 + # x * cdef _sub_c_impl
2.528 # * __deepcopy__
2.529 # * __invert__
2.530 # * Matrix windows -- only if you need strassen for that base
3.1 --- a/sage/matrix/matrix_window_modn_dense.pyx Sat Aug 02 16:50:33 2008 -0700
3.2 +++ b/sage/matrix/matrix_window_modn_dense.pyx Thu Aug 07 19:39:23 2008 -0700
3.3 @@ -1,3 +1,12 @@
3.4 +"""
3.5 +TESTS:
3.6 + sage: a = random_matrix(GF(11), 30, 40)
3.7 + sage: b = random_matrix(GF(11), 40, 53)
3.8 + sage: a._multiply_strassen(b, 7) == a.change_ring(ZZ)*b.change_ring(ZZ)
3.9 + True
3.10 +"""
3.11 +
3.12 +
3.13 include "../ext/cdefs.pxi"
3.14 include "../ext/stdsage.pxi"
3.15
3.16 @@ -40,7 +49,7 @@
3.17
3.18 cdef add(self, MatrixWindow A):
3.19 cdef Py_ssize_t i, j
3.20 - cdef mod_int p
3.21 + cdef mod_int k, p
3.22 cdef mod_int* self_row
3.23 cdef mod_int* A_row
3.24 if self._nrows != A._nrows or self._ncols != A._ncols:
3.25 @@ -50,13 +59,12 @@
3.26 self_row = ( <Matrix_modn_dense> self._matrix )._matrix[i + self._row] + self._col
3.27 A_row = ( <Matrix_modn_dense> A._matrix )._matrix[i + A._row] + A._col
3.28 for j from 0 <= j < self._ncols:
3.29 - self_row[j] += A_row[j]
3.30 - if self_row[j] >= p:
3.31 - self_row[j] -= p
3.32 + k = self_row[j] + A_row[j]
3.33 + self_row[j] = k - (k >= p) * p
3.34
3.35 cdef subtract(self, MatrixWindow A):
3.36 cdef Py_ssize_t i, j
3.37 - cdef mod_int p
3.38 + cdef mod_int k, p
3.39 cdef mod_int* self_row
3.40 cdef mod_int* A_row
3.41 if self._nrows != A._nrows or self._ncols != A._ncols:
3.42 @@ -66,14 +74,12 @@
3.43 self_row = ( <Matrix_modn_dense> self._matrix )._matrix[i + self._row] + self._col
3.44 A_row = ( <Matrix_modn_dense> A._matrix )._matrix[i + A._row] + A._col
3.45 for j from 0 <= j < self._ncols:
3.46 - if self_row[j] >= A_row[j]:
3.47 - self_row[j] -= A_row[j]
3.48 - else:
3.49 - self_row[j] += p - A_row[j]
3.50 + k = p + self_row[j] - A_row[j]
3.51 + self_row[j] = k - (k >= p) * p
3.52
3.53 cdef set_to_sum(self, MatrixWindow A, MatrixWindow B):
3.54 cdef Py_ssize_t i, j
3.55 - cdef mod_int p
3.56 + cdef mod_int k, p
3.57 cdef mod_int* self_row
3.58 cdef mod_int* A_row
3.59 cdef mod_int* B_row
3.60 @@ -87,13 +93,12 @@
3.61 A_row = ( <Matrix_modn_dense> A._matrix )._matrix[i + A._row] + A._col
3.62 B_row = ( <Matrix_modn_dense> B._matrix )._matrix[i + B._row] + B._col
3.63 for j from 0 <= j < self._ncols:
3.64 - self_row[j] = A_row[j] + B_row[j]
3.65 - if self_row[j] >= p:
3.66 - self_row[j] -= p
3.67 + k = A_row[j] + B_row[j]
3.68 + self_row[j] = k - (k >= p) * p
3.69
3.70 cdef set_to_diff(self, MatrixWindow A, MatrixWindow B):
3.71 cdef Py_ssize_t i, j
3.72 - cdef mod_int p
3.73 + cdef mod_int k, p
3.74 cdef mod_int* self_row
3.75 cdef mod_int* A_row
3.76 cdef mod_int* B_row
3.77 @@ -107,10 +112,8 @@
3.78 A_row = ( <Matrix_modn_dense> A._matrix )._matrix[i + A._row] + A._col
3.79 B_row = ( <Matrix_modn_dense> B._matrix )._matrix[i + B._row] + B._col
3.80 for j from 0 <= j < self._ncols:
3.81 - if A_row[j] > B_row[j]:
3.82 - self_row[j] = A_row[j] - B_row[j]
3.83 - else:
3.84 - self_row[j] = p + A_row[j] - B_row[j]
3.85 + k = p + A_row[j] - B_row[j]
3.86 + self_row[j] = k - (k >= p) * p
3.87
3.88 cdef set_to_prod(self, MatrixWindow A, MatrixWindow B):
3.89 cdef Py_ssize_t i, j, k, gather, top, A_ncols
3.90 @@ -133,12 +136,12 @@
3.91 self_row = ( <Matrix_modn_dense> self._matrix )._matrix[i + self._row] + self._col
3.92 A_row = ( <Matrix_modn_dense> A._matrix )._matrix[i + A._row] + A._col
3.93 k = 0
3.94 - for j from 1 <= j < B._ncols:
3.95 + for j from 0 <= j < B._ncols:
3.96 self_row[j] = (A_row[k] * B_matrix_off[k][B._col+j]) % p
3.97 for k from 1 <= k < A._ncols:
3.98 A_i_k = A_row[k]
3.99 B_row_k = B_matrix_off[k] + B._col
3.100 - for j from 1 <= j < B._ncols:
3.101 + for j from 0 <= j < B._ncols:
3.102 self_row[j] = (self_row[j] + A_i_k * B_row_k[j]) % p
3.103
3.104 else:
