Line data Source code
1 : #include "tommath_private.h"
2 : #ifdef BN_S_MP_SQR_FAST_C
3 : /* LibTomMath, multiple-precision integer library -- Tom St Denis */
4 : /* SPDX-License-Identifier: Unlicense */
5 :
6 : /* the jist of squaring...
7 : * you do like mult except the offset of the tmpx [one that
8 : * starts closer to zero] can't equal the offset of tmpy.
9 : * So basically you set up iy like before then you min it with
10 : * (ty-tx) so that it never happens. You double all those
11 : * you add in the inner loop
12 :
13 : After that loop you do the squares and add them in.
14 : */
15 :
16 236608 : mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b)
17 : {
18 : int olduse, pa, ix, iz;
19 : mp_digit W[MP_WARRAY], *tmpx;
20 : mp_word W1;
21 : mp_err err;
22 :
23 : /* grow the destination as required */
24 236608 : pa = a->used + a->used;
25 236608 : if (b->alloc < pa) {
26 535 : if ((err = mp_grow(b, pa)) != MP_OKAY) {
27 0 : return err;
28 : }
29 : }
30 :
31 : /* number of output digits to produce */
32 236608 : W1 = 0;
33 14193344 : for (ix = 0; ix < pa; ix++) {
34 : int tx, ty, iy;
35 : mp_word _W;
36 : mp_digit *tmpy;
37 :
38 : /* clear counter */
39 13956736 : _W = 0;
40 :
41 : /* get offsets into the two bignums */
42 13956736 : ty = MP_MIN(a->used-1, ix);
43 13956736 : tx = ix - ty;
44 :
45 : /* setup temp aliases */
46 13956736 : tmpx = a->dp + tx;
47 13956736 : tmpy = a->dp + ty;
48 :
49 : /* this is the number of times the loop will iterrate, essentially
50 : while (tx++ < a->used && ty-- >= 0) { ... }
51 : */
52 13956736 : iy = MP_MIN(a->used-tx, ty+1);
53 :
54 : /* now for squaring tx can never equal ty
55 : * we halve the distance since they approach at a rate of 2x
56 : * and we have to round because odd cases need to be executed
57 : */
58 13956736 : iy = MP_MIN(iy, ((ty-tx)+1)>>1);
59 :
60 : /* execute loop */
61 122159713 : for (iz = 0; iz < iy; iz++) {
62 108202977 : _W += (mp_word)*tmpx++ * (mp_word)*tmpy--;
63 : }
64 :
65 : /* double the inner product and add carry */
66 13956736 : _W = _W + _W + W1;
67 :
68 : /* even columns have the square term in them */
69 13956736 : if (((unsigned)ix & 1u) == 0u) {
70 6978368 : _W += (mp_word)a->dp[ix>>1] * (mp_word)a->dp[ix>>1];
71 : }
72 :
73 : /* store it */
74 13956736 : W[ix] = (mp_digit)_W & MP_MASK;
75 :
76 : /* make next carry */
77 13956736 : W1 = _W >> (mp_word)MP_DIGIT_BIT;
78 : }
79 :
80 : /* setup dest */
81 236608 : olduse = b->used;
82 236608 : b->used = a->used+a->used;
83 :
84 : {
85 : mp_digit *tmpb;
86 236608 : tmpb = b->dp;
87 14193344 : for (ix = 0; ix < pa; ix++) {
88 13956736 : *tmpb++ = W[ix] & MP_MASK;
89 : }
90 :
91 : /* clear unused digits [that existed in the old copy of c] */
92 236608 : MP_ZERO_DIGITS(tmpb, olduse - ix);
93 : }
94 236608 : mp_clamp(b);
95 236608 : return MP_OKAY;
96 : }
97 : #endif
|