LCOV - code coverage report
Current view: top level - third_party/openssl/crypto/bn - bn_asm.c (source / functions) Hit Total Coverage
Test: tmp.zDYK9MVh93 Lines: 309 354 87.3 %
Date: 2015-10-10 Functions: 9 11 81.8 %

          Line data    Source code
       1             : /* crypto/bn/bn_asm.c */
       2             : /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
       3             :  * All rights reserved.
       4             :  *
       5             :  * This package is an SSL implementation written
       6             :  * by Eric Young (eay@cryptsoft.com).
       7             :  * The implementation was written so as to conform with Netscapes SSL.
       8             :  *
       9             :  * This library is free for commercial and non-commercial use as long as
      10             :  * the following conditions are aheared to.  The following conditions
      11             :  * apply to all code found in this distribution, be it the RC4, RSA,
      12             :  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
      13             :  * included with this distribution is covered by the same copyright terms
      14             :  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
      15             :  *
      16             :  * Copyright remains Eric Young's, and as such any Copyright notices in
      17             :  * the code are not to be removed.
      18             :  * If this package is used in a product, Eric Young should be given attribution
      19             :  * as the author of the parts of the library used.
      20             :  * This can be in the form of a textual message at program startup or
      21             :  * in documentation (online or textual) provided with the package.
      22             :  *
      23             :  * Redistribution and use in source and binary forms, with or without
      24             :  * modification, are permitted provided that the following conditions
      25             :  * are met:
      26             :  * 1. Redistributions of source code must retain the copyright
      27             :  *    notice, this list of conditions and the following disclaimer.
      28             :  * 2. Redistributions in binary form must reproduce the above copyright
      29             :  *    notice, this list of conditions and the following disclaimer in the
      30             :  *    documentation and/or other materials provided with the distribution.
      31             :  * 3. All advertising materials mentioning features or use of this software
      32             :  *    must display the following acknowledgement:
      33             :  *    "This product includes cryptographic software written by
      34             :  *     Eric Young (eay@cryptsoft.com)"
      35             :  *    The word 'cryptographic' can be left out if the rouines from the library
      36             :  *    being used are not cryptographic related :-).
      37             :  * 4. If you include any Windows specific code (or a derivative thereof) from
      38             :  *    the apps directory (application code) you must include an acknowledgement:
      39             :  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
      40             :  *
      41             :  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
      42             :  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      43             :  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      44             :  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
      45             :  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
      46             :  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
      47             :  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      48             :  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      49             :  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      50             :  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      51             :  * SUCH DAMAGE.
      52             :  *
      53             :  * The licence and distribution terms for any publically available version or
      54             :  * derivative of this code cannot be changed.  i.e. this code cannot simply be
      55             :  * copied and put under another distribution licence
      56             :  * [including the GNU Public Licence.]
      57             :  */
      58             : 
      59             : #ifndef BN_DEBUG
      60             : # undef NDEBUG                  /* avoid conflicting definitions */
      61             : # define NDEBUG
      62             : #endif
      63             : 
      64             : #include <stdio.h>
      65             : #include <assert.h>
      66             : #include "cryptlib.h"
      67             : #include "bn_lcl.h"
      68             : 
      69             : #if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
      70             : 
      71             : BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
      72             :                           BN_ULONG w)
      73             : {
      74             :     BN_ULONG c1 = 0;
      75             : 
      76             :     assert(num >= 0);
      77             :     if (num <= 0)
      78             :         return (c1);
      79             : 
      80             : # ifndef OPENSSL_SMALL_FOOTPRINT
      81             :     while (num & ~3) {
      82             :         mul_add(rp[0], ap[0], w, c1);
      83             :         mul_add(rp[1], ap[1], w, c1);
      84             :         mul_add(rp[2], ap[2], w, c1);
      85             :         mul_add(rp[3], ap[3], w, c1);
      86             :         ap += 4;
      87             :         rp += 4;
      88             :         num -= 4;
      89             :     }
      90             : # endif
      91             :     while (num) {
      92             :         mul_add(rp[0], ap[0], w, c1);
      93             :         ap++;
      94             :         rp++;
      95             :         num--;
      96             :     }
      97             : 
      98             :     return (c1);
      99             : }
     100             : 
     101             : BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
     102             : {
     103             :     BN_ULONG c1 = 0;
     104             : 
     105             :     assert(num >= 0);
     106             :     if (num <= 0)
     107             :         return (c1);
     108             : 
     109             : # ifndef OPENSSL_SMALL_FOOTPRINT
     110             :     while (num & ~3) {
     111             :         mul(rp[0], ap[0], w, c1);
     112             :         mul(rp[1], ap[1], w, c1);
     113             :         mul(rp[2], ap[2], w, c1);
     114             :         mul(rp[3], ap[3], w, c1);
     115             :         ap += 4;
     116             :         rp += 4;
     117             :         num -= 4;
     118             :     }
     119             : # endif
     120             :     while (num) {
     121             :         mul(rp[0], ap[0], w, c1);
     122             :         ap++;
     123             :         rp++;
     124             :         num--;
     125             :     }
     126             :     return (c1);
     127             : }
     128             : 
     129             : void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
     130             : {
     131             :     assert(n >= 0);
     132             :     if (n <= 0)
     133             :         return;
     134             : 
     135             : # ifndef OPENSSL_SMALL_FOOTPRINT
     136             :     while (n & ~3) {
     137             :         sqr(r[0], r[1], a[0]);
     138             :         sqr(r[2], r[3], a[1]);
     139             :         sqr(r[4], r[5], a[2]);
     140             :         sqr(r[6], r[7], a[3]);
     141             :         a += 4;
     142             :         r += 8;
     143             :         n -= 4;
     144             :     }
     145             : # endif
     146             :     while (n) {
     147             :         sqr(r[0], r[1], a[0]);
     148             :         a++;
     149             :         r += 2;
     150             :         n--;
     151             :     }
     152             : }
     153             : 
     154             : #else                           /* !(defined(BN_LLONG) ||
     155             :                                  * defined(BN_UMULT_HIGH)) */
     156             : 
     157    14923735 : BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
     158             :                           BN_ULONG w)
     159             : {
     160             :     BN_ULONG c = 0;
     161             :     BN_ULONG bl, bh;
     162             : 
     163             :     assert(num >= 0);
     164    14923735 :     if (num <= 0)
     165             :         return ((BN_ULONG)0);
     166             : 
     167    14923735 :     bl = LBITS(w);
     168    14923735 :     bh = HBITS(w);
     169             : 
     170             : # ifndef OPENSSL_SMALL_FOOTPRINT
     171    50267349 :     while (num & ~3) {
     172    20419879 :         mul_add(rp[0], ap[0], bl, bh, c);
     173    20419879 :         mul_add(rp[1], ap[1], bl, bh, c);
     174    20419879 :         mul_add(rp[2], ap[2], bl, bh, c);
     175    20419879 :         mul_add(rp[3], ap[3], bl, bh, c);
     176    20419879 :         ap += 4;
     177    20419879 :         rp += 4;
     178    20419879 :         num -= 4;
     179             :     }
     180             : # endif
     181    14923735 :     while (num) {
     182           0 :         mul_add(rp[0], ap[0], bl, bh, c);
     183           0 :         ap++;
     184           0 :         rp++;
     185           0 :         num--;
     186             :     }
     187             :     return (c);
     188             : }
     189             : 
     190     4113527 : BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
     191             : {
     192             :     BN_ULONG carry = 0;
     193             :     BN_ULONG bl, bh;
     194             : 
     195             :     assert(num >= 0);
     196     4113527 :     if (num <= 0)
     197             :         return ((BN_ULONG)0);
     198             : 
     199     4113527 :     bl = LBITS(w);
     200     4113527 :     bh = HBITS(w);
     201             : 
     202             : # ifndef OPENSSL_SMALL_FOOTPRINT
     203    12887834 :     while (num & ~3) {
     204     4660780 :         mul(rp[0], ap[0], bl, bh, carry);
     205     4660780 :         mul(rp[1], ap[1], bl, bh, carry);
     206     4660780 :         mul(rp[2], ap[2], bl, bh, carry);
     207     4660780 :         mul(rp[3], ap[3], bl, bh, carry);
     208     4660780 :         ap += 4;
     209     4660780 :         rp += 4;
     210     4660780 :         num -= 4;
     211             :     }
     212             : # endif
     213     5053338 :     while (num) {
     214      939811 :         mul(rp[0], ap[0], bl, bh, carry);
     215      939811 :         ap++;
     216      939811 :         rp++;
     217      939811 :         num--;
     218             :     }
     219             :     return (carry);
     220             : }
     221             : 
     222        2352 : void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
     223             : {
     224             :     assert(n >= 0);
     225        2352 :     if (n <= 0)
     226        2352 :         return;
     227             : 
     228             : # ifndef OPENSSL_SMALL_FOOTPRINT
     229        2352 :     while (n & ~3) {
     230           0 :         sqr64(r[0], r[1], a[0]);
     231           0 :         sqr64(r[2], r[3], a[1]);
     232           0 :         sqr64(r[4], r[5], a[2]);
     233           0 :         sqr64(r[6], r[7], a[3]);
     234           0 :         a += 4;
     235           0 :         r += 8;
     236           0 :         n -= 4;
     237             :     }
     238             : # endif
     239        4704 :     while (n) {
     240        2352 :         sqr64(r[0], r[1], a[0]);
     241        2352 :         a++;
     242        2352 :         r += 2;
     243        2352 :         n--;
     244             :     }
     245             : }
     246             : 
     247             : #endif                          /* !(defined(BN_LLONG) ||
     248             :                                  * defined(BN_UMULT_HIGH)) */
     249             : 
     250             : #if defined(BN_LLONG) && defined(BN_DIV2W)
     251             : 
     252             : BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
     253             : {
     254             :     return ((BN_ULONG)(((((BN_ULLONG) h) << BN_BITS2) | l) / (BN_ULLONG) d));
     255             : }
     256             : 
     257             : #else
     258             : 
     259             : /* Divide h,l by d and return the result. */
     260             : /* I need to test this some more :-( */
     261      438492 : BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
     262             : {
     263             :     BN_ULONG dh, dl, q, ret = 0, th, tl, t;
     264             :     int i, count = 2;
     265             : 
     266      438492 :     if (d == 0)
     267             :         return (BN_MASK2);
     268             : 
     269      438492 :     i = BN_num_bits_word(d);
     270             :     assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i));
     271             : 
     272      438492 :     i = BN_BITS2 - i;
     273      438492 :     if (h >= d)
     274           0 :         h -= d;
     275             : 
     276      438492 :     if (i) {
     277           0 :         d <<= i;
     278           0 :         h = (h << i) | (l >> (BN_BITS2 - i));
     279           0 :         l <<= i;
     280             :     }
     281      438492 :     dh = (d & BN_MASK2h) >> BN_BITS4;
     282      438492 :     dl = (d & BN_MASK2l);
     283             :     for (;;) {
     284      876984 :         if ((h >> BN_BITS4) == dh)
     285             :             q = BN_MASK2l;
     286             :         else
     287      876984 :             q = h / dh;
     288             : 
     289      876984 :         th = q * dh;
     290      876984 :         tl = dl * q;
     291             :         for (;;) {
     292      892060 :             t = h - th;
     293     1778168 :             if ((t & BN_MASK2h) ||
     294      886108 :                 ((tl) <= ((t << BN_BITS4) | ((l & BN_MASK2h) >> BN_BITS4))))
     295             :                 break;
     296       15076 :             q--;
     297       15076 :             th -= dh;
     298       15076 :             tl -= dl;
     299       15076 :         }
     300      876984 :         t = (tl >> BN_BITS4);
     301      876984 :         tl = (tl << BN_BITS4) & BN_MASK2h;
     302      876984 :         th += t;
     303             : 
     304      876984 :         if (l < tl)
     305      161253 :             th++;
     306      876984 :         l -= tl;
     307      876984 :         if (h < th) {
     308           0 :             h += d;
     309           0 :             q--;
     310             :         }
     311      876984 :         h -= th;
     312             : 
     313      876984 :         if (--count == 0)
     314             :             break;
     315             : 
     316      438492 :         ret = q << BN_BITS4;
     317      438492 :         h = ((h << BN_BITS4) | (l >> BN_BITS4)) & BN_MASK2;
     318      438492 :         l = (l & BN_MASK2l) << BN_BITS4;
     319      438492 :     }
     320      438492 :     ret |= q;
     321      438492 :     return (ret);
     322             : }
     323             : #endif                          /* !defined(BN_LLONG) && defined(BN_DIV2W) */
     324             : 
     325             : #ifdef BN_LLONG
     326             : BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
     327             :                       int n)
     328             : {
     329             :     BN_ULLONG ll = 0;
     330             : 
     331             :     assert(n >= 0);
     332             :     if (n <= 0)
     333             :         return ((BN_ULONG)0);
     334             : 
     335             : # ifndef OPENSSL_SMALL_FOOTPRINT
     336             :     while (n & ~3) {
     337             :         ll += (BN_ULLONG) a[0] + b[0];
     338             :         r[0] = (BN_ULONG)ll & BN_MASK2;
     339             :         ll >>= BN_BITS2;
     340             :         ll += (BN_ULLONG) a[1] + b[1];
     341             :         r[1] = (BN_ULONG)ll & BN_MASK2;
     342             :         ll >>= BN_BITS2;
     343             :         ll += (BN_ULLONG) a[2] + b[2];
     344             :         r[2] = (BN_ULONG)ll & BN_MASK2;
     345             :         ll >>= BN_BITS2;
     346             :         ll += (BN_ULLONG) a[3] + b[3];
     347             :         r[3] = (BN_ULONG)ll & BN_MASK2;
     348             :         ll >>= BN_BITS2;
     349             :         a += 4;
     350             :         b += 4;
     351             :         r += 4;
     352             :         n -= 4;
     353             :     }
     354             : # endif
     355             :     while (n) {
     356             :         ll += (BN_ULLONG) a[0] + b[0];
     357             :         r[0] = (BN_ULONG)ll & BN_MASK2;
     358             :         ll >>= BN_BITS2;
     359             :         a++;
     360             :         b++;
     361             :         r++;
     362             :         n--;
     363             :     }
     364             :     return ((BN_ULONG)ll);
     365             : }
     366             : #else                           /* !BN_LLONG */
     367     6945203 : BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
     368             :                       int n)
     369             : {
     370             :     BN_ULONG c, l, t;
     371             : 
     372             :     assert(n >= 0);
     373     6945203 :     if (n <= 0)
     374             :         return ((BN_ULONG)0);
     375             : 
     376             :     c = 0;
     377             : # ifndef OPENSSL_SMALL_FOOTPRINT
     378    13890946 :     while (n & ~3) {
     379     6953255 :         t = a[0];
     380     6953255 :         t = (t + c) & BN_MASK2;
     381     6953255 :         c = (t < c);
     382     6953255 :         l = (t + b[0]) & BN_MASK2;
     383     6953255 :         c += (l < t);
     384     6953255 :         r[0] = l;
     385     6953255 :         t = a[1];
     386     6953255 :         t = (t + c) & BN_MASK2;
     387     6953255 :         c = (t < c);
     388     6953255 :         l = (t + b[1]) & BN_MASK2;
     389     6953255 :         c += (l < t);
     390     6953255 :         r[1] = l;
     391     6953255 :         t = a[2];
     392     6953255 :         t = (t + c) & BN_MASK2;
     393     6953255 :         c = (t < c);
     394     6953255 :         l = (t + b[2]) & BN_MASK2;
     395     6953255 :         c += (l < t);
     396     6953255 :         r[2] = l;
     397     6953255 :         t = a[3];
     398     6953255 :         t = (t + c) & BN_MASK2;
     399     6953255 :         c = (t < c);
     400     6953255 :         l = (t + b[3]) & BN_MASK2;
     401     6953255 :         c += (l < t);
     402     6953255 :         r[3] = l;
     403     6953255 :         a += 4;
     404     6953255 :         b += 4;
     405     6953255 :         r += 4;
     406     6953255 :         n -= 4;
     407             :     }
     408             : # endif
     409     7762131 :     while (n) {
     410      824440 :         t = a[0];
     411      824440 :         t = (t + c) & BN_MASK2;
     412      824440 :         c = (t < c);
     413      824440 :         l = (t + b[0]) & BN_MASK2;
     414      824440 :         c += (l < t);
     415      824440 :         r[0] = l;
     416      824440 :         a++;
     417      824440 :         b++;
     418      824440 :         r++;
     419      824440 :         n--;
     420             :     }
     421             :     return ((BN_ULONG)c);
     422             : }
     423             : #endif                          /* !BN_LLONG */
     424             : 
     425     8210651 : BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
     426             :                       int n)
     427             : {
     428             :     BN_ULONG t1, t2;
     429             :     int c = 0;
     430             : 
     431             :     assert(n >= 0);
     432     8210651 :     if (n <= 0)
     433             :         return ((BN_ULONG)0);
     434             : 
     435             : #ifndef OPENSSL_SMALL_FOOTPRINT
     436    17620434 :     while (n & ~3) {
     437     9409783 :         t1 = a[0];
     438     9409783 :         t2 = b[0];
     439     9409783 :         r[0] = (t1 - t2 - c) & BN_MASK2;
     440     9409783 :         if (t1 != t2)
     441     9366578 :             c = (t1 < t2);
     442     9409783 :         t1 = a[1];
     443     9409783 :         t2 = b[1];
     444     9409783 :         r[1] = (t1 - t2 - c) & BN_MASK2;
     445     9409783 :         if (t1 != t2)
     446     9409740 :             c = (t1 < t2);
     447     9409783 :         t1 = a[2];
     448     9409783 :         t2 = b[2];
     449     9409783 :         r[2] = (t1 - t2 - c) & BN_MASK2;
     450     9409783 :         if (t1 != t2)
     451     9407328 :             c = (t1 < t2);
     452     9409783 :         t1 = a[3];
     453     9409783 :         t2 = b[3];
     454     9409783 :         r[3] = (t1 - t2 - c) & BN_MASK2;
     455     9409783 :         if (t1 != t2)
     456     9336473 :             c = (t1 < t2);
     457     9409783 :         a += 4;
     458     9409783 :         b += 4;
     459     9409783 :         r += 4;
     460     9409783 :         n -= 4;
     461             :     }
     462             : #endif
     463     8909877 :     while (n) {
     464      699226 :         t1 = a[0];
     465      699226 :         t2 = b[0];
     466      699226 :         r[0] = (t1 - t2 - c) & BN_MASK2;
     467      699226 :         if (t1 != t2)
     468      437005 :             c = (t1 < t2);
     469      699226 :         a++;
     470      699226 :         b++;
     471      699226 :         r++;
     472      699226 :         n--;
     473             :     }
     474     8210651 :     return (c);
     475             : }
     476             : 
     477             : #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT)
     478             : 
     479             : # undef bn_mul_comba8
     480             : # undef bn_mul_comba4
     481             : # undef bn_sqr_comba8
     482             : # undef bn_sqr_comba4
     483             : 
     484             : /* mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0) */
     485             : /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
     486             : /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
     487             : /*
     488             :  * sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number
     489             :  * c=(c2,c1,c0)
     490             :  */
     491             : 
     492             : # ifdef BN_LLONG
     493             : /*
     494             :  * Keep in mind that additions to multiplication result can not
     495             :  * overflow, because its high half cannot be all-ones.
     496             :  */
     497             : #  define mul_add_c(a,b,c0,c1,c2)       do {    \
     498             :         BN_ULONG hi;                            \
     499             :         BN_ULLONG t = (BN_ULLONG)(a)*(b);       \
     500             :         t += c0;                /* no carry */  \
     501             :         c0 = (BN_ULONG)Lw(t);                   \
     502             :         hi = (BN_ULONG)Hw(t);                   \
     503             :         c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
     504             :         } while(0)
     505             : 
     506             : #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
     507             :         BN_ULONG hi;                            \
     508             :         BN_ULLONG t = (BN_ULLONG)(a)*(b);       \
     509             :         BN_ULLONG tt = t+c0;    /* no carry */  \
     510             :         c0 = (BN_ULONG)Lw(tt);                  \
     511             :         hi = (BN_ULONG)Hw(tt);                  \
     512             :         c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
     513             :         t += c0;                /* no carry */  \
     514             :         c0 = (BN_ULONG)Lw(t);                   \
     515             :         hi = (BN_ULONG)Hw(t);                   \
     516             :         c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
     517             :         } while(0)
     518             : 
     519             : #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
     520             :         BN_ULONG hi;                            \
     521             :         BN_ULLONG t = (BN_ULLONG)a[i]*a[i];     \
     522             :         t += c0;                /* no carry */  \
     523             :         c0 = (BN_ULONG)Lw(t);                   \
     524             :         hi = (BN_ULONG)Hw(t);                   \
     525             :         c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
     526             :         } while(0)
     527             : 
     528             : #  define sqr_add_c2(a,i,j,c0,c1,c2) \
     529             :         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
     530             : 
     531             : # elif defined(BN_UMULT_LOHI)
     532             : /*
     533             :  * Keep in mind that additions to hi can not overflow, because
     534             :  * the high word of a multiplication result cannot be all-ones.
     535             :  */
     536             : #  define mul_add_c(a,b,c0,c1,c2)       do {    \
     537             :         BN_ULONG ta = (a), tb = (b);            \
     538             :         BN_ULONG lo, hi;                        \
     539             :         BN_UMULT_LOHI(lo,hi,ta,tb);             \
     540             :         c0 += lo; hi += (c0<lo)?1:0;            \
     541             :         c1 += hi; c2 += (c1<hi)?1:0;            \
     542             :         } while(0)
     543             : 
     544             : #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
     545             :         BN_ULONG ta = (a), tb = (b);            \
     546             :         BN_ULONG lo, hi, tt;                    \
     547             :         BN_UMULT_LOHI(lo,hi,ta,tb);             \
     548             :         c0 += lo; tt = hi+((c0<lo)?1:0);        \
     549             :         c1 += tt; c2 += (c1<tt)?1:0;            \
     550             :         c0 += lo; hi += (c0<lo)?1:0;            \
     551             :         c1 += hi; c2 += (c1<hi)?1:0;            \
     552             :         } while(0)
     553             : 
     554             : #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
     555             :         BN_ULONG ta = (a)[i];                   \
     556             :         BN_ULONG lo, hi;                        \
     557             :         BN_UMULT_LOHI(lo,hi,ta,ta);             \
     558             :         c0 += lo; hi += (c0<lo)?1:0;            \
     559             :         c1 += hi; c2 += (c1<hi)?1:0;            \
     560             :         } while(0)
     561             : 
     562             : #  define sqr_add_c2(a,i,j,c0,c1,c2)    \
     563             :         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
     564             : 
     565             : # elif defined(BN_UMULT_HIGH)
     566             : /*
     567             :  * Keep in mind that additions to hi can not overflow, because
     568             :  * the high word of a multiplication result cannot be all-ones.
     569             :  */
     570             : #  define mul_add_c(a,b,c0,c1,c2)       do {    \
     571             :         BN_ULONG ta = (a), tb = (b);            \
     572             :         BN_ULONG lo = ta * tb;                  \
     573             :         BN_ULONG hi = BN_UMULT_HIGH(ta,tb);     \
     574             :         c0 += lo; hi += (c0<lo)?1:0;            \
     575             :         c1 += hi; c2 += (c1<hi)?1:0;            \
     576             :         } while(0)
     577             : 
     578             : #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
     579             :         BN_ULONG ta = (a), tb = (b), tt;        \
     580             :         BN_ULONG lo = ta * tb;                  \
     581             :         BN_ULONG hi = BN_UMULT_HIGH(ta,tb);     \
     582             :         c0 += lo; tt = hi + ((c0<lo)?1:0);      \
     583             :         c1 += tt; c2 += (c1<tt)?1:0;            \
     584             :         c0 += lo; hi += (c0<lo)?1:0;            \
     585             :         c1 += hi; c2 += (c1<hi)?1:0;            \
     586             :         } while(0)
     587             : 
     588             : #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
     589             :         BN_ULONG ta = (a)[i];                   \
     590             :         BN_ULONG lo = ta * ta;                  \
     591             :         BN_ULONG hi = BN_UMULT_HIGH(ta,ta);     \
     592             :         c0 += lo; hi += (c0<lo)?1:0;            \
     593             :         c1 += hi; c2 += (c1<hi)?1:0;            \
     594             :         } while(0)
     595             : 
     596             : #  define sqr_add_c2(a,i,j,c0,c1,c2)      \
     597             :         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
     598             : 
     599             : # else                          /* !BN_LLONG */
     600             : /*
     601             :  * Keep in mind that additions to hi can not overflow, because
     602             :  * the high word of a multiplication result cannot be all-ones.
     603             :  */
     604             : #  define mul_add_c(a,b,c0,c1,c2)       do {    \
     605             :         BN_ULONG lo = LBITS(a), hi = HBITS(a);  \
     606             :         BN_ULONG bl = LBITS(b), bh = HBITS(b);  \
     607             :         mul64(lo,hi,bl,bh);                     \
     608             :         c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
     609             :         c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
     610             :         } while(0)
     611             : 
     612             : #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
     613             :         BN_ULONG tt;                            \
     614             :         BN_ULONG lo = LBITS(a), hi = HBITS(a);  \
     615             :         BN_ULONG bl = LBITS(b), bh = HBITS(b);  \
     616             :         mul64(lo,hi,bl,bh);                     \
     617             :         tt = hi;                                \
     618             :         c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \
     619             :         c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \
     620             :         c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
     621             :         c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
     622             :         } while(0)
     623             : 
     624             : #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
     625             :         BN_ULONG lo, hi;                        \
     626             :         sqr64(lo,hi,(a)[i]);                    \
     627             :         c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
     628             :         c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
     629             :         } while(0)
     630             : 
     631             : #  define sqr_add_c2(a,i,j,c0,c1,c2) \
     632             :         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
     633             : # endif                         /* !BN_LLONG */
     634             : 
     635      120131 : void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
     636             : {
     637             :     BN_ULONG c1, c2, c3;
     638             : 
     639             :     c1 = 0;
     640             :     c2 = 0;
     641             :     c3 = 0;
     642      120131 :     mul_add_c(a[0], b[0], c1, c2, c3);
     643      120131 :     r[0] = c1;
     644             :     c1 = 0;
     645      120131 :     mul_add_c(a[0], b[1], c2, c3, c1);
     646      120131 :     mul_add_c(a[1], b[0], c2, c3, c1);
     647      120131 :     r[1] = c2;
     648             :     c2 = 0;
     649      120131 :     mul_add_c(a[2], b[0], c3, c1, c2);
     650      120131 :     mul_add_c(a[1], b[1], c3, c1, c2);
     651      120131 :     mul_add_c(a[0], b[2], c3, c1, c2);
     652      120131 :     r[2] = c3;
     653             :     c3 = 0;
     654      120131 :     mul_add_c(a[0], b[3], c1, c2, c3);
     655      120131 :     mul_add_c(a[1], b[2], c1, c2, c3);
     656      120131 :     mul_add_c(a[2], b[1], c1, c2, c3);
     657      120131 :     mul_add_c(a[3], b[0], c1, c2, c3);
     658      120131 :     r[3] = c1;
     659             :     c1 = 0;
     660      120131 :     mul_add_c(a[4], b[0], c2, c3, c1);
     661      120131 :     mul_add_c(a[3], b[1], c2, c3, c1);
     662      120131 :     mul_add_c(a[2], b[2], c2, c3, c1);
     663      120131 :     mul_add_c(a[1], b[3], c2, c3, c1);
     664      120131 :     mul_add_c(a[0], b[4], c2, c3, c1);
     665      120131 :     r[4] = c2;
     666             :     c2 = 0;
     667      120131 :     mul_add_c(a[0], b[5], c3, c1, c2);
     668      120131 :     mul_add_c(a[1], b[4], c3, c1, c2);
     669      120131 :     mul_add_c(a[2], b[3], c3, c1, c2);
     670      120131 :     mul_add_c(a[3], b[2], c3, c1, c2);
     671      120131 :     mul_add_c(a[4], b[1], c3, c1, c2);
     672      120131 :     mul_add_c(a[5], b[0], c3, c1, c2);
     673      120131 :     r[5] = c3;
     674             :     c3 = 0;
     675      120131 :     mul_add_c(a[6], b[0], c1, c2, c3);
     676      120131 :     mul_add_c(a[5], b[1], c1, c2, c3);
     677      120131 :     mul_add_c(a[4], b[2], c1, c2, c3);
     678      120131 :     mul_add_c(a[3], b[3], c1, c2, c3);
     679      120131 :     mul_add_c(a[2], b[4], c1, c2, c3);
     680      120131 :     mul_add_c(a[1], b[5], c1, c2, c3);
     681      120131 :     mul_add_c(a[0], b[6], c1, c2, c3);
     682      120131 :     r[6] = c1;
     683             :     c1 = 0;
     684      120131 :     mul_add_c(a[0], b[7], c2, c3, c1);
     685      120131 :     mul_add_c(a[1], b[6], c2, c3, c1);
     686      120131 :     mul_add_c(a[2], b[5], c2, c3, c1);
     687      120131 :     mul_add_c(a[3], b[4], c2, c3, c1);
     688      120131 :     mul_add_c(a[4], b[3], c2, c3, c1);
     689      120131 :     mul_add_c(a[5], b[2], c2, c3, c1);
     690      120131 :     mul_add_c(a[6], b[1], c2, c3, c1);
     691      120131 :     mul_add_c(a[7], b[0], c2, c3, c1);
     692      120131 :     r[7] = c2;
     693             :     c2 = 0;
     694      120131 :     mul_add_c(a[7], b[1], c3, c1, c2);
     695      120131 :     mul_add_c(a[6], b[2], c3, c1, c2);
     696      120131 :     mul_add_c(a[5], b[3], c3, c1, c2);
     697      120131 :     mul_add_c(a[4], b[4], c3, c1, c2);
     698      120131 :     mul_add_c(a[3], b[5], c3, c1, c2);
     699      120131 :     mul_add_c(a[2], b[6], c3, c1, c2);
     700      120131 :     mul_add_c(a[1], b[7], c3, c1, c2);
     701      120131 :     r[8] = c3;
     702             :     c3 = 0;
     703      120131 :     mul_add_c(a[2], b[7], c1, c2, c3);
     704      120131 :     mul_add_c(a[3], b[6], c1, c2, c3);
     705      120131 :     mul_add_c(a[4], b[5], c1, c2, c3);
     706      120131 :     mul_add_c(a[5], b[4], c1, c2, c3);
     707      120131 :     mul_add_c(a[6], b[3], c1, c2, c3);
     708      120131 :     mul_add_c(a[7], b[2], c1, c2, c3);
     709      120131 :     r[9] = c1;
     710             :     c1 = 0;
     711      120131 :     mul_add_c(a[7], b[3], c2, c3, c1);
     712      120131 :     mul_add_c(a[6], b[4], c2, c3, c1);
     713      120131 :     mul_add_c(a[5], b[5], c2, c3, c1);
     714      120131 :     mul_add_c(a[4], b[6], c2, c3, c1);
     715      120131 :     mul_add_c(a[3], b[7], c2, c3, c1);
     716      120131 :     r[10] = c2;
     717             :     c2 = 0;
     718      120131 :     mul_add_c(a[4], b[7], c3, c1, c2);
     719      120131 :     mul_add_c(a[5], b[6], c3, c1, c2);
     720      120131 :     mul_add_c(a[6], b[5], c3, c1, c2);
     721      120131 :     mul_add_c(a[7], b[4], c3, c1, c2);
     722      120131 :     r[11] = c3;
     723             :     c3 = 0;
     724      120131 :     mul_add_c(a[7], b[5], c1, c2, c3);
     725      120131 :     mul_add_c(a[6], b[6], c1, c2, c3);
     726      120131 :     mul_add_c(a[5], b[7], c1, c2, c3);
     727      120131 :     r[12] = c1;
     728             :     c1 = 0;
     729      120131 :     mul_add_c(a[6], b[7], c2, c3, c1);
     730      120131 :     mul_add_c(a[7], b[6], c2, c3, c1);
     731      120131 :     r[13] = c2;
     732             :     c2 = 0;
     733      120131 :     mul_add_c(a[7], b[7], c3, c1, c2);
     734      120131 :     r[14] = c3;
     735      120131 :     r[15] = c1;
     736      120131 : }
     737             : 
     738           0 : void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
     739             : {
     740             :     BN_ULONG c1, c2, c3;
     741             : 
     742             :     c1 = 0;
     743             :     c2 = 0;
     744             :     c3 = 0;
     745           0 :     mul_add_c(a[0], b[0], c1, c2, c3);
     746           0 :     r[0] = c1;
     747             :     c1 = 0;
     748           0 :     mul_add_c(a[0], b[1], c2, c3, c1);
     749           0 :     mul_add_c(a[1], b[0], c2, c3, c1);
     750           0 :     r[1] = c2;
     751             :     c2 = 0;
     752           0 :     mul_add_c(a[2], b[0], c3, c1, c2);
     753           0 :     mul_add_c(a[1], b[1], c3, c1, c2);
     754           0 :     mul_add_c(a[0], b[2], c3, c1, c2);
     755           0 :     r[2] = c3;
     756             :     c3 = 0;
     757           0 :     mul_add_c(a[0], b[3], c1, c2, c3);
     758           0 :     mul_add_c(a[1], b[2], c1, c2, c3);
     759           0 :     mul_add_c(a[2], b[1], c1, c2, c3);
     760           0 :     mul_add_c(a[3], b[0], c1, c2, c3);
     761           0 :     r[3] = c1;
     762             :     c1 = 0;
     763           0 :     mul_add_c(a[3], b[1], c2, c3, c1);
     764           0 :     mul_add_c(a[2], b[2], c2, c3, c1);
     765           0 :     mul_add_c(a[1], b[3], c2, c3, c1);
     766           0 :     r[4] = c2;
     767             :     c2 = 0;
     768           0 :     mul_add_c(a[2], b[3], c3, c1, c2);
     769           0 :     mul_add_c(a[3], b[2], c3, c1, c2);
     770           0 :     r[5] = c3;
     771             :     c3 = 0;
     772           0 :     mul_add_c(a[3], b[3], c1, c2, c3);
     773           0 :     r[6] = c1;
     774           0 :     r[7] = c2;
     775           0 : }
     776             : 
     777      476645 : void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
     778             : {
     779             :     BN_ULONG c1, c2, c3;
     780             : 
     781             :     c1 = 0;
     782             :     c2 = 0;
     783             :     c3 = 0;
     784      476645 :     sqr_add_c(a, 0, c1, c2, c3);
     785      476645 :     r[0] = c1;
     786             :     c1 = 0;
     787      476645 :     sqr_add_c2(a, 1, 0, c2, c3, c1);
     788      476645 :     r[1] = c2;
     789             :     c2 = 0;
     790      476645 :     sqr_add_c(a, 1, c3, c1, c2);
     791      476645 :     sqr_add_c2(a, 2, 0, c3, c1, c2);
     792      476645 :     r[2] = c3;
     793             :     c3 = 0;
     794      476645 :     sqr_add_c2(a, 3, 0, c1, c2, c3);
     795      476645 :     sqr_add_c2(a, 2, 1, c1, c2, c3);
     796      476645 :     r[3] = c1;
     797             :     c1 = 0;
     798      476645 :     sqr_add_c(a, 2, c2, c3, c1);
     799      476645 :     sqr_add_c2(a, 3, 1, c2, c3, c1);
     800      476645 :     sqr_add_c2(a, 4, 0, c2, c3, c1);
     801      476645 :     r[4] = c2;
     802             :     c2 = 0;
     803      476645 :     sqr_add_c2(a, 5, 0, c3, c1, c2);
     804      476645 :     sqr_add_c2(a, 4, 1, c3, c1, c2);
     805      476645 :     sqr_add_c2(a, 3, 2, c3, c1, c2);
     806      476645 :     r[5] = c3;
     807             :     c3 = 0;
     808      476645 :     sqr_add_c(a, 3, c1, c2, c3);
     809      476645 :     sqr_add_c2(a, 4, 2, c1, c2, c3);
     810      476645 :     sqr_add_c2(a, 5, 1, c1, c2, c3);
     811      476645 :     sqr_add_c2(a, 6, 0, c1, c2, c3);
     812      476645 :     r[6] = c1;
     813             :     c1 = 0;
     814      476645 :     sqr_add_c2(a, 7, 0, c2, c3, c1);
     815      476645 :     sqr_add_c2(a, 6, 1, c2, c3, c1);
     816      476645 :     sqr_add_c2(a, 5, 2, c2, c3, c1);
     817      476645 :     sqr_add_c2(a, 4, 3, c2, c3, c1);
     818      476645 :     r[7] = c2;
     819             :     c2 = 0;
     820      476645 :     sqr_add_c(a, 4, c3, c1, c2);
     821      476645 :     sqr_add_c2(a, 5, 3, c3, c1, c2);
     822      476645 :     sqr_add_c2(a, 6, 2, c3, c1, c2);
     823      476645 :     sqr_add_c2(a, 7, 1, c3, c1, c2);
     824      476645 :     r[8] = c3;
     825             :     c3 = 0;
     826      476645 :     sqr_add_c2(a, 7, 2, c1, c2, c3);
     827      476645 :     sqr_add_c2(a, 6, 3, c1, c2, c3);
     828      476645 :     sqr_add_c2(a, 5, 4, c1, c2, c3);
     829      476645 :     r[9] = c1;
     830             :     c1 = 0;
     831      476645 :     sqr_add_c(a, 5, c2, c3, c1);
     832      476645 :     sqr_add_c2(a, 6, 4, c2, c3, c1);
     833      476645 :     sqr_add_c2(a, 7, 3, c2, c3, c1);
     834      476645 :     r[10] = c2;
     835             :     c2 = 0;
     836      476645 :     sqr_add_c2(a, 7, 4, c3, c1, c2);
     837      476645 :     sqr_add_c2(a, 6, 5, c3, c1, c2);
     838      476645 :     r[11] = c3;
     839             :     c3 = 0;
     840      476645 :     sqr_add_c(a, 6, c1, c2, c3);
     841      476645 :     sqr_add_c2(a, 7, 5, c1, c2, c3);
     842      476645 :     r[12] = c1;
     843             :     c1 = 0;
     844      476645 :     sqr_add_c2(a, 7, 6, c2, c3, c1);
     845      476645 :     r[13] = c2;
     846             :     c2 = 0;
     847      476645 :     sqr_add_c(a, 7, c3, c1, c2);
     848      476645 :     r[14] = c3;
     849      476645 :     r[15] = c1;
     850      476645 : }
     851             : 
     852     2788319 : void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
     853             : {
     854             :     BN_ULONG c1, c2, c3;
     855             : 
     856             :     c1 = 0;
     857             :     c2 = 0;
     858             :     c3 = 0;
     859     2788319 :     sqr_add_c(a, 0, c1, c2, c3);
     860     2788319 :     r[0] = c1;
     861             :     c1 = 0;
     862     2788319 :     sqr_add_c2(a, 1, 0, c2, c3, c1);
     863     2788319 :     r[1] = c2;
     864             :     c2 = 0;
     865     2788319 :     sqr_add_c(a, 1, c3, c1, c2);
     866     2788319 :     sqr_add_c2(a, 2, 0, c3, c1, c2);
     867     2788319 :     r[2] = c3;
     868             :     c3 = 0;
     869     2788319 :     sqr_add_c2(a, 3, 0, c1, c2, c3);
     870     2788319 :     sqr_add_c2(a, 2, 1, c1, c2, c3);
     871     2788319 :     r[3] = c1;
     872             :     c1 = 0;
     873     2788319 :     sqr_add_c(a, 2, c2, c3, c1);
     874     2788319 :     sqr_add_c2(a, 3, 1, c2, c3, c1);
     875     2788319 :     r[4] = c2;
     876             :     c2 = 0;
     877     2788319 :     sqr_add_c2(a, 3, 2, c3, c1, c2);
     878     2788319 :     r[5] = c3;
     879             :     c3 = 0;
     880     2788319 :     sqr_add_c(a, 3, c1, c2, c3);
     881     2788319 :     r[6] = c1;
     882     2788319 :     r[7] = c2;
     883     2788319 : }
     884             : 
     885             : # ifdef OPENSSL_NO_ASM
     886             : #  ifdef OPENSSL_BN_ASM_MONT
     887             : #   include <alloca.h>
     888             : /*
     889             :  * This is essentially reference implementation, which may or may not
     890             :  * result in performance improvement. E.g. on IA-32 this routine was
     891             :  * observed to give 40% faster rsa1024 private key operations and 10%
     892             :  * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
     893             :  * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
     894             :  * reference implementation, one to be used as starting point for
     895             :  * platform-specific assembler. Mentioned numbers apply to compiler
     896             :  * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and
     897             :  * can vary not only from platform to platform, but even for compiler
     898             :  * versions. Assembler vs. assembler improvement coefficients can
     899             :  * [and are known to] differ and are to be documented elsewhere.
     900             :  */
     901             : int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
     902             :                 const BN_ULONG *np, const BN_ULONG *n0p, int num)
     903             : {
     904             :     BN_ULONG c0, c1, ml, *tp, n0;
     905             : #   ifdef mul64
     906             :     BN_ULONG mh;
     907             : #   endif
     908             :     volatile BN_ULONG *vp;
     909             :     int i = 0, j;
     910             : 
     911             : #   if 0                        /* template for platform-specific
     912             :                                  * implementation */
     913             :     if (ap == bp)
     914             :         return bn_sqr_mont(rp, ap, np, n0p, num);
     915             : #   endif
     916             :     vp = tp = alloca((num + 2) * sizeof(BN_ULONG));
     917             : 
     918             :     n0 = *n0p;
     919             : 
     920             :     c0 = 0;
     921             :     ml = bp[0];
     922             : #   ifdef mul64
     923             :     mh = HBITS(ml);
     924             :     ml = LBITS(ml);
     925             :     for (j = 0; j < num; ++j)
     926             :         mul(tp[j], ap[j], ml, mh, c0);
     927             : #   else
     928             :     for (j = 0; j < num; ++j)
     929             :         mul(tp[j], ap[j], ml, c0);
     930             : #   endif
     931             : 
     932             :     tp[num] = c0;
     933             :     tp[num + 1] = 0;
     934             :     goto enter;
     935             : 
     936             :     for (i = 0; i < num; i++) {
     937             :         c0 = 0;
     938             :         ml = bp[i];
     939             : #   ifdef mul64
     940             :         mh = HBITS(ml);
     941             :         ml = LBITS(ml);
     942             :         for (j = 0; j < num; ++j)
     943             :             mul_add(tp[j], ap[j], ml, mh, c0);
     944             : #   else
     945             :         for (j = 0; j < num; ++j)
     946             :             mul_add(tp[j], ap[j], ml, c0);
     947             : #   endif
     948             :         c1 = (tp[num] + c0) & BN_MASK2;
     949             :         tp[num] = c1;
     950             :         tp[num + 1] = (c1 < c0 ? 1 : 0);
     951             :  enter:
     952             :         c1 = tp[0];
     953             :         ml = (c1 * n0) & BN_MASK2;
     954             :         c0 = 0;
     955             : #   ifdef mul64
     956             :         mh = HBITS(ml);
     957             :         ml = LBITS(ml);
     958             :         mul_add(c1, np[0], ml, mh, c0);
     959             : #   else
     960             :         mul_add(c1, ml, np[0], c0);
     961             : #   endif
     962             :         for (j = 1; j < num; j++) {
     963             :             c1 = tp[j];
     964             : #   ifdef mul64
     965             :             mul_add(c1, np[j], ml, mh, c0);
     966             : #   else
     967             :             mul_add(c1, ml, np[j], c0);
     968             : #   endif
     969             :             tp[j - 1] = c1 & BN_MASK2;
     970             :         }
     971             :         c1 = (tp[num] + c0) & BN_MASK2;
     972             :         tp[num - 1] = c1;
     973             :         tp[num] = tp[num + 1] + (c1 < c0 ? 1 : 0);
     974             :     }
     975             : 
     976             :     if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
     977             :         c0 = bn_sub_words(rp, tp, np, num);
     978             :         if (tp[num] != 0 || c0 == 0) {
     979             :             for (i = 0; i < num + 2; i++)
     980             :                 vp[i] = 0;
     981             :             return 1;
     982             :         }
     983             :     }
     984             :     for (i = 0; i < num; i++)
     985             :         rp[i] = tp[i], vp[i] = 0;
     986             :     vp[num] = 0;
     987             :     vp[num + 1] = 0;
     988             :     return 1;
     989             : }
     990             : #  else
     991             : /*
     992             :  * Return value of 0 indicates that multiplication/convolution was not
     993             :  * performed to signal the caller to fall down to alternative/original
     994             :  * code-path.
     995             :  */
     996           0 : int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
     997             :                 const BN_ULONG *np, const BN_ULONG *n0, int num)
     998             : {
     999           0 :     return 0;
    1000             : }
    1001             : #  endif                        /* OPENSSL_BN_ASM_MONT */
    1002             : # endif
    1003             : 
    1004             : #else                           /* !BN_MUL_COMBA */
    1005             : 
    1006             : /* hmm... is it faster just to do a multiply? */
    1007             : # undef bn_sqr_comba4
    1008             : void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
    1009             : {
    1010             :     BN_ULONG t[8];
    1011             :     bn_sqr_normal(r, a, 4, t);
    1012             : }
    1013             : 
    1014             : # undef bn_sqr_comba8
    1015             : void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
    1016             : {
    1017             :     BN_ULONG t[16];
    1018             :     bn_sqr_normal(r, a, 8, t);
    1019             : }
    1020             : 
    1021             : void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
    1022             : {
    1023             :     r[4] = bn_mul_words(&(r[0]), a, 4, b[0]);
    1024             :     r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]);
    1025             :     r[6] = bn_mul_add_words(&(r[2]), a, 4, b[2]);
    1026             :     r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]);
    1027             : }
    1028             : 
    1029             : void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
    1030             : {
    1031             :     r[8] = bn_mul_words(&(r[0]), a, 8, b[0]);
    1032             :     r[9] = bn_mul_add_words(&(r[1]), a, 8, b[1]);
    1033             :     r[10] = bn_mul_add_words(&(r[2]), a, 8, b[2]);
    1034             :     r[11] = bn_mul_add_words(&(r[3]), a, 8, b[3]);
    1035             :     r[12] = bn_mul_add_words(&(r[4]), a, 8, b[4]);
    1036             :     r[13] = bn_mul_add_words(&(r[5]), a, 8, b[5]);
    1037             :     r[14] = bn_mul_add_words(&(r[6]), a, 8, b[6]);
    1038             :     r[15] = bn_mul_add_words(&(r[7]), a, 8, b[7]);
    1039             : }
    1040             : 
    1041             : # ifdef OPENSSL_NO_ASM
    1042             : #  ifdef OPENSSL_BN_ASM_MONT
    1043             : #   include <alloca.h>
    1044             : int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
    1045             :                 const BN_ULONG *np, const BN_ULONG *n0p, int num)
    1046             : {
    1047             :     BN_ULONG c0, c1, *tp, n0 = *n0p;
    1048             :     volatile BN_ULONG *vp;
    1049             :     int i = 0, j;
    1050             : 
    1051             :     vp = tp = alloca((num + 2) * sizeof(BN_ULONG));
    1052             : 
    1053             :     for (i = 0; i <= num; i++)
    1054             :         tp[i] = 0;
    1055             : 
    1056             :     for (i = 0; i < num; i++) {
    1057             :         c0 = bn_mul_add_words(tp, ap, num, bp[i]);
    1058             :         c1 = (tp[num] + c0) & BN_MASK2;
    1059             :         tp[num] = c1;
    1060             :         tp[num + 1] = (c1 < c0 ? 1 : 0);
    1061             : 
    1062             :         c0 = bn_mul_add_words(tp, np, num, tp[0] * n0);
    1063             :         c1 = (tp[num] + c0) & BN_MASK2;
    1064             :         tp[num] = c1;
    1065             :         tp[num + 1] += (c1 < c0 ? 1 : 0);
    1066             :         for (j = 0; j <= num; j++)
    1067             :             tp[j] = tp[j + 1];
    1068             :     }
    1069             : 
    1070             :     if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
    1071             :         c0 = bn_sub_words(rp, tp, np, num);
    1072             :         if (tp[num] != 0 || c0 == 0) {
    1073             :             for (i = 0; i < num + 2; i++)
    1074             :                 vp[i] = 0;
    1075             :             return 1;
    1076             :         }
    1077             :     }
    1078             :     for (i = 0; i < num; i++)
    1079             :         rp[i] = tp[i], vp[i] = 0;
    1080             :     vp[num] = 0;
    1081             :     vp[num + 1] = 0;
    1082             :     return 1;
    1083             : }
    1084             : #  else
    1085             : int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
    1086             :                 const BN_ULONG *np, const BN_ULONG *n0, int num)
    1087             : {
    1088             :     return 0;
    1089             : }
    1090             : #  endif                        /* OPENSSL_BN_ASM_MONT */
    1091             : # endif
    1092             : 
    1093             : #endif                          /* !BN_MUL_COMBA */

Generated by: LCOV version 1.10