LCOV - code coverage report
Current view: top level - third_party/openssl/crypto/bn - bn_exp.c (source / functions) Hit Total Coverage
Test: tmp.zDYK9MVh93 Lines: 126 319 39.5 %
Date: 2015-10-10 Functions: 4 8 50.0 %

          Line data    Source code
       1             : /* crypto/bn/bn_exp.c */
       2             : /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
       3             :  * All rights reserved.
       4             :  *
       5             :  * This package is an SSL implementation written
       6             :  * by Eric Young (eay@cryptsoft.com).
       7             :  * The implementation was written so as to conform with Netscapes SSL.
       8             :  *
       9             :  * This library is free for commercial and non-commercial use as long as
      10             :  * the following conditions are aheared to.  The following conditions
      11             :  * apply to all code found in this distribution, be it the RC4, RSA,
      12             :  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
      13             :  * included with this distribution is covered by the same copyright terms
      14             :  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
      15             :  *
      16             :  * Copyright remains Eric Young's, and as such any Copyright notices in
      17             :  * the code are not to be removed.
      18             :  * If this package is used in a product, Eric Young should be given attribution
      19             :  * as the author of the parts of the library used.
      20             :  * This can be in the form of a textual message at program startup or
      21             :  * in documentation (online or textual) provided with the package.
      22             :  *
      23             :  * Redistribution and use in source and binary forms, with or without
      24             :  * modification, are permitted provided that the following conditions
      25             :  * are met:
      26             :  * 1. Redistributions of source code must retain the copyright
      27             :  *    notice, this list of conditions and the following disclaimer.
      28             :  * 2. Redistributions in binary form must reproduce the above copyright
      29             :  *    notice, this list of conditions and the following disclaimer in the
      30             :  *    documentation and/or other materials provided with the distribution.
      31             :  * 3. All advertising materials mentioning features or use of this software
      32             :  *    must display the following acknowledgement:
      33             :  *    "This product includes cryptographic software written by
      34             :  *     Eric Young (eay@cryptsoft.com)"
      35             :  *    The word 'cryptographic' can be left out if the rouines from the library
      36             :  *    being used are not cryptographic related :-).
      37             :  * 4. If you include any Windows specific code (or a derivative thereof) from
      38             :  *    the apps directory (application code) you must include an acknowledgement:
      39             :  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
      40             :  *
      41             :  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
      42             :  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      43             :  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      44             :  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
      45             :  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
      46             :  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
      47             :  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
      48             :  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
      49             :  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
      50             :  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
      51             :  * SUCH DAMAGE.
      52             :  *
      53             :  * The licence and distribution terms for any publically available version or
      54             :  * derivative of this code cannot be changed.  i.e. this code cannot simply be
      55             :  * copied and put under another distribution licence
      56             :  * [including the GNU Public Licence.]
      57             :  */
      58             : /* ====================================================================
      59             :  * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
      60             :  *
      61             :  * Redistribution and use in source and binary forms, with or without
      62             :  * modification, are permitted provided that the following conditions
      63             :  * are met:
      64             :  *
      65             :  * 1. Redistributions of source code must retain the above copyright
      66             :  *    notice, this list of conditions and the following disclaimer.
      67             :  *
      68             :  * 2. Redistributions in binary form must reproduce the above copyright
      69             :  *    notice, this list of conditions and the following disclaimer in
      70             :  *    the documentation and/or other materials provided with the
      71             :  *    distribution.
      72             :  *
      73             :  * 3. All advertising materials mentioning features or use of this
      74             :  *    software must display the following acknowledgment:
      75             :  *    "This product includes software developed by the OpenSSL Project
      76             :  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
      77             :  *
      78             :  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
      79             :  *    endorse or promote products derived from this software without
      80             :  *    prior written permission. For written permission, please contact
      81             :  *    openssl-core@openssl.org.
      82             :  *
      83             :  * 5. Products derived from this software may not be called "OpenSSL"
      84             :  *    nor may "OpenSSL" appear in their names without prior written
      85             :  *    permission of the OpenSSL Project.
      86             :  *
      87             :  * 6. Redistributions of any form whatsoever must retain the following
      88             :  *    acknowledgment:
      89             :  *    "This product includes software developed by the OpenSSL Project
      90             :  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
      91             :  *
      92             :  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
      93             :  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
      94             :  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
      95             :  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
      96             :  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
      97             :  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
      98             :  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
      99             :  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     100             :  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     101             :  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     102             :  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
     103             :  * OF THE POSSIBILITY OF SUCH DAMAGE.
     104             :  * ====================================================================
     105             :  *
     106             :  * This product includes cryptographic software written by Eric Young
     107             :  * (eay@cryptsoft.com).  This product includes software written by Tim
     108             :  * Hudson (tjh@cryptsoft.com).
     109             :  *
     110             :  */
     111             : 
     112             : #include "cryptlib.h"
     113             : #include "bn_lcl.h"
     114             : 
     115             : #include <stdlib.h>
     116             : #ifdef _WIN32
     117             : # include <malloc.h>
     118             : # ifndef alloca
     119             : #  define alloca _alloca
     120             : # endif
     121             : #elif defined(__GNUC__)
     122             : # ifndef alloca
     123             : #  define alloca(s) __builtin_alloca((s))
     124             : # endif
     125             : #elif defined(__sun)
     126             : # include <alloca.h>
     127             : #endif
     128             : 
     129             : #include "rsaz_exp.h"
     130             : 
     131             : #undef SPARC_T4_MONT
     132             : #if defined(OPENSSL_BN_ASM_MONT) && (defined(__sparc__) || defined(__sparc))
     133             : # include "sparc_arch.h"
     134             : extern unsigned int OPENSSL_sparcv9cap_P[];
     135             : # define SPARC_T4_MONT
     136             : #endif
     137             : 
     138             : /* maximum precomputation table size for *variable* sliding windows */
     139             : #define TABLE_SIZE      32
     140             : 
     141             : /* this one works - simple but works */
     142           0 : int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
     143             : {
     144             :     int i, bits, ret = 0;
     145             :     BIGNUM *v, *rr;
     146             : 
     147           0 :     if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
     148             :         /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
     149           0 :         BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
     150           0 :         return -1;
     151             :     }
     152             : 
     153           0 :     BN_CTX_start(ctx);
     154           0 :     if ((r == a) || (r == p))
     155           0 :         rr = BN_CTX_get(ctx);
     156             :     else
     157             :         rr = r;
     158           0 :     v = BN_CTX_get(ctx);
     159           0 :     if (rr == NULL || v == NULL)
     160             :         goto err;
     161             : 
     162           0 :     if (BN_copy(v, a) == NULL)
     163             :         goto err;
     164           0 :     bits = BN_num_bits(p);
     165             : 
     166           0 :     if (BN_is_odd(p)) {
     167           0 :         if (BN_copy(rr, a) == NULL)
     168             :             goto err;
     169             :     } else {
     170           0 :         if (!BN_one(rr))
     171             :             goto err;
     172             :     }
     173             : 
     174           0 :     for (i = 1; i < bits; i++) {
     175           0 :         if (!BN_sqr(v, v, ctx))
     176             :             goto err;
     177           0 :         if (BN_is_bit_set(p, i)) {
     178           0 :             if (!BN_mul(rr, rr, v, ctx))
     179             :                 goto err;
     180             :         }
     181             :     }
     182           0 :     if (r != rr)
     183           0 :         BN_copy(r, rr);
     184             :     ret = 1;
     185             :  err:
     186           0 :     BN_CTX_end(ctx);
     187             :     bn_check_top(r);
     188           0 :     return (ret);
     189             : }
     190             : 
     191         345 : int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
     192             :                BN_CTX *ctx)
     193             : {
     194             :     int ret;
     195             : 
     196             :     bn_check_top(a);
     197             :     bn_check_top(p);
     198             :     bn_check_top(m);
     199             : 
     200             :     /*-
     201             :      * For even modulus  m = 2^k*m_odd,  it might make sense to compute
     202             :      * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
     203             :      * exponentiation for the odd part), using appropriate exponent
     204             :      * reductions, and combine the results using the CRT.
     205             :      *
     206             :      * For now, we use Montgomery only if the modulus is odd; otherwise,
     207             :      * exponentiation using the reciprocal-based quick remaindering
     208             :      * algorithm is used.
     209             :      *
     210             :      * (Timing obtained with expspeed.c [computations  a^p mod m
     211             :      * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
     212             :      * 4096, 8192 bits], compared to the running time of the
     213             :      * standard algorithm:
     214             :      *
     215             :      *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
     216             :      *                     55 .. 77 %  [UltraSparc processor, but
     217             :      *                                  debug-solaris-sparcv8-gcc conf.]
     218             :      *
     219             :      *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
     220             :      *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
     221             :      *
     222             :      * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
     223             :      * at 2048 and more bits, but at 512 and 1024 bits, it was
     224             :      * slower even than the standard algorithm!
     225             :      *
     226             :      * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
     227             :      * should be obtained when the new Montgomery reduction code
     228             :      * has been integrated into OpenSSL.)
     229             :      */
     230             : 
     231             : #define MONT_MUL_MOD
     232             : #define MONT_EXP_WORD
     233             : #define RECP_MUL_MOD
     234             : 
     235             : #ifdef MONT_MUL_MOD
     236             :     /*
     237             :      * I have finally been able to take out this pre-condition of the top bit
     238             :      * being set.  It was caused by an error in BN_div with negatives.  There
     239             :      * was also another problem when for a^b%m a >= m.  eay 07-May-97
     240             :      */
     241             :     /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
     242             : 
     243         345 :     if (BN_is_odd(m)) {
     244             : # ifdef MONT_EXP_WORD
     245         345 :         if (a->top == 1 && !a->neg
     246           0 :             && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) {
     247           0 :             BN_ULONG A = a->d[0];
     248           0 :             ret = BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
     249             :         } else
     250             : # endif
     251         345 :             ret = BN_mod_exp_mont(r, a, p, m, ctx, NULL);
     252             :     } else
     253             : #endif
     254             : #ifdef RECP_MUL_MOD
     255             :     {
     256           0 :         ret = BN_mod_exp_recp(r, a, p, m, ctx);
     257             :     }
     258             : #else
     259             :     {
     260             :         ret = BN_mod_exp_simple(r, a, p, m, ctx);
     261             :     }
     262             : #endif
     263             : 
     264             :     bn_check_top(r);
     265         345 :     return (ret);
     266             : }
     267             : 
     268           0 : int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
     269             :                     const BIGNUM *m, BN_CTX *ctx)
     270             : {
     271             :     int i, j, bits, ret = 0, wstart, wend, window, wvalue;
     272             :     int start = 1;
     273             :     BIGNUM *aa;
     274             :     /* Table of variables obtained from 'ctx' */
     275             :     BIGNUM *val[TABLE_SIZE];
     276             :     BN_RECP_CTX recp;
     277             : 
     278           0 :     if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
     279             :         /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
     280           0 :         BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
     281           0 :         return -1;
     282             :     }
     283             : 
     284           0 :     bits = BN_num_bits(p);
     285             : 
     286           0 :     if (bits == 0) {
     287           0 :         ret = BN_one(r);
     288           0 :         return ret;
     289             :     }
     290             : 
     291           0 :     BN_CTX_start(ctx);
     292           0 :     aa = BN_CTX_get(ctx);
     293           0 :     val[0] = BN_CTX_get(ctx);
     294           0 :     if (!aa || !val[0])
     295             :         goto err;
     296             : 
     297           0 :     BN_RECP_CTX_init(&recp);
     298           0 :     if (m->neg) {
     299             :         /* ignore sign of 'm' */
     300           0 :         if (!BN_copy(aa, m))
     301             :             goto err;
     302           0 :         aa->neg = 0;
     303           0 :         if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
     304             :             goto err;
     305             :     } else {
     306           0 :         if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
     307             :             goto err;
     308             :     }
     309             : 
     310           0 :     if (!BN_nnmod(val[0], a, m, ctx))
     311             :         goto err;               /* 1 */
     312           0 :     if (BN_is_zero(val[0])) {
     313           0 :         BN_zero(r);
     314             :         ret = 1;
     315           0 :         goto err;
     316             :     }
     317             : 
     318           0 :     window = BN_window_bits_for_exponent_size(bits);
     319           0 :     if (window > 1) {
     320           0 :         if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
     321             :             goto err;           /* 2 */
     322           0 :         j = 1 << (window - 1);
     323           0 :         for (i = 1; i < j; i++) {
     324           0 :             if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
     325           0 :                 !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx))
     326             :                 goto err;
     327             :         }
     328             :     }
     329             : 
     330             :     start = 1;                  /* This is used to avoid multiplication etc
     331             :                                  * when there is only the value '1' in the
     332             :                                  * buffer. */
     333             :     wvalue = 0;                 /* The 'value' of the window */
     334           0 :     wstart = bits - 1;          /* The top bit of the window */
     335             :     wend = 0;                   /* The bottom bit of the window */
     336             : 
     337           0 :     if (!BN_one(r))
     338             :         goto err;
     339             : 
     340             :     for (;;) {
     341           0 :         if (BN_is_bit_set(p, wstart) == 0) {
     342           0 :             if (!start)
     343           0 :                 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
     344             :                     goto err;
     345           0 :             if (wstart == 0)
     346             :                 break;
     347           0 :             wstart--;
     348           0 :             continue;
     349             :         }
     350             :         /*
     351             :          * We now have wstart on a 'set' bit, we now need to work out how bit
     352             :          * a window to do.  To do this we need to scan forward until the last
     353             :          * set bit before the end of the window
     354             :          */
     355             :         j = wstart;
     356             :         wvalue = 1;
     357             :         wend = 0;
     358           0 :         for (i = 1; i < window; i++) {
     359           0 :             if (wstart - i < 0)
     360             :                 break;
     361           0 :             if (BN_is_bit_set(p, wstart - i)) {
     362           0 :                 wvalue <<= (i - wend);
     363           0 :                 wvalue |= 1;
     364             :                 wend = i;
     365             :             }
     366             :         }
     367             : 
     368             :         /* wend is the size of the current window */
     369             :         j = wend + 1;
     370             :         /* add the 'bytes above' */
     371           0 :         if (!start)
     372           0 :             for (i = 0; i < j; i++) {
     373           0 :                 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx))
     374             :                     goto err;
     375             :             }
     376             : 
     377             :         /* wvalue will be an odd number < 2^window */
     378           0 :         if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx))
     379             :             goto err;
     380             : 
     381             :         /* move the 'window' down further */
     382           0 :         wstart -= wend + 1;
     383             :         wvalue = 0;
     384             :         start = 0;
     385           0 :         if (wstart < 0)
     386             :             break;
     387             :     }
     388             :     ret = 1;
     389             :  err:
     390           0 :     BN_CTX_end(ctx);
     391           0 :     BN_RECP_CTX_free(&recp);
     392             :     bn_check_top(r);
     393           0 :     return (ret);
     394             : }
     395             : 
     396        2270 : int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
     397             :                     const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
     398             : {
     399             :     int i, j, bits, ret = 0, wstart, wend, window, wvalue;
     400             :     int start = 1;
     401             :     BIGNUM *d, *r;
     402             :     const BIGNUM *aa;
     403             :     /* Table of variables obtained from 'ctx' */
     404             :     BIGNUM *val[TABLE_SIZE];
     405             :     BN_MONT_CTX *mont = NULL;
     406             : 
     407        2270 :     if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
     408         762 :         return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
     409             :     }
     410             : 
     411             :     bn_check_top(a);
     412             :     bn_check_top(p);
     413             :     bn_check_top(m);
     414             : 
     415        1508 :     if (!BN_is_odd(m)) {
     416           0 :         BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
     417           0 :         return (0);
     418             :     }
     419        1508 :     bits = BN_num_bits(p);
     420        1508 :     if (bits == 0) {
     421           0 :         ret = BN_one(rr);
     422           0 :         return ret;
     423             :     }
     424             : 
     425        1508 :     BN_CTX_start(ctx);
     426        1508 :     d = BN_CTX_get(ctx);
     427        1508 :     r = BN_CTX_get(ctx);
     428        1508 :     val[0] = BN_CTX_get(ctx);
     429        1508 :     if (!d || !r || !val[0])
     430             :         goto err;
     431             : 
     432             :     /*
     433             :      * If this is not done, things will break in the montgomery part
     434             :      */
     435             : 
     436        1508 :     if (in_mont != NULL)
     437             :         mont = in_mont;
     438             :     else {
     439         345 :         if ((mont = BN_MONT_CTX_new()) == NULL)
     440             :             goto err;
     441         345 :         if (!BN_MONT_CTX_set(mont, m, ctx))
     442             :             goto err;
     443             :     }
     444             : 
     445        1508 :     if (a->neg || BN_ucmp(a, m) >= 0) {
     446           0 :         if (!BN_nnmod(val[0], a, m, ctx))
     447             :             goto err;
     448             :         aa = val[0];
     449             :     } else
     450             :         aa = a;
     451        1508 :     if (BN_is_zero(aa)) {
     452           0 :         BN_zero(rr);
     453             :         ret = 1;
     454           0 :         goto err;
     455             :     }
     456        1508 :     if (!BN_to_montgomery(val[0], aa, mont, ctx))
     457             :         goto err;               /* 1 */
     458             : 
     459        1508 :     window = BN_window_bits_for_exponent_size(bits);
     460        1508 :     if (window > 1) {
     461          36 :         if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
     462             :             goto err;           /* 2 */
     463          36 :         j = 1 << (window - 1);
     464         576 :         for (i = 1; i < j; i++) {
     465        1080 :             if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
     466         540 :                 !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx))
     467             :                 goto err;
     468             :         }
     469             :     }
     470             : 
     471             :     start = 1;                  /* This is used to avoid multiplication etc
     472             :                                  * when there is only the value '1' in the
     473             :                                  * buffer. */
     474             :     wvalue = 0;                 /* The 'value' of the window */
     475        1508 :     wstart = bits - 1;          /* The top bit of the window */
     476             :     wend = 0;                   /* The bottom bit of the window */
     477             : 
     478             : #if 1                           /* by Shay Gueron's suggestion */
     479        1508 :     j = m->top;                 /* borrow j */
     480        1508 :     if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
     481        1508 :         if (bn_wexpand(r, j) == NULL)
     482             :             goto err;
     483             :         /* 2^(top*BN_BITS2) - m */
     484        1508 :         r->d[0] = (0 - m->d[0]) & BN_MASK2;
     485       23840 :         for (i = 1; i < j; i++)
     486       22332 :             r->d[i] = (~m->d[i]) & BN_MASK2;
     487        1508 :         r->top = j;
     488             :         /*
     489             :          * Upper words will be zero if the corresponding words of 'm' were
     490             :          * 0xfff[...], so decrement r->top accordingly.
     491             :          */
     492        1508 :         bn_correct_top(r);
     493             :     } else
     494             : #endif
     495           0 :     if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
     496             :         goto err;
     497             :     for (;;) {
     498       33646 :         if (BN_is_bit_set(p, wstart) == 0) {
     499       27660 :             if (!start) {
     500       27660 :                 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
     501             :                     goto err;
     502             :             }
     503       27660 :             if (wstart == 0)
     504             :                 break;
     505       27660 :             wstart--;
     506       27660 :             continue;
     507             :         }
     508             :         /*
     509             :          * We now have wstart on a 'set' bit, we now need to work out how bit
     510             :          * a window to do.  To do this we need to scan forward until the last
     511             :          * set bit before the end of the window
     512             :          */
     513             :         j = wstart;
     514             :         wvalue = 1;
     515             :         wend = 0;
     516       12024 :         for (i = 1; i < window; i++) {
     517       12060 :             if (wstart - i < 0)
     518             :                 break;
     519       12024 :             if (BN_is_bit_set(p, wstart - i)) {
     520        6282 :                 wvalue <<= (i - wend);
     521        6282 :                 wvalue |= 1;
     522             :                 wend = i;
     523             :             }
     524             :         }
     525             : 
     526             :         /* wend is the size of the current window */
     527             :         j = wend + 1;
     528             :         /* add the 'bytes above' */
     529        5986 :         if (!start)
     530       14090 :             for (i = 0; i < j; i++) {
     531       14090 :                 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
     532             :                     goto err;
     533             :             }
     534             : 
     535             :         /* wvalue will be an odd number < 2^window */
     536        5986 :         if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
     537             :             goto err;
     538             : 
     539             :         /* move the 'window' down further */
     540        5986 :         wstart -= wend + 1;
     541             :         wvalue = 0;
     542             :         start = 0;
     543        5986 :         if (wstart < 0)
     544             :             break;
     545             :     }
     546             : #if defined(SPARC_T4_MONT)
     547             :     if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) {
     548             :         j = mont->N.top;        /* borrow j */
     549             :         val[0]->d[0] = 1;       /* borrow val[0] */
     550             :         for (i = 1; i < j; i++)
     551             :             val[0]->d[i] = 0;
     552             :         val[0]->top = j;
     553             :         if (!BN_mod_mul_montgomery(rr, r, val[0], mont, ctx))
     554             :             goto err;
     555             :     } else
     556             : #endif
     557        1508 :     if (!BN_from_montgomery(rr, r, mont, ctx))
     558             :         goto err;
     559             :     ret = 1;
     560             :  err:
     561        1508 :     if ((in_mont == NULL) && (mont != NULL))
     562         345 :         BN_MONT_CTX_free(mont);
     563        1508 :     BN_CTX_end(ctx);
     564             :     bn_check_top(rr);
     565        1508 :     return (ret);
     566             : }
     567             : 
     568             : #if defined(SPARC_T4_MONT)
     569             : static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
     570             : {
     571             :     BN_ULONG ret = 0;
     572             :     int wordpos;
     573             : 
     574             :     wordpos = bitpos / BN_BITS2;
     575             :     bitpos %= BN_BITS2;
     576             :     if (wordpos >= 0 && wordpos < a->top) {
     577             :         ret = a->d[wordpos] & BN_MASK2;
     578             :         if (bitpos) {
     579             :             ret >>= bitpos;
     580             :             if (++wordpos < a->top)
     581             :                 ret |= a->d[wordpos] << (BN_BITS2 - bitpos);
     582             :         }
     583             :     }
     584             : 
     585             :     return ret & BN_MASK2;
     586             : }
     587             : #endif
     588             : 
     589             : /*
     590             :  * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
     591             :  * layout so that accessing any of these table values shows the same access
     592             :  * pattern as far as cache lines are concerned.  The following functions are
     593             :  * used to transfer a BIGNUM from/to that table.
     594             :  */
     595             : 
     596             : static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top,
     597             :                                         unsigned char *buf, int idx,
     598             :                                         int width)
     599             : {
     600             :     size_t i, j;
     601             : 
     602       24384 :     if (top > b->top)
     603             :         top = b->top;           /* this works because 'buf' is explicitly
     604             :                                  * zeroed */
     605     1582674 :     for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
     606     1560576 :         buf[j] = ((unsigned char *)b->d)[i];
     607             :     }
     608             : 
     609             :     return 1;
     610             : }
     611             : 
     612       78113 : static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top,
     613             :                                           unsigned char *buf, int idx,
     614             :                                           int width)
     615             : {
     616             :     size_t i, j;
     617             : 
     618       78113 :     if (bn_wexpand(b, top) == NULL)
     619             :         return 0;
     620             : 
     621     5077345 :     for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
     622     4999232 :         ((unsigned char *)b->d)[i] = buf[j];
     623             :     }
     624             : 
     625       78113 :     b->top = top;
     626       78113 :     bn_correct_top(b);
     627             :     return 1;
     628             : }
     629             : 
     630             : /*
     631             :  * Given a pointer value, compute the next address that is a cache line
     632             :  * multiple.
     633             :  */
     634             : #define MOD_EXP_CTIME_ALIGN(x_) \
     635             :         ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
     636             : 
     637             : /*
     638             :  * This variant of BN_mod_exp_mont() uses fixed windows and the special
     639             :  * precomputation memory layout to limit data-dependency to a minimum to
     640             :  * protect secret exponents (cf. the hyper-threading timing attacks pointed
     641             :  * out by Colin Percival,
     642             :  * http://www.daemong-consideredperthreading-considered-harmful/)
     643             :  */
     644         762 : int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
     645             :                               const BIGNUM *m, BN_CTX *ctx,
     646             :                               BN_MONT_CTX *in_mont)
     647             : {
     648             :     int i, bits, ret = 0, window, wvalue;
     649             :     int top;
     650             :     BN_MONT_CTX *mont = NULL;
     651             : 
     652             :     int numPowers;
     653             :     unsigned char *powerbufFree = NULL;
     654             :     int powerbufLen = 0;
     655             :     unsigned char *powerbuf = NULL;
     656             :     BIGNUM tmp, am;
     657             : #if defined(SPARC_T4_MONT)
     658             :     unsigned int t4 = 0;
     659             : #endif
     660             : 
     661             :     bn_check_top(a);
     662             :     bn_check_top(p);
     663             :     bn_check_top(m);
     664             : 
     665         762 :     top = m->top;
     666             : 
     667         762 :     if (!(m->d[0] & 1)) {
     668           0 :         BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS);
     669           0 :         return (0);
     670             :     }
     671         762 :     bits = BN_num_bits(p);
     672         762 :     if (bits == 0) {
     673           0 :         ret = BN_one(rr);
     674           0 :         return ret;
     675             :     }
     676             : 
     677         762 :     BN_CTX_start(ctx);
     678             : 
     679             :     /*
     680             :      * Allocate a montgomery context if it was not supplied by the caller. If
     681             :      * this is not done, things will break in the montgomery part.
     682             :      */
     683         762 :     if (in_mont != NULL)
     684             :         mont = in_mont;
     685             :     else {
     686           0 :         if ((mont = BN_MONT_CTX_new()) == NULL)
     687             :             goto err;
     688           0 :         if (!BN_MONT_CTX_set(mont, m, ctx))
     689             :             goto err;
     690             :     }
     691             : 
     692             : #ifdef RSAZ_ENABLED
     693             :     /*
     694             :      * If the size of the operands allow it, perform the optimized
     695             :      * RSAZ exponentiation. For further information see
     696             :      * crypto/bn/rsaz_exp.c and accompanying assembly modules.
     697             :      */
     698             :     if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024)
     699             :         && rsaz_avx2_eligible()) {
     700             :         if (NULL == bn_wexpand(rr, 16))
     701             :             goto err;
     702             :         RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d,
     703             :                                mont->n0[0]);
     704             :         rr->top = 16;
     705             :         rr->neg = 0;
     706             :         bn_correct_top(rr);
     707             :         ret = 1;
     708             :         goto err;
     709             :     } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
     710             :         if (NULL == bn_wexpand(rr, 8))
     711             :             goto err;
     712             :         RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
     713             :         rr->top = 8;
     714             :         rr->neg = 0;
     715             :         bn_correct_top(rr);
     716             :         ret = 1;
     717             :         goto err;
     718             :     }
     719             : #endif
     720             : 
     721             :     /* Get the window size to use with size of p. */
     722         762 :     window = BN_window_bits_for_ctime_exponent_size(bits);
     723             : #if defined(SPARC_T4_MONT)
     724             :     if (window >= 5 && (top & 15) == 0 && top <= 64 &&
     725             :         (OPENSSL_sparcv9cap_P[1] & (CFR_MONTMUL | CFR_MONTSQR)) ==
     726             :         (CFR_MONTMUL | CFR_MONTSQR) && (t4 = OPENSSL_sparcv9cap_P[0]))
     727             :         window = 5;
     728             :     else
     729             : #endif
     730             : #if defined(OPENSSL_BN_ASM_MONT5)
     731             :     if (window >= 5) {
     732             :         window = 5;             /* ~5% improvement for RSA2048 sign, and even
     733             :                                  * for RSA4096 */
     734             :         if ((top & 7) == 0)
     735             :             powerbufLen += 2 * top * sizeof(m->d[0]);
     736             :     }
     737             : #endif
     738             :     (void)0;
     739             : 
     740             :     /*
     741             :      * Allocate a buffer large enough to hold all of the pre-computed powers
     742             :      * of am, am itself and tmp.
     743             :      */
     744         762 :     numPowers = 1 << window;
     745        1524 :     powerbufLen += sizeof(m->d[0]) * (top * numPowers +
     746             :                                       ((2 * top) >
     747         762 :                                        numPowers ? (2 * top) : numPowers));
     748             : #ifdef alloca
     749         762 :     if (powerbufLen < 3072)
     750         762 :         powerbufFree =
     751         762 :             alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
     752             :     else
     753             : #endif
     754           0 :         if ((powerbufFree =
     755           0 :              (unsigned char *)OPENSSL_malloc(powerbufLen +
     756             :                                              MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH))
     757             :             == NULL)
     758             :         goto err;
     759             : 
     760         762 :     powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
     761         762 :     memset(powerbuf, 0, powerbufLen);
     762             : 
     763             : #ifdef alloca
     764         762 :     if (powerbufLen < 3072)
     765             :         powerbufFree = NULL;
     766             : #endif
     767             : 
     768             :     /* lay down tmp and am right after powers table */
     769         762 :     tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
     770         762 :     am.d = tmp.d + top;
     771         762 :     tmp.top = am.top = 0;
     772         762 :     tmp.dmax = am.dmax = top;
     773         762 :     tmp.neg = am.neg = 0;
     774         762 :     tmp.flags = am.flags = BN_FLG_STATIC_DATA;
     775             : 
     776             :     /* prepare a^0 in Montgomery domain */
     777             : #if 1                           /* by Shay Gueron's suggestion */
     778         762 :     if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
     779             :         /* 2^(top*BN_BITS2) - m */
     780         762 :         tmp.d[0] = (0 - m->d[0]) & BN_MASK2;
     781        6096 :         for (i = 1; i < top; i++)
     782        5334 :             tmp.d[i] = (~m->d[i]) & BN_MASK2;
     783         762 :         tmp.top = top;
     784             :     } else
     785             : #endif
     786           0 :     if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
     787             :         goto err;
     788             : 
     789             :     /* prepare a^1 in Montgomery domain */
     790         762 :     if (a->neg || BN_ucmp(a, m) >= 0) {
     791           0 :         if (!BN_mod(&am, a, m, ctx))
     792             :             goto err;
     793           0 :         if (!BN_to_montgomery(&am, &am, mont, ctx))
     794             :             goto err;
     795         762 :     } else if (!BN_to_montgomery(&am, a, mont, ctx))
     796             :         goto err;
     797             : 
     798             : #if defined(SPARC_T4_MONT)
     799             :     if (t4) {
     800             :         typedef int (*bn_pwr5_mont_f) (BN_ULONG *tp, const BN_ULONG *np,
     801             :                                        const BN_ULONG *n0, const void *table,
     802             :                                        int power, int bits);
     803             :         int bn_pwr5_mont_t4_8(BN_ULONG *tp, const BN_ULONG *np,
     804             :                               const BN_ULONG *n0, const void *table,
     805             :                               int power, int bits);
     806             :         int bn_pwr5_mont_t4_16(BN_ULONG *tp, const BN_ULONG *np,
     807             :                                const BN_ULONG *n0, const void *table,
     808             :                                int power, int bits);
     809             :         int bn_pwr5_mont_t4_24(BN_ULONG *tp, const BN_ULONG *np,
     810             :                                const BN_ULONG *n0, const void *table,
     811             :                                int power, int bits);
     812             :         int bn_pwr5_mont_t4_32(BN_ULONG *tp, const BN_ULONG *np,
     813             :                                const BN_ULONG *n0, const void *table,
     814             :                                int power, int bits);
     815             :         static const bn_pwr5_mont_f pwr5_funcs[4] = {
     816             :             bn_pwr5_mont_t4_8, bn_pwr5_mont_t4_16,
     817             :             bn_pwr5_mont_t4_24, bn_pwr5_mont_t4_32
     818             :         };
     819             :         bn_pwr5_mont_f pwr5_worker = pwr5_funcs[top / 16 - 1];
     820             : 
     821             :         typedef int (*bn_mul_mont_f) (BN_ULONG *rp, const BN_ULONG *ap,
     822             :                                       const void *bp, const BN_ULONG *np,
     823             :                                       const BN_ULONG *n0);
     824             :         int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, const void *bp,
     825             :                              const BN_ULONG *np, const BN_ULONG *n0);
     826             :         int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap,
     827             :                               const void *bp, const BN_ULONG *np,
     828             :                               const BN_ULONG *n0);
     829             :         int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap,
     830             :                               const void *bp, const BN_ULONG *np,
     831             :                               const BN_ULONG *n0);
     832             :         int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap,
     833             :                               const void *bp, const BN_ULONG *np,
     834             :                               const BN_ULONG *n0);
     835             :         static const bn_mul_mont_f mul_funcs[4] = {
     836             :             bn_mul_mont_t4_8, bn_mul_mont_t4_16,
     837             :             bn_mul_mont_t4_24, bn_mul_mont_t4_32
     838             :         };
     839             :         bn_mul_mont_f mul_worker = mul_funcs[top / 16 - 1];
     840             : 
     841             :         void bn_mul_mont_vis3(BN_ULONG *rp, const BN_ULONG *ap,
     842             :                               const void *bp, const BN_ULONG *np,
     843             :                               const BN_ULONG *n0, int num);
     844             :         void bn_mul_mont_t4(BN_ULONG *rp, const BN_ULONG *ap,
     845             :                             const void *bp, const BN_ULONG *np,
     846             :                             const BN_ULONG *n0, int num);
     847             :         void bn_mul_mont_gather5_t4(BN_ULONG *rp, const BN_ULONG *ap,
     848             :                                     const void *table, const BN_ULONG *np,
     849             :                                     const BN_ULONG *n0, int num, int power);
     850             :         void bn_flip_n_scatter5_t4(const BN_ULONG *inp, size_t num,
     851             :                                    void *table, size_t power);
     852             :         void bn_gather5_t4(BN_ULONG *out, size_t num,
     853             :                            void *table, size_t power);
     854             :         void bn_flip_t4(BN_ULONG *dst, BN_ULONG *src, size_t num);
     855             : 
     856             :         BN_ULONG *np = mont->N.d, *n0 = mont->n0;
     857             :         int stride = 5 * (6 - (top / 16 - 1)); /* multiple of 5, but less
     858             :                                                 * than 32 */
     859             : 
     860             :         /*
     861             :          * BN_to_montgomery can contaminate words above .top [in
     862             :          * BN_DEBUG[_DEBUG] build]...
     863             :          */
     864             :         for (i = am.top; i < top; i++)
     865             :             am.d[i] = 0;
     866             :         for (i = tmp.top; i < top; i++)
     867             :             tmp.d[i] = 0;
     868             : 
     869             :         bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 0);
     870             :         bn_flip_n_scatter5_t4(am.d, top, powerbuf, 1);
     871             :         if (!(*mul_worker) (tmp.d, am.d, am.d, np, n0) &&
     872             :             !(*mul_worker) (tmp.d, am.d, am.d, np, n0))
     873             :             bn_mul_mont_vis3(tmp.d, am.d, am.d, np, n0, top);
     874             :         bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, 2);
     875             : 
     876             :         for (i = 3; i < 32; i++) {
     877             :             /* Calculate a^i = a^(i-1) * a */
     878             :             if (!(*mul_worker) (tmp.d, tmp.d, am.d, np, n0) &&
     879             :                 !(*mul_worker) (tmp.d, tmp.d, am.d, np, n0))
     880             :                 bn_mul_mont_vis3(tmp.d, tmp.d, am.d, np, n0, top);
     881             :             bn_flip_n_scatter5_t4(tmp.d, top, powerbuf, i);
     882             :         }
     883             : 
     884             :         /* switch to 64-bit domain */
     885             :         np = alloca(top * sizeof(BN_ULONG));
     886             :         top /= 2;
     887             :         bn_flip_t4(np, mont->N.d, top);
     888             : 
     889             :         bits--;
     890             :         for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
     891             :             wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
     892             :         bn_gather5_t4(tmp.d, top, powerbuf, wvalue);
     893             : 
     894             :         /*
     895             :          * Scan the exponent one window at a time starting from the most
     896             :          * significant bits.
     897             :          */
     898             :         while (bits >= 0) {
     899             :             if (bits < stride)
     900             :                 stride = bits + 1;
     901             :             bits -= stride;
     902             :             wvalue = bn_get_bits(p, bits + 1);
     903             : 
     904             :             if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
     905             :                 continue;
     906             :             /* retry once and fall back */
     907             :             if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride))
     908             :                 continue;
     909             : 
     910             :             bits += stride - 5;
     911             :             wvalue >>= stride - 5;
     912             :             wvalue &= 31;
     913             :             bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
     914             :             bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
     915             :             bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
     916             :             bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
     917             :             bn_mul_mont_t4(tmp.d, tmp.d, tmp.d, np, n0, top);
     918             :             bn_mul_mont_gather5_t4(tmp.d, tmp.d, powerbuf, np, n0, top,
     919             :                                    wvalue);
     920             :         }
     921             : 
     922             :         bn_flip_t4(tmp.d, tmp.d, top);
     923             :         top *= 2;
     924             :         /* back to 32-bit domain */
     925             :         tmp.top = top;
     926             :         bn_correct_top(&tmp);
     927             :         OPENSSL_cleanse(np, top * sizeof(BN_ULONG));
     928             :     } else
     929             : #endif
     930             : #if defined(OPENSSL_BN_ASM_MONT5)
     931             :     if (window == 5 && top > 1) {
     932             :         /*
     933             :          * This optimization uses ideas from http://eprint.iacr.org/2011/239,
     934             :          * specifically optimization of cache-timing attack countermeasures
     935             :          * and pre-computation optimization.
     936             :          */
     937             : 
     938             :         /*
     939             :          * Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
     940             :          * 512-bit RSA is hardly relevant, we omit it to spare size...
     941             :          */
     942             :         void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
     943             :                                  const void *table, const BN_ULONG *np,
     944             :                                  const BN_ULONG *n0, int num, int power);
     945             :         void bn_scatter5(const BN_ULONG *inp, size_t num,
     946             :                          void *table, size_t power);
     947             :         void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
     948             :         void bn_power5(BN_ULONG *rp, const BN_ULONG *ap,
     949             :                        const void *table, const BN_ULONG *np,
     950             :                        const BN_ULONG *n0, int num, int power);
     951             :         int bn_get_bits5(const BN_ULONG *ap, int off);
     952             :         int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
     953             :                                const BN_ULONG *not_used, const BN_ULONG *np,
     954             :                                const BN_ULONG *n0, int num);
     955             : 
     956             :         BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2;
     957             : 
     958             :         /*
     959             :          * BN_to_montgomery can contaminate words above .top [in
     960             :          * BN_DEBUG[_DEBUG] build]...
     961             :          */
     962             :         for (i = am.top; i < top; i++)
     963             :             am.d[i] = 0;
     964             :         for (i = tmp.top; i < top; i++)
     965             :             tmp.d[i] = 0;
     966             : 
     967             :         if (top & 7)
     968             :             np2 = np;
     969             :         else
     970             :             for (np2 = am.d + top, i = 0; i < top; i++)
     971             :                 np2[2 * i] = np[i];
     972             : 
     973             :         bn_scatter5(tmp.d, top, powerbuf, 0);
     974             :         bn_scatter5(am.d, am.top, powerbuf, 1);
     975             :         bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
     976             :         bn_scatter5(tmp.d, top, powerbuf, 2);
     977             : 
     978             : # if 0
     979             :         for (i = 3; i < 32; i++) {
     980             :             /* Calculate a^i = a^(i-1) * a */
     981             :             bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
     982             :             bn_scatter5(tmp.d, top, powerbuf, i);
     983             :         }
     984             : # else
     985             :         /* same as above, but uses squaring for 1/2 of operations */
     986             :         for (i = 4; i < 32; i *= 2) {
     987             :             bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
     988             :             bn_scatter5(tmp.d, top, powerbuf, i);
     989             :         }
     990             :         for (i = 3; i < 8; i += 2) {
     991             :             int j;
     992             :             bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
     993             :             bn_scatter5(tmp.d, top, powerbuf, i);
     994             :             for (j = 2 * i; j < 32; j *= 2) {
     995             :                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
     996             :                 bn_scatter5(tmp.d, top, powerbuf, j);
     997             :             }
     998             :         }
     999             :         for (; i < 16; i += 2) {
    1000             :             bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
    1001             :             bn_scatter5(tmp.d, top, powerbuf, i);
    1002             :             bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
    1003             :             bn_scatter5(tmp.d, top, powerbuf, 2 * i);
    1004             :         }
    1005             :         for (; i < 32; i += 2) {
    1006             :             bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
    1007             :             bn_scatter5(tmp.d, top, powerbuf, i);
    1008             :         }
    1009             : # endif
    1010             :         bits--;
    1011             :         for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
    1012             :             wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
    1013             :         bn_gather5(tmp.d, top, powerbuf, wvalue);
    1014             : 
    1015             :         /*
    1016             :          * Scan the exponent one window at a time starting from the most
    1017             :          * significant bits.
    1018             :          */
    1019             :         if (top & 7)
    1020             :             while (bits >= 0) {
    1021             :                 for (wvalue = 0, i = 0; i < 5; i++, bits--)
    1022             :                     wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
    1023             : 
    1024             :                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
    1025             :                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
    1026             :                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
    1027             :                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
    1028             :                 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
    1029             :                 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top,
    1030             :                                     wvalue);
    1031             :         } else {
    1032             :             while (bits >= 0) {
    1033             :                 wvalue = bn_get_bits5(p->d, bits - 4);
    1034             :                 bits -= 5;
    1035             :                 bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue);
    1036             :             }
    1037             :         }
    1038             : 
    1039             :         ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top);
    1040             :         tmp.top = top;
    1041             :         bn_correct_top(&tmp);
    1042             :         if (ret) {
    1043             :             if (!BN_copy(rr, &tmp))
    1044             :                 ret = 0;
    1045             :             goto err;           /* non-zero ret means it's not error */
    1046             :         }
    1047             :     } else
    1048             : #endif
    1049             :     {
    1050         762 :         if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers))
    1051             :             goto err;
    1052         762 :         if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers))
    1053             :             goto err;
    1054             : 
    1055             :         /*
    1056             :          * If the window size is greater than 1, then calculate
    1057             :          * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) (even
    1058             :          * powers could instead be computed as (a^(i/2))^2 to use the slight
    1059             :          * performance advantage of sqr over mul).
    1060             :          */
    1061         762 :         if (window > 1) {
    1062         762 :             if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
    1063             :                 goto err;
    1064             :             if (!MOD_EXP_CTIME_COPY_TO_PREBUF
    1065         762 :                 (&tmp, top, powerbuf, 2, numPowers))
    1066             :                 goto err;
    1067       22098 :             for (i = 3; i < numPowers; i++) {
    1068             :                 /* Calculate a^i = a^(i-1) * a */
    1069       22098 :                 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx))
    1070             :                     goto err;
    1071             :                 if (!MOD_EXP_CTIME_COPY_TO_PREBUF
    1072       22098 :                     (&tmp, top, powerbuf, i, numPowers))
    1073             :                     goto err;
    1074             :             }
    1075             :         }
    1076             : 
    1077         762 :         bits--;
    1078        3024 :         for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
    1079        2262 :             wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
    1080         762 :         if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
    1081         762 :             (&tmp, top, powerbuf, wvalue, numPowers))
    1082             :             goto err;
    1083             : 
    1084             :         /*
    1085             :          * Scan the exponent one window at a time starting from the most
    1086             :          * significant bits.
    1087             :          */
    1088       78113 :         while (bits >= 0) {
    1089             :             wvalue = 0;         /* The 'value' of the window */
    1090             : 
    1091             :             /* Scan the window, squaring the result as we go */
    1092      386755 :             for (i = 0; i < window; i++, bits--) {
    1093      386755 :                 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx))
    1094             :                     goto err;
    1095      386755 :                 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
    1096             :             }
    1097             : 
    1098             :             /*
    1099             :              * Fetch the appropriate pre-computed value from the pre-buf
    1100             :              */
    1101       77351 :             if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
    1102       77351 :                 (&am, top, powerbuf, wvalue, numPowers))
    1103             :                 goto err;
    1104             : 
    1105             :             /* Multiply the result into the intermediate result */
    1106       77351 :             if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
    1107             :                 goto err;
    1108             :         }
    1109             :     }
    1110             : 
    1111             :     /* Convert the final result from montgomery to standard format */
    1112             : #if defined(SPARC_T4_MONT)
    1113             :     if (OPENSSL_sparcv9cap_P[0] & (SPARCV9_VIS3 | SPARCV9_PREFER_FPU)) {
    1114             :         am.d[0] = 1;            /* borrow am */
    1115             :         for (i = 1; i < top; i++)
    1116             :             am.d[i] = 0;
    1117             :         if (!BN_mod_mul_montgomery(rr, &tmp, &am, mont, ctx))
    1118             :             goto err;
    1119             :     } else
    1120             : #endif
    1121         762 :     if (!BN_from_montgomery(rr, &tmp, mont, ctx))
    1122             :         goto err;
    1123             :     ret = 1;
    1124             :  err:
    1125         762 :     if ((in_mont == NULL) && (mont != NULL))
    1126           0 :         BN_MONT_CTX_free(mont);
    1127         762 :     if (powerbuf != NULL) {
    1128         762 :         OPENSSL_cleanse(powerbuf, powerbufLen);
    1129         762 :         if (powerbufFree)
    1130           0 :             OPENSSL_free(powerbufFree);
    1131             :     }
    1132         762 :     BN_CTX_end(ctx);
    1133         762 :     return (ret);
    1134             : }
    1135             : 
    1136           0 : int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
    1137             :                          const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
    1138             : {
    1139             :     BN_MONT_CTX *mont = NULL;
    1140             :     int b, bits, ret = 0;
    1141             :     int r_is_one;
    1142             :     BN_ULONG w, next_w;
    1143             :     BIGNUM *d, *r, *t;
    1144             :     BIGNUM *swap_tmp;
    1145             : #define BN_MOD_MUL_WORD(r, w, m) \
    1146             :                 (BN_mul_word(r, (w)) && \
    1147             :                 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
    1148             :                         (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
    1149             :     /*
    1150             :      * BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
    1151             :      * probably more overhead than always using BN_mod (which uses BN_copy if
    1152             :      * a similar test returns true).
    1153             :      */
    1154             :     /*
    1155             :      * We can use BN_mod and do not need BN_nnmod because our accumulator is
    1156             :      * never negative (the result of BN_mod does not depend on the sign of
    1157             :      * the modulus).
    1158             :      */
    1159             : #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
    1160             :                 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
    1161             : 
    1162           0 :     if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
    1163             :         /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
    1164           0 :         BNerr(BN_F_BN_MOD_EXP_MONT_WORD, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
    1165           0 :         return -1;
    1166             :     }
    1167             : 
    1168             :     bn_check_top(p);
    1169             :     bn_check_top(m);
    1170             : 
    1171           0 :     if (!BN_is_odd(m)) {
    1172           0 :         BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS);
    1173           0 :         return (0);
    1174             :     }
    1175           0 :     if (m->top == 1)
    1176           0 :         a %= m->d[0];           /* make sure that 'a' is reduced */
    1177             : 
    1178           0 :     bits = BN_num_bits(p);
    1179           0 :     if (bits == 0) {
    1180             :         /* x**0 mod 1 is still zero. */
    1181           0 :         if (BN_is_one(m)) {
    1182             :             ret = 1;
    1183           0 :             BN_zero(rr);
    1184             :         } else
    1185           0 :             ret = BN_one(rr);
    1186           0 :         return ret;
    1187             :     }
    1188           0 :     if (a == 0) {
    1189           0 :         BN_zero(rr);
    1190             :         ret = 1;
    1191           0 :         return ret;
    1192             :     }
    1193             : 
    1194           0 :     BN_CTX_start(ctx);
    1195           0 :     d = BN_CTX_get(ctx);
    1196           0 :     r = BN_CTX_get(ctx);
    1197           0 :     t = BN_CTX_get(ctx);
    1198           0 :     if (d == NULL || r == NULL || t == NULL)
    1199             :         goto err;
    1200             : 
    1201           0 :     if (in_mont != NULL)
    1202             :         mont = in_mont;
    1203             :     else {
    1204           0 :         if ((mont = BN_MONT_CTX_new()) == NULL)
    1205             :             goto err;
    1206           0 :         if (!BN_MONT_CTX_set(mont, m, ctx))
    1207             :             goto err;
    1208             :     }
    1209             : 
    1210             :     r_is_one = 1;               /* except for Montgomery factor */
    1211             : 
    1212             :     /* bits-1 >= 0 */
    1213             : 
    1214             :     /* The result is accumulated in the product r*w. */
    1215             :     w = a;                      /* bit 'bits-1' of 'p' is always set */
    1216           0 :     for (b = bits - 2; b >= 0; b--) {
    1217             :         /* First, square r*w. */
    1218           0 :         next_w = w * w;
    1219           0 :         if ((next_w / w) != w) { /* overflow */
    1220           0 :             if (r_is_one) {
    1221           0 :                 if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
    1222             :                     goto err;
    1223             :                 r_is_one = 0;
    1224             :             } else {
    1225           0 :                 if (!BN_MOD_MUL_WORD(r, w, m))
    1226             :                     goto err;
    1227             :             }
    1228             :             next_w = 1;
    1229             :         }
    1230             :         w = next_w;
    1231           0 :         if (!r_is_one) {
    1232           0 :             if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
    1233             :                 goto err;
    1234             :         }
    1235             : 
    1236             :         /* Second, multiply r*w by 'a' if exponent bit is set. */
    1237           0 :         if (BN_is_bit_set(p, b)) {
    1238           0 :             next_w = w * a;
    1239           0 :             if ((next_w / a) != w) { /* overflow */
    1240           0 :                 if (r_is_one) {
    1241           0 :                     if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
    1242             :                         goto err;
    1243             :                     r_is_one = 0;
    1244             :                 } else {
    1245           0 :                     if (!BN_MOD_MUL_WORD(r, w, m))
    1246             :                         goto err;
    1247             :                 }
    1248             :                 next_w = a;
    1249             :             }
    1250             :             w = next_w;
    1251             :         }
    1252             :     }
    1253             : 
    1254             :     /* Finally, set r:=r*w. */
    1255           0 :     if (w != 1) {
    1256           0 :         if (r_is_one) {
    1257           0 :             if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
    1258             :                 goto err;
    1259             :             r_is_one = 0;
    1260             :         } else {
    1261           0 :             if (!BN_MOD_MUL_WORD(r, w, m))
    1262             :                 goto err;
    1263             :         }
    1264             :     }
    1265             : 
    1266           0 :     if (r_is_one) {             /* can happen only if a == 1 */
    1267           0 :         if (!BN_one(rr))
    1268             :             goto err;
    1269             :     } else {
    1270           0 :         if (!BN_from_montgomery(rr, r, mont, ctx))
    1271             :             goto err;
    1272             :     }
    1273             :     ret = 1;
    1274             :  err:
    1275           0 :     if ((in_mont == NULL) && (mont != NULL))
    1276           0 :         BN_MONT_CTX_free(mont);
    1277           0 :     BN_CTX_end(ctx);
    1278             :     bn_check_top(rr);
    1279           0 :     return (ret);
    1280             : }
    1281             : 
    1282             : /* The old fallback, simple version :-) */
    1283           0 : int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
    1284             :                       const BIGNUM *m, BN_CTX *ctx)
    1285             : {
    1286             :     int i, j, bits, ret = 0, wstart, wend, window, wvalue;
    1287             :     int start = 1;
    1288             :     BIGNUM *d;
    1289             :     /* Table of variables obtained from 'ctx' */
    1290             :     BIGNUM *val[TABLE_SIZE];
    1291             : 
    1292           0 :     if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
    1293             :         /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
    1294           0 :         BNerr(BN_F_BN_MOD_EXP_SIMPLE, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
    1295           0 :         return -1;
    1296             :     }
    1297             : 
    1298           0 :     bits = BN_num_bits(p);
    1299             : 
    1300           0 :     if (bits == 0) {
    1301           0 :         ret = BN_one(r);
    1302           0 :         return ret;
    1303             :     }
    1304             : 
    1305           0 :     BN_CTX_start(ctx);
    1306           0 :     d = BN_CTX_get(ctx);
    1307           0 :     val[0] = BN_CTX_get(ctx);
    1308           0 :     if (!d || !val[0])
    1309             :         goto err;
    1310             : 
    1311           0 :     if (!BN_nnmod(val[0], a, m, ctx))
    1312             :         goto err;               /* 1 */
    1313           0 :     if (BN_is_zero(val[0])) {
    1314           0 :         BN_zero(r);
    1315             :         ret = 1;
    1316           0 :         goto err;
    1317             :     }
    1318             : 
    1319           0 :     window = BN_window_bits_for_exponent_size(bits);
    1320           0 :     if (window > 1) {
    1321           0 :         if (!BN_mod_mul(d, val[0], val[0], m, ctx))
    1322             :             goto err;           /* 2 */
    1323           0 :         j = 1 << (window - 1);
    1324           0 :         for (i = 1; i < j; i++) {
    1325           0 :             if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
    1326           0 :                 !BN_mod_mul(val[i], val[i - 1], d, m, ctx))
    1327             :                 goto err;
    1328             :         }
    1329             :     }
    1330             : 
    1331             :     start = 1;                  /* This is used to avoid multiplication etc
    1332             :                                  * when there is only the value '1' in the
    1333             :                                  * buffer. */
    1334             :     wvalue = 0;                 /* The 'value' of the window */
    1335           0 :     wstart = bits - 1;          /* The top bit of the window */
    1336             :     wend = 0;                   /* The bottom bit of the window */
    1337             : 
    1338           0 :     if (!BN_one(r))
    1339             :         goto err;
    1340             : 
    1341             :     for (;;) {
    1342           0 :         if (BN_is_bit_set(p, wstart) == 0) {
    1343           0 :             if (!start)
    1344           0 :                 if (!BN_mod_mul(r, r, r, m, ctx))
    1345             :                     goto err;
    1346           0 :             if (wstart == 0)
    1347             :                 break;
    1348           0 :             wstart--;
    1349           0 :             continue;
    1350             :         }
    1351             :         /*
    1352             :          * We now have wstart on a 'set' bit, we now need to work out how bit
    1353             :          * a window to do.  To do this we need to scan forward until the last
    1354             :          * set bit before the end of the window
    1355             :          */
    1356             :         j = wstart;
    1357             :         wvalue = 1;
    1358             :         wend = 0;
    1359           0 :         for (i = 1; i < window; i++) {
    1360           0 :             if (wstart - i < 0)
    1361             :                 break;
    1362           0 :             if (BN_is_bit_set(p, wstart - i)) {
    1363           0 :                 wvalue <<= (i - wend);
    1364           0 :                 wvalue |= 1;
    1365             :                 wend = i;
    1366             :             }
    1367             :         }
    1368             : 
    1369             :         /* wend is the size of the current window */
    1370             :         j = wend + 1;
    1371             :         /* add the 'bytes above' */
    1372           0 :         if (!start)
    1373           0 :             for (i = 0; i < j; i++) {
    1374           0 :                 if (!BN_mod_mul(r, r, r, m, ctx))
    1375             :                     goto err;
    1376             :             }
    1377             : 
    1378             :         /* wvalue will be an odd number < 2^window */
    1379           0 :         if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
    1380             :             goto err;
    1381             : 
    1382             :         /* move the 'window' down further */
    1383           0 :         wstart -= wend + 1;
    1384             :         wvalue = 0;
    1385             :         start = 0;
    1386           0 :         if (wstart < 0)
    1387             :             break;
    1388             :     }
    1389             :     ret = 1;
    1390             :  err:
    1391           0 :     BN_CTX_end(ctx);
    1392             :     bn_check_top(r);
    1393           0 :     return (ret);
    1394             : }

Generated by: LCOV version 1.10