Blame - src/tls/libtommath.c - android_external_wpa_supplicant_8

blob: 4f7a14823d725d6fc80877bad78ac07b385ac134 [file] [log] [blame]

Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1	/*
				2	* Minimal code for RSA support from LibTomMath 0.41
				3	* http://libtom.org/
				4	* http://libtom.org/files/ltm-0.41.tar.bz2
				5	* This library was released in public domain by Tom St Denis.
				6	*
				7	* The combination in this file may not use all of the optimized algorithms
				8	* from LibTomMath and may be considerable slower than the LibTomMath with its
				9	* default settings. The main purpose of having this version here is to make it
				10	* easier to build bignum.c wrapper without having to install and build an
				11	* external library.
				12	*
				13	* If CONFIG_INTERNAL_LIBTOMMATH is defined, bignum.c includes this
				14	* libtommath.c file instead of using the external LibTomMath library.
				15	*/
				16
				17	#ifndef CHAR_BIT
				18	#define CHAR_BIT 8
				19	#endif
				20
				21	#define BN_MP_INVMOD_C
				22	#define BN_S_MP_EXPTMOD_C /* Note: #undef in tommath_superclass.h; this would
				23	* require BN_MP_EXPTMOD_FAST_C instead */
				24	#define BN_S_MP_MUL_DIGS_C
				25	#define BN_MP_INVMOD_SLOW_C
				26	#define BN_S_MP_SQR_C
				27	#define BN_S_MP_MUL_HIGH_DIGS_C /* Note: #undef in tommath_superclass.h; this
				28	* would require other than mp_reduce */
				29
				30	#ifdef LTM_FAST
				31
				32	/* Use faster div at the cost of about 1 kB */
				33	#define BN_MP_MUL_D_C
				34
				35	/* Include faster exptmod (Montgomery) at the cost of about 2.5 kB in code */
				36	#define BN_MP_EXPTMOD_FAST_C
				37	#define BN_MP_MONTGOMERY_SETUP_C
				38	#define BN_FAST_MP_MONTGOMERY_REDUCE_C
				39	#define BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
				40	#define BN_MP_MUL_2_C
				41
				42	/* Include faster sqr at the cost of about 0.5 kB in code */
				43	#define BN_FAST_S_MP_SQR_C
				44
Dmitry Shmidt	700a137	2013-03-15 14:14:44 -0700	[diff] [blame]	45	/* About 0.25 kB of code, but ~1.7kB of stack space! */
				46	#define BN_FAST_S_MP_MUL_DIGS_C
				47
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	48	#else /* LTM_FAST */
				49
				50	#define BN_MP_DIV_SMALL
				51	#define BN_MP_INIT_MULTI_C
				52	#define BN_MP_CLEAR_MULTI_C
				53	#define BN_MP_ABS_C
				54	#endif /* LTM_FAST */
				55
				56	/* Current uses do not require support for negative exponent in exptmod, so we
				57	* can save about 1.5 kB in leaving out invmod. */
				58	#define LTM_NO_NEG_EXP
				59
				60	/* from tommath.h */
				61
				62	#ifndef MIN
				63	#define MIN(x,y) ((x)<(y)?(x):(y))
				64	#endif
				65
				66	#ifndef MAX
				67	#define MAX(x,y) ((x)>(y)?(x):(y))
				68	#endif
				69
				70	#define OPT_CAST(x)
				71
Dmitry Shmidt	0494959	2012-07-19 12:16:46 -0700	[diff] [blame]	72	#ifdef __x86_64__
				73	typedef unsigned long mp_digit;
				74	typedef unsigned long mp_word __attribute__((mode(TI)));
				75
				76	#define DIGIT_BIT 60
				77	#define MP_64BIT
				78	#else
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	79	typedef unsigned long mp_digit;
				80	typedef u64 mp_word;
				81
				82	#define DIGIT_BIT 28
				83	#define MP_28BIT
Dmitry Shmidt	0494959	2012-07-19 12:16:46 -0700	[diff] [blame]	84	#endif
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	85
				86
				87	#define XMALLOC os_malloc
				88	#define XFREE os_free
				89	#define XREALLOC os_realloc
				90
				91
				92	#define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
				93
				94	#define MP_LT -1 /* less than */
				95	#define MP_EQ 0 /* equal to */
				96	#define MP_GT 1 /* greater than */
				97
				98	#define MP_ZPOS 0 /* positive integer */
				99	#define MP_NEG 1 /* negative */
				100
				101	#define MP_OKAY 0 /* ok result */
				102	#define MP_MEM -2 /* out of mem */
				103	#define MP_VAL -3 /* invalid input */
				104
				105	#define MP_YES 1 /* yes response */
				106	#define MP_NO 0 /* no response */
				107
				108	typedef int mp_err;
				109
				110	/* define this to use lower memory usage routines (exptmods mostly) */
				111	#define MP_LOW_MEM
				112
				113	/* default precision */
				114	#ifndef MP_PREC
				115	#ifndef MP_LOW_MEM
				116	#define MP_PREC 32 /* default digits of precision */
				117	#else
				118	#define MP_PREC 8 /* default digits of precision */
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	119	#endif
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	120	#endif
				121
				122	/* size of comba arrays, should be at least 2 * 2*(BITS_PER_WORD - BITS_PER_DIGIT2) */
				123	#define MP_WARRAY (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
				124
				125	/* the infamous mp_int structure */
				126	typedef struct {
				127	int used, alloc, sign;
				128	mp_digit *dp;
				129	} mp_int;
				130
				131
				132	/* ---> Basic Manipulations <--- */
				133	#define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO)
				134	#define mp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? MP_YES : MP_NO)
				135	#define mp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? MP_YES : MP_NO)
				136
				137
				138	/* prototypes for copied functions */
				139	#define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
				140	static int s_mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode);
				141	static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
				142	static int s_mp_sqr(mp_int * a, mp_int * b);
				143	static int s_mp_mul_high_digs(mp_int * a, mp_int * b, mp_int * c, int digs);
				144
Dmitry Shmidt	700a137	2013-03-15 14:14:44 -0700	[diff] [blame]	145	#ifdef BN_FAST_S_MP_MUL_DIGS_C
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	146	static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
Dmitry Shmidt	700a137	2013-03-15 14:14:44 -0700	[diff] [blame]	147	#endif
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	148
				149	#ifdef BN_MP_INIT_MULTI_C
				150	static int mp_init_multi(mp_int *mp, ...);
				151	#endif
				152	#ifdef BN_MP_CLEAR_MULTI_C
				153	static void mp_clear_multi(mp_int *mp, ...);
				154	#endif
				155	static int mp_lshd(mp_int * a, int b);
				156	static void mp_set(mp_int * a, mp_digit b);
				157	static void mp_clamp(mp_int * a);
				158	static void mp_exch(mp_int * a, mp_int * b);
				159	static void mp_rshd(mp_int * a, int b);
				160	static void mp_zero(mp_int * a);
				161	static int mp_mod_2d(mp_int * a, int b, mp_int * c);
				162	static int mp_div_2d(mp_int * a, int b, mp_int * c, mp_int * d);
				163	static int mp_init_copy(mp_int * a, mp_int * b);
				164	static int mp_mul_2d(mp_int * a, int b, mp_int * c);
				165	#ifndef LTM_NO_NEG_EXP
				166	static int mp_div_2(mp_int * a, mp_int * b);
				167	static int mp_invmod(mp_int * a, mp_int * b, mp_int * c);
				168	static int mp_invmod_slow(mp_int * a, mp_int * b, mp_int * c);
				169	#endif /* LTM_NO_NEG_EXP */
				170	static int mp_copy(mp_int * a, mp_int * b);
				171	static int mp_count_bits(mp_int * a);
				172	static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d);
				173	static int mp_mod(mp_int * a, mp_int * b, mp_int * c);
				174	static int mp_grow(mp_int * a, int size);
				175	static int mp_cmp_mag(mp_int * a, mp_int * b);
				176	#ifdef BN_MP_ABS_C
				177	static int mp_abs(mp_int * a, mp_int * b);
				178	#endif
				179	static int mp_sqr(mp_int * a, mp_int * b);
				180	static int mp_reduce_2k_l(mp_int a, mp_int n, mp_int *d);
				181	static int mp_reduce_2k_setup_l(mp_int a, mp_int d);
				182	static int mp_2expt(mp_int * a, int b);
				183	static int mp_reduce_setup(mp_int * a, mp_int * b);
				184	static int mp_reduce(mp_int * x, mp_int * m, mp_int * mu);
				185	static int mp_init_size(mp_int * a, int size);
				186	#ifdef BN_MP_EXPTMOD_FAST_C
				187	static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode);
				188	#endif /* BN_MP_EXPTMOD_FAST_C */
				189	#ifdef BN_FAST_S_MP_SQR_C
				190	static int fast_s_mp_sqr (mp_int * a, mp_int * b);
				191	#endif /* BN_FAST_S_MP_SQR_C */
				192	#ifdef BN_MP_MUL_D_C
				193	static int mp_mul_d (mp_int * a, mp_digit b, mp_int * c);
				194	#endif /* BN_MP_MUL_D_C */
				195
				196
				197
				198	/* functions from bn_<func name>.c */
				199
				200
				201	/* reverse an array, used for radix code */
				202	static void bn_reverse (unsigned char *s, int len)
				203	{
				204	int ix, iy;
				205	unsigned char t;
				206
				207	ix = 0;
				208	iy = len - 1;
				209	while (ix < iy) {
				210	t = s[ix];
				211	s[ix] = s[iy];
				212	s[iy] = t;
				213	++ix;
				214	--iy;
				215	}
				216	}
				217
				218
				219	/* low level addition, based on HAC pp.594, Algorithm 14.7 */
				220	static int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
				221	{
				222	mp_int *x;
				223	int olduse, res, min, max;
				224
				225	/* find sizes, we let \|a\| <= \|b\| which means we have to sort
				226	* them. "x" will point to the input with the most digits
				227	*/
				228	if (a->used > b->used) {
				229	min = b->used;
				230	max = a->used;
				231	x = a;
				232	} else {
				233	min = a->used;
				234	max = b->used;
				235	x = b;
				236	}
				237
				238	/* init result */
				239	if (c->alloc < max + 1) {
				240	if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
				241	return res;
				242	}
				243	}
				244
				245	/* get old used digit count and set new one */
				246	olduse = c->used;
				247	c->used = max + 1;
				248
				249	{
				250	register mp_digit u, tmpa, tmpb, *tmpc;
				251	register int i;
				252
				253	/* alias for digit pointers */
				254
				255	/* first input */
				256	tmpa = a->dp;
				257
				258	/* second input */
				259	tmpb = b->dp;
				260
				261	/* destination */
				262	tmpc = c->dp;
				263
				264	/* zero the carry */
				265	u = 0;
				266	for (i = 0; i < min; i++) {
				267	/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
				268	tmpc = tmpa++ + *tmpb++ + u;
				269
				270	/* U = carry bit of T[i] */
				271	u = *tmpc >> ((mp_digit)DIGIT_BIT);
				272
				273	/* take away carry bit from T[i] */
				274	*tmpc++ &= MP_MASK;
				275	}
				276
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	277	/* now copy higher words if any, that is in A+B
				278	* if A or B has more digits add those in
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	279	*/
				280	if (min != max) {
				281	for (; i < max; i++) {
				282	/* T[i] = X[i] + U */
				283	*tmpc = x->dp[i] + u;
				284
				285	/* U = carry bit of T[i] */
				286	u = *tmpc >> ((mp_digit)DIGIT_BIT);
				287
				288	/* take away carry bit from T[i] */
				289	*tmpc++ &= MP_MASK;
				290	}
				291	}
				292
				293	/* add carry */
				294	*tmpc++ = u;
				295
				296	/* clear digits above oldused */
				297	for (i = c->used; i < olduse; i++) {
				298	*tmpc++ = 0;
				299	}
				300	}
				301
				302	mp_clamp (c);
				303	return MP_OKAY;
				304	}
				305
				306
				307	/* low level subtraction (assumes \|a\| > \|b\|), HAC pp.595 Algorithm 14.9 */
				308	static int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
				309	{
				310	int olduse, res, min, max;
				311
				312	/* find sizes */
				313	min = b->used;
				314	max = a->used;
				315
				316	/* init result */
				317	if (c->alloc < max) {
				318	if ((res = mp_grow (c, max)) != MP_OKAY) {
				319	return res;
				320	}
				321	}
				322	olduse = c->used;
				323	c->used = max;
				324
				325	{
				326	register mp_digit u, tmpa, tmpb, *tmpc;
				327	register int i;
				328
				329	/* alias for digit pointers */
				330	tmpa = a->dp;
				331	tmpb = b->dp;
				332	tmpc = c->dp;
				333
				334	/* set carry to zero */
				335	u = 0;
				336	for (i = 0; i < min; i++) {
				337	/* T[i] = A[i] - B[i] - U */
				338	tmpc = tmpa++ - *tmpb++ - u;
				339
				340	/* U = carry bit of T[i]
				341	* Note this saves performing an AND operation since
				342	* if a carry does occur it will propagate all the way to the
				343	* MSB. As a result a single shift is enough to get the carry
				344	*/
				345	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
				346
				347	/* Clear carry from T[i] */
				348	*tmpc++ &= MP_MASK;
				349	}
				350
				351	/* now copy higher words if any, e.g. if A has more digits than B */
				352	for (; i < max; i++) {
				353	/* T[i] = A[i] - U */
				354	tmpc = tmpa++ - u;
				355
				356	/* U = carry bit of T[i] */
				357	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
				358
				359	/* Clear carry from T[i] */
				360	*tmpc++ &= MP_MASK;
				361	}
				362
				363	/* clear digits above used (since we may not have grown result above) */
				364	for (i = c->used; i < olduse; i++) {
				365	*tmpc++ = 0;
				366	}
				367	}
				368
				369	mp_clamp (c);
				370	return MP_OKAY;
				371	}
				372
				373
				374	/* init a new mp_int */
				375	static int mp_init (mp_int * a)
				376	{
				377	int i;
				378
				379	/* allocate memory required and clear it */
				380	a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
				381	if (a->dp == NULL) {
				382	return MP_MEM;
				383	}
				384
				385	/* set the digits to zero */
				386	for (i = 0; i < MP_PREC; i++) {
				387	a->dp[i] = 0;
				388	}
				389
				390	/* set the used to zero, allocated digits to the default precision
				391	* and sign to positive */
				392	a->used = 0;
				393	a->alloc = MP_PREC;
				394	a->sign = MP_ZPOS;
				395
				396	return MP_OKAY;
				397	}
				398
				399
				400	/* clear one (frees) */
				401	static void mp_clear (mp_int * a)
				402	{
				403	int i;
				404
				405	/* only do anything if a hasn't been freed previously */
				406	if (a->dp != NULL) {
				407	/* first zero the digits */
				408	for (i = 0; i < a->used; i++) {
				409	a->dp[i] = 0;
				410	}
				411
				412	/* free ram */
				413	XFREE(a->dp);
				414
				415	/* reset members to make debugging easier */
				416	a->dp = NULL;
				417	a->alloc = a->used = 0;
				418	a->sign = MP_ZPOS;
				419	}
				420	}
				421
				422
				423	/* high level addition (handles signs) */
				424	static int mp_add (mp_int * a, mp_int * b, mp_int * c)
				425	{
				426	int sa, sb, res;
				427
				428	/* get sign of both inputs */
				429	sa = a->sign;
				430	sb = b->sign;
				431
				432	/* handle two cases, not four */
				433	if (sa == sb) {
				434	/* both positive or both negative */
				435	/* add their magnitudes, copy the sign */
				436	c->sign = sa;
				437	res = s_mp_add (a, b, c);
				438	} else {
				439	/* one positive, the other negative */
				440	/* subtract the one with the greater magnitude from */
				441	/* the one of the lesser magnitude. The result gets */
				442	/* the sign of the one with the greater magnitude. */
				443	if (mp_cmp_mag (a, b) == MP_LT) {
				444	c->sign = sb;
				445	res = s_mp_sub (b, a, c);
				446	} else {
				447	c->sign = sa;
				448	res = s_mp_sub (a, b, c);
				449	}
				450	}
				451	return res;
				452	}
				453
				454
				455	/* high level subtraction (handles signs) */
				456	static int mp_sub (mp_int * a, mp_int * b, mp_int * c)
				457	{
				458	int sa, sb, res;
				459
				460	sa = a->sign;
				461	sb = b->sign;
				462
				463	if (sa != sb) {
				464	/* subtract a negative from a positive, OR */
				465	/* subtract a positive from a negative. */
				466	/* In either case, ADD their magnitudes, */
				467	/* and use the sign of the first number. */
				468	c->sign = sa;
				469	res = s_mp_add (a, b, c);
				470	} else {
				471	/* subtract a positive from a positive, OR */
				472	/* subtract a negative from a negative. */
				473	/* First, take the difference between their */
				474	/* magnitudes, then... */
				475	if (mp_cmp_mag (a, b) != MP_LT) {
				476	/* Copy the sign from the first */
				477	c->sign = sa;
				478	/* The first has a larger or equal magnitude */
				479	res = s_mp_sub (a, b, c);
				480	} else {
				481	/* The result has the opposite sign from */
				482	/* the first number. */
				483	c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
				484	/* The second has a larger magnitude */
				485	res = s_mp_sub (b, a, c);
				486	}
				487	}
				488	return res;
				489	}
				490
				491
				492	/* high level multiplication (handles sign) */
				493	static int mp_mul (mp_int * a, mp_int * b, mp_int * c)
				494	{
				495	int res, neg;
				496	neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
				497
				498	/* use Toom-Cook? */
				499	#ifdef BN_MP_TOOM_MUL_C
				500	if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
				501	res = mp_toom_mul(a, b, c);
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	502	} else
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	503	#endif
				504	#ifdef BN_MP_KARATSUBA_MUL_C
				505	/* use Karatsuba? */
				506	if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
				507	res = mp_karatsuba_mul (a, b, c);
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	508	} else
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	509	#endif
				510	{
				511	/* can we use the fast multiplier?
				512	*
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	513	* The fast multiplier can be used if the output will
				514	* have less than MP_WARRAY digits and the number of
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	515	* digits won't affect carry propagation
				516	*/
				517	#ifdef BN_FAST_S_MP_MUL_DIGS_C
				518	int digs = a->used + b->used + 1;
				519
				520	if ((digs < MP_WARRAY) &&
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	521	MIN(a->used, b->used) <=
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	522	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
				523	res = fast_s_mp_mul_digs (a, b, c, digs);
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	524	} else
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	525	#endif
				526	#ifdef BN_S_MP_MUL_DIGS_C
				527	res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
				528	#else
				529	#error mp_mul could fail
				530	res = MP_VAL;
				531	#endif
				532
				533	}
				534	c->sign = (c->used > 0) ? neg : MP_ZPOS;
				535	return res;
				536	}
				537
				538
				539	/* d = a * b (mod c) */
				540	static int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
				541	{
				542	int res;
				543	mp_int t;
				544
				545	if ((res = mp_init (&t)) != MP_OKAY) {
				546	return res;
				547	}
				548
				549	if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
				550	mp_clear (&t);
				551	return res;
				552	}
				553	res = mp_mod (&t, c, d);
				554	mp_clear (&t);
				555	return res;
				556	}
				557
				558
				559	/* c = a mod b, 0 <= c < b */
				560	static int mp_mod (mp_int * a, mp_int * b, mp_int * c)
				561	{
				562	mp_int t;
				563	int res;
				564
				565	if ((res = mp_init (&t)) != MP_OKAY) {
				566	return res;
				567	}
				568
				569	if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
				570	mp_clear (&t);
				571	return res;
				572	}
				573
				574	if (t.sign != b->sign) {
				575	res = mp_add (b, &t, c);
				576	} else {
				577	res = MP_OKAY;
				578	mp_exch (&t, c);
				579	}
				580
				581	mp_clear (&t);
				582	return res;
				583	}
				584
				585
				586	/* this is a shell function that calls either the normal or Montgomery
				587	* exptmod functions. Originally the call to the montgomery code was
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame]	588	* embedded in the normal function but that wasted a lot of stack space
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	589	* for nothing (since 99% of the time the Montgomery code would be called)
				590	*/
				591	static int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
				592	{
				593	int dr;
				594
				595	/* modulus P must be positive */
				596	if (P->sign == MP_NEG) {
				597	return MP_VAL;
				598	}
				599
				600	/* if exponent X is negative we have to recurse */
				601	if (X->sign == MP_NEG) {
				602	#ifdef LTM_NO_NEG_EXP
				603	return MP_VAL;
				604	#else /* LTM_NO_NEG_EXP */
				605	#ifdef BN_MP_INVMOD_C
				606	mp_int tmpG, tmpX;
				607	int err;
				608
				609	/* first compute 1/G mod P */
				610	if ((err = mp_init(&tmpG)) != MP_OKAY) {
				611	return err;
				612	}
				613	if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
				614	mp_clear(&tmpG);
				615	return err;
				616	}
				617
				618	/* now get \|X\| */
				619	if ((err = mp_init(&tmpX)) != MP_OKAY) {
				620	mp_clear(&tmpG);
				621	return err;
				622	}
				623	if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
				624	mp_clear_multi(&tmpG, &tmpX, NULL);
				625	return err;
				626	}
				627
				628	/* and now compute (1/G)\|X\| instead of GX [X < 0] */
				629	err = mp_exptmod(&tmpG, &tmpX, P, Y);
				630	mp_clear_multi(&tmpG, &tmpX, NULL);
				631	return err;
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	632	#else
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	633	#error mp_exptmod would always fail
				634	/* no invmod */
				635	return MP_VAL;
				636	#endif
				637	#endif /* LTM_NO_NEG_EXP */
				638	}
				639
				640	/* modified diminished radix reduction */
				641	#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && defined(BN_S_MP_EXPTMOD_C)
				642	if (mp_reduce_is_2k_l(P) == MP_YES) {
				643	return s_mp_exptmod(G, X, P, Y, 1);
				644	}
				645	#endif
				646
				647	#ifdef BN_MP_DR_IS_MODULUS_C
				648	/* is it a DR modulus? */
				649	dr = mp_dr_is_modulus(P);
				650	#else
				651	/* default to no */
				652	dr = 0;
				653	#endif
				654
				655	#ifdef BN_MP_REDUCE_IS_2K_C
				656	/* if not, is it a unrestricted DR modulus? */
				657	if (dr == 0) {
				658	dr = mp_reduce_is_2k(P) << 1;
				659	}
				660	#endif
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	661
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	662	/* if the modulus is odd or dr != 0 use the montgomery method */
				663	#ifdef BN_MP_EXPTMOD_FAST_C
				664	if (mp_isodd (P) == 1 \|\| dr != 0) {
				665	return mp_exptmod_fast (G, X, P, Y, dr);
				666	} else {
				667	#endif
				668	#ifdef BN_S_MP_EXPTMOD_C
				669	/* otherwise use the generic Barrett reduction technique */
				670	return s_mp_exptmod (G, X, P, Y, 0);
				671	#else
				672	#error mp_exptmod could fail
				673	/* no exptmod for evens */
				674	return MP_VAL;
				675	#endif
				676	#ifdef BN_MP_EXPTMOD_FAST_C
				677	}
				678	#endif
Dmitry Shmidt	700a137	2013-03-15 14:14:44 -0700	[diff] [blame]	679	if (dr == 0) {
				680	/* avoid compiler warnings about possibly unused variable */
				681	}
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	682	}
				683
				684
				685	/* compare two ints (signed)*/
				686	static int mp_cmp (mp_int * a, mp_int * b)
				687	{
				688	/* compare based on sign */
				689	if (a->sign != b->sign) {
				690	if (a->sign == MP_NEG) {
				691	return MP_LT;
				692	} else {
				693	return MP_GT;
				694	}
				695	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	696
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	697	/* compare digits */
				698	if (a->sign == MP_NEG) {
				699	/* if negative compare opposite direction */
				700	return mp_cmp_mag(b, a);
				701	} else {
				702	return mp_cmp_mag(a, b);
				703	}
				704	}
				705
				706
				707	/* compare a digit */
				708	static int mp_cmp_d(mp_int * a, mp_digit b)
				709	{
				710	/* compare based on sign */
				711	if (a->sign == MP_NEG) {
				712	return MP_LT;
				713	}
				714
				715	/* compare based on magnitude */
				716	if (a->used > 1) {
				717	return MP_GT;
				718	}
				719
				720	/* compare the only digit of a to b */
				721	if (a->dp[0] > b) {
				722	return MP_GT;
				723	} else if (a->dp[0] < b) {
				724	return MP_LT;
				725	} else {
				726	return MP_EQ;
				727	}
				728	}
				729
				730
				731	#ifndef LTM_NO_NEG_EXP
				732	/* hac 14.61, pp608 */
				733	static int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
				734	{
				735	/* b cannot be negative */
				736	if (b->sign == MP_NEG \|\| mp_iszero(b) == 1) {
				737	return MP_VAL;
				738	}
				739
				740	#ifdef BN_FAST_MP_INVMOD_C
				741	/* if the modulus is odd we can use a faster routine instead */
				742	if (mp_isodd (b) == 1) {
				743	return fast_mp_invmod (a, b, c);
				744	}
				745	#endif
				746
				747	#ifdef BN_MP_INVMOD_SLOW_C
				748	return mp_invmod_slow(a, b, c);
				749	#endif
				750
				751	#ifndef BN_FAST_MP_INVMOD_C
				752	#ifndef BN_MP_INVMOD_SLOW_C
				753	#error mp_invmod would always fail
				754	#endif
				755	#endif
				756	return MP_VAL;
				757	}
				758	#endif /* LTM_NO_NEG_EXP */
				759
				760
				761	/* get the size for an unsigned equivalent */
				762	static int mp_unsigned_bin_size (mp_int * a)
				763	{
				764	int size = mp_count_bits (a);
				765	return (size / 8 + ((size & 7) != 0 ? 1 : 0));
				766	}
				767
				768
				769	#ifndef LTM_NO_NEG_EXP
				770	/* hac 14.61, pp608 */
				771	static int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
				772	{
				773	mp_int x, y, u, v, A, B, C, D;
				774	int res;
				775
				776	/* b cannot be negative */
				777	if (b->sign == MP_NEG \|\| mp_iszero(b) == 1) {
				778	return MP_VAL;
				779	}
				780
				781	/* init temps */
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	782	if ((res = mp_init_multi(&x, &y, &u, &v,
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	783	&A, &B, &C, &D, NULL)) != MP_OKAY) {
				784	return res;
				785	}
				786
				787	/* x = a, y = b */
				788	if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
				789	goto LBL_ERR;
				790	}
				791	if ((res = mp_copy (b, &y)) != MP_OKAY) {
				792	goto LBL_ERR;
				793	}
				794
				795	/* 2. [modified] if x,y are both even then return an error! */
				796	if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
				797	res = MP_VAL;
				798	goto LBL_ERR;
				799	}
				800
				801	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
				802	if ((res = mp_copy (&x, &u)) != MP_OKAY) {
				803	goto LBL_ERR;
				804	}
				805	if ((res = mp_copy (&y, &v)) != MP_OKAY) {
				806	goto LBL_ERR;
				807	}
				808	mp_set (&A, 1);
				809	mp_set (&D, 1);
				810
				811	top:
				812	/* 4. while u is even do */
				813	while (mp_iseven (&u) == 1) {
				814	/* 4.1 u = u/2 */
				815	if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
				816	goto LBL_ERR;
				817	}
				818	/* 4.2 if A or B is odd then */
				819	if (mp_isodd (&A) == 1 \|\| mp_isodd (&B) == 1) {
				820	/* A = (A+y)/2, B = (B-x)/2 */
				821	if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
				822	goto LBL_ERR;
				823	}
				824	if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
				825	goto LBL_ERR;
				826	}
				827	}
				828	/* A = A/2, B = B/2 */
				829	if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
				830	goto LBL_ERR;
				831	}
				832	if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
				833	goto LBL_ERR;
				834	}
				835	}
				836
				837	/* 5. while v is even do */
				838	while (mp_iseven (&v) == 1) {
				839	/* 5.1 v = v/2 */
				840	if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
				841	goto LBL_ERR;
				842	}
				843	/* 5.2 if C or D is odd then */
				844	if (mp_isodd (&C) == 1 \|\| mp_isodd (&D) == 1) {
				845	/* C = (C+y)/2, D = (D-x)/2 */
				846	if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
				847	goto LBL_ERR;
				848	}
				849	if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
				850	goto LBL_ERR;
				851	}
				852	}
				853	/* C = C/2, D = D/2 */
				854	if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
				855	goto LBL_ERR;
				856	}
				857	if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
				858	goto LBL_ERR;
				859	}
				860	}
				861
				862	/* 6. if u >= v then */
				863	if (mp_cmp (&u, &v) != MP_LT) {
				864	/* u = u - v, A = A - C, B = B - D */
				865	if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
				866	goto LBL_ERR;
				867	}
				868
				869	if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
				870	goto LBL_ERR;
				871	}
				872
				873	if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
				874	goto LBL_ERR;
				875	}
				876	} else {
				877	/* v - v - u, C = C - A, D = D - B */
				878	if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
				879	goto LBL_ERR;
				880	}
				881
				882	if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
				883	goto LBL_ERR;
				884	}
				885
				886	if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
				887	goto LBL_ERR;
				888	}
				889	}
				890
				891	/* if not zero goto step 4 */
				892	if (mp_iszero (&u) == 0)
				893	goto top;
				894
				895	/* now a = C, b = D, gcd == gv /
				896
				897	/* if v != 1 then there is no inverse */
				898	if (mp_cmp_d (&v, 1) != MP_EQ) {
				899	res = MP_VAL;
				900	goto LBL_ERR;
				901	}
				902
				903	/* if its too low */
				904	while (mp_cmp_d(&C, 0) == MP_LT) {
				905	if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
				906	goto LBL_ERR;
				907	}
				908	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	909
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	910	/* too big */
				911	while (mp_cmp_mag(&C, b) != MP_LT) {
				912	if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
				913	goto LBL_ERR;
				914	}
				915	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	916
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	917	/* C is now the inverse */
				918	mp_exch (&C, c);
				919	res = MP_OKAY;
				920	LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
				921	return res;
				922	}
				923	#endif /* LTM_NO_NEG_EXP */
				924
				925
				926	/* compare maginitude of two ints (unsigned) */
				927	static int mp_cmp_mag (mp_int * a, mp_int * b)
				928	{
				929	int n;
				930	mp_digit tmpa, tmpb;
				931
				932	/* compare based on # of non-zero digits */
				933	if (a->used > b->used) {
				934	return MP_GT;
				935	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	936
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	937	if (a->used < b->used) {
				938	return MP_LT;
				939	}
				940
				941	/* alias for a */
				942	tmpa = a->dp + (a->used - 1);
				943
				944	/* alias for b */
				945	tmpb = b->dp + (a->used - 1);
				946
				947	/* compare based on digits */
				948	for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
				949	if (tmpa > tmpb) {
				950	return MP_GT;
				951	}
				952
				953	if (tmpa < tmpb) {
				954	return MP_LT;
				955	}
				956	}
				957	return MP_EQ;
				958	}
				959
				960
				961	/* reads a unsigned char array, assumes the msb is stored first [big endian] */
				962	static int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
				963	{
				964	int res;
				965
				966	/* make sure there are at least two digits */
				967	if (a->alloc < 2) {
				968	if ((res = mp_grow(a, 2)) != MP_OKAY) {
				969	return res;
				970	}
				971	}
				972
				973	/* zero the int */
				974	mp_zero (a);
				975
				976	/* read the bytes in */
				977	while (c-- > 0) {
				978	if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
				979	return res;
				980	}
				981
				982	#ifndef MP_8BIT
				983	a->dp[0] \|= *b++;
				984	a->used += 1;
				985	#else
				986	a->dp[0] = (*b & MP_MASK);
				987	a->dp[1] \|= ((*b++ >> 7U) & 1);
				988	a->used += 2;
				989	#endif
				990	}
				991	mp_clamp (a);
				992	return MP_OKAY;
				993	}
				994
				995
				996	/* store in unsigned [big endian] format */
				997	static int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
				998	{
				999	int x, res;
				1000	mp_int t;
				1001
				1002	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
				1003	return res;
				1004	}
				1005
				1006	x = 0;
				1007	while (mp_iszero (&t) == 0) {
				1008	#ifndef MP_8BIT
				1009	b[x++] = (unsigned char) (t.dp[0] & 255);
				1010	#else
				1011	b[x++] = (unsigned char) (t.dp[0] \| ((t.dp[1] & 0x01) << 7));
				1012	#endif
				1013	if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
				1014	mp_clear (&t);
				1015	return res;
				1016	}
				1017	}
				1018	bn_reverse (b, x);
				1019	mp_clear (&t);
				1020	return MP_OKAY;
				1021	}
				1022
				1023
				1024	/* shift right by a certain bit count (store quotient in c, optional remainder in d) */
				1025	static int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
				1026	{
				1027	mp_digit D, r, rr;
				1028	int x, res;
				1029	mp_int t;
				1030
				1031
				1032	/* if the shift count is <= 0 then we do no work */
				1033	if (b <= 0) {
				1034	res = mp_copy (a, c);
				1035	if (d != NULL) {
				1036	mp_zero (d);
				1037	}
				1038	return res;
				1039	}
				1040
				1041	if ((res = mp_init (&t)) != MP_OKAY) {
				1042	return res;
				1043	}
				1044
				1045	/* get the remainder */
				1046	if (d != NULL) {
				1047	if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
				1048	mp_clear (&t);
				1049	return res;
				1050	}
				1051	}
				1052
				1053	/* copy */
				1054	if ((res = mp_copy (a, c)) != MP_OKAY) {
				1055	mp_clear (&t);
				1056	return res;
				1057	}
				1058
				1059	/* shift by as many digits in the bit count */
				1060	if (b >= (int)DIGIT_BIT) {
				1061	mp_rshd (c, b / DIGIT_BIT);
				1062	}
				1063
				1064	/* shift any bit count < DIGIT_BIT */
				1065	D = (mp_digit) (b % DIGIT_BIT);
				1066	if (D != 0) {
				1067	register mp_digit *tmpc, mask, shift;
				1068
				1069	/* mask */
				1070	mask = (((mp_digit)1) << D) - 1;
				1071
				1072	/* shift for lsb */
				1073	shift = DIGIT_BIT - D;
				1074
				1075	/* alias */
				1076	tmpc = c->dp + (c->used - 1);
				1077
				1078	/* carry */
				1079	r = 0;
				1080	for (x = c->used - 1; x >= 0; x--) {
				1081	/* get the lower bits of this word in a temp */
				1082	rr = *tmpc & mask;
				1083
				1084	/* shift the current word and mix in the carry bits from the previous word */
				1085	tmpc = (tmpc >> D) \| (r << shift);
				1086	--tmpc;
				1087
				1088	/* set the carry to the carry bits of the current word found above */
				1089	r = rr;
				1090	}
				1091	}
				1092	mp_clamp (c);
				1093	if (d != NULL) {
				1094	mp_exch (&t, d);
				1095	}
				1096	mp_clear (&t);
				1097	return MP_OKAY;
				1098	}
				1099
				1100
				1101	static int mp_init_copy (mp_int * a, mp_int * b)
				1102	{
				1103	int res;
				1104
				1105	if ((res = mp_init (a)) != MP_OKAY) {
				1106	return res;
				1107	}
				1108	return mp_copy (b, a);
				1109	}
				1110
				1111
				1112	/* set to zero */
				1113	static void mp_zero (mp_int * a)
				1114	{
				1115	int n;
				1116	mp_digit *tmp;
				1117
				1118	a->sign = MP_ZPOS;
				1119	a->used = 0;
				1120
				1121	tmp = a->dp;
				1122	for (n = 0; n < a->alloc; n++) {
				1123	*tmp++ = 0;
				1124	}
				1125	}
				1126
				1127
				1128	/* copy, b = a */
				1129	static int mp_copy (mp_int * a, mp_int * b)
				1130	{
				1131	int res, n;
				1132
				1133	/* if dst == src do nothing */
				1134	if (a == b) {
				1135	return MP_OKAY;
				1136	}
				1137
				1138	/* grow dest */
				1139	if (b->alloc < a->used) {
				1140	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
				1141	return res;
				1142	}
				1143	}
				1144
				1145	/* zero b and copy the parameters over */
				1146	{
				1147	register mp_digit tmpa, tmpb;
				1148
				1149	/* pointer aliases */
				1150
				1151	/* source */
				1152	tmpa = a->dp;
				1153
				1154	/* destination */
				1155	tmpb = b->dp;
				1156
				1157	/* copy all the digits */
				1158	for (n = 0; n < a->used; n++) {
				1159	tmpb++ = tmpa++;
				1160	}
				1161
				1162	/* clear high digits */
				1163	for (; n < b->used; n++) {
				1164	*tmpb++ = 0;
				1165	}
				1166	}
				1167
				1168	/* copy used count and sign */
				1169	b->used = a->used;
				1170	b->sign = a->sign;
				1171	return MP_OKAY;
				1172	}
				1173
				1174
				1175	/* shift right a certain amount of digits */
				1176	static void mp_rshd (mp_int * a, int b)
				1177	{
				1178	int x;
				1179
				1180	/* if b <= 0 then ignore it */
				1181	if (b <= 0) {
				1182	return;
				1183	}
				1184
				1185	/* if b > used then simply zero it and return */
				1186	if (a->used <= b) {
				1187	mp_zero (a);
				1188	return;
				1189	}
				1190
				1191	{
				1192	register mp_digit bottom, top;
				1193
				1194	/* shift the digits down */
				1195
				1196	/* bottom */
				1197	bottom = a->dp;
				1198
				1199	/* top [offset into digits] */
				1200	top = a->dp + b;
				1201
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1202	/* this is implemented as a sliding window where
				1203	* the window is b-digits long and digits from
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1204	* the top of the window are copied to the bottom
				1205	*
				1206	* e.g.
				1207
				1208	b-2 \| b-1 \| b0 \| b1 \| b2 \| ... \| bb \| ---->
				1209	/\ \| ---->
				1210	\-------------------/ ---->
				1211	*/
				1212	for (x = 0; x < (a->used - b); x++) {
				1213	bottom++ = top++;
				1214	}
				1215
				1216	/* zero the top digits */
				1217	for (; x < a->used; x++) {
				1218	*bottom++ = 0;
				1219	}
				1220	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1221
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1222	/* remove excess digits */
				1223	a->used -= b;
				1224	}
				1225
				1226
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1227	/* swap the elements of two integers, for cases where you can't simply swap the
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1228	* mp_int pointers around
				1229	*/
				1230	static void mp_exch (mp_int * a, mp_int * b)
				1231	{
				1232	mp_int t;
				1233
				1234	t = *a;
				1235	a = b;
				1236	*b = t;
				1237	}
				1238
				1239
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1240	/* trim unused digits
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1241	*
				1242	* This is used to ensure that leading zero digits are
				1243	* trimed and the leading "used" digit will be non-zero
				1244	* Typically very fast. Also fixes the sign if there
				1245	* are no more leading digits
				1246	*/
				1247	static void mp_clamp (mp_int * a)
				1248	{
				1249	/* decrease used while the most significant digit is
				1250	* zero.
				1251	*/
				1252	while (a->used > 0 && a->dp[a->used - 1] == 0) {
				1253	--(a->used);
				1254	}
				1255
				1256	/* reset the sign flag if used == 0 */
				1257	if (a->used == 0) {
				1258	a->sign = MP_ZPOS;
				1259	}
				1260	}
				1261
				1262
				1263	/* grow as required */
				1264	static int mp_grow (mp_int * a, int size)
				1265	{
				1266	int i;
				1267	mp_digit *tmp;
				1268
				1269	/* if the alloc size is smaller alloc more ram */
				1270	if (a->alloc < size) {
				1271	/* ensure there are always at least MP_PREC digits extra on top */
				1272	size += (MP_PREC * 2) - (size % MP_PREC);
				1273
				1274	/* reallocate the array a->dp
				1275	*
				1276	* We store the return in a temporary variable
				1277	* in case the operation failed we don't want
				1278	* to overwrite the dp member of a.
				1279	*/
				1280	tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size);
				1281	if (tmp == NULL) {
				1282	/* reallocation failed but "a" is still valid [can be freed] */
				1283	return MP_MEM;
				1284	}
				1285
				1286	/* reallocation succeeded so set a->dp */
				1287	a->dp = tmp;
				1288
				1289	/* zero excess digits */
				1290	i = a->alloc;
				1291	a->alloc = size;
				1292	for (; i < a->alloc; i++) {
				1293	a->dp[i] = 0;
				1294	}
				1295	}
				1296	return MP_OKAY;
				1297	}
				1298
				1299
				1300	#ifdef BN_MP_ABS_C
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1301	/* b = \|a\|
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1302	*
				1303	* Simple function copies the input and fixes the sign to positive
				1304	*/
				1305	static int mp_abs (mp_int * a, mp_int * b)
				1306	{
				1307	int res;
				1308
				1309	/* copy a to b */
				1310	if (a != b) {
				1311	if ((res = mp_copy (a, b)) != MP_OKAY) {
				1312	return res;
				1313	}
				1314	}
				1315
				1316	/* force the sign of b to positive */
				1317	b->sign = MP_ZPOS;
				1318
				1319	return MP_OKAY;
				1320	}
				1321	#endif
				1322
				1323
				1324	/* set to a digit */
				1325	static void mp_set (mp_int * a, mp_digit b)
				1326	{
				1327	mp_zero (a);
				1328	a->dp[0] = b & MP_MASK;
				1329	a->used = (a->dp[0] != 0) ? 1 : 0;
				1330	}
				1331
				1332
				1333	#ifndef LTM_NO_NEG_EXP
				1334	/* b = a/2 */
				1335	static int mp_div_2(mp_int * a, mp_int * b)
				1336	{
				1337	int x, res, oldused;
				1338
				1339	/* copy */
				1340	if (b->alloc < a->used) {
				1341	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
				1342	return res;
				1343	}
				1344	}
				1345
				1346	oldused = b->used;
				1347	b->used = a->used;
				1348	{
				1349	register mp_digit r, rr, tmpa, tmpb;
				1350
				1351	/* source alias */
				1352	tmpa = a->dp + b->used - 1;
				1353
				1354	/* dest alias */
				1355	tmpb = b->dp + b->used - 1;
				1356
				1357	/* carry */
				1358	r = 0;
				1359	for (x = b->used - 1; x >= 0; x--) {
				1360	/* get the carry for the next iteration */
				1361	rr = *tmpa & 1;
				1362
				1363	/* shift the current digit, add in carry and store */
				1364	tmpb-- = (tmpa-- >> 1) \| (r << (DIGIT_BIT - 1));
				1365
				1366	/* forward carry to next iteration */
				1367	r = rr;
				1368	}
				1369
				1370	/* zero excess digits */
				1371	tmpb = b->dp + b->used;
				1372	for (x = b->used; x < oldused; x++) {
				1373	*tmpb++ = 0;
				1374	}
				1375	}
				1376	b->sign = a->sign;
				1377	mp_clamp (b);
				1378	return MP_OKAY;
				1379	}
				1380	#endif /* LTM_NO_NEG_EXP */
				1381
				1382
				1383	/* shift left by a certain bit count */
				1384	static int mp_mul_2d (mp_int * a, int b, mp_int * c)
				1385	{
				1386	mp_digit d;
				1387	int res;
				1388
				1389	/* copy */
				1390	if (a != c) {
				1391	if ((res = mp_copy (a, c)) != MP_OKAY) {
				1392	return res;
				1393	}
				1394	}
				1395
				1396	if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
				1397	if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
				1398	return res;
				1399	}
				1400	}
				1401
				1402	/* shift by as many digits in the bit count */
				1403	if (b >= (int)DIGIT_BIT) {
				1404	if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
				1405	return res;
				1406	}
				1407	}
				1408
				1409	/* shift any bit count < DIGIT_BIT */
				1410	d = (mp_digit) (b % DIGIT_BIT);
				1411	if (d != 0) {
				1412	register mp_digit *tmpc, shift, mask, r, rr;
				1413	register int x;
				1414
				1415	/* bitmask for carries */
				1416	mask = (((mp_digit)1) << d) - 1;
				1417
				1418	/* shift for msbs */
				1419	shift = DIGIT_BIT - d;
				1420
				1421	/* alias */
				1422	tmpc = c->dp;
				1423
				1424	/* carry */
				1425	r = 0;
				1426	for (x = 0; x < c->used; x++) {
				1427	/* get the higher bits of the current word */
				1428	rr = (*tmpc >> shift) & mask;
				1429
				1430	/* shift the current word and OR in the carry */
				1431	tmpc = ((tmpc << d) \| r) & MP_MASK;
				1432	++tmpc;
				1433
				1434	/* set the carry to the carry bits of the current word */
				1435	r = rr;
				1436	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1437
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1438	/* set final carry */
				1439	if (r != 0) {
				1440	c->dp[(c->used)++] = r;
				1441	}
				1442	}
				1443	mp_clamp (c);
				1444	return MP_OKAY;
				1445	}
				1446
				1447
				1448	#ifdef BN_MP_INIT_MULTI_C
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1449	static int mp_init_multi(mp_int *mp, ...)
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1450	{
				1451	mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
				1452	int n = 0; /* Number of ok inits */
				1453	mp_int* cur_arg = mp;
				1454	va_list args;
				1455
				1456	va_start(args, mp); /* init args to next argument from caller */
				1457	while (cur_arg != NULL) {
				1458	if (mp_init(cur_arg) != MP_OKAY) {
				1459	/* Oops - error! Back-track and mp_clear what we already
				1460	succeeded in init-ing, then return error.
				1461	*/
				1462	va_list clean_args;
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1463
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1464	/* end the current list */
				1465	va_end(args);
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1466
				1467	/* now start cleaning up */
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1468	cur_arg = mp;
				1469	va_start(clean_args, mp);
				1470	while (n--) {
				1471	mp_clear(cur_arg);
				1472	cur_arg = va_arg(clean_args, mp_int*);
				1473	}
				1474	va_end(clean_args);
Dmitry Shmidt	4171258	2015-06-29 11:02:15 -0700	[diff] [blame]	1475	return MP_MEM;
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1476	}
				1477	n++;
				1478	cur_arg = va_arg(args, mp_int*);
				1479	}
				1480	va_end(args);
				1481	return res; /* Assumed ok, if error flagged above. */
				1482	}
				1483	#endif
				1484
				1485
				1486	#ifdef BN_MP_CLEAR_MULTI_C
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1487	static void mp_clear_multi(mp_int *mp, ...)
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1488	{
				1489	mp_int* next_mp = mp;
				1490	va_list args;
				1491	va_start(args, mp);
				1492	while (next_mp != NULL) {
				1493	mp_clear(next_mp);
				1494	next_mp = va_arg(args, mp_int*);
				1495	}
				1496	va_end(args);
				1497	}
				1498	#endif
				1499
				1500
				1501	/* shift left a certain amount of digits */
				1502	static int mp_lshd (mp_int * a, int b)
				1503	{
				1504	int x, res;
				1505
				1506	/* if its less than zero return */
				1507	if (b <= 0) {
				1508	return MP_OKAY;
				1509	}
				1510
				1511	/* grow to fit the new digits */
				1512	if (a->alloc < a->used + b) {
				1513	if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
				1514	return res;
				1515	}
				1516	}
				1517
				1518	{
				1519	register mp_digit top, bottom;
				1520
				1521	/* increment the used by the shift amount then copy upwards */
				1522	a->used += b;
				1523
				1524	/* top */
				1525	top = a->dp + a->used - 1;
				1526
				1527	/* base */
				1528	bottom = a->dp + a->used - 1 - b;
				1529
				1530	/* much like mp_rshd this is implemented using a sliding window
				1531	* except the window goes the otherway around. Copying from
				1532	* the bottom to the top. see bn_mp_rshd.c for more info.
				1533	*/
				1534	for (x = a->used - 1; x >= b; x--) {
				1535	top-- = bottom--;
				1536	}
				1537
				1538	/* zero the lower digits */
				1539	top = a->dp;
				1540	for (x = 0; x < b; x++) {
				1541	*top++ = 0;
				1542	}
				1543	}
				1544	return MP_OKAY;
				1545	}
				1546
				1547
				1548	/* returns the number of bits in an int */
				1549	static int mp_count_bits (mp_int * a)
				1550	{
				1551	int r;
				1552	mp_digit q;
				1553
				1554	/* shortcut */
				1555	if (a->used == 0) {
				1556	return 0;
				1557	}
				1558
				1559	/* get number of digits and add that */
				1560	r = (a->used - 1) * DIGIT_BIT;
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1561
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1562	/* take the last digit and count the bits in it */
				1563	q = a->dp[a->used - 1];
				1564	while (q > ((mp_digit) 0)) {
				1565	++r;
				1566	q >>= ((mp_digit) 1);
				1567	}
				1568	return r;
				1569	}
				1570
				1571
				1572	/* calc a value mod 2*b /
				1573	static int mp_mod_2d (mp_int * a, int b, mp_int * c)
				1574	{
				1575	int x, res;
				1576
				1577	/* if b is <= 0 then zero the int */
				1578	if (b <= 0) {
				1579	mp_zero (c);
				1580	return MP_OKAY;
				1581	}
				1582
				1583	/* if the modulus is larger than the value than return */
				1584	if (b >= (int) (a->used * DIGIT_BIT)) {
				1585	res = mp_copy (a, c);
				1586	return res;
				1587	}
				1588
				1589	/* copy */
				1590	if ((res = mp_copy (a, c)) != MP_OKAY) {
				1591	return res;
				1592	}
				1593
				1594	/* zero digits above the last digit of the modulus */
				1595	for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
				1596	c->dp[x] = 0;
				1597	}
				1598	/* clear the digit that is not completely outside/inside the modulus */
				1599	c->dp[b / DIGIT_BIT] &=
				1600	(mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
				1601	mp_clamp (c);
				1602	return MP_OKAY;
				1603	}
				1604
				1605
				1606	#ifdef BN_MP_DIV_SMALL
				1607
				1608	/* slower bit-bang division... also smaller */
				1609	static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
				1610	{
				1611	mp_int ta, tb, tq, q;
				1612	int res, n, n2;
				1613
				1614	/* is divisor zero ? */
				1615	if (mp_iszero (b) == 1) {
				1616	return MP_VAL;
				1617	}
				1618
				1619	/* if a < b then q=0, r = a */
				1620	if (mp_cmp_mag (a, b) == MP_LT) {
				1621	if (d != NULL) {
				1622	res = mp_copy (a, d);
				1623	} else {
				1624	res = MP_OKAY;
				1625	}
				1626	if (c != NULL) {
				1627	mp_zero (c);
				1628	}
				1629	return res;
				1630	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1631
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1632	/* init our temps */
Dmitry Shmidt	cc00d5d	2015-05-04 10:34:12 -0700	[diff] [blame]	1633	if ((res = mp_init_multi(&ta, &tb, &tq, &q, NULL)) != MP_OKAY) {
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1634	return res;
				1635	}
				1636
				1637
				1638	mp_set(&tq, 1);
				1639	n = mp_count_bits(a) - mp_count_bits(b);
				1640	if (((res = mp_abs(a, &ta)) != MP_OKAY) \|\|
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1641	((res = mp_abs(b, &tb)) != MP_OKAY) \|\|
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1642	((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) \|\|
				1643	((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
				1644	goto LBL_ERR;
				1645	}
				1646
				1647	while (n-- >= 0) {
				1648	if (mp_cmp(&tb, &ta) != MP_GT) {
				1649	if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) \|\|
				1650	((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
				1651	goto LBL_ERR;
				1652	}
				1653	}
				1654	if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) \|\|
				1655	((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
				1656	goto LBL_ERR;
				1657	}
				1658	}
				1659
				1660	/* now q == quotient and ta == remainder */
				1661	n = a->sign;
				1662	n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
				1663	if (c != NULL) {
				1664	mp_exch(c, &q);
				1665	c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
				1666	}
				1667	if (d != NULL) {
				1668	mp_exch(d, &ta);
				1669	d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
				1670	}
				1671	LBL_ERR:
				1672	mp_clear_multi(&ta, &tb, &tq, &q, NULL);
				1673	return res;
				1674	}
				1675
				1676	#else
				1677
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1678	/* integer signed division.
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1679	* c*b + d == a [e.g. a/b, c=quotient, d=remainder]
				1680	* HAC pp.598 Algorithm 14.20
				1681	*
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1682	* Note that the description in HAC is horribly
				1683	* incomplete. For example, it doesn't consider
				1684	* the case where digits are removed from 'x' in
				1685	* the inner loop. It also doesn't consider the
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1686	* case that y has fewer than three digits, etc..
				1687	*
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1688	* The overall algorithm is as described as
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1689	* 14.20 from HAC but fixed to treat these cases.
				1690	*/
				1691	static int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
				1692	{
				1693	mp_int q, x, y, t1, t2;
				1694	int res, n, t, i, norm, neg;
				1695
				1696	/* is divisor zero ? */
				1697	if (mp_iszero (b) == 1) {
				1698	return MP_VAL;
				1699	}
				1700
				1701	/* if a < b then q=0, r = a */
				1702	if (mp_cmp_mag (a, b) == MP_LT) {
				1703	if (d != NULL) {
				1704	res = mp_copy (a, d);
				1705	} else {
				1706	res = MP_OKAY;
				1707	}
				1708	if (c != NULL) {
				1709	mp_zero (c);
				1710	}
				1711	return res;
				1712	}
				1713
				1714	if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
				1715	return res;
				1716	}
				1717	q.used = a->used + 2;
				1718
				1719	if ((res = mp_init (&t1)) != MP_OKAY) {
				1720	goto LBL_Q;
				1721	}
				1722
				1723	if ((res = mp_init (&t2)) != MP_OKAY) {
				1724	goto LBL_T1;
				1725	}
				1726
				1727	if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
				1728	goto LBL_T2;
				1729	}
				1730
				1731	if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
				1732	goto LBL_X;
				1733	}
				1734
				1735	/* fix the sign */
				1736	neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
				1737	x.sign = y.sign = MP_ZPOS;
				1738
				1739	/* normalize both x and y, ensure that y >= b/2, [b == 2*DIGIT_BIT] /
				1740	norm = mp_count_bits(&y) % DIGIT_BIT;
				1741	if (norm < (int)(DIGIT_BIT-1)) {
				1742	norm = (DIGIT_BIT-1) - norm;
				1743	if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
				1744	goto LBL_Y;
				1745	}
				1746	if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
				1747	goto LBL_Y;
				1748	}
				1749	} else {
				1750	norm = 0;
				1751	}
				1752
				1753	/* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
				1754	n = x.used - 1;
				1755	t = y.used - 1;
				1756
				1757	/* while (x >= ybn-t) do { q[n-t] += 1; x -= yb*{n-t} } /
				1758	if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = yb{n-t} /
				1759	goto LBL_Y;
				1760	}
				1761
				1762	while (mp_cmp (&x, &y) != MP_LT) {
				1763	++(q.dp[n - t]);
				1764	if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
				1765	goto LBL_Y;
				1766	}
				1767	}
				1768
				1769	/* reset y by shifting it back down */
				1770	mp_rshd (&y, n - t);
				1771
				1772	/* step 3. for i from n down to (t + 1) */
				1773	for (i = n; i >= (t + 1); i--) {
				1774	if (i > x.used) {
				1775	continue;
				1776	}
				1777
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1778	/* step 3.1 if xi == yt then set q{i-t-1} to b-1,
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1779	* otherwise set q{i-t-1} to (xib + x{i-1})/yt /
				1780	if (x.dp[i] == y.dp[t]) {
				1781	q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
				1782	} else {
				1783	mp_word tmp;
				1784	tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
				1785	tmp \|= ((mp_word) x.dp[i - 1]);
				1786	tmp /= ((mp_word) y.dp[t]);
				1787	if (tmp > (mp_word) MP_MASK)
				1788	tmp = MP_MASK;
				1789	q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
				1790	}
				1791
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1792	/* while (q{i-t-1} * (yt * b + y{t-1})) >
				1793	xi * b*2 + xi-1 b + xi-2
				1794
				1795	do q{i-t-1} -= 1;
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1796	*/
				1797	q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
				1798	do {
				1799	q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
				1800
				1801	/* find left hand */
				1802	mp_zero (&t1);
				1803	t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
				1804	t1.dp[1] = y.dp[t];
				1805	t1.used = 2;
				1806	if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
				1807	goto LBL_Y;
				1808	}
				1809
				1810	/* find right hand */
				1811	t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
				1812	t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
				1813	t2.dp[2] = x.dp[i];
				1814	t2.used = 3;
				1815	} while (mp_cmp_mag(&t1, &t2) == MP_GT);
				1816
				1817	/* step 3.3 x = x - q{i-t-1} * y * b*{i-t-1} /
				1818	if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
				1819	goto LBL_Y;
				1820	}
				1821
				1822	if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
				1823	goto LBL_Y;
				1824	}
				1825
				1826	if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
				1827	goto LBL_Y;
				1828	}
				1829
				1830	/* if x < 0 then { x = x + yb{i-t-1}; q{i-t-1} -= 1; } /
				1831	if (x.sign == MP_NEG) {
				1832	if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
				1833	goto LBL_Y;
				1834	}
				1835	if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
				1836	goto LBL_Y;
				1837	}
				1838	if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
				1839	goto LBL_Y;
				1840	}
				1841
				1842	q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
				1843	}
				1844	}
				1845
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1846	/* now q is the quotient and x is the remainder
				1847	* [which we have to normalize]
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1848	*/
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1849
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1850	/* get sign before writing to c */
				1851	x.sign = x.used == 0 ? MP_ZPOS : a->sign;
				1852
				1853	if (c != NULL) {
				1854	mp_clamp (&q);
				1855	mp_exch (&q, c);
				1856	c->sign = neg;
				1857	}
				1858
				1859	if (d != NULL) {
				1860	mp_div_2d (&x, norm, &x, NULL);
				1861	mp_exch (&x, d);
				1862	}
				1863
				1864	res = MP_OKAY;
				1865
				1866	LBL_Y:mp_clear (&y);
				1867	LBL_X:mp_clear (&x);
				1868	LBL_T2:mp_clear (&t2);
				1869	LBL_T1:mp_clear (&t1);
				1870	LBL_Q:mp_clear (&q);
				1871	return res;
				1872	}
				1873
				1874	#endif
				1875
				1876
				1877	#ifdef MP_LOW_MEM
				1878	#define TAB_SIZE 32
				1879	#else
				1880	#define TAB_SIZE 256
				1881	#endif
				1882
				1883	static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
				1884	{
				1885	mp_int M[TAB_SIZE], res, mu;
				1886	mp_digit buf;
				1887	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
				1888	int (redux)(mp_int,mp_int,mp_int);
				1889
				1890	/* find window size */
				1891	x = mp_count_bits (X);
				1892	if (x <= 7) {
				1893	winsize = 2;
				1894	} else if (x <= 36) {
				1895	winsize = 3;
				1896	} else if (x <= 140) {
				1897	winsize = 4;
				1898	} else if (x <= 450) {
				1899	winsize = 5;
				1900	} else if (x <= 1303) {
				1901	winsize = 6;
				1902	} else if (x <= 3529) {
				1903	winsize = 7;
				1904	} else {
				1905	winsize = 8;
				1906	}
				1907
				1908	#ifdef MP_LOW_MEM
				1909	if (winsize > 5) {
				1910	winsize = 5;
				1911	}
				1912	#endif
				1913
				1914	/* init M array */
				1915	/* init first cell */
				1916	if ((err = mp_init(&M[1])) != MP_OKAY) {
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1917	return err;
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1918	}
				1919
				1920	/* now init the second half of the array */
				1921	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
				1922	if ((err = mp_init(&M[x])) != MP_OKAY) {
				1923	for (y = 1<<(winsize-1); y < x; y++) {
				1924	mp_clear (&M[y]);
				1925	}
				1926	mp_clear(&M[1]);
				1927	return err;
				1928	}
				1929	}
				1930
				1931	/* create mu, used for Barrett reduction */
				1932	if ((err = mp_init (&mu)) != MP_OKAY) {
				1933	goto LBL_M;
				1934	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1935
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1936	if (redmode == 0) {
				1937	if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
				1938	goto LBL_MU;
				1939	}
				1940	redux = mp_reduce;
				1941	} else {
				1942	if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
				1943	goto LBL_MU;
				1944	}
				1945	redux = mp_reduce_2k_l;
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1946	}
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1947
				1948	/* create M table
				1949	*
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1950	* The M table contains powers of the base,
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1951	* e.g. M[x] = G**x mod P
				1952	*
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1953	* The first half of the table is not
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1954	* computed though accept for M[0] and M[1]
				1955	*/
				1956	if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
				1957	goto LBL_MU;
				1958	}
				1959
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1960	/* compute the value at M[1<<(winsize-1)] by squaring
				1961	* M[1] (winsize-1) times
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1962	*/
				1963	if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
				1964	goto LBL_MU;
				1965	}
				1966
				1967	for (x = 0; x < (winsize - 1); x++) {
				1968	/* square it */
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	1969	if ((err = mp_sqr (&M[1 << (winsize - 1)],
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1970	&M[1 << (winsize - 1)])) != MP_OKAY) {
				1971	goto LBL_MU;
				1972	}
				1973
				1974	/* reduce modulo P */
				1975	if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
				1976	goto LBL_MU;
				1977	}
				1978	}
				1979
				1980	/* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
				1981	* for x = (2(winsize - 1) + 1) to (2winsize - 1)
				1982	*/
				1983	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
				1984	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
				1985	goto LBL_MU;
				1986	}
				1987	if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
				1988	goto LBL_MU;
				1989	}
				1990	}
				1991
				1992	/* setup result */
				1993	if ((err = mp_init (&res)) != MP_OKAY) {
				1994	goto LBL_MU;
				1995	}
				1996	mp_set (&res, 1);
				1997
				1998	/* set initial mode and bit cnt */
				1999	mode = 0;
				2000	bitcnt = 1;
				2001	buf = 0;
				2002	digidx = X->used - 1;
				2003	bitcpy = 0;
				2004	bitbuf = 0;
				2005
				2006	for (;;) {
				2007	/* grab next digit as required */
				2008	if (--bitcnt == 0) {
				2009	/* if digidx == -1 we are out of digits */
				2010	if (digidx == -1) {
				2011	break;
				2012	}
				2013	/* read next digit and reset the bitcnt */
				2014	buf = X->dp[digidx--];
				2015	bitcnt = (int) DIGIT_BIT;
				2016	}
				2017
				2018	/* grab the next msb from the exponent */
				2019	y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
				2020	buf <<= (mp_digit)1;
				2021
				2022	/* if the bit is zero and mode == 0 then we ignore it
				2023	* These represent the leading zero bits before the first 1 bit
				2024	* in the exponent. Technically this opt is not required but it
				2025	* does lower the # of trivial squaring/reductions used
				2026	*/
				2027	if (mode == 0 && y == 0) {
				2028	continue;
				2029	}
				2030
				2031	/* if the bit is zero and mode == 1 then we square */
				2032	if (mode == 1 && y == 0) {
				2033	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				2034	goto LBL_RES;
				2035	}
				2036	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
				2037	goto LBL_RES;
				2038	}
				2039	continue;
				2040	}
				2041
				2042	/* else we add it to the window */
				2043	bitbuf \|= (y << (winsize - ++bitcpy));
				2044	mode = 2;
				2045
				2046	if (bitcpy == winsize) {
				2047	/* ok window is filled so square as required and multiply */
				2048	/* square first */
				2049	for (x = 0; x < winsize; x++) {
				2050	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				2051	goto LBL_RES;
				2052	}
				2053	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
				2054	goto LBL_RES;
				2055	}
				2056	}
				2057
				2058	/* then multiply */
				2059	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
				2060	goto LBL_RES;
				2061	}
				2062	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
				2063	goto LBL_RES;
				2064	}
				2065
				2066	/* empty window and reset */
				2067	bitcpy = 0;
				2068	bitbuf = 0;
				2069	mode = 1;
				2070	}
				2071	}
				2072
				2073	/* if bits remain then square/multiply */
				2074	if (mode == 2 && bitcpy > 0) {
				2075	/* square then multiply if the bit is set */
				2076	for (x = 0; x < bitcpy; x++) {
				2077	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				2078	goto LBL_RES;
				2079	}
				2080	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
				2081	goto LBL_RES;
				2082	}
				2083
				2084	bitbuf <<= 1;
				2085	if ((bitbuf & (1 << winsize)) != 0) {
				2086	/* then multiply */
				2087	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
				2088	goto LBL_RES;
				2089	}
				2090	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
				2091	goto LBL_RES;
				2092	}
				2093	}
				2094	}
				2095	}
				2096
				2097	mp_exch (&res, Y);
				2098	err = MP_OKAY;
				2099	LBL_RES:mp_clear (&res);
				2100	LBL_MU:mp_clear (&mu);
				2101	LBL_M:
				2102	mp_clear(&M[1]);
				2103	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
				2104	mp_clear (&M[x]);
				2105	}
				2106	return err;
				2107	}
				2108
				2109
				2110	/* computes b = aa /
				2111	static int mp_sqr (mp_int * a, mp_int * b)
				2112	{
				2113	int res;
				2114
				2115	#ifdef BN_MP_TOOM_SQR_C
				2116	/* use Toom-Cook? */
				2117	if (a->used >= TOOM_SQR_CUTOFF) {
				2118	res = mp_toom_sqr(a, b);
				2119	/* Karatsuba? */
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2120	} else
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2121	#endif
				2122	#ifdef BN_MP_KARATSUBA_SQR_C
				2123	if (a->used >= KARATSUBA_SQR_CUTOFF) {
				2124	res = mp_karatsuba_sqr (a, b);
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2125	} else
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2126	#endif
				2127	{
				2128	#ifdef BN_FAST_S_MP_SQR_C
				2129	/* can we use the fast comba multiplier? */
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2130	if ((a->used * 2 + 1) < MP_WARRAY &&
				2131	a->used <
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2132	(1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
				2133	res = fast_s_mp_sqr (a, b);
				2134	} else
				2135	#endif
				2136	#ifdef BN_S_MP_SQR_C
				2137	res = s_mp_sqr (a, b);
				2138	#else
				2139	#error mp_sqr could fail
				2140	res = MP_VAL;
				2141	#endif
				2142	}
				2143	b->sign = MP_ZPOS;
				2144	return res;
				2145	}
				2146
				2147
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2148	/* reduces a modulo n where n is of the form 2**p - d
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2149	This differs from reduce_2k since "d" can be larger
				2150	than a single digit.
				2151	*/
				2152	static int mp_reduce_2k_l(mp_int a, mp_int n, mp_int *d)
				2153	{
				2154	mp_int q;
				2155	int p, res;
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2156
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2157	if ((res = mp_init(&q)) != MP_OKAY) {
				2158	return res;
				2159	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2160
				2161	p = mp_count_bits(n);
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2162	top:
				2163	/* q = a/2p, a = a mod 2p */
				2164	if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
				2165	goto ERR;
				2166	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2167
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2168	/* q = q * d */
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2169	if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2170	goto ERR;
				2171	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2172
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2173	/* a = a + q */
				2174	if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
				2175	goto ERR;
				2176	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2177
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2178	if (mp_cmp_mag(a, n) != MP_LT) {
				2179	s_mp_sub(a, n, a);
				2180	goto top;
				2181	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2182
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2183	ERR:
				2184	mp_clear(&q);
				2185	return res;
				2186	}
				2187
				2188
				2189	/* determines the setup value */
				2190	static int mp_reduce_2k_setup_l(mp_int a, mp_int d)
				2191	{
				2192	int res;
				2193	mp_int tmp;
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2194
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2195	if ((res = mp_init(&tmp)) != MP_OKAY) {
				2196	return res;
				2197	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2198
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2199	if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
				2200	goto ERR;
				2201	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2202
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2203	if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
				2204	goto ERR;
				2205	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2206
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2207	ERR:
				2208	mp_clear(&tmp);
				2209	return res;
				2210	}
				2211
				2212
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2213	/* computes a = 2**b
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2214	*
				2215	* Simple algorithm which zeroes the int, grows it then just sets one bit
				2216	* as required.
				2217	*/
				2218	static int mp_2expt (mp_int * a, int b)
				2219	{
				2220	int res;
				2221
				2222	/* zero a as per default */
				2223	mp_zero (a);
				2224
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame]	2225	/* grow a to accommodate the single bit */
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2226	if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
				2227	return res;
				2228	}
				2229
				2230	/* set the used count of where the bit will go */
				2231	a->used = b / DIGIT_BIT + 1;
				2232
				2233	/* put the single bit in its place */
				2234	a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
				2235
				2236	return MP_OKAY;
				2237	}
				2238
				2239
				2240	/* pre-calculate the value required for Barrett reduction
				2241	* For a given modulus "b" it calulates the value required in "a"
				2242	*/
				2243	static int mp_reduce_setup (mp_int * a, mp_int * b)
				2244	{
				2245	int res;
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2246
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2247	if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
				2248	return res;
				2249	}
				2250	return mp_div (a, b, a, NULL);
				2251	}
				2252
				2253
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2254	/* reduces x mod m, assumes 0 < x < m**2, mu is
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2255	* precomputed via mp_reduce_setup.
				2256	* From HAC pp.604 Algorithm 14.42
				2257	*/
				2258	static int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
				2259	{
				2260	mp_int q;
				2261	int res, um = m->used;
				2262
				2263	/* q = x */
				2264	if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
				2265	return res;
				2266	}
				2267
				2268	/* q1 = x / b*(k-1) /
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2269	mp_rshd (&q, um - 1);
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2270
				2271	/* according to HAC this optimization is ok */
				2272	if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
				2273	if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
				2274	goto CLEANUP;
				2275	}
				2276	} else {
				2277	#ifdef BN_S_MP_MUL_HIGH_DIGS_C
				2278	if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
				2279	goto CLEANUP;
				2280	}
				2281	#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
				2282	if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
				2283	goto CLEANUP;
				2284	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2285	#else
				2286	{
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2287	#error mp_reduce would always fail
				2288	res = MP_VAL;
				2289	goto CLEANUP;
				2290	}
				2291	#endif
				2292	}
				2293
				2294	/* q3 = q2 / b*(k+1) /
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2295	mp_rshd (&q, um + 1);
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2296
				2297	/* x = x mod b*(k+1), quick (no division) /
				2298	if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
				2299	goto CLEANUP;
				2300	}
				2301
				2302	/* q = q * m mod b*(k+1), quick (no division) /
				2303	if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
				2304	goto CLEANUP;
				2305	}
				2306
				2307	/* x = x - q */
				2308	if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
				2309	goto CLEANUP;
				2310	}
				2311
				2312	/* If x < 0, add b*(k+1) to it /
				2313	if (mp_cmp_d (x, 0) == MP_LT) {
				2314	mp_set (&q, 1);
				2315	if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) {
				2316	goto CLEANUP;
				2317	}
				2318	if ((res = mp_add (x, &q, x)) != MP_OKAY) {
				2319	goto CLEANUP;
				2320	}
				2321	}
				2322
				2323	/* Back off if it's too big */
				2324	while (mp_cmp (x, m) != MP_LT) {
				2325	if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
				2326	goto CLEANUP;
				2327	}
				2328	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2329
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2330	CLEANUP:
				2331	mp_clear (&q);
				2332
				2333	return res;
				2334	}
				2335
				2336
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame]	2337	/* multiplies \|a\| * \|b\| and only computes up to digs digits of result
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2338	* HAC pp. 595, Algorithm 14.12 Modified so you can control how
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2339	* many digits of output are created.
				2340	*/
				2341	static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
				2342	{
				2343	mp_int t;
				2344	int res, pa, pb, ix, iy;
				2345	mp_digit u;
				2346	mp_word r;
				2347	mp_digit tmpx, tmpt, tmpy;
				2348
Dmitry Shmidt	700a137	2013-03-15 14:14:44 -0700	[diff] [blame]	2349	#ifdef BN_FAST_S_MP_MUL_DIGS_C
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2350	/* can we use the fast multiplier? */
				2351	if (((digs) < MP_WARRAY) &&
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2352	MIN (a->used, b->used) <
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2353	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
				2354	return fast_s_mp_mul_digs (a, b, c, digs);
				2355	}
Dmitry Shmidt	700a137	2013-03-15 14:14:44 -0700	[diff] [blame]	2356	#endif
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2357
				2358	if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
				2359	return res;
				2360	}
				2361	t.used = digs;
				2362
				2363	/* compute the digits of the product directly */
				2364	pa = a->used;
				2365	for (ix = 0; ix < pa; ix++) {
				2366	/* set the carry to zero */
				2367	u = 0;
				2368
				2369	/* limit ourselves to making digs digits of output */
				2370	pb = MIN (b->used, digs - ix);
				2371
				2372	/* setup some aliases */
				2373	/* copy of the digit from a used within the nested loop */
				2374	tmpx = a->dp[ix];
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2375
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2376	/* an alias for the destination shifted ix places */
				2377	tmpt = t.dp + ix;
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2378
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2379	/* an alias for the digits of b */
				2380	tmpy = b->dp;
				2381
				2382	/* compute the columns of the output and propagate the carry */
				2383	for (iy = 0; iy < pb; iy++) {
				2384	/* compute the column as a mp_word */
				2385	r = ((mp_word)*tmpt) +
				2386	((mp_word)tmpx) * ((mp_word)*tmpy++) +
				2387	((mp_word) u);
				2388
				2389	/* the new column is the lower part of the result */
				2390	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
				2391
				2392	/* get the carry word from the result */
				2393	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
				2394	}
				2395	/* set carry if it is placed below digs */
				2396	if (ix + iy < digs) {
				2397	*tmpt = u;
				2398	}
				2399	}
				2400
				2401	mp_clamp (&t);
				2402	mp_exch (&t, c);
				2403
				2404	mp_clear (&t);
				2405	return MP_OKAY;
				2406	}
				2407
				2408
Dmitry Shmidt	700a137	2013-03-15 14:14:44 -0700	[diff] [blame]	2409	#ifdef BN_FAST_S_MP_MUL_DIGS_C
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2410	/* Fast (comba) multiplier
				2411	*
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2412	* This is the fast column-array [comba] multiplier. It is
				2413	* designed to compute the columns of the product first
				2414	* then handle the carries afterwards. This has the effect
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2415	* of making the nested loops that compute the columns very
				2416	* simple and schedulable on super-scalar processors.
				2417	*
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2418	* This has been modified to produce a variable number of
				2419	* digits of output so if say only a half-product is required
				2420	* you don't have to compute the upper half (a feature
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2421	* required for fast Barrett reduction).
				2422	*
				2423	* Based on Algorithm 14.12 on pp.595 of HAC.
				2424	*
				2425	*/
				2426	static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
				2427	{
				2428	int olduse, res, pa, ix, iz;
				2429	mp_digit W[MP_WARRAY];
				2430	register mp_word _W;
				2431
				2432	/* grow the destination as required */
				2433	if (c->alloc < digs) {
				2434	if ((res = mp_grow (c, digs)) != MP_OKAY) {
				2435	return res;
				2436	}
				2437	}
				2438
				2439	/* number of output digits to produce */
				2440	pa = MIN(digs, a->used + b->used);
				2441
				2442	/* clear the carry */
				2443	_W = 0;
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2444	for (ix = 0; ix < pa; ix++) {
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2445	int tx, ty;
				2446	int iy;
				2447	mp_digit tmpx, tmpy;
				2448
				2449	/* get offsets into the two bignums */
				2450	ty = MIN(b->used-1, ix);
				2451	tx = ix - ty;
				2452
				2453	/* setup temp aliases */
				2454	tmpx = a->dp + tx;
				2455	tmpy = b->dp + ty;
				2456
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2457	/* this is the number of times the loop will iterrate, essentially
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2458	while (tx++ < a->used && ty-- >= 0) { ... }
				2459	*/
				2460	iy = MIN(a->used-tx, ty+1);
				2461
				2462	/* execute loop */
				2463	for (iz = 0; iz < iy; ++iz) {
				2464	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
				2465
				2466	}
				2467
				2468	/* store term */
				2469	W[ix] = ((mp_digit)_W) & MP_MASK;
				2470
				2471	/* make next carry */
				2472	_W = _W >> ((mp_word)DIGIT_BIT);
				2473	}
				2474
				2475	/* setup dest */
				2476	olduse = c->used;
				2477	c->used = pa;
				2478
				2479	{
				2480	register mp_digit *tmpc;
				2481	tmpc = c->dp;
				2482	for (ix = 0; ix < pa+1; ix++) {
				2483	/* now extract the previous digit [below the carry] */
				2484	*tmpc++ = W[ix];
				2485	}
				2486
				2487	/* clear unused digits [that existed in the old copy of c] */
				2488	for (; ix < olduse; ix++) {
				2489	*tmpc++ = 0;
				2490	}
				2491	}
				2492	mp_clamp (c);
				2493	return MP_OKAY;
				2494	}
Dmitry Shmidt	700a137	2013-03-15 14:14:44 -0700	[diff] [blame]	2495	#endif /* BN_FAST_S_MP_MUL_DIGS_C */
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2496
				2497
				2498	/* init an mp_init for a given size */
				2499	static int mp_init_size (mp_int * a, int size)
				2500	{
				2501	int x;
				2502
				2503	/* pad size so there are always extra digits */
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2504	size += (MP_PREC * 2) - (size % MP_PREC);
				2505
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2506	/* alloc mem */
				2507	a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size);
				2508	if (a->dp == NULL) {
				2509	return MP_MEM;
				2510	}
				2511
				2512	/* set the members */
				2513	a->used = 0;
				2514	a->alloc = size;
				2515	a->sign = MP_ZPOS;
				2516
				2517	/* zero the digits */
				2518	for (x = 0; x < size; x++) {
				2519	a->dp[x] = 0;
				2520	}
				2521
				2522	return MP_OKAY;
				2523	}
				2524
				2525
				2526	/* low level squaring, b = aa, HAC pp.596-597, Algorithm 14.16 /
				2527	static int s_mp_sqr (mp_int * a, mp_int * b)
				2528	{
				2529	mp_int t;
				2530	int res, ix, iy, pa;
				2531	mp_word r;
				2532	mp_digit u, tmpx, *tmpt;
				2533
				2534	pa = a->used;
				2535	if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
				2536	return res;
				2537	}
				2538
				2539	/* default used is maximum possible size */
				2540	t.used = 2*pa + 1;
				2541
				2542	for (ix = 0; ix < pa; ix++) {
				2543	/* first calculate the digit at 2ix /
				2544	/* calculate double precision result */
				2545	r = ((mp_word) t.dp[2*ix]) +
				2546	((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
				2547
				2548	/* store lower part in result */
				2549	t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
				2550
				2551	/* get the carry */
				2552	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
				2553
				2554	/* left hand side of A[ix] * A[iy] */
				2555	tmpx = a->dp[ix];
				2556
				2557	/* alias for where to store the results */
				2558	tmpt = t.dp + (2*ix + 1);
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2559
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2560	for (iy = ix + 1; iy < pa; iy++) {
				2561	/* first calculate the product */
				2562	r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
				2563
				2564	/* now calculate the double precision result, note we use
				2565	* addition instead of *2 since it's easier to optimize
				2566	*/
				2567	r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
				2568
				2569	/* store lower part */
				2570	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
				2571
				2572	/* get carry */
				2573	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
				2574	}
				2575	/* propagate upwards */
				2576	while (u != ((mp_digit) 0)) {
				2577	r = ((mp_word) *tmpt) + ((mp_word) u);
				2578	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
				2579	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
				2580	}
				2581	}
				2582
				2583	mp_clamp (&t);
				2584	mp_exch (&t, b);
				2585	mp_clear (&t);
				2586	return MP_OKAY;
				2587	}
				2588
				2589
				2590	/* multiplies \|a\| * \|b\| and does not compute the lower digs digits
				2591	* [meant to get the higher part of the product]
				2592	*/
				2593	static int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
				2594	{
				2595	mp_int t;
				2596	int res, pa, pb, ix, iy;
				2597	mp_digit u;
				2598	mp_word r;
				2599	mp_digit tmpx, tmpt, tmpy;
				2600
				2601	/* can we use the fast multiplier? */
				2602	#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
				2603	if (((a->used + b->used + 1) < MP_WARRAY)
				2604	&& MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
				2605	return fast_s_mp_mul_high_digs (a, b, c, digs);
				2606	}
				2607	#endif
				2608
				2609	if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
				2610	return res;
				2611	}
				2612	t.used = a->used + b->used + 1;
				2613
				2614	pa = a->used;
				2615	pb = b->used;
				2616	for (ix = 0; ix < pa; ix++) {
				2617	/* clear the carry */
				2618	u = 0;
				2619
				2620	/* left hand side of A[ix] * B[iy] */
				2621	tmpx = a->dp[ix];
				2622
				2623	/* alias to the address of where the digits will be stored */
				2624	tmpt = &(t.dp[digs]);
				2625
				2626	/* alias for where to read the right hand side from */
				2627	tmpy = b->dp + (digs - ix);
				2628
				2629	for (iy = digs - ix; iy < pb; iy++) {
				2630	/* calculate the double precision result */
				2631	r = ((mp_word)*tmpt) +
				2632	((mp_word)tmpx) * ((mp_word)*tmpy++) +
				2633	((mp_word) u);
				2634
				2635	/* get the lower part */
				2636	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
				2637
				2638	/* carry the carry */
				2639	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
				2640	}
				2641	*tmpt = u;
				2642	}
				2643	mp_clamp (&t);
				2644	mp_exch (&t, c);
				2645	mp_clear (&t);
				2646	return MP_OKAY;
				2647	}
				2648
				2649
				2650	#ifdef BN_MP_MONTGOMERY_SETUP_C
				2651	/* setups the montgomery reduction stuff */
				2652	static int
				2653	mp_montgomery_setup (mp_int * n, mp_digit * rho)
				2654	{
				2655	mp_digit x, b;
				2656
				2657	/* fast inversion mod 2**k
				2658	*
				2659	* Based on the fact that
				2660	*
				2661	* XA = 1 (mod 2n) => (X(2-XA)) A = 1 (mod 22n)
				2662	* => 2XA - XXA*A = 1
				2663	* => 2*(1) - (1) = 1
				2664	*/
				2665	b = n->dp[0];
				2666
				2667	if ((b & 1) == 0) {
				2668	return MP_VAL;
				2669	}
				2670
				2671	x = (((b + 2) & 4) << 1) + b; /* here xa==1 mod 24 /
				2672	x = 2 - b x; /* here xa==1 mod 28 /
				2673	#if !defined(MP_8BIT)
				2674	x = 2 - b x; /* here xa==1 mod 216 /
				2675	#endif
				2676	#if defined(MP_64BIT) \|\| !(defined(MP_8BIT) \|\| defined(MP_16BIT))
				2677	x = 2 - b x; /* here xa==1 mod 232 /
				2678	#endif
				2679	#ifdef MP_64BIT
				2680	x = 2 - b x; /* here xa==1 mod 264 /
				2681	#endif
				2682
				2683	/* rho = -1/m mod b */
				2684	*rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
				2685
				2686	return MP_OKAY;
				2687	}
				2688	#endif
				2689
				2690
				2691	#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
				2692	/* computes xR**-1 == x (mod N) via Montgomery Reduction
				2693	*
				2694	* This is an optimized implementation of montgomery_reduce
				2695	* which uses the comba method to quickly calculate the columns of the
				2696	* reduction.
				2697	*
				2698	* Based on Algorithm 14.32 on pp.601 of HAC.
				2699	*/
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame]	2700	static int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2701	{
				2702	int ix, res, olduse;
				2703	mp_word W[MP_WARRAY];
				2704
				2705	/* get old used count */
				2706	olduse = x->used;
				2707
				2708	/* grow a as required */
				2709	if (x->alloc < n->used + 1) {
				2710	if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
				2711	return res;
				2712	}
				2713	}
				2714
				2715	/* first we have to get the digits of the input into
				2716	* an array of double precision words W[...]
				2717	*/
				2718	{
				2719	register mp_word *_W;
				2720	register mp_digit *tmpx;
				2721
				2722	/* alias for the W[] array */
				2723	_W = W;
				2724
				2725	/* alias for the digits of x*/
				2726	tmpx = x->dp;
				2727
				2728	/* copy the digits of a into W[0..a->used-1] */
				2729	for (ix = 0; ix < x->used; ix++) {
				2730	_W++ = tmpx++;
				2731	}
				2732
				2733	/* zero the high words of W[a->used..m->used2] /
				2734	for (; ix < n->used * 2 + 1; ix++) {
				2735	*_W++ = 0;
				2736	}
				2737	}
				2738
				2739	/* now we proceed to zero successive digits
				2740	* from the least significant upwards
				2741	*/
				2742	for (ix = 0; ix < n->used; ix++) {
				2743	/* mu = ai * m' mod b
				2744	*
				2745	* We avoid a double precision multiplication (which isn't required)
				2746	* by casting the value down to a mp_digit. Note this requires
				2747	* that W[ix-1] have the carry cleared (see after the inner loop)
				2748	*/
				2749	register mp_digit mu;
				2750	mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
				2751
				2752	/* a = a + mu * m * b**i
				2753	*
				2754	* This is computed in place and on the fly. The multiplication
				2755	* by b**i is handled by offseting which columns the results
				2756	* are added to.
				2757	*
				2758	* Note the comba method normally doesn't handle carries in the
				2759	* inner loop In this case we fix the carry from the previous
				2760	* column since the Montgomery reduction requires digits of the
				2761	* result (so far) [see above] to work. This is
				2762	* handled by fixing up one carry after the inner loop. The
				2763	* carry fixups are done in order so after these loops the
				2764	* first m->used words of W[] have the carries fixed
				2765	*/
				2766	{
				2767	register int iy;
				2768	register mp_digit *tmpn;
				2769	register mp_word *_W;
				2770
				2771	/* alias for the digits of the modulus */
				2772	tmpn = n->dp;
				2773
				2774	/* Alias for the columns set by an offset of ix */
				2775	_W = W + ix;
				2776
				2777	/* inner loop */
				2778	for (iy = 0; iy < n->used; iy++) {
				2779	_W++ += ((mp_word)mu) ((mp_word)*tmpn++);
				2780	}
				2781	}
				2782
				2783	/* now fix carry for next digit, W[ix+1] */
				2784	W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
				2785	}
				2786
				2787	/* now we have to propagate the carries and
				2788	* shift the words downward [all those least
				2789	* significant digits we zeroed].
				2790	*/
				2791	{
				2792	register mp_digit *tmpx;
				2793	register mp_word _W, _W1;
				2794
				2795	/* nox fix rest of carries */
				2796
				2797	/* alias for current word */
				2798	_W1 = W + ix;
				2799
				2800	/* alias for next word, where the carry goes */
				2801	_W = W + ++ix;
				2802
				2803	for (; ix <= n->used * 2 + 1; ix++) {
				2804	_W++ += _W1++ >> ((mp_word) DIGIT_BIT);
				2805	}
				2806
				2807	/* copy out, A = A/b**n
				2808	*
				2809	* The result is A/b**n but instead of converting from an
				2810	* array of mp_word to mp_digit than calling mp_rshd
				2811	* we just copy them in the right order
				2812	*/
				2813
				2814	/* alias for destination word */
				2815	tmpx = x->dp;
				2816
				2817	/* alias for shifted double precision result */
				2818	_W = W + n->used;
				2819
				2820	for (ix = 0; ix < n->used + 1; ix++) {
				2821	tmpx++ = (mp_digit)(_W++ & ((mp_word) MP_MASK));
				2822	}
				2823
				2824	/* zero oldused digits, if the input a was larger than
				2825	* m->used+1 we'll have to clear the digits
				2826	*/
				2827	for (; ix < olduse; ix++) {
				2828	*tmpx++ = 0;
				2829	}
				2830	}
				2831
				2832	/* set the max used and clamp */
				2833	x->used = n->used + 1;
				2834	mp_clamp (x);
				2835
				2836	/* if A >= m then A = A - m */
				2837	if (mp_cmp_mag (x, n) != MP_LT) {
				2838	return s_mp_sub (x, n, x);
				2839	}
				2840	return MP_OKAY;
				2841	}
				2842	#endif
				2843
				2844
				2845	#ifdef BN_MP_MUL_2_C
				2846	/* b = a2 /
				2847	static int mp_mul_2(mp_int * a, mp_int * b)
				2848	{
				2849	int x, res, oldused;
				2850
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame]	2851	/* grow to accommodate result */
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2852	if (b->alloc < a->used + 1) {
				2853	if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
				2854	return res;
				2855	}
				2856	}
				2857
				2858	oldused = b->used;
				2859	b->used = a->used;
				2860
				2861	{
				2862	register mp_digit r, rr, tmpa, tmpb;
				2863
				2864	/* alias for source */
				2865	tmpa = a->dp;
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2866
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2867	/* alias for dest */
				2868	tmpb = b->dp;
				2869
				2870	/* carry */
				2871	r = 0;
				2872	for (x = 0; x < a->used; x++) {
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2873
				2874	/* get what will be the next carry bit from the
				2875	* MSB of the current digit
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2876	*/
				2877	rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2878
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2879	/* now shift up this digit, add in the carry [from the previous] */
				2880	tmpb++ = ((tmpa++ << ((mp_digit)1)) \| r) & MP_MASK;
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2881
				2882	/* copy the carry that would be from the source
				2883	* digit into the next iteration
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2884	*/
				2885	r = rr;
				2886	}
				2887
				2888	/* new leading digit? */
				2889	if (r != 0) {
				2890	/* add a MSB which is always 1 at this point */
				2891	*tmpb = 1;
				2892	++(b->used);
				2893	}
				2894
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	2895	/* now zero any excess digits on the destination
				2896	* that we didn't write to
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2897	*/
				2898	tmpb = b->dp + b->used;
				2899	for (x = b->used; x < oldused; x++) {
				2900	*tmpb++ = 0;
				2901	}
				2902	}
				2903	b->sign = a->sign;
				2904	return MP_OKAY;
				2905	}
				2906	#endif
				2907
				2908
				2909	#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
				2910	/*
				2911	* shifts with subtractions when the result is greater than b.
				2912	*
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame]	2913	* The method is slightly modified to shift B unconditionally up to just under
				2914	* the leading bit of b. This saves a lot of multiple precision shifting.
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2915	*/
				2916	static int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
				2917	{
				2918	int x, bits, res;
				2919
				2920	/* how many bits of last digit does b use */
				2921	bits = mp_count_bits (b) % DIGIT_BIT;
				2922
				2923	if (b->used > 1) {
				2924	if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
				2925	return res;
				2926	}
				2927	} else {
				2928	mp_set(a, 1);
				2929	bits = 1;
				2930	}
				2931
				2932
				2933	/* now compute C = A * B mod b */
				2934	for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
				2935	if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
				2936	return res;
				2937	}
				2938	if (mp_cmp_mag (a, b) != MP_LT) {
				2939	if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
				2940	return res;
				2941	}
				2942	}
				2943	}
				2944
				2945	return MP_OKAY;
				2946	}
				2947	#endif
				2948
				2949
				2950	#ifdef BN_MP_EXPTMOD_FAST_C
				2951	/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
				2952	*
				2953	* Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
				2954	* The value of k changes based on the size of the exponent.
				2955	*
				2956	* Uses Montgomery or Diminished Radix reduction [whichever appropriate]
				2957	*/
				2958
				2959	static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
				2960	{
				2961	mp_int M[TAB_SIZE], res;
				2962	mp_digit buf, mp;
				2963	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
				2964
				2965	/* use a pointer to the reduction algorithm. This allows us to use
				2966	* one of many reduction algorithms without modding the guts of
				2967	* the code with if statements everywhere.
				2968	*/
				2969	int (redux)(mp_int,mp_int*,mp_digit);
				2970
				2971	/* find window size */
				2972	x = mp_count_bits (X);
				2973	if (x <= 7) {
				2974	winsize = 2;
				2975	} else if (x <= 36) {
				2976	winsize = 3;
				2977	} else if (x <= 140) {
				2978	winsize = 4;
				2979	} else if (x <= 450) {
				2980	winsize = 5;
				2981	} else if (x <= 1303) {
				2982	winsize = 6;
				2983	} else if (x <= 3529) {
				2984	winsize = 7;
				2985	} else {
				2986	winsize = 8;
				2987	}
				2988
				2989	#ifdef MP_LOW_MEM
				2990	if (winsize > 5) {
				2991	winsize = 5;
				2992	}
				2993	#endif
				2994
				2995	/* init M array */
				2996	/* init first cell */
				2997	if ((err = mp_init(&M[1])) != MP_OKAY) {
				2998	return err;
				2999	}
				3000
				3001	/* now init the second half of the array */
				3002	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
				3003	if ((err = mp_init(&M[x])) != MP_OKAY) {
				3004	for (y = 1<<(winsize-1); y < x; y++) {
				3005	mp_clear (&M[y]);
				3006	}
				3007	mp_clear(&M[1]);
				3008	return err;
				3009	}
				3010	}
				3011
				3012	/* determine and setup reduction code */
				3013	if (redmode == 0) {
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	3014	#ifdef BN_MP_MONTGOMERY_SETUP_C
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	3015	/* now setup montgomery */
				3016	if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
				3017	goto LBL_M;
				3018	}
				3019	#else
				3020	err = MP_VAL;
				3021	goto LBL_M;
				3022	#endif
				3023
				3024	/* automatically pick the comba one if available (saves quite a few calls/ifs) */
				3025	#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
				3026	if (((P->used * 2 + 1) < MP_WARRAY) &&
				3027	P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
				3028	redux = fast_mp_montgomery_reduce;
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	3029	} else
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	3030	#endif
				3031	{
				3032	#ifdef BN_MP_MONTGOMERY_REDUCE_C
				3033	/* use slower baseline Montgomery method */
				3034	redux = mp_montgomery_reduce;
				3035	#else
				3036	err = MP_VAL;
				3037	goto LBL_M;
				3038	#endif
				3039	}
				3040	} else if (redmode == 1) {
				3041	#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
				3042	/* setup DR reduction for moduli of the form B*k - b /
				3043	mp_dr_setup(P, &mp);
				3044	redux = mp_dr_reduce;
				3045	#else
				3046	err = MP_VAL;
				3047	goto LBL_M;
				3048	#endif
				3049	} else {
				3050	#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
				3051	/* setup DR reduction for moduli of the form 2*k - b /
				3052	if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
				3053	goto LBL_M;
				3054	}
				3055	redux = mp_reduce_2k;
				3056	#else
				3057	err = MP_VAL;
				3058	goto LBL_M;
				3059	#endif
				3060	}
				3061
				3062	/* setup result */
				3063	if ((err = mp_init (&res)) != MP_OKAY) {
				3064	goto LBL_M;
				3065	}
				3066
				3067	/* create M table
				3068	*
				3069
				3070	*
				3071	* The first half of the table is not computed though accept for M[0] and M[1]
				3072	*/
				3073
				3074	if (redmode == 0) {
				3075	#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
				3076	/* now we need R mod m */
				3077	if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
				3078	goto LBL_RES;
				3079	}
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	3080	#else
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	3081	err = MP_VAL;
				3082	goto LBL_RES;
				3083	#endif
				3084
				3085	/* now set M[1] to G * R mod m */
				3086	if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
				3087	goto LBL_RES;
				3088	}
				3089	} else {
				3090	mp_set(&res, 1);
				3091	if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
				3092	goto LBL_RES;
				3093	}
				3094	}
				3095
				3096	/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
				3097	if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
				3098	goto LBL_RES;
				3099	}
				3100
				3101	for (x = 0; x < (winsize - 1); x++) {
				3102	if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
				3103	goto LBL_RES;
				3104	}
				3105	if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
				3106	goto LBL_RES;
				3107	}
				3108	}
				3109
				3110	/* create upper table */
				3111	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
				3112	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
				3113	goto LBL_RES;
				3114	}
				3115	if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
				3116	goto LBL_RES;
				3117	}
				3118	}
				3119
				3120	/* set initial mode and bit cnt */
				3121	mode = 0;
				3122	bitcnt = 1;
				3123	buf = 0;
				3124	digidx = X->used - 1;
				3125	bitcpy = 0;
				3126	bitbuf = 0;
				3127
				3128	for (;;) {
				3129	/* grab next digit as required */
				3130	if (--bitcnt == 0) {
				3131	/* if digidx == -1 we are out of digits so break */
				3132	if (digidx == -1) {
				3133	break;
				3134	}
				3135	/* read next digit and reset bitcnt */
				3136	buf = X->dp[digidx--];
				3137	bitcnt = (int)DIGIT_BIT;
				3138	}
				3139
				3140	/* grab the next msb from the exponent */
				3141	y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
				3142	buf <<= (mp_digit)1;
				3143
				3144	/* if the bit is zero and mode == 0 then we ignore it
				3145	* These represent the leading zero bits before the first 1 bit
				3146	* in the exponent. Technically this opt is not required but it
				3147	* does lower the # of trivial squaring/reductions used
				3148	*/
				3149	if (mode == 0 && y == 0) {
				3150	continue;
				3151	}
				3152
				3153	/* if the bit is zero and mode == 1 then we square */
				3154	if (mode == 1 && y == 0) {
				3155	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				3156	goto LBL_RES;
				3157	}
				3158	if ((err = redux (&res, P, mp)) != MP_OKAY) {
				3159	goto LBL_RES;
				3160	}
				3161	continue;
				3162	}
				3163
				3164	/* else we add it to the window */
				3165	bitbuf \|= (y << (winsize - ++bitcpy));
				3166	mode = 2;
				3167
				3168	if (bitcpy == winsize) {
				3169	/* ok window is filled so square as required and multiply */
				3170	/* square first */
				3171	for (x = 0; x < winsize; x++) {
				3172	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				3173	goto LBL_RES;
				3174	}
				3175	if ((err = redux (&res, P, mp)) != MP_OKAY) {
				3176	goto LBL_RES;
				3177	}
				3178	}
				3179
				3180	/* then multiply */
				3181	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
				3182	goto LBL_RES;
				3183	}
				3184	if ((err = redux (&res, P, mp)) != MP_OKAY) {
				3185	goto LBL_RES;
				3186	}
				3187
				3188	/* empty window and reset */
				3189	bitcpy = 0;
				3190	bitbuf = 0;
				3191	mode = 1;
				3192	}
				3193	}
				3194
				3195	/* if bits remain then square/multiply */
				3196	if (mode == 2 && bitcpy > 0) {
				3197	/* square then multiply if the bit is set */
				3198	for (x = 0; x < bitcpy; x++) {
				3199	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				3200	goto LBL_RES;
				3201	}
				3202	if ((err = redux (&res, P, mp)) != MP_OKAY) {
				3203	goto LBL_RES;
				3204	}
				3205
				3206	/* get next bit of the window */
				3207	bitbuf <<= 1;
				3208	if ((bitbuf & (1 << winsize)) != 0) {
				3209	/* then multiply */
				3210	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
				3211	goto LBL_RES;
				3212	}
				3213	if ((err = redux (&res, P, mp)) != MP_OKAY) {
				3214	goto LBL_RES;
				3215	}
				3216	}
				3217	}
				3218	}
				3219
				3220	if (redmode == 0) {
				3221	/* fixup result if Montgomery reduction is used
				3222	* recall that any value in a Montgomery system is
				3223	* actually multiplied by R mod n. So we have
				3224	* to reduce one more time to cancel out the factor
				3225	* of R.
				3226	*/
				3227	if ((err = redux(&res, P, mp)) != MP_OKAY) {
				3228	goto LBL_RES;
				3229	}
				3230	}
				3231
				3232	/* swap res with Y */
				3233	mp_exch (&res, Y);
				3234	err = MP_OKAY;
				3235	LBL_RES:mp_clear (&res);
				3236	LBL_M:
				3237	mp_clear(&M[1]);
				3238	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
				3239	mp_clear (&M[x]);
				3240	}
				3241	return err;
				3242	}
				3243	#endif
				3244
				3245
				3246	#ifdef BN_FAST_S_MP_SQR_C
				3247	/* the jist of squaring...
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	3248	* you do like mult except the offset of the tmpx [one that
				3249	* starts closer to zero] can't equal the offset of tmpy.
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	3250	* So basically you set up iy like before then you min it with
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	3251	* (ty-tx) so that it never happens. You double all those
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	3252	* you add in the inner loop
				3253
				3254	After that loop you do the squares and add them in.
				3255	*/
				3256
				3257	static int fast_s_mp_sqr (mp_int * a, mp_int * b)
				3258	{
				3259	int olduse, res, pa, ix, iz;
				3260	mp_digit W[MP_WARRAY], *tmpx;
				3261	mp_word W1;
				3262
				3263	/* grow the destination as required */
				3264	pa = a->used + a->used;
				3265	if (b->alloc < pa) {
				3266	if ((res = mp_grow (b, pa)) != MP_OKAY) {
				3267	return res;
				3268	}
				3269	}
				3270
				3271	/* number of output digits to produce */
				3272	W1 = 0;
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	3273	for (ix = 0; ix < pa; ix++) {
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	3274	int tx, ty, iy;
				3275	mp_word _W;
				3276	mp_digit *tmpy;
				3277
				3278	/* clear counter */
				3279	_W = 0;
				3280
				3281	/* get offsets into the two bignums */
				3282	ty = MIN(a->used-1, ix);
				3283	tx = ix - ty;
				3284
				3285	/* setup temp aliases */
				3286	tmpx = a->dp + tx;
				3287	tmpy = a->dp + ty;
				3288
				3289	/* this is the number of times the loop will iterrate, essentially
				3290	while (tx++ < a->used && ty-- >= 0) { ... }
				3291	*/
				3292	iy = MIN(a->used-tx, ty+1);
				3293
Dmitry Shmidt	2933359	2017-01-09 12:27:11 -0800	[diff] [blame^]	3294	/* now for squaring tx can never equal ty
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	3295	* we halve the distance since they approach at a rate of 2x
				3296	* and we have to round because odd cases need to be executed
				3297	*/
				3298	iy = MIN(iy, (ty-tx+1)>>1);
				3299
				3300	/* execute loop */
				3301	for (iz = 0; iz < iy; iz++) {
				3302	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
				3303	}
				3304
				3305	/* double the inner product and add carry */
				3306	_W = _W + _W + W1;
				3307
				3308	/* even columns have the square term in them */
				3309	if ((ix&1) == 0) {
				3310	_W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
				3311	}
				3312
				3313	/* store it */
				3314	W[ix] = (mp_digit)(_W & MP_MASK);
				3315
				3316	/* make next carry */
				3317	W1 = _W >> ((mp_word)DIGIT_BIT);
				3318	}
				3319
				3320	/* setup dest */
				3321	olduse = b->used;
				3322	b->used = a->used+a->used;
				3323
				3324	{
				3325	mp_digit *tmpb;
				3326	tmpb = b->dp;
				3327	for (ix = 0; ix < pa; ix++) {
				3328	*tmpb++ = W[ix] & MP_MASK;
				3329	}
				3330
				3331	/* clear unused digits [that existed in the old copy of c] */
				3332	for (; ix < olduse; ix++) {
				3333	*tmpb++ = 0;
				3334	}
				3335	}
				3336	mp_clamp (b);
				3337	return MP_OKAY;
				3338	}
				3339	#endif
				3340
				3341
				3342	#ifdef BN_MP_MUL_D_C
				3343	/* multiply by a digit */
				3344	static int
				3345	mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
				3346	{
				3347	mp_digit u, tmpa, tmpc;
				3348	mp_word r;
				3349	int ix, res, olduse;
				3350
				3351	/* make sure c is big enough to hold ab /
				3352	if (c->alloc < a->used + 1) {
				3353	if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
				3354	return res;
				3355	}
				3356	}
				3357
				3358	/* get the original destinations used count */
				3359	olduse = c->used;
				3360
				3361	/* set the sign */
				3362	c->sign = a->sign;
				3363
				3364	/* alias for a->dp [source] */
				3365	tmpa = a->dp;
				3366
				3367	/* alias for c->dp [dest] */
				3368	tmpc = c->dp;
				3369
				3370	/* zero carry */
				3371	u = 0;
				3372
				3373	/* compute columns */
				3374	for (ix = 0; ix < a->used; ix++) {
				3375	/* compute product and carry sum for this term */
				3376	r = ((mp_word) u) + ((mp_word)tmpa++) ((mp_word)b);
				3377
				3378	/* mask off higher bits to get a single digit */
				3379	*tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
				3380
				3381	/* send carry into next iteration */
				3382	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
				3383	}
				3384
				3385	/* store final carry [if any] and increment ix offset */
				3386	*tmpc++ = u;
				3387	++ix;
				3388
				3389	/* now zero digits above the top */
				3390	while (ix++ < olduse) {
				3391	*tmpc++ = 0;
				3392	}
				3393
				3394	/* set used count */
				3395	c->used = a->used + 1;
				3396	mp_clamp(c);
				3397
				3398	return MP_OKAY;
				3399	}
				3400	#endif