Blame - src/tls/libtommath.c - android_external_wpa_supplicant_8

blob: 7c9857f5c197f1fa1248bdbfdac014f49a4d6dd6 [file] [log] [blame]

Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1	/*
				2	* Minimal code for RSA support from LibTomMath 0.41
				3	* http://libtom.org/
				4	* http://libtom.org/files/ltm-0.41.tar.bz2
				5	* This library was released in public domain by Tom St Denis.
				6	*
				7	* The combination in this file may not use all of the optimized algorithms
				8	* from LibTomMath and may be considerable slower than the LibTomMath with its
				9	* default settings. The main purpose of having this version here is to make it
				10	* easier to build bignum.c wrapper without having to install and build an
				11	* external library.
				12	*
				13	* If CONFIG_INTERNAL_LIBTOMMATH is defined, bignum.c includes this
				14	* libtommath.c file instead of using the external LibTomMath library.
				15	*/
				16
				17	#ifndef CHAR_BIT
				18	#define CHAR_BIT 8
				19	#endif
				20
				21	#define BN_MP_INVMOD_C
				22	#define BN_S_MP_EXPTMOD_C /* Note: #undef in tommath_superclass.h; this would
				23	* require BN_MP_EXPTMOD_FAST_C instead */
				24	#define BN_S_MP_MUL_DIGS_C
				25	#define BN_MP_INVMOD_SLOW_C
				26	#define BN_S_MP_SQR_C
				27	#define BN_S_MP_MUL_HIGH_DIGS_C /* Note: #undef in tommath_superclass.h; this
				28	* would require other than mp_reduce */
				29
				30	#ifdef LTM_FAST
				31
				32	/* Use faster div at the cost of about 1 kB */
				33	#define BN_MP_MUL_D_C
				34
				35	/* Include faster exptmod (Montgomery) at the cost of about 2.5 kB in code */
				36	#define BN_MP_EXPTMOD_FAST_C
				37	#define BN_MP_MONTGOMERY_SETUP_C
				38	#define BN_FAST_MP_MONTGOMERY_REDUCE_C
				39	#define BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
				40	#define BN_MP_MUL_2_C
				41
				42	/* Include faster sqr at the cost of about 0.5 kB in code */
				43	#define BN_FAST_S_MP_SQR_C
				44
				45	#else /* LTM_FAST */
				46
				47	#define BN_MP_DIV_SMALL
				48	#define BN_MP_INIT_MULTI_C
				49	#define BN_MP_CLEAR_MULTI_C
				50	#define BN_MP_ABS_C
				51	#endif /* LTM_FAST */
				52
				53	/* Current uses do not require support for negative exponent in exptmod, so we
				54	* can save about 1.5 kB in leaving out invmod. */
				55	#define LTM_NO_NEG_EXP
				56
				57	/* from tommath.h */
				58
				59	#ifndef MIN
				60	#define MIN(x,y) ((x)<(y)?(x):(y))
				61	#endif
				62
				63	#ifndef MAX
				64	#define MAX(x,y) ((x)>(y)?(x):(y))
				65	#endif
				66
				67	#define OPT_CAST(x)
				68
				69	typedef unsigned long mp_digit;
				70	typedef u64 mp_word;
				71
				72	#define DIGIT_BIT 28
				73	#define MP_28BIT
				74
				75
				76	#define XMALLOC os_malloc
				77	#define XFREE os_free
				78	#define XREALLOC os_realloc
				79
				80
				81	#define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
				82
				83	#define MP_LT -1 /* less than */
				84	#define MP_EQ 0 /* equal to */
				85	#define MP_GT 1 /* greater than */
				86
				87	#define MP_ZPOS 0 /* positive integer */
				88	#define MP_NEG 1 /* negative */
				89
				90	#define MP_OKAY 0 /* ok result */
				91	#define MP_MEM -2 /* out of mem */
				92	#define MP_VAL -3 /* invalid input */
				93
				94	#define MP_YES 1 /* yes response */
				95	#define MP_NO 0 /* no response */
				96
				97	typedef int mp_err;
				98
				99	/* define this to use lower memory usage routines (exptmods mostly) */
				100	#define MP_LOW_MEM
				101
				102	/* default precision */
				103	#ifndef MP_PREC
				104	#ifndef MP_LOW_MEM
				105	#define MP_PREC 32 /* default digits of precision */
				106	#else
				107	#define MP_PREC 8 /* default digits of precision */
				108	#endif
				109	#endif
				110
				111	/* size of comba arrays, should be at least 2 * 2*(BITS_PER_WORD - BITS_PER_DIGIT2) */
				112	#define MP_WARRAY (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
				113
				114	/* the infamous mp_int structure */
				115	typedef struct {
				116	int used, alloc, sign;
				117	mp_digit *dp;
				118	} mp_int;
				119
				120
				121	/* ---> Basic Manipulations <--- */
				122	#define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO)
				123	#define mp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? MP_YES : MP_NO)
				124	#define mp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? MP_YES : MP_NO)
				125
				126
				127	/* prototypes for copied functions */
				128	#define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
				129	static int s_mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode);
				130	static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
				131	static int s_mp_sqr(mp_int * a, mp_int * b);
				132	static int s_mp_mul_high_digs(mp_int * a, mp_int * b, mp_int * c, int digs);
				133
				134	static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
				135
				136	#ifdef BN_MP_INIT_MULTI_C
				137	static int mp_init_multi(mp_int *mp, ...);
				138	#endif
				139	#ifdef BN_MP_CLEAR_MULTI_C
				140	static void mp_clear_multi(mp_int *mp, ...);
				141	#endif
				142	static int mp_lshd(mp_int * a, int b);
				143	static void mp_set(mp_int * a, mp_digit b);
				144	static void mp_clamp(mp_int * a);
				145	static void mp_exch(mp_int * a, mp_int * b);
				146	static void mp_rshd(mp_int * a, int b);
				147	static void mp_zero(mp_int * a);
				148	static int mp_mod_2d(mp_int * a, int b, mp_int * c);
				149	static int mp_div_2d(mp_int * a, int b, mp_int * c, mp_int * d);
				150	static int mp_init_copy(mp_int * a, mp_int * b);
				151	static int mp_mul_2d(mp_int * a, int b, mp_int * c);
				152	#ifndef LTM_NO_NEG_EXP
				153	static int mp_div_2(mp_int * a, mp_int * b);
				154	static int mp_invmod(mp_int * a, mp_int * b, mp_int * c);
				155	static int mp_invmod_slow(mp_int * a, mp_int * b, mp_int * c);
				156	#endif /* LTM_NO_NEG_EXP */
				157	static int mp_copy(mp_int * a, mp_int * b);
				158	static int mp_count_bits(mp_int * a);
				159	static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d);
				160	static int mp_mod(mp_int * a, mp_int * b, mp_int * c);
				161	static int mp_grow(mp_int * a, int size);
				162	static int mp_cmp_mag(mp_int * a, mp_int * b);
				163	#ifdef BN_MP_ABS_C
				164	static int mp_abs(mp_int * a, mp_int * b);
				165	#endif
				166	static int mp_sqr(mp_int * a, mp_int * b);
				167	static int mp_reduce_2k_l(mp_int a, mp_int n, mp_int *d);
				168	static int mp_reduce_2k_setup_l(mp_int a, mp_int d);
				169	static int mp_2expt(mp_int * a, int b);
				170	static int mp_reduce_setup(mp_int * a, mp_int * b);
				171	static int mp_reduce(mp_int * x, mp_int * m, mp_int * mu);
				172	static int mp_init_size(mp_int * a, int size);
				173	#ifdef BN_MP_EXPTMOD_FAST_C
				174	static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode);
				175	#endif /* BN_MP_EXPTMOD_FAST_C */
				176	#ifdef BN_FAST_S_MP_SQR_C
				177	static int fast_s_mp_sqr (mp_int * a, mp_int * b);
				178	#endif /* BN_FAST_S_MP_SQR_C */
				179	#ifdef BN_MP_MUL_D_C
				180	static int mp_mul_d (mp_int * a, mp_digit b, mp_int * c);
				181	#endif /* BN_MP_MUL_D_C */
				182
				183
				184
				185	/* functions from bn_<func name>.c */
				186
				187
				188	/* reverse an array, used for radix code */
				189	static void bn_reverse (unsigned char *s, int len)
				190	{
				191	int ix, iy;
				192	unsigned char t;
				193
				194	ix = 0;
				195	iy = len - 1;
				196	while (ix < iy) {
				197	t = s[ix];
				198	s[ix] = s[iy];
				199	s[iy] = t;
				200	++ix;
				201	--iy;
				202	}
				203	}
				204
				205
				206	/* low level addition, based on HAC pp.594, Algorithm 14.7 */
				207	static int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
				208	{
				209	mp_int *x;
				210	int olduse, res, min, max;
				211
				212	/* find sizes, we let \|a\| <= \|b\| which means we have to sort
				213	* them. "x" will point to the input with the most digits
				214	*/
				215	if (a->used > b->used) {
				216	min = b->used;
				217	max = a->used;
				218	x = a;
				219	} else {
				220	min = a->used;
				221	max = b->used;
				222	x = b;
				223	}
				224
				225	/* init result */
				226	if (c->alloc < max + 1) {
				227	if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
				228	return res;
				229	}
				230	}
				231
				232	/* get old used digit count and set new one */
				233	olduse = c->used;
				234	c->used = max + 1;
				235
				236	{
				237	register mp_digit u, tmpa, tmpb, *tmpc;
				238	register int i;
				239
				240	/* alias for digit pointers */
				241
				242	/* first input */
				243	tmpa = a->dp;
				244
				245	/* second input */
				246	tmpb = b->dp;
				247
				248	/* destination */
				249	tmpc = c->dp;
				250
				251	/* zero the carry */
				252	u = 0;
				253	for (i = 0; i < min; i++) {
				254	/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
				255	tmpc = tmpa++ + *tmpb++ + u;
				256
				257	/* U = carry bit of T[i] */
				258	u = *tmpc >> ((mp_digit)DIGIT_BIT);
				259
				260	/* take away carry bit from T[i] */
				261	*tmpc++ &= MP_MASK;
				262	}
				263
				264	/* now copy higher words if any, that is in A+B
				265	* if A or B has more digits add those in
				266	*/
				267	if (min != max) {
				268	for (; i < max; i++) {
				269	/* T[i] = X[i] + U */
				270	*tmpc = x->dp[i] + u;
				271
				272	/* U = carry bit of T[i] */
				273	u = *tmpc >> ((mp_digit)DIGIT_BIT);
				274
				275	/* take away carry bit from T[i] */
				276	*tmpc++ &= MP_MASK;
				277	}
				278	}
				279
				280	/* add carry */
				281	*tmpc++ = u;
				282
				283	/* clear digits above oldused */
				284	for (i = c->used; i < olduse; i++) {
				285	*tmpc++ = 0;
				286	}
				287	}
				288
				289	mp_clamp (c);
				290	return MP_OKAY;
				291	}
				292
				293
				294	/* low level subtraction (assumes \|a\| > \|b\|), HAC pp.595 Algorithm 14.9 */
				295	static int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
				296	{
				297	int olduse, res, min, max;
				298
				299	/* find sizes */
				300	min = b->used;
				301	max = a->used;
				302
				303	/* init result */
				304	if (c->alloc < max) {
				305	if ((res = mp_grow (c, max)) != MP_OKAY) {
				306	return res;
				307	}
				308	}
				309	olduse = c->used;
				310	c->used = max;
				311
				312	{
				313	register mp_digit u, tmpa, tmpb, *tmpc;
				314	register int i;
				315
				316	/* alias for digit pointers */
				317	tmpa = a->dp;
				318	tmpb = b->dp;
				319	tmpc = c->dp;
				320
				321	/* set carry to zero */
				322	u = 0;
				323	for (i = 0; i < min; i++) {
				324	/* T[i] = A[i] - B[i] - U */
				325	tmpc = tmpa++ - *tmpb++ - u;
				326
				327	/* U = carry bit of T[i]
				328	* Note this saves performing an AND operation since
				329	* if a carry does occur it will propagate all the way to the
				330	* MSB. As a result a single shift is enough to get the carry
				331	*/
				332	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
				333
				334	/* Clear carry from T[i] */
				335	*tmpc++ &= MP_MASK;
				336	}
				337
				338	/* now copy higher words if any, e.g. if A has more digits than B */
				339	for (; i < max; i++) {
				340	/* T[i] = A[i] - U */
				341	tmpc = tmpa++ - u;
				342
				343	/* U = carry bit of T[i] */
				344	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
				345
				346	/* Clear carry from T[i] */
				347	*tmpc++ &= MP_MASK;
				348	}
				349
				350	/* clear digits above used (since we may not have grown result above) */
				351	for (i = c->used; i < olduse; i++) {
				352	*tmpc++ = 0;
				353	}
				354	}
				355
				356	mp_clamp (c);
				357	return MP_OKAY;
				358	}
				359
				360
				361	/* init a new mp_int */
				362	static int mp_init (mp_int * a)
				363	{
				364	int i;
				365
				366	/* allocate memory required and clear it */
				367	a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
				368	if (a->dp == NULL) {
				369	return MP_MEM;
				370	}
				371
				372	/* set the digits to zero */
				373	for (i = 0; i < MP_PREC; i++) {
				374	a->dp[i] = 0;
				375	}
				376
				377	/* set the used to zero, allocated digits to the default precision
				378	* and sign to positive */
				379	a->used = 0;
				380	a->alloc = MP_PREC;
				381	a->sign = MP_ZPOS;
				382
				383	return MP_OKAY;
				384	}
				385
				386
				387	/* clear one (frees) */
				388	static void mp_clear (mp_int * a)
				389	{
				390	int i;
				391
				392	/* only do anything if a hasn't been freed previously */
				393	if (a->dp != NULL) {
				394	/* first zero the digits */
				395	for (i = 0; i < a->used; i++) {
				396	a->dp[i] = 0;
				397	}
				398
				399	/* free ram */
				400	XFREE(a->dp);
				401
				402	/* reset members to make debugging easier */
				403	a->dp = NULL;
				404	a->alloc = a->used = 0;
				405	a->sign = MP_ZPOS;
				406	}
				407	}
				408
				409
				410	/* high level addition (handles signs) */
				411	static int mp_add (mp_int * a, mp_int * b, mp_int * c)
				412	{
				413	int sa, sb, res;
				414
				415	/* get sign of both inputs */
				416	sa = a->sign;
				417	sb = b->sign;
				418
				419	/* handle two cases, not four */
				420	if (sa == sb) {
				421	/* both positive or both negative */
				422	/* add their magnitudes, copy the sign */
				423	c->sign = sa;
				424	res = s_mp_add (a, b, c);
				425	} else {
				426	/* one positive, the other negative */
				427	/* subtract the one with the greater magnitude from */
				428	/* the one of the lesser magnitude. The result gets */
				429	/* the sign of the one with the greater magnitude. */
				430	if (mp_cmp_mag (a, b) == MP_LT) {
				431	c->sign = sb;
				432	res = s_mp_sub (b, a, c);
				433	} else {
				434	c->sign = sa;
				435	res = s_mp_sub (a, b, c);
				436	}
				437	}
				438	return res;
				439	}
				440
				441
				442	/* high level subtraction (handles signs) */
				443	static int mp_sub (mp_int * a, mp_int * b, mp_int * c)
				444	{
				445	int sa, sb, res;
				446
				447	sa = a->sign;
				448	sb = b->sign;
				449
				450	if (sa != sb) {
				451	/* subtract a negative from a positive, OR */
				452	/* subtract a positive from a negative. */
				453	/* In either case, ADD their magnitudes, */
				454	/* and use the sign of the first number. */
				455	c->sign = sa;
				456	res = s_mp_add (a, b, c);
				457	} else {
				458	/* subtract a positive from a positive, OR */
				459	/* subtract a negative from a negative. */
				460	/* First, take the difference between their */
				461	/* magnitudes, then... */
				462	if (mp_cmp_mag (a, b) != MP_LT) {
				463	/* Copy the sign from the first */
				464	c->sign = sa;
				465	/* The first has a larger or equal magnitude */
				466	res = s_mp_sub (a, b, c);
				467	} else {
				468	/* The result has the opposite sign from */
				469	/* the first number. */
				470	c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
				471	/* The second has a larger magnitude */
				472	res = s_mp_sub (b, a, c);
				473	}
				474	}
				475	return res;
				476	}
				477
				478
				479	/* high level multiplication (handles sign) */
				480	static int mp_mul (mp_int * a, mp_int * b, mp_int * c)
				481	{
				482	int res, neg;
				483	neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
				484
				485	/* use Toom-Cook? */
				486	#ifdef BN_MP_TOOM_MUL_C
				487	if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
				488	res = mp_toom_mul(a, b, c);
				489	} else
				490	#endif
				491	#ifdef BN_MP_KARATSUBA_MUL_C
				492	/* use Karatsuba? */
				493	if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
				494	res = mp_karatsuba_mul (a, b, c);
				495	} else
				496	#endif
				497	{
				498	/* can we use the fast multiplier?
				499	*
				500	* The fast multiplier can be used if the output will
				501	* have less than MP_WARRAY digits and the number of
				502	* digits won't affect carry propagation
				503	*/
				504	#ifdef BN_FAST_S_MP_MUL_DIGS_C
				505	int digs = a->used + b->used + 1;
				506
				507	if ((digs < MP_WARRAY) &&
				508	MIN(a->used, b->used) <=
				509	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
				510	res = fast_s_mp_mul_digs (a, b, c, digs);
				511	} else
				512	#endif
				513	#ifdef BN_S_MP_MUL_DIGS_C
				514	res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
				515	#else
				516	#error mp_mul could fail
				517	res = MP_VAL;
				518	#endif
				519
				520	}
				521	c->sign = (c->used > 0) ? neg : MP_ZPOS;
				522	return res;
				523	}
				524
				525
				526	/* d = a * b (mod c) */
				527	static int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
				528	{
				529	int res;
				530	mp_int t;
				531
				532	if ((res = mp_init (&t)) != MP_OKAY) {
				533	return res;
				534	}
				535
				536	if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
				537	mp_clear (&t);
				538	return res;
				539	}
				540	res = mp_mod (&t, c, d);
				541	mp_clear (&t);
				542	return res;
				543	}
				544
				545
				546	/* c = a mod b, 0 <= c < b */
				547	static int mp_mod (mp_int * a, mp_int * b, mp_int * c)
				548	{
				549	mp_int t;
				550	int res;
				551
				552	if ((res = mp_init (&t)) != MP_OKAY) {
				553	return res;
				554	}
				555
				556	if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
				557	mp_clear (&t);
				558	return res;
				559	}
				560
				561	if (t.sign != b->sign) {
				562	res = mp_add (b, &t, c);
				563	} else {
				564	res = MP_OKAY;
				565	mp_exch (&t, c);
				566	}
				567
				568	mp_clear (&t);
				569	return res;
				570	}
				571
				572
				573	/* this is a shell function that calls either the normal or Montgomery
				574	* exptmod functions. Originally the call to the montgomery code was
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame^]	575	* embedded in the normal function but that wasted a lot of stack space
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	576	* for nothing (since 99% of the time the Montgomery code would be called)
				577	*/
				578	static int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
				579	{
				580	int dr;
				581
				582	/* modulus P must be positive */
				583	if (P->sign == MP_NEG) {
				584	return MP_VAL;
				585	}
				586
				587	/* if exponent X is negative we have to recurse */
				588	if (X->sign == MP_NEG) {
				589	#ifdef LTM_NO_NEG_EXP
				590	return MP_VAL;
				591	#else /* LTM_NO_NEG_EXP */
				592	#ifdef BN_MP_INVMOD_C
				593	mp_int tmpG, tmpX;
				594	int err;
				595
				596	/* first compute 1/G mod P */
				597	if ((err = mp_init(&tmpG)) != MP_OKAY) {
				598	return err;
				599	}
				600	if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
				601	mp_clear(&tmpG);
				602	return err;
				603	}
				604
				605	/* now get \|X\| */
				606	if ((err = mp_init(&tmpX)) != MP_OKAY) {
				607	mp_clear(&tmpG);
				608	return err;
				609	}
				610	if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
				611	mp_clear_multi(&tmpG, &tmpX, NULL);
				612	return err;
				613	}
				614
				615	/* and now compute (1/G)\|X\| instead of GX [X < 0] */
				616	err = mp_exptmod(&tmpG, &tmpX, P, Y);
				617	mp_clear_multi(&tmpG, &tmpX, NULL);
				618	return err;
				619	#else
				620	#error mp_exptmod would always fail
				621	/* no invmod */
				622	return MP_VAL;
				623	#endif
				624	#endif /* LTM_NO_NEG_EXP */
				625	}
				626
				627	/* modified diminished radix reduction */
				628	#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && defined(BN_S_MP_EXPTMOD_C)
				629	if (mp_reduce_is_2k_l(P) == MP_YES) {
				630	return s_mp_exptmod(G, X, P, Y, 1);
				631	}
				632	#endif
				633
				634	#ifdef BN_MP_DR_IS_MODULUS_C
				635	/* is it a DR modulus? */
				636	dr = mp_dr_is_modulus(P);
				637	#else
				638	/* default to no */
				639	dr = 0;
				640	#endif
				641
				642	#ifdef BN_MP_REDUCE_IS_2K_C
				643	/* if not, is it a unrestricted DR modulus? */
				644	if (dr == 0) {
				645	dr = mp_reduce_is_2k(P) << 1;
				646	}
				647	#endif
				648
				649	/* if the modulus is odd or dr != 0 use the montgomery method */
				650	#ifdef BN_MP_EXPTMOD_FAST_C
				651	if (mp_isodd (P) == 1 \|\| dr != 0) {
				652	return mp_exptmod_fast (G, X, P, Y, dr);
				653	} else {
				654	#endif
				655	#ifdef BN_S_MP_EXPTMOD_C
				656	/* otherwise use the generic Barrett reduction technique */
				657	return s_mp_exptmod (G, X, P, Y, 0);
				658	#else
				659	#error mp_exptmod could fail
				660	/* no exptmod for evens */
				661	return MP_VAL;
				662	#endif
				663	#ifdef BN_MP_EXPTMOD_FAST_C
				664	}
				665	#endif
				666	}
				667
				668
				669	/* compare two ints (signed)*/
				670	static int mp_cmp (mp_int * a, mp_int * b)
				671	{
				672	/* compare based on sign */
				673	if (a->sign != b->sign) {
				674	if (a->sign == MP_NEG) {
				675	return MP_LT;
				676	} else {
				677	return MP_GT;
				678	}
				679	}
				680
				681	/* compare digits */
				682	if (a->sign == MP_NEG) {
				683	/* if negative compare opposite direction */
				684	return mp_cmp_mag(b, a);
				685	} else {
				686	return mp_cmp_mag(a, b);
				687	}
				688	}
				689
				690
				691	/* compare a digit */
				692	static int mp_cmp_d(mp_int * a, mp_digit b)
				693	{
				694	/* compare based on sign */
				695	if (a->sign == MP_NEG) {
				696	return MP_LT;
				697	}
				698
				699	/* compare based on magnitude */
				700	if (a->used > 1) {
				701	return MP_GT;
				702	}
				703
				704	/* compare the only digit of a to b */
				705	if (a->dp[0] > b) {
				706	return MP_GT;
				707	} else if (a->dp[0] < b) {
				708	return MP_LT;
				709	} else {
				710	return MP_EQ;
				711	}
				712	}
				713
				714
				715	#ifndef LTM_NO_NEG_EXP
				716	/* hac 14.61, pp608 */
				717	static int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
				718	{
				719	/* b cannot be negative */
				720	if (b->sign == MP_NEG \|\| mp_iszero(b) == 1) {
				721	return MP_VAL;
				722	}
				723
				724	#ifdef BN_FAST_MP_INVMOD_C
				725	/* if the modulus is odd we can use a faster routine instead */
				726	if (mp_isodd (b) == 1) {
				727	return fast_mp_invmod (a, b, c);
				728	}
				729	#endif
				730
				731	#ifdef BN_MP_INVMOD_SLOW_C
				732	return mp_invmod_slow(a, b, c);
				733	#endif
				734
				735	#ifndef BN_FAST_MP_INVMOD_C
				736	#ifndef BN_MP_INVMOD_SLOW_C
				737	#error mp_invmod would always fail
				738	#endif
				739	#endif
				740	return MP_VAL;
				741	}
				742	#endif /* LTM_NO_NEG_EXP */
				743
				744
				745	/* get the size for an unsigned equivalent */
				746	static int mp_unsigned_bin_size (mp_int * a)
				747	{
				748	int size = mp_count_bits (a);
				749	return (size / 8 + ((size & 7) != 0 ? 1 : 0));
				750	}
				751
				752
				753	#ifndef LTM_NO_NEG_EXP
				754	/* hac 14.61, pp608 */
				755	static int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
				756	{
				757	mp_int x, y, u, v, A, B, C, D;
				758	int res;
				759
				760	/* b cannot be negative */
				761	if (b->sign == MP_NEG \|\| mp_iszero(b) == 1) {
				762	return MP_VAL;
				763	}
				764
				765	/* init temps */
				766	if ((res = mp_init_multi(&x, &y, &u, &v,
				767	&A, &B, &C, &D, NULL)) != MP_OKAY) {
				768	return res;
				769	}
				770
				771	/* x = a, y = b */
				772	if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
				773	goto LBL_ERR;
				774	}
				775	if ((res = mp_copy (b, &y)) != MP_OKAY) {
				776	goto LBL_ERR;
				777	}
				778
				779	/* 2. [modified] if x,y are both even then return an error! */
				780	if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
				781	res = MP_VAL;
				782	goto LBL_ERR;
				783	}
				784
				785	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
				786	if ((res = mp_copy (&x, &u)) != MP_OKAY) {
				787	goto LBL_ERR;
				788	}
				789	if ((res = mp_copy (&y, &v)) != MP_OKAY) {
				790	goto LBL_ERR;
				791	}
				792	mp_set (&A, 1);
				793	mp_set (&D, 1);
				794
				795	top:
				796	/* 4. while u is even do */
				797	while (mp_iseven (&u) == 1) {
				798	/* 4.1 u = u/2 */
				799	if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
				800	goto LBL_ERR;
				801	}
				802	/* 4.2 if A or B is odd then */
				803	if (mp_isodd (&A) == 1 \|\| mp_isodd (&B) == 1) {
				804	/* A = (A+y)/2, B = (B-x)/2 */
				805	if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
				806	goto LBL_ERR;
				807	}
				808	if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
				809	goto LBL_ERR;
				810	}
				811	}
				812	/* A = A/2, B = B/2 */
				813	if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
				814	goto LBL_ERR;
				815	}
				816	if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
				817	goto LBL_ERR;
				818	}
				819	}
				820
				821	/* 5. while v is even do */
				822	while (mp_iseven (&v) == 1) {
				823	/* 5.1 v = v/2 */
				824	if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
				825	goto LBL_ERR;
				826	}
				827	/* 5.2 if C or D is odd then */
				828	if (mp_isodd (&C) == 1 \|\| mp_isodd (&D) == 1) {
				829	/* C = (C+y)/2, D = (D-x)/2 */
				830	if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
				831	goto LBL_ERR;
				832	}
				833	if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
				834	goto LBL_ERR;
				835	}
				836	}
				837	/* C = C/2, D = D/2 */
				838	if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
				839	goto LBL_ERR;
				840	}
				841	if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
				842	goto LBL_ERR;
				843	}
				844	}
				845
				846	/* 6. if u >= v then */
				847	if (mp_cmp (&u, &v) != MP_LT) {
				848	/* u = u - v, A = A - C, B = B - D */
				849	if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
				850	goto LBL_ERR;
				851	}
				852
				853	if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
				854	goto LBL_ERR;
				855	}
				856
				857	if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
				858	goto LBL_ERR;
				859	}
				860	} else {
				861	/* v - v - u, C = C - A, D = D - B */
				862	if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
				863	goto LBL_ERR;
				864	}
				865
				866	if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
				867	goto LBL_ERR;
				868	}
				869
				870	if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
				871	goto LBL_ERR;
				872	}
				873	}
				874
				875	/* if not zero goto step 4 */
				876	if (mp_iszero (&u) == 0)
				877	goto top;
				878
				879	/* now a = C, b = D, gcd == gv /
				880
				881	/* if v != 1 then there is no inverse */
				882	if (mp_cmp_d (&v, 1) != MP_EQ) {
				883	res = MP_VAL;
				884	goto LBL_ERR;
				885	}
				886
				887	/* if its too low */
				888	while (mp_cmp_d(&C, 0) == MP_LT) {
				889	if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
				890	goto LBL_ERR;
				891	}
				892	}
				893
				894	/* too big */
				895	while (mp_cmp_mag(&C, b) != MP_LT) {
				896	if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
				897	goto LBL_ERR;
				898	}
				899	}
				900
				901	/* C is now the inverse */
				902	mp_exch (&C, c);
				903	res = MP_OKAY;
				904	LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
				905	return res;
				906	}
				907	#endif /* LTM_NO_NEG_EXP */
				908
				909
				910	/* compare maginitude of two ints (unsigned) */
				911	static int mp_cmp_mag (mp_int * a, mp_int * b)
				912	{
				913	int n;
				914	mp_digit tmpa, tmpb;
				915
				916	/* compare based on # of non-zero digits */
				917	if (a->used > b->used) {
				918	return MP_GT;
				919	}
				920
				921	if (a->used < b->used) {
				922	return MP_LT;
				923	}
				924
				925	/* alias for a */
				926	tmpa = a->dp + (a->used - 1);
				927
				928	/* alias for b */
				929	tmpb = b->dp + (a->used - 1);
				930
				931	/* compare based on digits */
				932	for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
				933	if (tmpa > tmpb) {
				934	return MP_GT;
				935	}
				936
				937	if (tmpa < tmpb) {
				938	return MP_LT;
				939	}
				940	}
				941	return MP_EQ;
				942	}
				943
				944
				945	/* reads a unsigned char array, assumes the msb is stored first [big endian] */
				946	static int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
				947	{
				948	int res;
				949
				950	/* make sure there are at least two digits */
				951	if (a->alloc < 2) {
				952	if ((res = mp_grow(a, 2)) != MP_OKAY) {
				953	return res;
				954	}
				955	}
				956
				957	/* zero the int */
				958	mp_zero (a);
				959
				960	/* read the bytes in */
				961	while (c-- > 0) {
				962	if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
				963	return res;
				964	}
				965
				966	#ifndef MP_8BIT
				967	a->dp[0] \|= *b++;
				968	a->used += 1;
				969	#else
				970	a->dp[0] = (*b & MP_MASK);
				971	a->dp[1] \|= ((*b++ >> 7U) & 1);
				972	a->used += 2;
				973	#endif
				974	}
				975	mp_clamp (a);
				976	return MP_OKAY;
				977	}
				978
				979
				980	/* store in unsigned [big endian] format */
				981	static int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
				982	{
				983	int x, res;
				984	mp_int t;
				985
				986	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
				987	return res;
				988	}
				989
				990	x = 0;
				991	while (mp_iszero (&t) == 0) {
				992	#ifndef MP_8BIT
				993	b[x++] = (unsigned char) (t.dp[0] & 255);
				994	#else
				995	b[x++] = (unsigned char) (t.dp[0] \| ((t.dp[1] & 0x01) << 7));
				996	#endif
				997	if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
				998	mp_clear (&t);
				999	return res;
				1000	}
				1001	}
				1002	bn_reverse (b, x);
				1003	mp_clear (&t);
				1004	return MP_OKAY;
				1005	}
				1006
				1007
				1008	/* shift right by a certain bit count (store quotient in c, optional remainder in d) */
				1009	static int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
				1010	{
				1011	mp_digit D, r, rr;
				1012	int x, res;
				1013	mp_int t;
				1014
				1015
				1016	/* if the shift count is <= 0 then we do no work */
				1017	if (b <= 0) {
				1018	res = mp_copy (a, c);
				1019	if (d != NULL) {
				1020	mp_zero (d);
				1021	}
				1022	return res;
				1023	}
				1024
				1025	if ((res = mp_init (&t)) != MP_OKAY) {
				1026	return res;
				1027	}
				1028
				1029	/* get the remainder */
				1030	if (d != NULL) {
				1031	if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
				1032	mp_clear (&t);
				1033	return res;
				1034	}
				1035	}
				1036
				1037	/* copy */
				1038	if ((res = mp_copy (a, c)) != MP_OKAY) {
				1039	mp_clear (&t);
				1040	return res;
				1041	}
				1042
				1043	/* shift by as many digits in the bit count */
				1044	if (b >= (int)DIGIT_BIT) {
				1045	mp_rshd (c, b / DIGIT_BIT);
				1046	}
				1047
				1048	/* shift any bit count < DIGIT_BIT */
				1049	D = (mp_digit) (b % DIGIT_BIT);
				1050	if (D != 0) {
				1051	register mp_digit *tmpc, mask, shift;
				1052
				1053	/* mask */
				1054	mask = (((mp_digit)1) << D) - 1;
				1055
				1056	/* shift for lsb */
				1057	shift = DIGIT_BIT - D;
				1058
				1059	/* alias */
				1060	tmpc = c->dp + (c->used - 1);
				1061
				1062	/* carry */
				1063	r = 0;
				1064	for (x = c->used - 1; x >= 0; x--) {
				1065	/* get the lower bits of this word in a temp */
				1066	rr = *tmpc & mask;
				1067
				1068	/* shift the current word and mix in the carry bits from the previous word */
				1069	tmpc = (tmpc >> D) \| (r << shift);
				1070	--tmpc;
				1071
				1072	/* set the carry to the carry bits of the current word found above */
				1073	r = rr;
				1074	}
				1075	}
				1076	mp_clamp (c);
				1077	if (d != NULL) {
				1078	mp_exch (&t, d);
				1079	}
				1080	mp_clear (&t);
				1081	return MP_OKAY;
				1082	}
				1083
				1084
				1085	static int mp_init_copy (mp_int * a, mp_int * b)
				1086	{
				1087	int res;
				1088
				1089	if ((res = mp_init (a)) != MP_OKAY) {
				1090	return res;
				1091	}
				1092	return mp_copy (b, a);
				1093	}
				1094
				1095
				1096	/* set to zero */
				1097	static void mp_zero (mp_int * a)
				1098	{
				1099	int n;
				1100	mp_digit *tmp;
				1101
				1102	a->sign = MP_ZPOS;
				1103	a->used = 0;
				1104
				1105	tmp = a->dp;
				1106	for (n = 0; n < a->alloc; n++) {
				1107	*tmp++ = 0;
				1108	}
				1109	}
				1110
				1111
				1112	/* copy, b = a */
				1113	static int mp_copy (mp_int * a, mp_int * b)
				1114	{
				1115	int res, n;
				1116
				1117	/* if dst == src do nothing */
				1118	if (a == b) {
				1119	return MP_OKAY;
				1120	}
				1121
				1122	/* grow dest */
				1123	if (b->alloc < a->used) {
				1124	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
				1125	return res;
				1126	}
				1127	}
				1128
				1129	/* zero b and copy the parameters over */
				1130	{
				1131	register mp_digit tmpa, tmpb;
				1132
				1133	/* pointer aliases */
				1134
				1135	/* source */
				1136	tmpa = a->dp;
				1137
				1138	/* destination */
				1139	tmpb = b->dp;
				1140
				1141	/* copy all the digits */
				1142	for (n = 0; n < a->used; n++) {
				1143	tmpb++ = tmpa++;
				1144	}
				1145
				1146	/* clear high digits */
				1147	for (; n < b->used; n++) {
				1148	*tmpb++ = 0;
				1149	}
				1150	}
				1151
				1152	/* copy used count and sign */
				1153	b->used = a->used;
				1154	b->sign = a->sign;
				1155	return MP_OKAY;
				1156	}
				1157
				1158
				1159	/* shift right a certain amount of digits */
				1160	static void mp_rshd (mp_int * a, int b)
				1161	{
				1162	int x;
				1163
				1164	/* if b <= 0 then ignore it */
				1165	if (b <= 0) {
				1166	return;
				1167	}
				1168
				1169	/* if b > used then simply zero it and return */
				1170	if (a->used <= b) {
				1171	mp_zero (a);
				1172	return;
				1173	}
				1174
				1175	{
				1176	register mp_digit bottom, top;
				1177
				1178	/* shift the digits down */
				1179
				1180	/* bottom */
				1181	bottom = a->dp;
				1182
				1183	/* top [offset into digits] */
				1184	top = a->dp + b;
				1185
				1186	/* this is implemented as a sliding window where
				1187	* the window is b-digits long and digits from
				1188	* the top of the window are copied to the bottom
				1189	*
				1190	* e.g.
				1191
				1192	b-2 \| b-1 \| b0 \| b1 \| b2 \| ... \| bb \| ---->
				1193	/\ \| ---->
				1194	\-------------------/ ---->
				1195	*/
				1196	for (x = 0; x < (a->used - b); x++) {
				1197	bottom++ = top++;
				1198	}
				1199
				1200	/* zero the top digits */
				1201	for (; x < a->used; x++) {
				1202	*bottom++ = 0;
				1203	}
				1204	}
				1205
				1206	/* remove excess digits */
				1207	a->used -= b;
				1208	}
				1209
				1210
				1211	/* swap the elements of two integers, for cases where you can't simply swap the
				1212	* mp_int pointers around
				1213	*/
				1214	static void mp_exch (mp_int * a, mp_int * b)
				1215	{
				1216	mp_int t;
				1217
				1218	t = *a;
				1219	a = b;
				1220	*b = t;
				1221	}
				1222
				1223
				1224	/* trim unused digits
				1225	*
				1226	* This is used to ensure that leading zero digits are
				1227	* trimed and the leading "used" digit will be non-zero
				1228	* Typically very fast. Also fixes the sign if there
				1229	* are no more leading digits
				1230	*/
				1231	static void mp_clamp (mp_int * a)
				1232	{
				1233	/* decrease used while the most significant digit is
				1234	* zero.
				1235	*/
				1236	while (a->used > 0 && a->dp[a->used - 1] == 0) {
				1237	--(a->used);
				1238	}
				1239
				1240	/* reset the sign flag if used == 0 */
				1241	if (a->used == 0) {
				1242	a->sign = MP_ZPOS;
				1243	}
				1244	}
				1245
				1246
				1247	/* grow as required */
				1248	static int mp_grow (mp_int * a, int size)
				1249	{
				1250	int i;
				1251	mp_digit *tmp;
				1252
				1253	/* if the alloc size is smaller alloc more ram */
				1254	if (a->alloc < size) {
				1255	/* ensure there are always at least MP_PREC digits extra on top */
				1256	size += (MP_PREC * 2) - (size % MP_PREC);
				1257
				1258	/* reallocate the array a->dp
				1259	*
				1260	* We store the return in a temporary variable
				1261	* in case the operation failed we don't want
				1262	* to overwrite the dp member of a.
				1263	*/
				1264	tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size);
				1265	if (tmp == NULL) {
				1266	/* reallocation failed but "a" is still valid [can be freed] */
				1267	return MP_MEM;
				1268	}
				1269
				1270	/* reallocation succeeded so set a->dp */
				1271	a->dp = tmp;
				1272
				1273	/* zero excess digits */
				1274	i = a->alloc;
				1275	a->alloc = size;
				1276	for (; i < a->alloc; i++) {
				1277	a->dp[i] = 0;
				1278	}
				1279	}
				1280	return MP_OKAY;
				1281	}
				1282
				1283
				1284	#ifdef BN_MP_ABS_C
				1285	/* b = \|a\|
				1286	*
				1287	* Simple function copies the input and fixes the sign to positive
				1288	*/
				1289	static int mp_abs (mp_int * a, mp_int * b)
				1290	{
				1291	int res;
				1292
				1293	/* copy a to b */
				1294	if (a != b) {
				1295	if ((res = mp_copy (a, b)) != MP_OKAY) {
				1296	return res;
				1297	}
				1298	}
				1299
				1300	/* force the sign of b to positive */
				1301	b->sign = MP_ZPOS;
				1302
				1303	return MP_OKAY;
				1304	}
				1305	#endif
				1306
				1307
				1308	/* set to a digit */
				1309	static void mp_set (mp_int * a, mp_digit b)
				1310	{
				1311	mp_zero (a);
				1312	a->dp[0] = b & MP_MASK;
				1313	a->used = (a->dp[0] != 0) ? 1 : 0;
				1314	}
				1315
				1316
				1317	#ifndef LTM_NO_NEG_EXP
				1318	/* b = a/2 */
				1319	static int mp_div_2(mp_int * a, mp_int * b)
				1320	{
				1321	int x, res, oldused;
				1322
				1323	/* copy */
				1324	if (b->alloc < a->used) {
				1325	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
				1326	return res;
				1327	}
				1328	}
				1329
				1330	oldused = b->used;
				1331	b->used = a->used;
				1332	{
				1333	register mp_digit r, rr, tmpa, tmpb;
				1334
				1335	/* source alias */
				1336	tmpa = a->dp + b->used - 1;
				1337
				1338	/* dest alias */
				1339	tmpb = b->dp + b->used - 1;
				1340
				1341	/* carry */
				1342	r = 0;
				1343	for (x = b->used - 1; x >= 0; x--) {
				1344	/* get the carry for the next iteration */
				1345	rr = *tmpa & 1;
				1346
				1347	/* shift the current digit, add in carry and store */
				1348	tmpb-- = (tmpa-- >> 1) \| (r << (DIGIT_BIT - 1));
				1349
				1350	/* forward carry to next iteration */
				1351	r = rr;
				1352	}
				1353
				1354	/* zero excess digits */
				1355	tmpb = b->dp + b->used;
				1356	for (x = b->used; x < oldused; x++) {
				1357	*tmpb++ = 0;
				1358	}
				1359	}
				1360	b->sign = a->sign;
				1361	mp_clamp (b);
				1362	return MP_OKAY;
				1363	}
				1364	#endif /* LTM_NO_NEG_EXP */
				1365
				1366
				1367	/* shift left by a certain bit count */
				1368	static int mp_mul_2d (mp_int * a, int b, mp_int * c)
				1369	{
				1370	mp_digit d;
				1371	int res;
				1372
				1373	/* copy */
				1374	if (a != c) {
				1375	if ((res = mp_copy (a, c)) != MP_OKAY) {
				1376	return res;
				1377	}
				1378	}
				1379
				1380	if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
				1381	if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
				1382	return res;
				1383	}
				1384	}
				1385
				1386	/* shift by as many digits in the bit count */
				1387	if (b >= (int)DIGIT_BIT) {
				1388	if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
				1389	return res;
				1390	}
				1391	}
				1392
				1393	/* shift any bit count < DIGIT_BIT */
				1394	d = (mp_digit) (b % DIGIT_BIT);
				1395	if (d != 0) {
				1396	register mp_digit *tmpc, shift, mask, r, rr;
				1397	register int x;
				1398
				1399	/* bitmask for carries */
				1400	mask = (((mp_digit)1) << d) - 1;
				1401
				1402	/* shift for msbs */
				1403	shift = DIGIT_BIT - d;
				1404
				1405	/* alias */
				1406	tmpc = c->dp;
				1407
				1408	/* carry */
				1409	r = 0;
				1410	for (x = 0; x < c->used; x++) {
				1411	/* get the higher bits of the current word */
				1412	rr = (*tmpc >> shift) & mask;
				1413
				1414	/* shift the current word and OR in the carry */
				1415	tmpc = ((tmpc << d) \| r) & MP_MASK;
				1416	++tmpc;
				1417
				1418	/* set the carry to the carry bits of the current word */
				1419	r = rr;
				1420	}
				1421
				1422	/* set final carry */
				1423	if (r != 0) {
				1424	c->dp[(c->used)++] = r;
				1425	}
				1426	}
				1427	mp_clamp (c);
				1428	return MP_OKAY;
				1429	}
				1430
				1431
				1432	#ifdef BN_MP_INIT_MULTI_C
				1433	static int mp_init_multi(mp_int *mp, ...)
				1434	{
				1435	mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
				1436	int n = 0; /* Number of ok inits */
				1437	mp_int* cur_arg = mp;
				1438	va_list args;
				1439
				1440	va_start(args, mp); /* init args to next argument from caller */
				1441	while (cur_arg != NULL) {
				1442	if (mp_init(cur_arg) != MP_OKAY) {
				1443	/* Oops - error! Back-track and mp_clear what we already
				1444	succeeded in init-ing, then return error.
				1445	*/
				1446	va_list clean_args;
				1447
				1448	/* end the current list */
				1449	va_end(args);
				1450
				1451	/* now start cleaning up */
				1452	cur_arg = mp;
				1453	va_start(clean_args, mp);
				1454	while (n--) {
				1455	mp_clear(cur_arg);
				1456	cur_arg = va_arg(clean_args, mp_int*);
				1457	}
				1458	va_end(clean_args);
				1459	res = MP_MEM;
				1460	break;
				1461	}
				1462	n++;
				1463	cur_arg = va_arg(args, mp_int*);
				1464	}
				1465	va_end(args);
				1466	return res; /* Assumed ok, if error flagged above. */
				1467	}
				1468	#endif
				1469
				1470
				1471	#ifdef BN_MP_CLEAR_MULTI_C
				1472	static void mp_clear_multi(mp_int *mp, ...)
				1473	{
				1474	mp_int* next_mp = mp;
				1475	va_list args;
				1476	va_start(args, mp);
				1477	while (next_mp != NULL) {
				1478	mp_clear(next_mp);
				1479	next_mp = va_arg(args, mp_int*);
				1480	}
				1481	va_end(args);
				1482	}
				1483	#endif
				1484
				1485
				1486	/* shift left a certain amount of digits */
				1487	static int mp_lshd (mp_int * a, int b)
				1488	{
				1489	int x, res;
				1490
				1491	/* if its less than zero return */
				1492	if (b <= 0) {
				1493	return MP_OKAY;
				1494	}
				1495
				1496	/* grow to fit the new digits */
				1497	if (a->alloc < a->used + b) {
				1498	if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
				1499	return res;
				1500	}
				1501	}
				1502
				1503	{
				1504	register mp_digit top, bottom;
				1505
				1506	/* increment the used by the shift amount then copy upwards */
				1507	a->used += b;
				1508
				1509	/* top */
				1510	top = a->dp + a->used - 1;
				1511
				1512	/* base */
				1513	bottom = a->dp + a->used - 1 - b;
				1514
				1515	/* much like mp_rshd this is implemented using a sliding window
				1516	* except the window goes the otherway around. Copying from
				1517	* the bottom to the top. see bn_mp_rshd.c for more info.
				1518	*/
				1519	for (x = a->used - 1; x >= b; x--) {
				1520	top-- = bottom--;
				1521	}
				1522
				1523	/* zero the lower digits */
				1524	top = a->dp;
				1525	for (x = 0; x < b; x++) {
				1526	*top++ = 0;
				1527	}
				1528	}
				1529	return MP_OKAY;
				1530	}
				1531
				1532
				1533	/* returns the number of bits in an int */
				1534	static int mp_count_bits (mp_int * a)
				1535	{
				1536	int r;
				1537	mp_digit q;
				1538
				1539	/* shortcut */
				1540	if (a->used == 0) {
				1541	return 0;
				1542	}
				1543
				1544	/* get number of digits and add that */
				1545	r = (a->used - 1) * DIGIT_BIT;
				1546
				1547	/* take the last digit and count the bits in it */
				1548	q = a->dp[a->used - 1];
				1549	while (q > ((mp_digit) 0)) {
				1550	++r;
				1551	q >>= ((mp_digit) 1);
				1552	}
				1553	return r;
				1554	}
				1555
				1556
				1557	/* calc a value mod 2*b /
				1558	static int mp_mod_2d (mp_int * a, int b, mp_int * c)
				1559	{
				1560	int x, res;
				1561
				1562	/* if b is <= 0 then zero the int */
				1563	if (b <= 0) {
				1564	mp_zero (c);
				1565	return MP_OKAY;
				1566	}
				1567
				1568	/* if the modulus is larger than the value than return */
				1569	if (b >= (int) (a->used * DIGIT_BIT)) {
				1570	res = mp_copy (a, c);
				1571	return res;
				1572	}
				1573
				1574	/* copy */
				1575	if ((res = mp_copy (a, c)) != MP_OKAY) {
				1576	return res;
				1577	}
				1578
				1579	/* zero digits above the last digit of the modulus */
				1580	for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
				1581	c->dp[x] = 0;
				1582	}
				1583	/* clear the digit that is not completely outside/inside the modulus */
				1584	c->dp[b / DIGIT_BIT] &=
				1585	(mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
				1586	mp_clamp (c);
				1587	return MP_OKAY;
				1588	}
				1589
				1590
				1591	#ifdef BN_MP_DIV_SMALL
				1592
				1593	/* slower bit-bang division... also smaller */
				1594	static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
				1595	{
				1596	mp_int ta, tb, tq, q;
				1597	int res, n, n2;
				1598
				1599	/* is divisor zero ? */
				1600	if (mp_iszero (b) == 1) {
				1601	return MP_VAL;
				1602	}
				1603
				1604	/* if a < b then q=0, r = a */
				1605	if (mp_cmp_mag (a, b) == MP_LT) {
				1606	if (d != NULL) {
				1607	res = mp_copy (a, d);
				1608	} else {
				1609	res = MP_OKAY;
				1610	}
				1611	if (c != NULL) {
				1612	mp_zero (c);
				1613	}
				1614	return res;
				1615	}
				1616
				1617	/* init our temps */
				1618	if ((res = mp_init_multi(&ta, &tb, &tq, &q, NULL) != MP_OKAY)) {
				1619	return res;
				1620	}
				1621
				1622
				1623	mp_set(&tq, 1);
				1624	n = mp_count_bits(a) - mp_count_bits(b);
				1625	if (((res = mp_abs(a, &ta)) != MP_OKAY) \|\|
				1626	((res = mp_abs(b, &tb)) != MP_OKAY) \|\|
				1627	((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) \|\|
				1628	((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
				1629	goto LBL_ERR;
				1630	}
				1631
				1632	while (n-- >= 0) {
				1633	if (mp_cmp(&tb, &ta) != MP_GT) {
				1634	if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) \|\|
				1635	((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
				1636	goto LBL_ERR;
				1637	}
				1638	}
				1639	if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) \|\|
				1640	((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
				1641	goto LBL_ERR;
				1642	}
				1643	}
				1644
				1645	/* now q == quotient and ta == remainder */
				1646	n = a->sign;
				1647	n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
				1648	if (c != NULL) {
				1649	mp_exch(c, &q);
				1650	c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
				1651	}
				1652	if (d != NULL) {
				1653	mp_exch(d, &ta);
				1654	d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
				1655	}
				1656	LBL_ERR:
				1657	mp_clear_multi(&ta, &tb, &tq, &q, NULL);
				1658	return res;
				1659	}
				1660
				1661	#else
				1662
				1663	/* integer signed division.
				1664	* c*b + d == a [e.g. a/b, c=quotient, d=remainder]
				1665	* HAC pp.598 Algorithm 14.20
				1666	*
				1667	* Note that the description in HAC is horribly
				1668	* incomplete. For example, it doesn't consider
				1669	* the case where digits are removed from 'x' in
				1670	* the inner loop. It also doesn't consider the
				1671	* case that y has fewer than three digits, etc..
				1672	*
				1673	* The overall algorithm is as described as
				1674	* 14.20 from HAC but fixed to treat these cases.
				1675	*/
				1676	static int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
				1677	{
				1678	mp_int q, x, y, t1, t2;
				1679	int res, n, t, i, norm, neg;
				1680
				1681	/* is divisor zero ? */
				1682	if (mp_iszero (b) == 1) {
				1683	return MP_VAL;
				1684	}
				1685
				1686	/* if a < b then q=0, r = a */
				1687	if (mp_cmp_mag (a, b) == MP_LT) {
				1688	if (d != NULL) {
				1689	res = mp_copy (a, d);
				1690	} else {
				1691	res = MP_OKAY;
				1692	}
				1693	if (c != NULL) {
				1694	mp_zero (c);
				1695	}
				1696	return res;
				1697	}
				1698
				1699	if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
				1700	return res;
				1701	}
				1702	q.used = a->used + 2;
				1703
				1704	if ((res = mp_init (&t1)) != MP_OKAY) {
				1705	goto LBL_Q;
				1706	}
				1707
				1708	if ((res = mp_init (&t2)) != MP_OKAY) {
				1709	goto LBL_T1;
				1710	}
				1711
				1712	if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
				1713	goto LBL_T2;
				1714	}
				1715
				1716	if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
				1717	goto LBL_X;
				1718	}
				1719
				1720	/* fix the sign */
				1721	neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
				1722	x.sign = y.sign = MP_ZPOS;
				1723
				1724	/* normalize both x and y, ensure that y >= b/2, [b == 2*DIGIT_BIT] /
				1725	norm = mp_count_bits(&y) % DIGIT_BIT;
				1726	if (norm < (int)(DIGIT_BIT-1)) {
				1727	norm = (DIGIT_BIT-1) - norm;
				1728	if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
				1729	goto LBL_Y;
				1730	}
				1731	if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
				1732	goto LBL_Y;
				1733	}
				1734	} else {
				1735	norm = 0;
				1736	}
				1737
				1738	/* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
				1739	n = x.used - 1;
				1740	t = y.used - 1;
				1741
				1742	/* while (x >= ybn-t) do { q[n-t] += 1; x -= yb*{n-t} } /
				1743	if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = yb{n-t} /
				1744	goto LBL_Y;
				1745	}
				1746
				1747	while (mp_cmp (&x, &y) != MP_LT) {
				1748	++(q.dp[n - t]);
				1749	if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
				1750	goto LBL_Y;
				1751	}
				1752	}
				1753
				1754	/* reset y by shifting it back down */
				1755	mp_rshd (&y, n - t);
				1756
				1757	/* step 3. for i from n down to (t + 1) */
				1758	for (i = n; i >= (t + 1); i--) {
				1759	if (i > x.used) {
				1760	continue;
				1761	}
				1762
				1763	/* step 3.1 if xi == yt then set q{i-t-1} to b-1,
				1764	* otherwise set q{i-t-1} to (xib + x{i-1})/yt /
				1765	if (x.dp[i] == y.dp[t]) {
				1766	q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
				1767	} else {
				1768	mp_word tmp;
				1769	tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
				1770	tmp \|= ((mp_word) x.dp[i - 1]);
				1771	tmp /= ((mp_word) y.dp[t]);
				1772	if (tmp > (mp_word) MP_MASK)
				1773	tmp = MP_MASK;
				1774	q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
				1775	}
				1776
				1777	/* while (q{i-t-1} * (yt * b + y{t-1})) >
				1778	xi * b*2 + xi-1 b + xi-2
				1779
				1780	do q{i-t-1} -= 1;
				1781	*/
				1782	q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
				1783	do {
				1784	q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
				1785
				1786	/* find left hand */
				1787	mp_zero (&t1);
				1788	t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
				1789	t1.dp[1] = y.dp[t];
				1790	t1.used = 2;
				1791	if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
				1792	goto LBL_Y;
				1793	}
				1794
				1795	/* find right hand */
				1796	t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
				1797	t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
				1798	t2.dp[2] = x.dp[i];
				1799	t2.used = 3;
				1800	} while (mp_cmp_mag(&t1, &t2) == MP_GT);
				1801
				1802	/* step 3.3 x = x - q{i-t-1} * y * b*{i-t-1} /
				1803	if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
				1804	goto LBL_Y;
				1805	}
				1806
				1807	if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
				1808	goto LBL_Y;
				1809	}
				1810
				1811	if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
				1812	goto LBL_Y;
				1813	}
				1814
				1815	/* if x < 0 then { x = x + yb{i-t-1}; q{i-t-1} -= 1; } /
				1816	if (x.sign == MP_NEG) {
				1817	if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
				1818	goto LBL_Y;
				1819	}
				1820	if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
				1821	goto LBL_Y;
				1822	}
				1823	if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
				1824	goto LBL_Y;
				1825	}
				1826
				1827	q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
				1828	}
				1829	}
				1830
				1831	/* now q is the quotient and x is the remainder
				1832	* [which we have to normalize]
				1833	*/
				1834
				1835	/* get sign before writing to c */
				1836	x.sign = x.used == 0 ? MP_ZPOS : a->sign;
				1837
				1838	if (c != NULL) {
				1839	mp_clamp (&q);
				1840	mp_exch (&q, c);
				1841	c->sign = neg;
				1842	}
				1843
				1844	if (d != NULL) {
				1845	mp_div_2d (&x, norm, &x, NULL);
				1846	mp_exch (&x, d);
				1847	}
				1848
				1849	res = MP_OKAY;
				1850
				1851	LBL_Y:mp_clear (&y);
				1852	LBL_X:mp_clear (&x);
				1853	LBL_T2:mp_clear (&t2);
				1854	LBL_T1:mp_clear (&t1);
				1855	LBL_Q:mp_clear (&q);
				1856	return res;
				1857	}
				1858
				1859	#endif
				1860
				1861
				1862	#ifdef MP_LOW_MEM
				1863	#define TAB_SIZE 32
				1864	#else
				1865	#define TAB_SIZE 256
				1866	#endif
				1867
				1868	static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
				1869	{
				1870	mp_int M[TAB_SIZE], res, mu;
				1871	mp_digit buf;
				1872	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
				1873	int (redux)(mp_int,mp_int,mp_int);
				1874
				1875	/* find window size */
				1876	x = mp_count_bits (X);
				1877	if (x <= 7) {
				1878	winsize = 2;
				1879	} else if (x <= 36) {
				1880	winsize = 3;
				1881	} else if (x <= 140) {
				1882	winsize = 4;
				1883	} else if (x <= 450) {
				1884	winsize = 5;
				1885	} else if (x <= 1303) {
				1886	winsize = 6;
				1887	} else if (x <= 3529) {
				1888	winsize = 7;
				1889	} else {
				1890	winsize = 8;
				1891	}
				1892
				1893	#ifdef MP_LOW_MEM
				1894	if (winsize > 5) {
				1895	winsize = 5;
				1896	}
				1897	#endif
				1898
				1899	/* init M array */
				1900	/* init first cell */
				1901	if ((err = mp_init(&M[1])) != MP_OKAY) {
				1902	return err;
				1903	}
				1904
				1905	/* now init the second half of the array */
				1906	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
				1907	if ((err = mp_init(&M[x])) != MP_OKAY) {
				1908	for (y = 1<<(winsize-1); y < x; y++) {
				1909	mp_clear (&M[y]);
				1910	}
				1911	mp_clear(&M[1]);
				1912	return err;
				1913	}
				1914	}
				1915
				1916	/* create mu, used for Barrett reduction */
				1917	if ((err = mp_init (&mu)) != MP_OKAY) {
				1918	goto LBL_M;
				1919	}
				1920
				1921	if (redmode == 0) {
				1922	if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
				1923	goto LBL_MU;
				1924	}
				1925	redux = mp_reduce;
				1926	} else {
				1927	if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
				1928	goto LBL_MU;
				1929	}
				1930	redux = mp_reduce_2k_l;
				1931	}
				1932
				1933	/* create M table
				1934	*
				1935	* The M table contains powers of the base,
				1936	* e.g. M[x] = G**x mod P
				1937	*
				1938	* The first half of the table is not
				1939	* computed though accept for M[0] and M[1]
				1940	*/
				1941	if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
				1942	goto LBL_MU;
				1943	}
				1944
				1945	/* compute the value at M[1<<(winsize-1)] by squaring
				1946	* M[1] (winsize-1) times
				1947	*/
				1948	if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
				1949	goto LBL_MU;
				1950	}
				1951
				1952	for (x = 0; x < (winsize - 1); x++) {
				1953	/* square it */
				1954	if ((err = mp_sqr (&M[1 << (winsize - 1)],
				1955	&M[1 << (winsize - 1)])) != MP_OKAY) {
				1956	goto LBL_MU;
				1957	}
				1958
				1959	/* reduce modulo P */
				1960	if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
				1961	goto LBL_MU;
				1962	}
				1963	}
				1964
				1965	/* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
				1966	* for x = (2(winsize - 1) + 1) to (2winsize - 1)
				1967	*/
				1968	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
				1969	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
				1970	goto LBL_MU;
				1971	}
				1972	if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
				1973	goto LBL_MU;
				1974	}
				1975	}
				1976
				1977	/* setup result */
				1978	if ((err = mp_init (&res)) != MP_OKAY) {
				1979	goto LBL_MU;
				1980	}
				1981	mp_set (&res, 1);
				1982
				1983	/* set initial mode and bit cnt */
				1984	mode = 0;
				1985	bitcnt = 1;
				1986	buf = 0;
				1987	digidx = X->used - 1;
				1988	bitcpy = 0;
				1989	bitbuf = 0;
				1990
				1991	for (;;) {
				1992	/* grab next digit as required */
				1993	if (--bitcnt == 0) {
				1994	/* if digidx == -1 we are out of digits */
				1995	if (digidx == -1) {
				1996	break;
				1997	}
				1998	/* read next digit and reset the bitcnt */
				1999	buf = X->dp[digidx--];
				2000	bitcnt = (int) DIGIT_BIT;
				2001	}
				2002
				2003	/* grab the next msb from the exponent */
				2004	y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
				2005	buf <<= (mp_digit)1;
				2006
				2007	/* if the bit is zero and mode == 0 then we ignore it
				2008	* These represent the leading zero bits before the first 1 bit
				2009	* in the exponent. Technically this opt is not required but it
				2010	* does lower the # of trivial squaring/reductions used
				2011	*/
				2012	if (mode == 0 && y == 0) {
				2013	continue;
				2014	}
				2015
				2016	/* if the bit is zero and mode == 1 then we square */
				2017	if (mode == 1 && y == 0) {
				2018	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				2019	goto LBL_RES;
				2020	}
				2021	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
				2022	goto LBL_RES;
				2023	}
				2024	continue;
				2025	}
				2026
				2027	/* else we add it to the window */
				2028	bitbuf \|= (y << (winsize - ++bitcpy));
				2029	mode = 2;
				2030
				2031	if (bitcpy == winsize) {
				2032	/* ok window is filled so square as required and multiply */
				2033	/* square first */
				2034	for (x = 0; x < winsize; x++) {
				2035	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				2036	goto LBL_RES;
				2037	}
				2038	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
				2039	goto LBL_RES;
				2040	}
				2041	}
				2042
				2043	/* then multiply */
				2044	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
				2045	goto LBL_RES;
				2046	}
				2047	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
				2048	goto LBL_RES;
				2049	}
				2050
				2051	/* empty window and reset */
				2052	bitcpy = 0;
				2053	bitbuf = 0;
				2054	mode = 1;
				2055	}
				2056	}
				2057
				2058	/* if bits remain then square/multiply */
				2059	if (mode == 2 && bitcpy > 0) {
				2060	/* square then multiply if the bit is set */
				2061	for (x = 0; x < bitcpy; x++) {
				2062	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				2063	goto LBL_RES;
				2064	}
				2065	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
				2066	goto LBL_RES;
				2067	}
				2068
				2069	bitbuf <<= 1;
				2070	if ((bitbuf & (1 << winsize)) != 0) {
				2071	/* then multiply */
				2072	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
				2073	goto LBL_RES;
				2074	}
				2075	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
				2076	goto LBL_RES;
				2077	}
				2078	}
				2079	}
				2080	}
				2081
				2082	mp_exch (&res, Y);
				2083	err = MP_OKAY;
				2084	LBL_RES:mp_clear (&res);
				2085	LBL_MU:mp_clear (&mu);
				2086	LBL_M:
				2087	mp_clear(&M[1]);
				2088	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
				2089	mp_clear (&M[x]);
				2090	}
				2091	return err;
				2092	}
				2093
				2094
				2095	/* computes b = aa /
				2096	static int mp_sqr (mp_int * a, mp_int * b)
				2097	{
				2098	int res;
				2099
				2100	#ifdef BN_MP_TOOM_SQR_C
				2101	/* use Toom-Cook? */
				2102	if (a->used >= TOOM_SQR_CUTOFF) {
				2103	res = mp_toom_sqr(a, b);
				2104	/* Karatsuba? */
				2105	} else
				2106	#endif
				2107	#ifdef BN_MP_KARATSUBA_SQR_C
				2108	if (a->used >= KARATSUBA_SQR_CUTOFF) {
				2109	res = mp_karatsuba_sqr (a, b);
				2110	} else
				2111	#endif
				2112	{
				2113	#ifdef BN_FAST_S_MP_SQR_C
				2114	/* can we use the fast comba multiplier? */
				2115	if ((a->used * 2 + 1) < MP_WARRAY &&
				2116	a->used <
				2117	(1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
				2118	res = fast_s_mp_sqr (a, b);
				2119	} else
				2120	#endif
				2121	#ifdef BN_S_MP_SQR_C
				2122	res = s_mp_sqr (a, b);
				2123	#else
				2124	#error mp_sqr could fail
				2125	res = MP_VAL;
				2126	#endif
				2127	}
				2128	b->sign = MP_ZPOS;
				2129	return res;
				2130	}
				2131
				2132
				2133	/* reduces a modulo n where n is of the form 2**p - d
				2134	This differs from reduce_2k since "d" can be larger
				2135	than a single digit.
				2136	*/
				2137	static int mp_reduce_2k_l(mp_int a, mp_int n, mp_int *d)
				2138	{
				2139	mp_int q;
				2140	int p, res;
				2141
				2142	if ((res = mp_init(&q)) != MP_OKAY) {
				2143	return res;
				2144	}
				2145
				2146	p = mp_count_bits(n);
				2147	top:
				2148	/* q = a/2p, a = a mod 2p */
				2149	if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
				2150	goto ERR;
				2151	}
				2152
				2153	/* q = q * d */
				2154	if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
				2155	goto ERR;
				2156	}
				2157
				2158	/* a = a + q */
				2159	if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
				2160	goto ERR;
				2161	}
				2162
				2163	if (mp_cmp_mag(a, n) != MP_LT) {
				2164	s_mp_sub(a, n, a);
				2165	goto top;
				2166	}
				2167
				2168	ERR:
				2169	mp_clear(&q);
				2170	return res;
				2171	}
				2172
				2173
				2174	/* determines the setup value */
				2175	static int mp_reduce_2k_setup_l(mp_int a, mp_int d)
				2176	{
				2177	int res;
				2178	mp_int tmp;
				2179
				2180	if ((res = mp_init(&tmp)) != MP_OKAY) {
				2181	return res;
				2182	}
				2183
				2184	if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
				2185	goto ERR;
				2186	}
				2187
				2188	if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
				2189	goto ERR;
				2190	}
				2191
				2192	ERR:
				2193	mp_clear(&tmp);
				2194	return res;
				2195	}
				2196
				2197
				2198	/* computes a = 2**b
				2199	*
				2200	* Simple algorithm which zeroes the int, grows it then just sets one bit
				2201	* as required.
				2202	*/
				2203	static int mp_2expt (mp_int * a, int b)
				2204	{
				2205	int res;
				2206
				2207	/* zero a as per default */
				2208	mp_zero (a);
				2209
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame^]	2210	/* grow a to accommodate the single bit */
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2211	if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
				2212	return res;
				2213	}
				2214
				2215	/* set the used count of where the bit will go */
				2216	a->used = b / DIGIT_BIT + 1;
				2217
				2218	/* put the single bit in its place */
				2219	a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
				2220
				2221	return MP_OKAY;
				2222	}
				2223
				2224
				2225	/* pre-calculate the value required for Barrett reduction
				2226	* For a given modulus "b" it calulates the value required in "a"
				2227	*/
				2228	static int mp_reduce_setup (mp_int * a, mp_int * b)
				2229	{
				2230	int res;
				2231
				2232	if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
				2233	return res;
				2234	}
				2235	return mp_div (a, b, a, NULL);
				2236	}
				2237
				2238
				2239	/* reduces x mod m, assumes 0 < x < m**2, mu is
				2240	* precomputed via mp_reduce_setup.
				2241	* From HAC pp.604 Algorithm 14.42
				2242	*/
				2243	static int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
				2244	{
				2245	mp_int q;
				2246	int res, um = m->used;
				2247
				2248	/* q = x */
				2249	if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
				2250	return res;
				2251	}
				2252
				2253	/* q1 = x / b*(k-1) /
				2254	mp_rshd (&q, um - 1);
				2255
				2256	/* according to HAC this optimization is ok */
				2257	if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
				2258	if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
				2259	goto CLEANUP;
				2260	}
				2261	} else {
				2262	#ifdef BN_S_MP_MUL_HIGH_DIGS_C
				2263	if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
				2264	goto CLEANUP;
				2265	}
				2266	#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
				2267	if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
				2268	goto CLEANUP;
				2269	}
				2270	#else
				2271	{
				2272	#error mp_reduce would always fail
				2273	res = MP_VAL;
				2274	goto CLEANUP;
				2275	}
				2276	#endif
				2277	}
				2278
				2279	/* q3 = q2 / b*(k+1) /
				2280	mp_rshd (&q, um + 1);
				2281
				2282	/* x = x mod b*(k+1), quick (no division) /
				2283	if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
				2284	goto CLEANUP;
				2285	}
				2286
				2287	/* q = q * m mod b*(k+1), quick (no division) /
				2288	if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
				2289	goto CLEANUP;
				2290	}
				2291
				2292	/* x = x - q */
				2293	if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
				2294	goto CLEANUP;
				2295	}
				2296
				2297	/* If x < 0, add b*(k+1) to it /
				2298	if (mp_cmp_d (x, 0) == MP_LT) {
				2299	mp_set (&q, 1);
				2300	if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) {
				2301	goto CLEANUP;
				2302	}
				2303	if ((res = mp_add (x, &q, x)) != MP_OKAY) {
				2304	goto CLEANUP;
				2305	}
				2306	}
				2307
				2308	/* Back off if it's too big */
				2309	while (mp_cmp (x, m) != MP_LT) {
				2310	if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
				2311	goto CLEANUP;
				2312	}
				2313	}
				2314
				2315	CLEANUP:
				2316	mp_clear (&q);
				2317
				2318	return res;
				2319	}
				2320
				2321
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame^]	2322	/* multiplies \|a\| * \|b\| and only computes up to digs digits of result
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2323	* HAC pp. 595, Algorithm 14.12 Modified so you can control how
				2324	* many digits of output are created.
				2325	*/
				2326	static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
				2327	{
				2328	mp_int t;
				2329	int res, pa, pb, ix, iy;
				2330	mp_digit u;
				2331	mp_word r;
				2332	mp_digit tmpx, tmpt, tmpy;
				2333
				2334	/* can we use the fast multiplier? */
				2335	if (((digs) < MP_WARRAY) &&
				2336	MIN (a->used, b->used) <
				2337	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
				2338	return fast_s_mp_mul_digs (a, b, c, digs);
				2339	}
				2340
				2341	if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
				2342	return res;
				2343	}
				2344	t.used = digs;
				2345
				2346	/* compute the digits of the product directly */
				2347	pa = a->used;
				2348	for (ix = 0; ix < pa; ix++) {
				2349	/* set the carry to zero */
				2350	u = 0;
				2351
				2352	/* limit ourselves to making digs digits of output */
				2353	pb = MIN (b->used, digs - ix);
				2354
				2355	/* setup some aliases */
				2356	/* copy of the digit from a used within the nested loop */
				2357	tmpx = a->dp[ix];
				2358
				2359	/* an alias for the destination shifted ix places */
				2360	tmpt = t.dp + ix;
				2361
				2362	/* an alias for the digits of b */
				2363	tmpy = b->dp;
				2364
				2365	/* compute the columns of the output and propagate the carry */
				2366	for (iy = 0; iy < pb; iy++) {
				2367	/* compute the column as a mp_word */
				2368	r = ((mp_word)*tmpt) +
				2369	((mp_word)tmpx) * ((mp_word)*tmpy++) +
				2370	((mp_word) u);
				2371
				2372	/* the new column is the lower part of the result */
				2373	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
				2374
				2375	/* get the carry word from the result */
				2376	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
				2377	}
				2378	/* set carry if it is placed below digs */
				2379	if (ix + iy < digs) {
				2380	*tmpt = u;
				2381	}
				2382	}
				2383
				2384	mp_clamp (&t);
				2385	mp_exch (&t, c);
				2386
				2387	mp_clear (&t);
				2388	return MP_OKAY;
				2389	}
				2390
				2391
				2392	/* Fast (comba) multiplier
				2393	*
				2394	* This is the fast column-array [comba] multiplier. It is
				2395	* designed to compute the columns of the product first
				2396	* then handle the carries afterwards. This has the effect
				2397	* of making the nested loops that compute the columns very
				2398	* simple and schedulable on super-scalar processors.
				2399	*
				2400	* This has been modified to produce a variable number of
				2401	* digits of output so if say only a half-product is required
				2402	* you don't have to compute the upper half (a feature
				2403	* required for fast Barrett reduction).
				2404	*
				2405	* Based on Algorithm 14.12 on pp.595 of HAC.
				2406	*
				2407	*/
				2408	static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
				2409	{
				2410	int olduse, res, pa, ix, iz;
				2411	mp_digit W[MP_WARRAY];
				2412	register mp_word _W;
				2413
				2414	/* grow the destination as required */
				2415	if (c->alloc < digs) {
				2416	if ((res = mp_grow (c, digs)) != MP_OKAY) {
				2417	return res;
				2418	}
				2419	}
				2420
				2421	/* number of output digits to produce */
				2422	pa = MIN(digs, a->used + b->used);
				2423
				2424	/* clear the carry */
				2425	_W = 0;
				2426	for (ix = 0; ix < pa; ix++) {
				2427	int tx, ty;
				2428	int iy;
				2429	mp_digit tmpx, tmpy;
				2430
				2431	/* get offsets into the two bignums */
				2432	ty = MIN(b->used-1, ix);
				2433	tx = ix - ty;
				2434
				2435	/* setup temp aliases */
				2436	tmpx = a->dp + tx;
				2437	tmpy = b->dp + ty;
				2438
				2439	/* this is the number of times the loop will iterrate, essentially
				2440	while (tx++ < a->used && ty-- >= 0) { ... }
				2441	*/
				2442	iy = MIN(a->used-tx, ty+1);
				2443
				2444	/* execute loop */
				2445	for (iz = 0; iz < iy; ++iz) {
				2446	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
				2447
				2448	}
				2449
				2450	/* store term */
				2451	W[ix] = ((mp_digit)_W) & MP_MASK;
				2452
				2453	/* make next carry */
				2454	_W = _W >> ((mp_word)DIGIT_BIT);
				2455	}
				2456
				2457	/* setup dest */
				2458	olduse = c->used;
				2459	c->used = pa;
				2460
				2461	{
				2462	register mp_digit *tmpc;
				2463	tmpc = c->dp;
				2464	for (ix = 0; ix < pa+1; ix++) {
				2465	/* now extract the previous digit [below the carry] */
				2466	*tmpc++ = W[ix];
				2467	}
				2468
				2469	/* clear unused digits [that existed in the old copy of c] */
				2470	for (; ix < olduse; ix++) {
				2471	*tmpc++ = 0;
				2472	}
				2473	}
				2474	mp_clamp (c);
				2475	return MP_OKAY;
				2476	}
				2477
				2478
				2479	/* init an mp_init for a given size */
				2480	static int mp_init_size (mp_int * a, int size)
				2481	{
				2482	int x;
				2483
				2484	/* pad size so there are always extra digits */
				2485	size += (MP_PREC * 2) - (size % MP_PREC);
				2486
				2487	/* alloc mem */
				2488	a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size);
				2489	if (a->dp == NULL) {
				2490	return MP_MEM;
				2491	}
				2492
				2493	/* set the members */
				2494	a->used = 0;
				2495	a->alloc = size;
				2496	a->sign = MP_ZPOS;
				2497
				2498	/* zero the digits */
				2499	for (x = 0; x < size; x++) {
				2500	a->dp[x] = 0;
				2501	}
				2502
				2503	return MP_OKAY;
				2504	}
				2505
				2506
				2507	/* low level squaring, b = aa, HAC pp.596-597, Algorithm 14.16 /
				2508	static int s_mp_sqr (mp_int * a, mp_int * b)
				2509	{
				2510	mp_int t;
				2511	int res, ix, iy, pa;
				2512	mp_word r;
				2513	mp_digit u, tmpx, *tmpt;
				2514
				2515	pa = a->used;
				2516	if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
				2517	return res;
				2518	}
				2519
				2520	/* default used is maximum possible size */
				2521	t.used = 2*pa + 1;
				2522
				2523	for (ix = 0; ix < pa; ix++) {
				2524	/* first calculate the digit at 2ix /
				2525	/* calculate double precision result */
				2526	r = ((mp_word) t.dp[2*ix]) +
				2527	((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
				2528
				2529	/* store lower part in result */
				2530	t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
				2531
				2532	/* get the carry */
				2533	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
				2534
				2535	/* left hand side of A[ix] * A[iy] */
				2536	tmpx = a->dp[ix];
				2537
				2538	/* alias for where to store the results */
				2539	tmpt = t.dp + (2*ix + 1);
				2540
				2541	for (iy = ix + 1; iy < pa; iy++) {
				2542	/* first calculate the product */
				2543	r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
				2544
				2545	/* now calculate the double precision result, note we use
				2546	* addition instead of *2 since it's easier to optimize
				2547	*/
				2548	r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
				2549
				2550	/* store lower part */
				2551	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
				2552
				2553	/* get carry */
				2554	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
				2555	}
				2556	/* propagate upwards */
				2557	while (u != ((mp_digit) 0)) {
				2558	r = ((mp_word) *tmpt) + ((mp_word) u);
				2559	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
				2560	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
				2561	}
				2562	}
				2563
				2564	mp_clamp (&t);
				2565	mp_exch (&t, b);
				2566	mp_clear (&t);
				2567	return MP_OKAY;
				2568	}
				2569
				2570
				2571	/* multiplies \|a\| * \|b\| and does not compute the lower digs digits
				2572	* [meant to get the higher part of the product]
				2573	*/
				2574	static int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
				2575	{
				2576	mp_int t;
				2577	int res, pa, pb, ix, iy;
				2578	mp_digit u;
				2579	mp_word r;
				2580	mp_digit tmpx, tmpt, tmpy;
				2581
				2582	/* can we use the fast multiplier? */
				2583	#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
				2584	if (((a->used + b->used + 1) < MP_WARRAY)
				2585	&& MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
				2586	return fast_s_mp_mul_high_digs (a, b, c, digs);
				2587	}
				2588	#endif
				2589
				2590	if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
				2591	return res;
				2592	}
				2593	t.used = a->used + b->used + 1;
				2594
				2595	pa = a->used;
				2596	pb = b->used;
				2597	for (ix = 0; ix < pa; ix++) {
				2598	/* clear the carry */
				2599	u = 0;
				2600
				2601	/* left hand side of A[ix] * B[iy] */
				2602	tmpx = a->dp[ix];
				2603
				2604	/* alias to the address of where the digits will be stored */
				2605	tmpt = &(t.dp[digs]);
				2606
				2607	/* alias for where to read the right hand side from */
				2608	tmpy = b->dp + (digs - ix);
				2609
				2610	for (iy = digs - ix; iy < pb; iy++) {
				2611	/* calculate the double precision result */
				2612	r = ((mp_word)*tmpt) +
				2613	((mp_word)tmpx) * ((mp_word)*tmpy++) +
				2614	((mp_word) u);
				2615
				2616	/* get the lower part */
				2617	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
				2618
				2619	/* carry the carry */
				2620	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
				2621	}
				2622	*tmpt = u;
				2623	}
				2624	mp_clamp (&t);
				2625	mp_exch (&t, c);
				2626	mp_clear (&t);
				2627	return MP_OKAY;
				2628	}
				2629
				2630
				2631	#ifdef BN_MP_MONTGOMERY_SETUP_C
				2632	/* setups the montgomery reduction stuff */
				2633	static int
				2634	mp_montgomery_setup (mp_int * n, mp_digit * rho)
				2635	{
				2636	mp_digit x, b;
				2637
				2638	/* fast inversion mod 2**k
				2639	*
				2640	* Based on the fact that
				2641	*
				2642	* XA = 1 (mod 2n) => (X(2-XA)) A = 1 (mod 22n)
				2643	* => 2XA - XXA*A = 1
				2644	* => 2*(1) - (1) = 1
				2645	*/
				2646	b = n->dp[0];
				2647
				2648	if ((b & 1) == 0) {
				2649	return MP_VAL;
				2650	}
				2651
				2652	x = (((b + 2) & 4) << 1) + b; /* here xa==1 mod 24 /
				2653	x = 2 - b x; /* here xa==1 mod 28 /
				2654	#if !defined(MP_8BIT)
				2655	x = 2 - b x; /* here xa==1 mod 216 /
				2656	#endif
				2657	#if defined(MP_64BIT) \|\| !(defined(MP_8BIT) \|\| defined(MP_16BIT))
				2658	x = 2 - b x; /* here xa==1 mod 232 /
				2659	#endif
				2660	#ifdef MP_64BIT
				2661	x = 2 - b x; /* here xa==1 mod 264 /
				2662	#endif
				2663
				2664	/* rho = -1/m mod b */
				2665	*rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
				2666
				2667	return MP_OKAY;
				2668	}
				2669	#endif
				2670
				2671
				2672	#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
				2673	/* computes xR**-1 == x (mod N) via Montgomery Reduction
				2674	*
				2675	* This is an optimized implementation of montgomery_reduce
				2676	* which uses the comba method to quickly calculate the columns of the
				2677	* reduction.
				2678	*
				2679	* Based on Algorithm 14.32 on pp.601 of HAC.
				2680	*/
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame^]	2681	static int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2682	{
				2683	int ix, res, olduse;
				2684	mp_word W[MP_WARRAY];
				2685
				2686	/* get old used count */
				2687	olduse = x->used;
				2688
				2689	/* grow a as required */
				2690	if (x->alloc < n->used + 1) {
				2691	if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
				2692	return res;
				2693	}
				2694	}
				2695
				2696	/* first we have to get the digits of the input into
				2697	* an array of double precision words W[...]
				2698	*/
				2699	{
				2700	register mp_word *_W;
				2701	register mp_digit *tmpx;
				2702
				2703	/* alias for the W[] array */
				2704	_W = W;
				2705
				2706	/* alias for the digits of x*/
				2707	tmpx = x->dp;
				2708
				2709	/* copy the digits of a into W[0..a->used-1] */
				2710	for (ix = 0; ix < x->used; ix++) {
				2711	_W++ = tmpx++;
				2712	}
				2713
				2714	/* zero the high words of W[a->used..m->used2] /
				2715	for (; ix < n->used * 2 + 1; ix++) {
				2716	*_W++ = 0;
				2717	}
				2718	}
				2719
				2720	/* now we proceed to zero successive digits
				2721	* from the least significant upwards
				2722	*/
				2723	for (ix = 0; ix < n->used; ix++) {
				2724	/* mu = ai * m' mod b
				2725	*
				2726	* We avoid a double precision multiplication (which isn't required)
				2727	* by casting the value down to a mp_digit. Note this requires
				2728	* that W[ix-1] have the carry cleared (see after the inner loop)
				2729	*/
				2730	register mp_digit mu;
				2731	mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
				2732
				2733	/* a = a + mu * m * b**i
				2734	*
				2735	* This is computed in place and on the fly. The multiplication
				2736	* by b**i is handled by offseting which columns the results
				2737	* are added to.
				2738	*
				2739	* Note the comba method normally doesn't handle carries in the
				2740	* inner loop In this case we fix the carry from the previous
				2741	* column since the Montgomery reduction requires digits of the
				2742	* result (so far) [see above] to work. This is
				2743	* handled by fixing up one carry after the inner loop. The
				2744	* carry fixups are done in order so after these loops the
				2745	* first m->used words of W[] have the carries fixed
				2746	*/
				2747	{
				2748	register int iy;
				2749	register mp_digit *tmpn;
				2750	register mp_word *_W;
				2751
				2752	/* alias for the digits of the modulus */
				2753	tmpn = n->dp;
				2754
				2755	/* Alias for the columns set by an offset of ix */
				2756	_W = W + ix;
				2757
				2758	/* inner loop */
				2759	for (iy = 0; iy < n->used; iy++) {
				2760	_W++ += ((mp_word)mu) ((mp_word)*tmpn++);
				2761	}
				2762	}
				2763
				2764	/* now fix carry for next digit, W[ix+1] */
				2765	W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
				2766	}
				2767
				2768	/* now we have to propagate the carries and
				2769	* shift the words downward [all those least
				2770	* significant digits we zeroed].
				2771	*/
				2772	{
				2773	register mp_digit *tmpx;
				2774	register mp_word _W, _W1;
				2775
				2776	/* nox fix rest of carries */
				2777
				2778	/* alias for current word */
				2779	_W1 = W + ix;
				2780
				2781	/* alias for next word, where the carry goes */
				2782	_W = W + ++ix;
				2783
				2784	for (; ix <= n->used * 2 + 1; ix++) {
				2785	_W++ += _W1++ >> ((mp_word) DIGIT_BIT);
				2786	}
				2787
				2788	/* copy out, A = A/b**n
				2789	*
				2790	* The result is A/b**n but instead of converting from an
				2791	* array of mp_word to mp_digit than calling mp_rshd
				2792	* we just copy them in the right order
				2793	*/
				2794
				2795	/* alias for destination word */
				2796	tmpx = x->dp;
				2797
				2798	/* alias for shifted double precision result */
				2799	_W = W + n->used;
				2800
				2801	for (ix = 0; ix < n->used + 1; ix++) {
				2802	tmpx++ = (mp_digit)(_W++ & ((mp_word) MP_MASK));
				2803	}
				2804
				2805	/* zero oldused digits, if the input a was larger than
				2806	* m->used+1 we'll have to clear the digits
				2807	*/
				2808	for (; ix < olduse; ix++) {
				2809	*tmpx++ = 0;
				2810	}
				2811	}
				2812
				2813	/* set the max used and clamp */
				2814	x->used = n->used + 1;
				2815	mp_clamp (x);
				2816
				2817	/* if A >= m then A = A - m */
				2818	if (mp_cmp_mag (x, n) != MP_LT) {
				2819	return s_mp_sub (x, n, x);
				2820	}
				2821	return MP_OKAY;
				2822	}
				2823	#endif
				2824
				2825
				2826	#ifdef BN_MP_MUL_2_C
				2827	/* b = a2 /
				2828	static int mp_mul_2(mp_int * a, mp_int * b)
				2829	{
				2830	int x, res, oldused;
				2831
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame^]	2832	/* grow to accommodate result */
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2833	if (b->alloc < a->used + 1) {
				2834	if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
				2835	return res;
				2836	}
				2837	}
				2838
				2839	oldused = b->used;
				2840	b->used = a->used;
				2841
				2842	{
				2843	register mp_digit r, rr, tmpa, tmpb;
				2844
				2845	/* alias for source */
				2846	tmpa = a->dp;
				2847
				2848	/* alias for dest */
				2849	tmpb = b->dp;
				2850
				2851	/* carry */
				2852	r = 0;
				2853	for (x = 0; x < a->used; x++) {
				2854
				2855	/* get what will be the next carry bit from the
				2856	* MSB of the current digit
				2857	*/
				2858	rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
				2859
				2860	/* now shift up this digit, add in the carry [from the previous] */
				2861	tmpb++ = ((tmpa++ << ((mp_digit)1)) \| r) & MP_MASK;
				2862
				2863	/* copy the carry that would be from the source
				2864	* digit into the next iteration
				2865	*/
				2866	r = rr;
				2867	}
				2868
				2869	/* new leading digit? */
				2870	if (r != 0) {
				2871	/* add a MSB which is always 1 at this point */
				2872	*tmpb = 1;
				2873	++(b->used);
				2874	}
				2875
				2876	/* now zero any excess digits on the destination
				2877	* that we didn't write to
				2878	*/
				2879	tmpb = b->dp + b->used;
				2880	for (x = b->used; x < oldused; x++) {
				2881	*tmpb++ = 0;
				2882	}
				2883	}
				2884	b->sign = a->sign;
				2885	return MP_OKAY;
				2886	}
				2887	#endif
				2888
				2889
				2890	#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
				2891	/*
				2892	* shifts with subtractions when the result is greater than b.
				2893	*
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame^]	2894	* The method is slightly modified to shift B unconditionally up to just under
				2895	* the leading bit of b. This saves a lot of multiple precision shifting.
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2896	*/
				2897	static int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
				2898	{
				2899	int x, bits, res;
				2900
				2901	/* how many bits of last digit does b use */
				2902	bits = mp_count_bits (b) % DIGIT_BIT;
				2903
				2904	if (b->used > 1) {
				2905	if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
				2906	return res;
				2907	}
				2908	} else {
				2909	mp_set(a, 1);
				2910	bits = 1;
				2911	}
				2912
				2913
				2914	/* now compute C = A * B mod b */
				2915	for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
				2916	if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
				2917	return res;
				2918	}
				2919	if (mp_cmp_mag (a, b) != MP_LT) {
				2920	if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
				2921	return res;
				2922	}
				2923	}
				2924	}
				2925
				2926	return MP_OKAY;
				2927	}
				2928	#endif
				2929
				2930
				2931	#ifdef BN_MP_EXPTMOD_FAST_C
				2932	/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
				2933	*
				2934	* Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
				2935	* The value of k changes based on the size of the exponent.
				2936	*
				2937	* Uses Montgomery or Diminished Radix reduction [whichever appropriate]
				2938	*/
				2939
				2940	static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
				2941	{
				2942	mp_int M[TAB_SIZE], res;
				2943	mp_digit buf, mp;
				2944	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
				2945
				2946	/* use a pointer to the reduction algorithm. This allows us to use
				2947	* one of many reduction algorithms without modding the guts of
				2948	* the code with if statements everywhere.
				2949	*/
				2950	int (redux)(mp_int,mp_int*,mp_digit);
				2951
				2952	/* find window size */
				2953	x = mp_count_bits (X);
				2954	if (x <= 7) {
				2955	winsize = 2;
				2956	} else if (x <= 36) {
				2957	winsize = 3;
				2958	} else if (x <= 140) {
				2959	winsize = 4;
				2960	} else if (x <= 450) {
				2961	winsize = 5;
				2962	} else if (x <= 1303) {
				2963	winsize = 6;
				2964	} else if (x <= 3529) {
				2965	winsize = 7;
				2966	} else {
				2967	winsize = 8;
				2968	}
				2969
				2970	#ifdef MP_LOW_MEM
				2971	if (winsize > 5) {
				2972	winsize = 5;
				2973	}
				2974	#endif
				2975
				2976	/* init M array */
				2977	/* init first cell */
				2978	if ((err = mp_init(&M[1])) != MP_OKAY) {
				2979	return err;
				2980	}
				2981
				2982	/* now init the second half of the array */
				2983	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
				2984	if ((err = mp_init(&M[x])) != MP_OKAY) {
				2985	for (y = 1<<(winsize-1); y < x; y++) {
				2986	mp_clear (&M[y]);
				2987	}
				2988	mp_clear(&M[1]);
				2989	return err;
				2990	}
				2991	}
				2992
				2993	/* determine and setup reduction code */
				2994	if (redmode == 0) {
				2995	#ifdef BN_MP_MONTGOMERY_SETUP_C
				2996	/* now setup montgomery */
				2997	if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
				2998	goto LBL_M;
				2999	}
				3000	#else
				3001	err = MP_VAL;
				3002	goto LBL_M;
				3003	#endif
				3004
				3005	/* automatically pick the comba one if available (saves quite a few calls/ifs) */
				3006	#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
				3007	if (((P->used * 2 + 1) < MP_WARRAY) &&
				3008	P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
				3009	redux = fast_mp_montgomery_reduce;
				3010	} else
				3011	#endif
				3012	{
				3013	#ifdef BN_MP_MONTGOMERY_REDUCE_C
				3014	/* use slower baseline Montgomery method */
				3015	redux = mp_montgomery_reduce;
				3016	#else
				3017	err = MP_VAL;
				3018	goto LBL_M;
				3019	#endif
				3020	}
				3021	} else if (redmode == 1) {
				3022	#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
				3023	/* setup DR reduction for moduli of the form B*k - b /
				3024	mp_dr_setup(P, &mp);
				3025	redux = mp_dr_reduce;
				3026	#else
				3027	err = MP_VAL;
				3028	goto LBL_M;
				3029	#endif
				3030	} else {
				3031	#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
				3032	/* setup DR reduction for moduli of the form 2*k - b /
				3033	if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
				3034	goto LBL_M;
				3035	}
				3036	redux = mp_reduce_2k;
				3037	#else
				3038	err = MP_VAL;
				3039	goto LBL_M;
				3040	#endif
				3041	}
				3042
				3043	/* setup result */
				3044	if ((err = mp_init (&res)) != MP_OKAY) {
				3045	goto LBL_M;
				3046	}
				3047
				3048	/* create M table
				3049	*
				3050
				3051	*
				3052	* The first half of the table is not computed though accept for M[0] and M[1]
				3053	*/
				3054
				3055	if (redmode == 0) {
				3056	#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
				3057	/* now we need R mod m */
				3058	if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
				3059	goto LBL_RES;
				3060	}
				3061	#else
				3062	err = MP_VAL;
				3063	goto LBL_RES;
				3064	#endif
				3065
				3066	/* now set M[1] to G * R mod m */
				3067	if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
				3068	goto LBL_RES;
				3069	}
				3070	} else {
				3071	mp_set(&res, 1);
				3072	if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
				3073	goto LBL_RES;
				3074	}
				3075	}
				3076
				3077	/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
				3078	if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
				3079	goto LBL_RES;
				3080	}
				3081
				3082	for (x = 0; x < (winsize - 1); x++) {
				3083	if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
				3084	goto LBL_RES;
				3085	}
				3086	if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
				3087	goto LBL_RES;
				3088	}
				3089	}
				3090
				3091	/* create upper table */
				3092	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
				3093	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
				3094	goto LBL_RES;
				3095	}
				3096	if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
				3097	goto LBL_RES;
				3098	}
				3099	}
				3100
				3101	/* set initial mode and bit cnt */
				3102	mode = 0;
				3103	bitcnt = 1;
				3104	buf = 0;
				3105	digidx = X->used - 1;
				3106	bitcpy = 0;
				3107	bitbuf = 0;
				3108
				3109	for (;;) {
				3110	/* grab next digit as required */
				3111	if (--bitcnt == 0) {
				3112	/* if digidx == -1 we are out of digits so break */
				3113	if (digidx == -1) {
				3114	break;
				3115	}
				3116	/* read next digit and reset bitcnt */
				3117	buf = X->dp[digidx--];
				3118	bitcnt = (int)DIGIT_BIT;
				3119	}
				3120
				3121	/* grab the next msb from the exponent */
				3122	y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
				3123	buf <<= (mp_digit)1;
				3124
				3125	/* if the bit is zero and mode == 0 then we ignore it
				3126	* These represent the leading zero bits before the first 1 bit
				3127	* in the exponent. Technically this opt is not required but it
				3128	* does lower the # of trivial squaring/reductions used
				3129	*/
				3130	if (mode == 0 && y == 0) {
				3131	continue;
				3132	}
				3133
				3134	/* if the bit is zero and mode == 1 then we square */
				3135	if (mode == 1 && y == 0) {
				3136	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				3137	goto LBL_RES;
				3138	}
				3139	if ((err = redux (&res, P, mp)) != MP_OKAY) {
				3140	goto LBL_RES;
				3141	}
				3142	continue;
				3143	}
				3144
				3145	/* else we add it to the window */
				3146	bitbuf \|= (y << (winsize - ++bitcpy));
				3147	mode = 2;
				3148
				3149	if (bitcpy == winsize) {
				3150	/* ok window is filled so square as required and multiply */
				3151	/* square first */
				3152	for (x = 0; x < winsize; x++) {
				3153	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				3154	goto LBL_RES;
				3155	}
				3156	if ((err = redux (&res, P, mp)) != MP_OKAY) {
				3157	goto LBL_RES;
				3158	}
				3159	}
				3160
				3161	/* then multiply */
				3162	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
				3163	goto LBL_RES;
				3164	}
				3165	if ((err = redux (&res, P, mp)) != MP_OKAY) {
				3166	goto LBL_RES;
				3167	}
				3168
				3169	/* empty window and reset */
				3170	bitcpy = 0;
				3171	bitbuf = 0;
				3172	mode = 1;
				3173	}
				3174	}
				3175
				3176	/* if bits remain then square/multiply */
				3177	if (mode == 2 && bitcpy > 0) {
				3178	/* square then multiply if the bit is set */
				3179	for (x = 0; x < bitcpy; x++) {
				3180	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				3181	goto LBL_RES;
				3182	}
				3183	if ((err = redux (&res, P, mp)) != MP_OKAY) {
				3184	goto LBL_RES;
				3185	}
				3186
				3187	/* get next bit of the window */
				3188	bitbuf <<= 1;
				3189	if ((bitbuf & (1 << winsize)) != 0) {
				3190	/* then multiply */
				3191	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
				3192	goto LBL_RES;
				3193	}
				3194	if ((err = redux (&res, P, mp)) != MP_OKAY) {
				3195	goto LBL_RES;
				3196	}
				3197	}
				3198	}
				3199	}
				3200
				3201	if (redmode == 0) {
				3202	/* fixup result if Montgomery reduction is used
				3203	* recall that any value in a Montgomery system is
				3204	* actually multiplied by R mod n. So we have
				3205	* to reduce one more time to cancel out the factor
				3206	* of R.
				3207	*/
				3208	if ((err = redux(&res, P, mp)) != MP_OKAY) {
				3209	goto LBL_RES;
				3210	}
				3211	}
				3212
				3213	/* swap res with Y */
				3214	mp_exch (&res, Y);
				3215	err = MP_OKAY;
				3216	LBL_RES:mp_clear (&res);
				3217	LBL_M:
				3218	mp_clear(&M[1]);
				3219	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
				3220	mp_clear (&M[x]);
				3221	}
				3222	return err;
				3223	}
				3224	#endif
				3225
				3226
				3227	#ifdef BN_FAST_S_MP_SQR_C
				3228	/* the jist of squaring...
				3229	* you do like mult except the offset of the tmpx [one that
				3230	* starts closer to zero] can't equal the offset of tmpy.
				3231	* So basically you set up iy like before then you min it with
				3232	* (ty-tx) so that it never happens. You double all those
				3233	* you add in the inner loop
				3234
				3235	After that loop you do the squares and add them in.
				3236	*/
				3237
				3238	static int fast_s_mp_sqr (mp_int * a, mp_int * b)
				3239	{
				3240	int olduse, res, pa, ix, iz;
				3241	mp_digit W[MP_WARRAY], *tmpx;
				3242	mp_word W1;
				3243
				3244	/* grow the destination as required */
				3245	pa = a->used + a->used;
				3246	if (b->alloc < pa) {
				3247	if ((res = mp_grow (b, pa)) != MP_OKAY) {
				3248	return res;
				3249	}
				3250	}
				3251
				3252	/* number of output digits to produce */
				3253	W1 = 0;
				3254	for (ix = 0; ix < pa; ix++) {
				3255	int tx, ty, iy;
				3256	mp_word _W;
				3257	mp_digit *tmpy;
				3258
				3259	/* clear counter */
				3260	_W = 0;
				3261
				3262	/* get offsets into the two bignums */
				3263	ty = MIN(a->used-1, ix);
				3264	tx = ix - ty;
				3265
				3266	/* setup temp aliases */
				3267	tmpx = a->dp + tx;
				3268	tmpy = a->dp + ty;
				3269
				3270	/* this is the number of times the loop will iterrate, essentially
				3271	while (tx++ < a->used && ty-- >= 0) { ... }
				3272	*/
				3273	iy = MIN(a->used-tx, ty+1);
				3274
				3275	/* now for squaring tx can never equal ty
				3276	* we halve the distance since they approach at a rate of 2x
				3277	* and we have to round because odd cases need to be executed
				3278	*/
				3279	iy = MIN(iy, (ty-tx+1)>>1);
				3280
				3281	/* execute loop */
				3282	for (iz = 0; iz < iy; iz++) {
				3283	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
				3284	}
				3285
				3286	/* double the inner product and add carry */
				3287	_W = _W + _W + W1;
				3288
				3289	/* even columns have the square term in them */
				3290	if ((ix&1) == 0) {
				3291	_W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
				3292	}
				3293
				3294	/* store it */
				3295	W[ix] = (mp_digit)(_W & MP_MASK);
				3296
				3297	/* make next carry */
				3298	W1 = _W >> ((mp_word)DIGIT_BIT);
				3299	}
				3300
				3301	/* setup dest */
				3302	olduse = b->used;
				3303	b->used = a->used+a->used;
				3304
				3305	{
				3306	mp_digit *tmpb;
				3307	tmpb = b->dp;
				3308	for (ix = 0; ix < pa; ix++) {
				3309	*tmpb++ = W[ix] & MP_MASK;
				3310	}
				3311
				3312	/* clear unused digits [that existed in the old copy of c] */
				3313	for (; ix < olduse; ix++) {
				3314	*tmpb++ = 0;
				3315	}
				3316	}
				3317	mp_clamp (b);
				3318	return MP_OKAY;
				3319	}
				3320	#endif
				3321
				3322
				3323	#ifdef BN_MP_MUL_D_C
				3324	/* multiply by a digit */
				3325	static int
				3326	mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
				3327	{
				3328	mp_digit u, tmpa, tmpc;
				3329	mp_word r;
				3330	int ix, res, olduse;
				3331
				3332	/* make sure c is big enough to hold ab /
				3333	if (c->alloc < a->used + 1) {
				3334	if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
				3335	return res;
				3336	}
				3337	}
				3338
				3339	/* get the original destinations used count */
				3340	olduse = c->used;
				3341
				3342	/* set the sign */
				3343	c->sign = a->sign;
				3344
				3345	/* alias for a->dp [source] */
				3346	tmpa = a->dp;
				3347
				3348	/* alias for c->dp [dest] */
				3349	tmpc = c->dp;
				3350
				3351	/* zero carry */
				3352	u = 0;
				3353
				3354	/* compute columns */
				3355	for (ix = 0; ix < a->used; ix++) {
				3356	/* compute product and carry sum for this term */
				3357	r = ((mp_word) u) + ((mp_word)tmpa++) ((mp_word)b);
				3358
				3359	/* mask off higher bits to get a single digit */
				3360	*tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
				3361
				3362	/* send carry into next iteration */
				3363	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
				3364	}
				3365
				3366	/* store final carry [if any] and increment ix offset */
				3367	*tmpc++ = u;
				3368	++ix;
				3369
				3370	/* now zero digits above the top */
				3371	while (ix++ < olduse) {
				3372	*tmpc++ = 0;
				3373	}
				3374
				3375	/* set used count */
				3376	c->used = a->used + 1;
				3377	mp_clamp(c);
				3378
				3379	return MP_OKAY;
				3380	}
				3381	#endif