Blame - src/tls/libtommath.c - android_external_wpa_supplicant_8

blob: 741b442ca912de71c50a6e9fa5fa30f70f9bd91b [file] [log] [blame]

Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	1	/*
				2	* Minimal code for RSA support from LibTomMath 0.41
				3	* http://libtom.org/
				4	* http://libtom.org/files/ltm-0.41.tar.bz2
				5	* This library was released in public domain by Tom St Denis.
				6	*
				7	* The combination in this file may not use all of the optimized algorithms
				8	* from LibTomMath and may be considerable slower than the LibTomMath with its
				9	* default settings. The main purpose of having this version here is to make it
				10	* easier to build bignum.c wrapper without having to install and build an
				11	* external library.
				12	*
				13	* If CONFIG_INTERNAL_LIBTOMMATH is defined, bignum.c includes this
				14	* libtommath.c file instead of using the external LibTomMath library.
				15	*/
				16
				17	#ifndef CHAR_BIT
				18	#define CHAR_BIT 8
				19	#endif
				20
				21	#define BN_MP_INVMOD_C
				22	#define BN_S_MP_EXPTMOD_C /* Note: #undef in tommath_superclass.h; this would
				23	* require BN_MP_EXPTMOD_FAST_C instead */
				24	#define BN_S_MP_MUL_DIGS_C
				25	#define BN_MP_INVMOD_SLOW_C
				26	#define BN_S_MP_SQR_C
				27	#define BN_S_MP_MUL_HIGH_DIGS_C /* Note: #undef in tommath_superclass.h; this
				28	* would require other than mp_reduce */
				29
				30	#ifdef LTM_FAST
				31
				32	/* Use faster div at the cost of about 1 kB */
				33	#define BN_MP_MUL_D_C
				34
				35	/* Include faster exptmod (Montgomery) at the cost of about 2.5 kB in code */
				36	#define BN_MP_EXPTMOD_FAST_C
				37	#define BN_MP_MONTGOMERY_SETUP_C
				38	#define BN_FAST_MP_MONTGOMERY_REDUCE_C
				39	#define BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
				40	#define BN_MP_MUL_2_C
				41
				42	/* Include faster sqr at the cost of about 0.5 kB in code */
				43	#define BN_FAST_S_MP_SQR_C
				44
				45	#else /* LTM_FAST */
				46
				47	#define BN_MP_DIV_SMALL
				48	#define BN_MP_INIT_MULTI_C
				49	#define BN_MP_CLEAR_MULTI_C
				50	#define BN_MP_ABS_C
				51	#endif /* LTM_FAST */
				52
				53	/* Current uses do not require support for negative exponent in exptmod, so we
				54	* can save about 1.5 kB in leaving out invmod. */
				55	#define LTM_NO_NEG_EXP
				56
				57	/* from tommath.h */
				58
				59	#ifndef MIN
				60	#define MIN(x,y) ((x)<(y)?(x):(y))
				61	#endif
				62
				63	#ifndef MAX
				64	#define MAX(x,y) ((x)>(y)?(x):(y))
				65	#endif
				66
				67	#define OPT_CAST(x)
				68
Dmitry Shmidt	0494959	2012-07-19 12:16:46 -0700	[diff] [blame^]	69	#ifdef __x86_64__
				70	typedef unsigned long mp_digit;
				71	typedef unsigned long mp_word __attribute__((mode(TI)));
				72
				73	#define DIGIT_BIT 60
				74	#define MP_64BIT
				75	#else
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	76	typedef unsigned long mp_digit;
				77	typedef u64 mp_word;
				78
				79	#define DIGIT_BIT 28
				80	#define MP_28BIT
Dmitry Shmidt	0494959	2012-07-19 12:16:46 -0700	[diff] [blame^]	81	#endif
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	82
				83
				84	#define XMALLOC os_malloc
				85	#define XFREE os_free
				86	#define XREALLOC os_realloc
				87
				88
				89	#define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
				90
				91	#define MP_LT -1 /* less than */
				92	#define MP_EQ 0 /* equal to */
				93	#define MP_GT 1 /* greater than */
				94
				95	#define MP_ZPOS 0 /* positive integer */
				96	#define MP_NEG 1 /* negative */
				97
				98	#define MP_OKAY 0 /* ok result */
				99	#define MP_MEM -2 /* out of mem */
				100	#define MP_VAL -3 /* invalid input */
				101
				102	#define MP_YES 1 /* yes response */
				103	#define MP_NO 0 /* no response */
				104
				105	typedef int mp_err;
				106
				107	/* define this to use lower memory usage routines (exptmods mostly) */
				108	#define MP_LOW_MEM
				109
				110	/* default precision */
				111	#ifndef MP_PREC
				112	#ifndef MP_LOW_MEM
				113	#define MP_PREC 32 /* default digits of precision */
				114	#else
				115	#define MP_PREC 8 /* default digits of precision */
				116	#endif
				117	#endif
				118
				119	/* size of comba arrays, should be at least 2 * 2*(BITS_PER_WORD - BITS_PER_DIGIT2) */
				120	#define MP_WARRAY (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
				121
				122	/* the infamous mp_int structure */
				123	typedef struct {
				124	int used, alloc, sign;
				125	mp_digit *dp;
				126	} mp_int;
				127
				128
				129	/* ---> Basic Manipulations <--- */
				130	#define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO)
				131	#define mp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? MP_YES : MP_NO)
				132	#define mp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? MP_YES : MP_NO)
				133
				134
				135	/* prototypes for copied functions */
				136	#define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
				137	static int s_mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode);
				138	static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
				139	static int s_mp_sqr(mp_int * a, mp_int * b);
				140	static int s_mp_mul_high_digs(mp_int * a, mp_int * b, mp_int * c, int digs);
				141
				142	static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
				143
				144	#ifdef BN_MP_INIT_MULTI_C
				145	static int mp_init_multi(mp_int *mp, ...);
				146	#endif
				147	#ifdef BN_MP_CLEAR_MULTI_C
				148	static void mp_clear_multi(mp_int *mp, ...);
				149	#endif
				150	static int mp_lshd(mp_int * a, int b);
				151	static void mp_set(mp_int * a, mp_digit b);
				152	static void mp_clamp(mp_int * a);
				153	static void mp_exch(mp_int * a, mp_int * b);
				154	static void mp_rshd(mp_int * a, int b);
				155	static void mp_zero(mp_int * a);
				156	static int mp_mod_2d(mp_int * a, int b, mp_int * c);
				157	static int mp_div_2d(mp_int * a, int b, mp_int * c, mp_int * d);
				158	static int mp_init_copy(mp_int * a, mp_int * b);
				159	static int mp_mul_2d(mp_int * a, int b, mp_int * c);
				160	#ifndef LTM_NO_NEG_EXP
				161	static int mp_div_2(mp_int * a, mp_int * b);
				162	static int mp_invmod(mp_int * a, mp_int * b, mp_int * c);
				163	static int mp_invmod_slow(mp_int * a, mp_int * b, mp_int * c);
				164	#endif /* LTM_NO_NEG_EXP */
				165	static int mp_copy(mp_int * a, mp_int * b);
				166	static int mp_count_bits(mp_int * a);
				167	static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d);
				168	static int mp_mod(mp_int * a, mp_int * b, mp_int * c);
				169	static int mp_grow(mp_int * a, int size);
				170	static int mp_cmp_mag(mp_int * a, mp_int * b);
				171	#ifdef BN_MP_ABS_C
				172	static int mp_abs(mp_int * a, mp_int * b);
				173	#endif
				174	static int mp_sqr(mp_int * a, mp_int * b);
				175	static int mp_reduce_2k_l(mp_int a, mp_int n, mp_int *d);
				176	static int mp_reduce_2k_setup_l(mp_int a, mp_int d);
				177	static int mp_2expt(mp_int * a, int b);
				178	static int mp_reduce_setup(mp_int * a, mp_int * b);
				179	static int mp_reduce(mp_int * x, mp_int * m, mp_int * mu);
				180	static int mp_init_size(mp_int * a, int size);
				181	#ifdef BN_MP_EXPTMOD_FAST_C
				182	static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode);
				183	#endif /* BN_MP_EXPTMOD_FAST_C */
				184	#ifdef BN_FAST_S_MP_SQR_C
				185	static int fast_s_mp_sqr (mp_int * a, mp_int * b);
				186	#endif /* BN_FAST_S_MP_SQR_C */
				187	#ifdef BN_MP_MUL_D_C
				188	static int mp_mul_d (mp_int * a, mp_digit b, mp_int * c);
				189	#endif /* BN_MP_MUL_D_C */
				190
				191
				192
				193	/* functions from bn_<func name>.c */
				194
				195
				196	/* reverse an array, used for radix code */
				197	static void bn_reverse (unsigned char *s, int len)
				198	{
				199	int ix, iy;
				200	unsigned char t;
				201
				202	ix = 0;
				203	iy = len - 1;
				204	while (ix < iy) {
				205	t = s[ix];
				206	s[ix] = s[iy];
				207	s[iy] = t;
				208	++ix;
				209	--iy;
				210	}
				211	}
				212
				213
				214	/* low level addition, based on HAC pp.594, Algorithm 14.7 */
				215	static int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
				216	{
				217	mp_int *x;
				218	int olduse, res, min, max;
				219
				220	/* find sizes, we let \|a\| <= \|b\| which means we have to sort
				221	* them. "x" will point to the input with the most digits
				222	*/
				223	if (a->used > b->used) {
				224	min = b->used;
				225	max = a->used;
				226	x = a;
				227	} else {
				228	min = a->used;
				229	max = b->used;
				230	x = b;
				231	}
				232
				233	/* init result */
				234	if (c->alloc < max + 1) {
				235	if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
				236	return res;
				237	}
				238	}
				239
				240	/* get old used digit count and set new one */
				241	olduse = c->used;
				242	c->used = max + 1;
				243
				244	{
				245	register mp_digit u, tmpa, tmpb, *tmpc;
				246	register int i;
				247
				248	/* alias for digit pointers */
				249
				250	/* first input */
				251	tmpa = a->dp;
				252
				253	/* second input */
				254	tmpb = b->dp;
				255
				256	/* destination */
				257	tmpc = c->dp;
				258
				259	/* zero the carry */
				260	u = 0;
				261	for (i = 0; i < min; i++) {
				262	/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
				263	tmpc = tmpa++ + *tmpb++ + u;
				264
				265	/* U = carry bit of T[i] */
				266	u = *tmpc >> ((mp_digit)DIGIT_BIT);
				267
				268	/* take away carry bit from T[i] */
				269	*tmpc++ &= MP_MASK;
				270	}
				271
				272	/* now copy higher words if any, that is in A+B
				273	* if A or B has more digits add those in
				274	*/
				275	if (min != max) {
				276	for (; i < max; i++) {
				277	/* T[i] = X[i] + U */
				278	*tmpc = x->dp[i] + u;
				279
				280	/* U = carry bit of T[i] */
				281	u = *tmpc >> ((mp_digit)DIGIT_BIT);
				282
				283	/* take away carry bit from T[i] */
				284	*tmpc++ &= MP_MASK;
				285	}
				286	}
				287
				288	/* add carry */
				289	*tmpc++ = u;
				290
				291	/* clear digits above oldused */
				292	for (i = c->used; i < olduse; i++) {
				293	*tmpc++ = 0;
				294	}
				295	}
				296
				297	mp_clamp (c);
				298	return MP_OKAY;
				299	}
				300
				301
				302	/* low level subtraction (assumes \|a\| > \|b\|), HAC pp.595 Algorithm 14.9 */
				303	static int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
				304	{
				305	int olduse, res, min, max;
				306
				307	/* find sizes */
				308	min = b->used;
				309	max = a->used;
				310
				311	/* init result */
				312	if (c->alloc < max) {
				313	if ((res = mp_grow (c, max)) != MP_OKAY) {
				314	return res;
				315	}
				316	}
				317	olduse = c->used;
				318	c->used = max;
				319
				320	{
				321	register mp_digit u, tmpa, tmpb, *tmpc;
				322	register int i;
				323
				324	/* alias for digit pointers */
				325	tmpa = a->dp;
				326	tmpb = b->dp;
				327	tmpc = c->dp;
				328
				329	/* set carry to zero */
				330	u = 0;
				331	for (i = 0; i < min; i++) {
				332	/* T[i] = A[i] - B[i] - U */
				333	tmpc = tmpa++ - *tmpb++ - u;
				334
				335	/* U = carry bit of T[i]
				336	* Note this saves performing an AND operation since
				337	* if a carry does occur it will propagate all the way to the
				338	* MSB. As a result a single shift is enough to get the carry
				339	*/
				340	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
				341
				342	/* Clear carry from T[i] */
				343	*tmpc++ &= MP_MASK;
				344	}
				345
				346	/* now copy higher words if any, e.g. if A has more digits than B */
				347	for (; i < max; i++) {
				348	/* T[i] = A[i] - U */
				349	tmpc = tmpa++ - u;
				350
				351	/* U = carry bit of T[i] */
				352	u = tmpc >> ((mp_digit)(CHAR_BIT sizeof (mp_digit) - 1));
				353
				354	/* Clear carry from T[i] */
				355	*tmpc++ &= MP_MASK;
				356	}
				357
				358	/* clear digits above used (since we may not have grown result above) */
				359	for (i = c->used; i < olduse; i++) {
				360	*tmpc++ = 0;
				361	}
				362	}
				363
				364	mp_clamp (c);
				365	return MP_OKAY;
				366	}
				367
				368
				369	/* init a new mp_int */
				370	static int mp_init (mp_int * a)
				371	{
				372	int i;
				373
				374	/* allocate memory required and clear it */
				375	a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
				376	if (a->dp == NULL) {
				377	return MP_MEM;
				378	}
				379
				380	/* set the digits to zero */
				381	for (i = 0; i < MP_PREC; i++) {
				382	a->dp[i] = 0;
				383	}
				384
				385	/* set the used to zero, allocated digits to the default precision
				386	* and sign to positive */
				387	a->used = 0;
				388	a->alloc = MP_PREC;
				389	a->sign = MP_ZPOS;
				390
				391	return MP_OKAY;
				392	}
				393
				394
				395	/* clear one (frees) */
				396	static void mp_clear (mp_int * a)
				397	{
				398	int i;
				399
				400	/* only do anything if a hasn't been freed previously */
				401	if (a->dp != NULL) {
				402	/* first zero the digits */
				403	for (i = 0; i < a->used; i++) {
				404	a->dp[i] = 0;
				405	}
				406
				407	/* free ram */
				408	XFREE(a->dp);
				409
				410	/* reset members to make debugging easier */
				411	a->dp = NULL;
				412	a->alloc = a->used = 0;
				413	a->sign = MP_ZPOS;
				414	}
				415	}
				416
				417
				418	/* high level addition (handles signs) */
				419	static int mp_add (mp_int * a, mp_int * b, mp_int * c)
				420	{
				421	int sa, sb, res;
				422
				423	/* get sign of both inputs */
				424	sa = a->sign;
				425	sb = b->sign;
				426
				427	/* handle two cases, not four */
				428	if (sa == sb) {
				429	/* both positive or both negative */
				430	/* add their magnitudes, copy the sign */
				431	c->sign = sa;
				432	res = s_mp_add (a, b, c);
				433	} else {
				434	/* one positive, the other negative */
				435	/* subtract the one with the greater magnitude from */
				436	/* the one of the lesser magnitude. The result gets */
				437	/* the sign of the one with the greater magnitude. */
				438	if (mp_cmp_mag (a, b) == MP_LT) {
				439	c->sign = sb;
				440	res = s_mp_sub (b, a, c);
				441	} else {
				442	c->sign = sa;
				443	res = s_mp_sub (a, b, c);
				444	}
				445	}
				446	return res;
				447	}
				448
				449
				450	/* high level subtraction (handles signs) */
				451	static int mp_sub (mp_int * a, mp_int * b, mp_int * c)
				452	{
				453	int sa, sb, res;
				454
				455	sa = a->sign;
				456	sb = b->sign;
				457
				458	if (sa != sb) {
				459	/* subtract a negative from a positive, OR */
				460	/* subtract a positive from a negative. */
				461	/* In either case, ADD their magnitudes, */
				462	/* and use the sign of the first number. */
				463	c->sign = sa;
				464	res = s_mp_add (a, b, c);
				465	} else {
				466	/* subtract a positive from a positive, OR */
				467	/* subtract a negative from a negative. */
				468	/* First, take the difference between their */
				469	/* magnitudes, then... */
				470	if (mp_cmp_mag (a, b) != MP_LT) {
				471	/* Copy the sign from the first */
				472	c->sign = sa;
				473	/* The first has a larger or equal magnitude */
				474	res = s_mp_sub (a, b, c);
				475	} else {
				476	/* The result has the opposite sign from */
				477	/* the first number. */
				478	c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
				479	/* The second has a larger magnitude */
				480	res = s_mp_sub (b, a, c);
				481	}
				482	}
				483	return res;
				484	}
				485
				486
				487	/* high level multiplication (handles sign) */
				488	static int mp_mul (mp_int * a, mp_int * b, mp_int * c)
				489	{
				490	int res, neg;
				491	neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
				492
				493	/* use Toom-Cook? */
				494	#ifdef BN_MP_TOOM_MUL_C
				495	if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
				496	res = mp_toom_mul(a, b, c);
				497	} else
				498	#endif
				499	#ifdef BN_MP_KARATSUBA_MUL_C
				500	/* use Karatsuba? */
				501	if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
				502	res = mp_karatsuba_mul (a, b, c);
				503	} else
				504	#endif
				505	{
				506	/* can we use the fast multiplier?
				507	*
				508	* The fast multiplier can be used if the output will
				509	* have less than MP_WARRAY digits and the number of
				510	* digits won't affect carry propagation
				511	*/
				512	#ifdef BN_FAST_S_MP_MUL_DIGS_C
				513	int digs = a->used + b->used + 1;
				514
				515	if ((digs < MP_WARRAY) &&
				516	MIN(a->used, b->used) <=
				517	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
				518	res = fast_s_mp_mul_digs (a, b, c, digs);
				519	} else
				520	#endif
				521	#ifdef BN_S_MP_MUL_DIGS_C
				522	res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
				523	#else
				524	#error mp_mul could fail
				525	res = MP_VAL;
				526	#endif
				527
				528	}
				529	c->sign = (c->used > 0) ? neg : MP_ZPOS;
				530	return res;
				531	}
				532
				533
				534	/* d = a * b (mod c) */
				535	static int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
				536	{
				537	int res;
				538	mp_int t;
				539
				540	if ((res = mp_init (&t)) != MP_OKAY) {
				541	return res;
				542	}
				543
				544	if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
				545	mp_clear (&t);
				546	return res;
				547	}
				548	res = mp_mod (&t, c, d);
				549	mp_clear (&t);
				550	return res;
				551	}
				552
				553
				554	/* c = a mod b, 0 <= c < b */
				555	static int mp_mod (mp_int * a, mp_int * b, mp_int * c)
				556	{
				557	mp_int t;
				558	int res;
				559
				560	if ((res = mp_init (&t)) != MP_OKAY) {
				561	return res;
				562	}
				563
				564	if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
				565	mp_clear (&t);
				566	return res;
				567	}
				568
				569	if (t.sign != b->sign) {
				570	res = mp_add (b, &t, c);
				571	} else {
				572	res = MP_OKAY;
				573	mp_exch (&t, c);
				574	}
				575
				576	mp_clear (&t);
				577	return res;
				578	}
				579
				580
				581	/* this is a shell function that calls either the normal or Montgomery
				582	* exptmod functions. Originally the call to the montgomery code was
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame]	583	* embedded in the normal function but that wasted a lot of stack space
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	584	* for nothing (since 99% of the time the Montgomery code would be called)
				585	*/
				586	static int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
				587	{
				588	int dr;
				589
				590	/* modulus P must be positive */
				591	if (P->sign == MP_NEG) {
				592	return MP_VAL;
				593	}
				594
				595	/* if exponent X is negative we have to recurse */
				596	if (X->sign == MP_NEG) {
				597	#ifdef LTM_NO_NEG_EXP
				598	return MP_VAL;
				599	#else /* LTM_NO_NEG_EXP */
				600	#ifdef BN_MP_INVMOD_C
				601	mp_int tmpG, tmpX;
				602	int err;
				603
				604	/* first compute 1/G mod P */
				605	if ((err = mp_init(&tmpG)) != MP_OKAY) {
				606	return err;
				607	}
				608	if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
				609	mp_clear(&tmpG);
				610	return err;
				611	}
				612
				613	/* now get \|X\| */
				614	if ((err = mp_init(&tmpX)) != MP_OKAY) {
				615	mp_clear(&tmpG);
				616	return err;
				617	}
				618	if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
				619	mp_clear_multi(&tmpG, &tmpX, NULL);
				620	return err;
				621	}
				622
				623	/* and now compute (1/G)\|X\| instead of GX [X < 0] */
				624	err = mp_exptmod(&tmpG, &tmpX, P, Y);
				625	mp_clear_multi(&tmpG, &tmpX, NULL);
				626	return err;
				627	#else
				628	#error mp_exptmod would always fail
				629	/* no invmod */
				630	return MP_VAL;
				631	#endif
				632	#endif /* LTM_NO_NEG_EXP */
				633	}
				634
				635	/* modified diminished radix reduction */
				636	#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && defined(BN_S_MP_EXPTMOD_C)
				637	if (mp_reduce_is_2k_l(P) == MP_YES) {
				638	return s_mp_exptmod(G, X, P, Y, 1);
				639	}
				640	#endif
				641
				642	#ifdef BN_MP_DR_IS_MODULUS_C
				643	/* is it a DR modulus? */
				644	dr = mp_dr_is_modulus(P);
				645	#else
				646	/* default to no */
				647	dr = 0;
				648	#endif
				649
				650	#ifdef BN_MP_REDUCE_IS_2K_C
				651	/* if not, is it a unrestricted DR modulus? */
				652	if (dr == 0) {
				653	dr = mp_reduce_is_2k(P) << 1;
				654	}
				655	#endif
				656
				657	/* if the modulus is odd or dr != 0 use the montgomery method */
				658	#ifdef BN_MP_EXPTMOD_FAST_C
				659	if (mp_isodd (P) == 1 \|\| dr != 0) {
				660	return mp_exptmod_fast (G, X, P, Y, dr);
				661	} else {
				662	#endif
				663	#ifdef BN_S_MP_EXPTMOD_C
				664	/* otherwise use the generic Barrett reduction technique */
				665	return s_mp_exptmod (G, X, P, Y, 0);
				666	#else
				667	#error mp_exptmod could fail
				668	/* no exptmod for evens */
				669	return MP_VAL;
				670	#endif
				671	#ifdef BN_MP_EXPTMOD_FAST_C
				672	}
				673	#endif
				674	}
				675
				676
				677	/* compare two ints (signed)*/
				678	static int mp_cmp (mp_int * a, mp_int * b)
				679	{
				680	/* compare based on sign */
				681	if (a->sign != b->sign) {
				682	if (a->sign == MP_NEG) {
				683	return MP_LT;
				684	} else {
				685	return MP_GT;
				686	}
				687	}
				688
				689	/* compare digits */
				690	if (a->sign == MP_NEG) {
				691	/* if negative compare opposite direction */
				692	return mp_cmp_mag(b, a);
				693	} else {
				694	return mp_cmp_mag(a, b);
				695	}
				696	}
				697
				698
				699	/* compare a digit */
				700	static int mp_cmp_d(mp_int * a, mp_digit b)
				701	{
				702	/* compare based on sign */
				703	if (a->sign == MP_NEG) {
				704	return MP_LT;
				705	}
				706
				707	/* compare based on magnitude */
				708	if (a->used > 1) {
				709	return MP_GT;
				710	}
				711
				712	/* compare the only digit of a to b */
				713	if (a->dp[0] > b) {
				714	return MP_GT;
				715	} else if (a->dp[0] < b) {
				716	return MP_LT;
				717	} else {
				718	return MP_EQ;
				719	}
				720	}
				721
				722
				723	#ifndef LTM_NO_NEG_EXP
				724	/* hac 14.61, pp608 */
				725	static int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
				726	{
				727	/* b cannot be negative */
				728	if (b->sign == MP_NEG \|\| mp_iszero(b) == 1) {
				729	return MP_VAL;
				730	}
				731
				732	#ifdef BN_FAST_MP_INVMOD_C
				733	/* if the modulus is odd we can use a faster routine instead */
				734	if (mp_isodd (b) == 1) {
				735	return fast_mp_invmod (a, b, c);
				736	}
				737	#endif
				738
				739	#ifdef BN_MP_INVMOD_SLOW_C
				740	return mp_invmod_slow(a, b, c);
				741	#endif
				742
				743	#ifndef BN_FAST_MP_INVMOD_C
				744	#ifndef BN_MP_INVMOD_SLOW_C
				745	#error mp_invmod would always fail
				746	#endif
				747	#endif
				748	return MP_VAL;
				749	}
				750	#endif /* LTM_NO_NEG_EXP */
				751
				752
				753	/* get the size for an unsigned equivalent */
				754	static int mp_unsigned_bin_size (mp_int * a)
				755	{
				756	int size = mp_count_bits (a);
				757	return (size / 8 + ((size & 7) != 0 ? 1 : 0));
				758	}
				759
				760
				761	#ifndef LTM_NO_NEG_EXP
				762	/* hac 14.61, pp608 */
				763	static int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
				764	{
				765	mp_int x, y, u, v, A, B, C, D;
				766	int res;
				767
				768	/* b cannot be negative */
				769	if (b->sign == MP_NEG \|\| mp_iszero(b) == 1) {
				770	return MP_VAL;
				771	}
				772
				773	/* init temps */
				774	if ((res = mp_init_multi(&x, &y, &u, &v,
				775	&A, &B, &C, &D, NULL)) != MP_OKAY) {
				776	return res;
				777	}
				778
				779	/* x = a, y = b */
				780	if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
				781	goto LBL_ERR;
				782	}
				783	if ((res = mp_copy (b, &y)) != MP_OKAY) {
				784	goto LBL_ERR;
				785	}
				786
				787	/* 2. [modified] if x,y are both even then return an error! */
				788	if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
				789	res = MP_VAL;
				790	goto LBL_ERR;
				791	}
				792
				793	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
				794	if ((res = mp_copy (&x, &u)) != MP_OKAY) {
				795	goto LBL_ERR;
				796	}
				797	if ((res = mp_copy (&y, &v)) != MP_OKAY) {
				798	goto LBL_ERR;
				799	}
				800	mp_set (&A, 1);
				801	mp_set (&D, 1);
				802
				803	top:
				804	/* 4. while u is even do */
				805	while (mp_iseven (&u) == 1) {
				806	/* 4.1 u = u/2 */
				807	if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
				808	goto LBL_ERR;
				809	}
				810	/* 4.2 if A or B is odd then */
				811	if (mp_isodd (&A) == 1 \|\| mp_isodd (&B) == 1) {
				812	/* A = (A+y)/2, B = (B-x)/2 */
				813	if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
				814	goto LBL_ERR;
				815	}
				816	if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
				817	goto LBL_ERR;
				818	}
				819	}
				820	/* A = A/2, B = B/2 */
				821	if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
				822	goto LBL_ERR;
				823	}
				824	if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
				825	goto LBL_ERR;
				826	}
				827	}
				828
				829	/* 5. while v is even do */
				830	while (mp_iseven (&v) == 1) {
				831	/* 5.1 v = v/2 */
				832	if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
				833	goto LBL_ERR;
				834	}
				835	/* 5.2 if C or D is odd then */
				836	if (mp_isodd (&C) == 1 \|\| mp_isodd (&D) == 1) {
				837	/* C = (C+y)/2, D = (D-x)/2 */
				838	if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
				839	goto LBL_ERR;
				840	}
				841	if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
				842	goto LBL_ERR;
				843	}
				844	}
				845	/* C = C/2, D = D/2 */
				846	if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
				847	goto LBL_ERR;
				848	}
				849	if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
				850	goto LBL_ERR;
				851	}
				852	}
				853
				854	/* 6. if u >= v then */
				855	if (mp_cmp (&u, &v) != MP_LT) {
				856	/* u = u - v, A = A - C, B = B - D */
				857	if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
				858	goto LBL_ERR;
				859	}
				860
				861	if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
				862	goto LBL_ERR;
				863	}
				864
				865	if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
				866	goto LBL_ERR;
				867	}
				868	} else {
				869	/* v - v - u, C = C - A, D = D - B */
				870	if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
				871	goto LBL_ERR;
				872	}
				873
				874	if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
				875	goto LBL_ERR;
				876	}
				877
				878	if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
				879	goto LBL_ERR;
				880	}
				881	}
				882
				883	/* if not zero goto step 4 */
				884	if (mp_iszero (&u) == 0)
				885	goto top;
				886
				887	/* now a = C, b = D, gcd == gv /
				888
				889	/* if v != 1 then there is no inverse */
				890	if (mp_cmp_d (&v, 1) != MP_EQ) {
				891	res = MP_VAL;
				892	goto LBL_ERR;
				893	}
				894
				895	/* if its too low */
				896	while (mp_cmp_d(&C, 0) == MP_LT) {
				897	if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
				898	goto LBL_ERR;
				899	}
				900	}
				901
				902	/* too big */
				903	while (mp_cmp_mag(&C, b) != MP_LT) {
				904	if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
				905	goto LBL_ERR;
				906	}
				907	}
				908
				909	/* C is now the inverse */
				910	mp_exch (&C, c);
				911	res = MP_OKAY;
				912	LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
				913	return res;
				914	}
				915	#endif /* LTM_NO_NEG_EXP */
				916
				917
				918	/* compare maginitude of two ints (unsigned) */
				919	static int mp_cmp_mag (mp_int * a, mp_int * b)
				920	{
				921	int n;
				922	mp_digit tmpa, tmpb;
				923
				924	/* compare based on # of non-zero digits */
				925	if (a->used > b->used) {
				926	return MP_GT;
				927	}
				928
				929	if (a->used < b->used) {
				930	return MP_LT;
				931	}
				932
				933	/* alias for a */
				934	tmpa = a->dp + (a->used - 1);
				935
				936	/* alias for b */
				937	tmpb = b->dp + (a->used - 1);
				938
				939	/* compare based on digits */
				940	for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
				941	if (tmpa > tmpb) {
				942	return MP_GT;
				943	}
				944
				945	if (tmpa < tmpb) {
				946	return MP_LT;
				947	}
				948	}
				949	return MP_EQ;
				950	}
				951
				952
				953	/* reads a unsigned char array, assumes the msb is stored first [big endian] */
				954	static int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
				955	{
				956	int res;
				957
				958	/* make sure there are at least two digits */
				959	if (a->alloc < 2) {
				960	if ((res = mp_grow(a, 2)) != MP_OKAY) {
				961	return res;
				962	}
				963	}
				964
				965	/* zero the int */
				966	mp_zero (a);
				967
				968	/* read the bytes in */
				969	while (c-- > 0) {
				970	if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
				971	return res;
				972	}
				973
				974	#ifndef MP_8BIT
				975	a->dp[0] \|= *b++;
				976	a->used += 1;
				977	#else
				978	a->dp[0] = (*b & MP_MASK);
				979	a->dp[1] \|= ((*b++ >> 7U) & 1);
				980	a->used += 2;
				981	#endif
				982	}
				983	mp_clamp (a);
				984	return MP_OKAY;
				985	}
				986
				987
				988	/* store in unsigned [big endian] format */
				989	static int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
				990	{
				991	int x, res;
				992	mp_int t;
				993
				994	if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
				995	return res;
				996	}
				997
				998	x = 0;
				999	while (mp_iszero (&t) == 0) {
				1000	#ifndef MP_8BIT
				1001	b[x++] = (unsigned char) (t.dp[0] & 255);
				1002	#else
				1003	b[x++] = (unsigned char) (t.dp[0] \| ((t.dp[1] & 0x01) << 7));
				1004	#endif
				1005	if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
				1006	mp_clear (&t);
				1007	return res;
				1008	}
				1009	}
				1010	bn_reverse (b, x);
				1011	mp_clear (&t);
				1012	return MP_OKAY;
				1013	}
				1014
				1015
				1016	/* shift right by a certain bit count (store quotient in c, optional remainder in d) */
				1017	static int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
				1018	{
				1019	mp_digit D, r, rr;
				1020	int x, res;
				1021	mp_int t;
				1022
				1023
				1024	/* if the shift count is <= 0 then we do no work */
				1025	if (b <= 0) {
				1026	res = mp_copy (a, c);
				1027	if (d != NULL) {
				1028	mp_zero (d);
				1029	}
				1030	return res;
				1031	}
				1032
				1033	if ((res = mp_init (&t)) != MP_OKAY) {
				1034	return res;
				1035	}
				1036
				1037	/* get the remainder */
				1038	if (d != NULL) {
				1039	if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
				1040	mp_clear (&t);
				1041	return res;
				1042	}
				1043	}
				1044
				1045	/* copy */
				1046	if ((res = mp_copy (a, c)) != MP_OKAY) {
				1047	mp_clear (&t);
				1048	return res;
				1049	}
				1050
				1051	/* shift by as many digits in the bit count */
				1052	if (b >= (int)DIGIT_BIT) {
				1053	mp_rshd (c, b / DIGIT_BIT);
				1054	}
				1055
				1056	/* shift any bit count < DIGIT_BIT */
				1057	D = (mp_digit) (b % DIGIT_BIT);
				1058	if (D != 0) {
				1059	register mp_digit *tmpc, mask, shift;
				1060
				1061	/* mask */
				1062	mask = (((mp_digit)1) << D) - 1;
				1063
				1064	/* shift for lsb */
				1065	shift = DIGIT_BIT - D;
				1066
				1067	/* alias */
				1068	tmpc = c->dp + (c->used - 1);
				1069
				1070	/* carry */
				1071	r = 0;
				1072	for (x = c->used - 1; x >= 0; x--) {
				1073	/* get the lower bits of this word in a temp */
				1074	rr = *tmpc & mask;
				1075
				1076	/* shift the current word and mix in the carry bits from the previous word */
				1077	tmpc = (tmpc >> D) \| (r << shift);
				1078	--tmpc;
				1079
				1080	/* set the carry to the carry bits of the current word found above */
				1081	r = rr;
				1082	}
				1083	}
				1084	mp_clamp (c);
				1085	if (d != NULL) {
				1086	mp_exch (&t, d);
				1087	}
				1088	mp_clear (&t);
				1089	return MP_OKAY;
				1090	}
				1091
				1092
				1093	static int mp_init_copy (mp_int * a, mp_int * b)
				1094	{
				1095	int res;
				1096
				1097	if ((res = mp_init (a)) != MP_OKAY) {
				1098	return res;
				1099	}
				1100	return mp_copy (b, a);
				1101	}
				1102
				1103
				1104	/* set to zero */
				1105	static void mp_zero (mp_int * a)
				1106	{
				1107	int n;
				1108	mp_digit *tmp;
				1109
				1110	a->sign = MP_ZPOS;
				1111	a->used = 0;
				1112
				1113	tmp = a->dp;
				1114	for (n = 0; n < a->alloc; n++) {
				1115	*tmp++ = 0;
				1116	}
				1117	}
				1118
				1119
				1120	/* copy, b = a */
				1121	static int mp_copy (mp_int * a, mp_int * b)
				1122	{
				1123	int res, n;
				1124
				1125	/* if dst == src do nothing */
				1126	if (a == b) {
				1127	return MP_OKAY;
				1128	}
				1129
				1130	/* grow dest */
				1131	if (b->alloc < a->used) {
				1132	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
				1133	return res;
				1134	}
				1135	}
				1136
				1137	/* zero b and copy the parameters over */
				1138	{
				1139	register mp_digit tmpa, tmpb;
				1140
				1141	/* pointer aliases */
				1142
				1143	/* source */
				1144	tmpa = a->dp;
				1145
				1146	/* destination */
				1147	tmpb = b->dp;
				1148
				1149	/* copy all the digits */
				1150	for (n = 0; n < a->used; n++) {
				1151	tmpb++ = tmpa++;
				1152	}
				1153
				1154	/* clear high digits */
				1155	for (; n < b->used; n++) {
				1156	*tmpb++ = 0;
				1157	}
				1158	}
				1159
				1160	/* copy used count and sign */
				1161	b->used = a->used;
				1162	b->sign = a->sign;
				1163	return MP_OKAY;
				1164	}
				1165
				1166
				1167	/* shift right a certain amount of digits */
				1168	static void mp_rshd (mp_int * a, int b)
				1169	{
				1170	int x;
				1171
				1172	/* if b <= 0 then ignore it */
				1173	if (b <= 0) {
				1174	return;
				1175	}
				1176
				1177	/* if b > used then simply zero it and return */
				1178	if (a->used <= b) {
				1179	mp_zero (a);
				1180	return;
				1181	}
				1182
				1183	{
				1184	register mp_digit bottom, top;
				1185
				1186	/* shift the digits down */
				1187
				1188	/* bottom */
				1189	bottom = a->dp;
				1190
				1191	/* top [offset into digits] */
				1192	top = a->dp + b;
				1193
				1194	/* this is implemented as a sliding window where
				1195	* the window is b-digits long and digits from
				1196	* the top of the window are copied to the bottom
				1197	*
				1198	* e.g.
				1199
				1200	b-2 \| b-1 \| b0 \| b1 \| b2 \| ... \| bb \| ---->
				1201	/\ \| ---->
				1202	\-------------------/ ---->
				1203	*/
				1204	for (x = 0; x < (a->used - b); x++) {
				1205	bottom++ = top++;
				1206	}
				1207
				1208	/* zero the top digits */
				1209	for (; x < a->used; x++) {
				1210	*bottom++ = 0;
				1211	}
				1212	}
				1213
				1214	/* remove excess digits */
				1215	a->used -= b;
				1216	}
				1217
				1218
				1219	/* swap the elements of two integers, for cases where you can't simply swap the
				1220	* mp_int pointers around
				1221	*/
				1222	static void mp_exch (mp_int * a, mp_int * b)
				1223	{
				1224	mp_int t;
				1225
				1226	t = *a;
				1227	a = b;
				1228	*b = t;
				1229	}
				1230
				1231
				1232	/* trim unused digits
				1233	*
				1234	* This is used to ensure that leading zero digits are
				1235	* trimed and the leading "used" digit will be non-zero
				1236	* Typically very fast. Also fixes the sign if there
				1237	* are no more leading digits
				1238	*/
				1239	static void mp_clamp (mp_int * a)
				1240	{
				1241	/* decrease used while the most significant digit is
				1242	* zero.
				1243	*/
				1244	while (a->used > 0 && a->dp[a->used - 1] == 0) {
				1245	--(a->used);
				1246	}
				1247
				1248	/* reset the sign flag if used == 0 */
				1249	if (a->used == 0) {
				1250	a->sign = MP_ZPOS;
				1251	}
				1252	}
				1253
				1254
				1255	/* grow as required */
				1256	static int mp_grow (mp_int * a, int size)
				1257	{
				1258	int i;
				1259	mp_digit *tmp;
				1260
				1261	/* if the alloc size is smaller alloc more ram */
				1262	if (a->alloc < size) {
				1263	/* ensure there are always at least MP_PREC digits extra on top */
				1264	size += (MP_PREC * 2) - (size % MP_PREC);
				1265
				1266	/* reallocate the array a->dp
				1267	*
				1268	* We store the return in a temporary variable
				1269	* in case the operation failed we don't want
				1270	* to overwrite the dp member of a.
				1271	*/
				1272	tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size);
				1273	if (tmp == NULL) {
				1274	/* reallocation failed but "a" is still valid [can be freed] */
				1275	return MP_MEM;
				1276	}
				1277
				1278	/* reallocation succeeded so set a->dp */
				1279	a->dp = tmp;
				1280
				1281	/* zero excess digits */
				1282	i = a->alloc;
				1283	a->alloc = size;
				1284	for (; i < a->alloc; i++) {
				1285	a->dp[i] = 0;
				1286	}
				1287	}
				1288	return MP_OKAY;
				1289	}
				1290
				1291
				1292	#ifdef BN_MP_ABS_C
				1293	/* b = \|a\|
				1294	*
				1295	* Simple function copies the input and fixes the sign to positive
				1296	*/
				1297	static int mp_abs (mp_int * a, mp_int * b)
				1298	{
				1299	int res;
				1300
				1301	/* copy a to b */
				1302	if (a != b) {
				1303	if ((res = mp_copy (a, b)) != MP_OKAY) {
				1304	return res;
				1305	}
				1306	}
				1307
				1308	/* force the sign of b to positive */
				1309	b->sign = MP_ZPOS;
				1310
				1311	return MP_OKAY;
				1312	}
				1313	#endif
				1314
				1315
				1316	/* set to a digit */
				1317	static void mp_set (mp_int * a, mp_digit b)
				1318	{
				1319	mp_zero (a);
				1320	a->dp[0] = b & MP_MASK;
				1321	a->used = (a->dp[0] != 0) ? 1 : 0;
				1322	}
				1323
				1324
				1325	#ifndef LTM_NO_NEG_EXP
				1326	/* b = a/2 */
				1327	static int mp_div_2(mp_int * a, mp_int * b)
				1328	{
				1329	int x, res, oldused;
				1330
				1331	/* copy */
				1332	if (b->alloc < a->used) {
				1333	if ((res = mp_grow (b, a->used)) != MP_OKAY) {
				1334	return res;
				1335	}
				1336	}
				1337
				1338	oldused = b->used;
				1339	b->used = a->used;
				1340	{
				1341	register mp_digit r, rr, tmpa, tmpb;
				1342
				1343	/* source alias */
				1344	tmpa = a->dp + b->used - 1;
				1345
				1346	/* dest alias */
				1347	tmpb = b->dp + b->used - 1;
				1348
				1349	/* carry */
				1350	r = 0;
				1351	for (x = b->used - 1; x >= 0; x--) {
				1352	/* get the carry for the next iteration */
				1353	rr = *tmpa & 1;
				1354
				1355	/* shift the current digit, add in carry and store */
				1356	tmpb-- = (tmpa-- >> 1) \| (r << (DIGIT_BIT - 1));
				1357
				1358	/* forward carry to next iteration */
				1359	r = rr;
				1360	}
				1361
				1362	/* zero excess digits */
				1363	tmpb = b->dp + b->used;
				1364	for (x = b->used; x < oldused; x++) {
				1365	*tmpb++ = 0;
				1366	}
				1367	}
				1368	b->sign = a->sign;
				1369	mp_clamp (b);
				1370	return MP_OKAY;
				1371	}
				1372	#endif /* LTM_NO_NEG_EXP */
				1373
				1374
				1375	/* shift left by a certain bit count */
				1376	static int mp_mul_2d (mp_int * a, int b, mp_int * c)
				1377	{
				1378	mp_digit d;
				1379	int res;
				1380
				1381	/* copy */
				1382	if (a != c) {
				1383	if ((res = mp_copy (a, c)) != MP_OKAY) {
				1384	return res;
				1385	}
				1386	}
				1387
				1388	if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
				1389	if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
				1390	return res;
				1391	}
				1392	}
				1393
				1394	/* shift by as many digits in the bit count */
				1395	if (b >= (int)DIGIT_BIT) {
				1396	if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
				1397	return res;
				1398	}
				1399	}
				1400
				1401	/* shift any bit count < DIGIT_BIT */
				1402	d = (mp_digit) (b % DIGIT_BIT);
				1403	if (d != 0) {
				1404	register mp_digit *tmpc, shift, mask, r, rr;
				1405	register int x;
				1406
				1407	/* bitmask for carries */
				1408	mask = (((mp_digit)1) << d) - 1;
				1409
				1410	/* shift for msbs */
				1411	shift = DIGIT_BIT - d;
				1412
				1413	/* alias */
				1414	tmpc = c->dp;
				1415
				1416	/* carry */
				1417	r = 0;
				1418	for (x = 0; x < c->used; x++) {
				1419	/* get the higher bits of the current word */
				1420	rr = (*tmpc >> shift) & mask;
				1421
				1422	/* shift the current word and OR in the carry */
				1423	tmpc = ((tmpc << d) \| r) & MP_MASK;
				1424	++tmpc;
				1425
				1426	/* set the carry to the carry bits of the current word */
				1427	r = rr;
				1428	}
				1429
				1430	/* set final carry */
				1431	if (r != 0) {
				1432	c->dp[(c->used)++] = r;
				1433	}
				1434	}
				1435	mp_clamp (c);
				1436	return MP_OKAY;
				1437	}
				1438
				1439
				1440	#ifdef BN_MP_INIT_MULTI_C
				1441	static int mp_init_multi(mp_int *mp, ...)
				1442	{
				1443	mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
				1444	int n = 0; /* Number of ok inits */
				1445	mp_int* cur_arg = mp;
				1446	va_list args;
				1447
				1448	va_start(args, mp); /* init args to next argument from caller */
				1449	while (cur_arg != NULL) {
				1450	if (mp_init(cur_arg) != MP_OKAY) {
				1451	/* Oops - error! Back-track and mp_clear what we already
				1452	succeeded in init-ing, then return error.
				1453	*/
				1454	va_list clean_args;
				1455
				1456	/* end the current list */
				1457	va_end(args);
				1458
				1459	/* now start cleaning up */
				1460	cur_arg = mp;
				1461	va_start(clean_args, mp);
				1462	while (n--) {
				1463	mp_clear(cur_arg);
				1464	cur_arg = va_arg(clean_args, mp_int*);
				1465	}
				1466	va_end(clean_args);
				1467	res = MP_MEM;
				1468	break;
				1469	}
				1470	n++;
				1471	cur_arg = va_arg(args, mp_int*);
				1472	}
				1473	va_end(args);
				1474	return res; /* Assumed ok, if error flagged above. */
				1475	}
				1476	#endif
				1477
				1478
				1479	#ifdef BN_MP_CLEAR_MULTI_C
				1480	static void mp_clear_multi(mp_int *mp, ...)
				1481	{
				1482	mp_int* next_mp = mp;
				1483	va_list args;
				1484	va_start(args, mp);
				1485	while (next_mp != NULL) {
				1486	mp_clear(next_mp);
				1487	next_mp = va_arg(args, mp_int*);
				1488	}
				1489	va_end(args);
				1490	}
				1491	#endif
				1492
				1493
				1494	/* shift left a certain amount of digits */
				1495	static int mp_lshd (mp_int * a, int b)
				1496	{
				1497	int x, res;
				1498
				1499	/* if its less than zero return */
				1500	if (b <= 0) {
				1501	return MP_OKAY;
				1502	}
				1503
				1504	/* grow to fit the new digits */
				1505	if (a->alloc < a->used + b) {
				1506	if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
				1507	return res;
				1508	}
				1509	}
				1510
				1511	{
				1512	register mp_digit top, bottom;
				1513
				1514	/* increment the used by the shift amount then copy upwards */
				1515	a->used += b;
				1516
				1517	/* top */
				1518	top = a->dp + a->used - 1;
				1519
				1520	/* base */
				1521	bottom = a->dp + a->used - 1 - b;
				1522
				1523	/* much like mp_rshd this is implemented using a sliding window
				1524	* except the window goes the otherway around. Copying from
				1525	* the bottom to the top. see bn_mp_rshd.c for more info.
				1526	*/
				1527	for (x = a->used - 1; x >= b; x--) {
				1528	top-- = bottom--;
				1529	}
				1530
				1531	/* zero the lower digits */
				1532	top = a->dp;
				1533	for (x = 0; x < b; x++) {
				1534	*top++ = 0;
				1535	}
				1536	}
				1537	return MP_OKAY;
				1538	}
				1539
				1540
				1541	/* returns the number of bits in an int */
				1542	static int mp_count_bits (mp_int * a)
				1543	{
				1544	int r;
				1545	mp_digit q;
				1546
				1547	/* shortcut */
				1548	if (a->used == 0) {
				1549	return 0;
				1550	}
				1551
				1552	/* get number of digits and add that */
				1553	r = (a->used - 1) * DIGIT_BIT;
				1554
				1555	/* take the last digit and count the bits in it */
				1556	q = a->dp[a->used - 1];
				1557	while (q > ((mp_digit) 0)) {
				1558	++r;
				1559	q >>= ((mp_digit) 1);
				1560	}
				1561	return r;
				1562	}
				1563
				1564
				1565	/* calc a value mod 2*b /
				1566	static int mp_mod_2d (mp_int * a, int b, mp_int * c)
				1567	{
				1568	int x, res;
				1569
				1570	/* if b is <= 0 then zero the int */
				1571	if (b <= 0) {
				1572	mp_zero (c);
				1573	return MP_OKAY;
				1574	}
				1575
				1576	/* if the modulus is larger than the value than return */
				1577	if (b >= (int) (a->used * DIGIT_BIT)) {
				1578	res = mp_copy (a, c);
				1579	return res;
				1580	}
				1581
				1582	/* copy */
				1583	if ((res = mp_copy (a, c)) != MP_OKAY) {
				1584	return res;
				1585	}
				1586
				1587	/* zero digits above the last digit of the modulus */
				1588	for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
				1589	c->dp[x] = 0;
				1590	}
				1591	/* clear the digit that is not completely outside/inside the modulus */
				1592	c->dp[b / DIGIT_BIT] &=
				1593	(mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
				1594	mp_clamp (c);
				1595	return MP_OKAY;
				1596	}
				1597
				1598
				1599	#ifdef BN_MP_DIV_SMALL
				1600
				1601	/* slower bit-bang division... also smaller */
				1602	static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
				1603	{
				1604	mp_int ta, tb, tq, q;
				1605	int res, n, n2;
				1606
				1607	/* is divisor zero ? */
				1608	if (mp_iszero (b) == 1) {
				1609	return MP_VAL;
				1610	}
				1611
				1612	/* if a < b then q=0, r = a */
				1613	if (mp_cmp_mag (a, b) == MP_LT) {
				1614	if (d != NULL) {
				1615	res = mp_copy (a, d);
				1616	} else {
				1617	res = MP_OKAY;
				1618	}
				1619	if (c != NULL) {
				1620	mp_zero (c);
				1621	}
				1622	return res;
				1623	}
				1624
				1625	/* init our temps */
				1626	if ((res = mp_init_multi(&ta, &tb, &tq, &q, NULL) != MP_OKAY)) {
				1627	return res;
				1628	}
				1629
				1630
				1631	mp_set(&tq, 1);
				1632	n = mp_count_bits(a) - mp_count_bits(b);
				1633	if (((res = mp_abs(a, &ta)) != MP_OKAY) \|\|
				1634	((res = mp_abs(b, &tb)) != MP_OKAY) \|\|
				1635	((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) \|\|
				1636	((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
				1637	goto LBL_ERR;
				1638	}
				1639
				1640	while (n-- >= 0) {
				1641	if (mp_cmp(&tb, &ta) != MP_GT) {
				1642	if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) \|\|
				1643	((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
				1644	goto LBL_ERR;
				1645	}
				1646	}
				1647	if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) \|\|
				1648	((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
				1649	goto LBL_ERR;
				1650	}
				1651	}
				1652
				1653	/* now q == quotient and ta == remainder */
				1654	n = a->sign;
				1655	n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
				1656	if (c != NULL) {
				1657	mp_exch(c, &q);
				1658	c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
				1659	}
				1660	if (d != NULL) {
				1661	mp_exch(d, &ta);
				1662	d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
				1663	}
				1664	LBL_ERR:
				1665	mp_clear_multi(&ta, &tb, &tq, &q, NULL);
				1666	return res;
				1667	}
				1668
				1669	#else
				1670
				1671	/* integer signed division.
				1672	* c*b + d == a [e.g. a/b, c=quotient, d=remainder]
				1673	* HAC pp.598 Algorithm 14.20
				1674	*
				1675	* Note that the description in HAC is horribly
				1676	* incomplete. For example, it doesn't consider
				1677	* the case where digits are removed from 'x' in
				1678	* the inner loop. It also doesn't consider the
				1679	* case that y has fewer than three digits, etc..
				1680	*
				1681	* The overall algorithm is as described as
				1682	* 14.20 from HAC but fixed to treat these cases.
				1683	*/
				1684	static int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
				1685	{
				1686	mp_int q, x, y, t1, t2;
				1687	int res, n, t, i, norm, neg;
				1688
				1689	/* is divisor zero ? */
				1690	if (mp_iszero (b) == 1) {
				1691	return MP_VAL;
				1692	}
				1693
				1694	/* if a < b then q=0, r = a */
				1695	if (mp_cmp_mag (a, b) == MP_LT) {
				1696	if (d != NULL) {
				1697	res = mp_copy (a, d);
				1698	} else {
				1699	res = MP_OKAY;
				1700	}
				1701	if (c != NULL) {
				1702	mp_zero (c);
				1703	}
				1704	return res;
				1705	}
				1706
				1707	if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
				1708	return res;
				1709	}
				1710	q.used = a->used + 2;
				1711
				1712	if ((res = mp_init (&t1)) != MP_OKAY) {
				1713	goto LBL_Q;
				1714	}
				1715
				1716	if ((res = mp_init (&t2)) != MP_OKAY) {
				1717	goto LBL_T1;
				1718	}
				1719
				1720	if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
				1721	goto LBL_T2;
				1722	}
				1723
				1724	if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
				1725	goto LBL_X;
				1726	}
				1727
				1728	/* fix the sign */
				1729	neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
				1730	x.sign = y.sign = MP_ZPOS;
				1731
				1732	/* normalize both x and y, ensure that y >= b/2, [b == 2*DIGIT_BIT] /
				1733	norm = mp_count_bits(&y) % DIGIT_BIT;
				1734	if (norm < (int)(DIGIT_BIT-1)) {
				1735	norm = (DIGIT_BIT-1) - norm;
				1736	if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
				1737	goto LBL_Y;
				1738	}
				1739	if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
				1740	goto LBL_Y;
				1741	}
				1742	} else {
				1743	norm = 0;
				1744	}
				1745
				1746	/* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
				1747	n = x.used - 1;
				1748	t = y.used - 1;
				1749
				1750	/* while (x >= ybn-t) do { q[n-t] += 1; x -= yb*{n-t} } /
				1751	if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = yb{n-t} /
				1752	goto LBL_Y;
				1753	}
				1754
				1755	while (mp_cmp (&x, &y) != MP_LT) {
				1756	++(q.dp[n - t]);
				1757	if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
				1758	goto LBL_Y;
				1759	}
				1760	}
				1761
				1762	/* reset y by shifting it back down */
				1763	mp_rshd (&y, n - t);
				1764
				1765	/* step 3. for i from n down to (t + 1) */
				1766	for (i = n; i >= (t + 1); i--) {
				1767	if (i > x.used) {
				1768	continue;
				1769	}
				1770
				1771	/* step 3.1 if xi == yt then set q{i-t-1} to b-1,
				1772	* otherwise set q{i-t-1} to (xib + x{i-1})/yt /
				1773	if (x.dp[i] == y.dp[t]) {
				1774	q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
				1775	} else {
				1776	mp_word tmp;
				1777	tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
				1778	tmp \|= ((mp_word) x.dp[i - 1]);
				1779	tmp /= ((mp_word) y.dp[t]);
				1780	if (tmp > (mp_word) MP_MASK)
				1781	tmp = MP_MASK;
				1782	q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
				1783	}
				1784
				1785	/* while (q{i-t-1} * (yt * b + y{t-1})) >
				1786	xi * b*2 + xi-1 b + xi-2
				1787
				1788	do q{i-t-1} -= 1;
				1789	*/
				1790	q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
				1791	do {
				1792	q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
				1793
				1794	/* find left hand */
				1795	mp_zero (&t1);
				1796	t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
				1797	t1.dp[1] = y.dp[t];
				1798	t1.used = 2;
				1799	if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
				1800	goto LBL_Y;
				1801	}
				1802
				1803	/* find right hand */
				1804	t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
				1805	t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
				1806	t2.dp[2] = x.dp[i];
				1807	t2.used = 3;
				1808	} while (mp_cmp_mag(&t1, &t2) == MP_GT);
				1809
				1810	/* step 3.3 x = x - q{i-t-1} * y * b*{i-t-1} /
				1811	if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
				1812	goto LBL_Y;
				1813	}
				1814
				1815	if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
				1816	goto LBL_Y;
				1817	}
				1818
				1819	if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
				1820	goto LBL_Y;
				1821	}
				1822
				1823	/* if x < 0 then { x = x + yb{i-t-1}; q{i-t-1} -= 1; } /
				1824	if (x.sign == MP_NEG) {
				1825	if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
				1826	goto LBL_Y;
				1827	}
				1828	if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
				1829	goto LBL_Y;
				1830	}
				1831	if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
				1832	goto LBL_Y;
				1833	}
				1834
				1835	q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
				1836	}
				1837	}
				1838
				1839	/* now q is the quotient and x is the remainder
				1840	* [which we have to normalize]
				1841	*/
				1842
				1843	/* get sign before writing to c */
				1844	x.sign = x.used == 0 ? MP_ZPOS : a->sign;
				1845
				1846	if (c != NULL) {
				1847	mp_clamp (&q);
				1848	mp_exch (&q, c);
				1849	c->sign = neg;
				1850	}
				1851
				1852	if (d != NULL) {
				1853	mp_div_2d (&x, norm, &x, NULL);
				1854	mp_exch (&x, d);
				1855	}
				1856
				1857	res = MP_OKAY;
				1858
				1859	LBL_Y:mp_clear (&y);
				1860	LBL_X:mp_clear (&x);
				1861	LBL_T2:mp_clear (&t2);
				1862	LBL_T1:mp_clear (&t1);
				1863	LBL_Q:mp_clear (&q);
				1864	return res;
				1865	}
				1866
				1867	#endif
				1868
				1869
				1870	#ifdef MP_LOW_MEM
				1871	#define TAB_SIZE 32
				1872	#else
				1873	#define TAB_SIZE 256
				1874	#endif
				1875
				1876	static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
				1877	{
				1878	mp_int M[TAB_SIZE], res, mu;
				1879	mp_digit buf;
				1880	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
				1881	int (redux)(mp_int,mp_int,mp_int);
				1882
				1883	/* find window size */
				1884	x = mp_count_bits (X);
				1885	if (x <= 7) {
				1886	winsize = 2;
				1887	} else if (x <= 36) {
				1888	winsize = 3;
				1889	} else if (x <= 140) {
				1890	winsize = 4;
				1891	} else if (x <= 450) {
				1892	winsize = 5;
				1893	} else if (x <= 1303) {
				1894	winsize = 6;
				1895	} else if (x <= 3529) {
				1896	winsize = 7;
				1897	} else {
				1898	winsize = 8;
				1899	}
				1900
				1901	#ifdef MP_LOW_MEM
				1902	if (winsize > 5) {
				1903	winsize = 5;
				1904	}
				1905	#endif
				1906
				1907	/* init M array */
				1908	/* init first cell */
				1909	if ((err = mp_init(&M[1])) != MP_OKAY) {
				1910	return err;
				1911	}
				1912
				1913	/* now init the second half of the array */
				1914	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
				1915	if ((err = mp_init(&M[x])) != MP_OKAY) {
				1916	for (y = 1<<(winsize-1); y < x; y++) {
				1917	mp_clear (&M[y]);
				1918	}
				1919	mp_clear(&M[1]);
				1920	return err;
				1921	}
				1922	}
				1923
				1924	/* create mu, used for Barrett reduction */
				1925	if ((err = mp_init (&mu)) != MP_OKAY) {
				1926	goto LBL_M;
				1927	}
				1928
				1929	if (redmode == 0) {
				1930	if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
				1931	goto LBL_MU;
				1932	}
				1933	redux = mp_reduce;
				1934	} else {
				1935	if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
				1936	goto LBL_MU;
				1937	}
				1938	redux = mp_reduce_2k_l;
				1939	}
				1940
				1941	/* create M table
				1942	*
				1943	* The M table contains powers of the base,
				1944	* e.g. M[x] = G**x mod P
				1945	*
				1946	* The first half of the table is not
				1947	* computed though accept for M[0] and M[1]
				1948	*/
				1949	if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
				1950	goto LBL_MU;
				1951	}
				1952
				1953	/* compute the value at M[1<<(winsize-1)] by squaring
				1954	* M[1] (winsize-1) times
				1955	*/
				1956	if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
				1957	goto LBL_MU;
				1958	}
				1959
				1960	for (x = 0; x < (winsize - 1); x++) {
				1961	/* square it */
				1962	if ((err = mp_sqr (&M[1 << (winsize - 1)],
				1963	&M[1 << (winsize - 1)])) != MP_OKAY) {
				1964	goto LBL_MU;
				1965	}
				1966
				1967	/* reduce modulo P */
				1968	if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
				1969	goto LBL_MU;
				1970	}
				1971	}
				1972
				1973	/* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
				1974	* for x = (2(winsize - 1) + 1) to (2winsize - 1)
				1975	*/
				1976	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
				1977	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
				1978	goto LBL_MU;
				1979	}
				1980	if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
				1981	goto LBL_MU;
				1982	}
				1983	}
				1984
				1985	/* setup result */
				1986	if ((err = mp_init (&res)) != MP_OKAY) {
				1987	goto LBL_MU;
				1988	}
				1989	mp_set (&res, 1);
				1990
				1991	/* set initial mode and bit cnt */
				1992	mode = 0;
				1993	bitcnt = 1;
				1994	buf = 0;
				1995	digidx = X->used - 1;
				1996	bitcpy = 0;
				1997	bitbuf = 0;
				1998
				1999	for (;;) {
				2000	/* grab next digit as required */
				2001	if (--bitcnt == 0) {
				2002	/* if digidx == -1 we are out of digits */
				2003	if (digidx == -1) {
				2004	break;
				2005	}
				2006	/* read next digit and reset the bitcnt */
				2007	buf = X->dp[digidx--];
				2008	bitcnt = (int) DIGIT_BIT;
				2009	}
				2010
				2011	/* grab the next msb from the exponent */
				2012	y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
				2013	buf <<= (mp_digit)1;
				2014
				2015	/* if the bit is zero and mode == 0 then we ignore it
				2016	* These represent the leading zero bits before the first 1 bit
				2017	* in the exponent. Technically this opt is not required but it
				2018	* does lower the # of trivial squaring/reductions used
				2019	*/
				2020	if (mode == 0 && y == 0) {
				2021	continue;
				2022	}
				2023
				2024	/* if the bit is zero and mode == 1 then we square */
				2025	if (mode == 1 && y == 0) {
				2026	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				2027	goto LBL_RES;
				2028	}
				2029	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
				2030	goto LBL_RES;
				2031	}
				2032	continue;
				2033	}
				2034
				2035	/* else we add it to the window */
				2036	bitbuf \|= (y << (winsize - ++bitcpy));
				2037	mode = 2;
				2038
				2039	if (bitcpy == winsize) {
				2040	/* ok window is filled so square as required and multiply */
				2041	/* square first */
				2042	for (x = 0; x < winsize; x++) {
				2043	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				2044	goto LBL_RES;
				2045	}
				2046	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
				2047	goto LBL_RES;
				2048	}
				2049	}
				2050
				2051	/* then multiply */
				2052	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
				2053	goto LBL_RES;
				2054	}
				2055	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
				2056	goto LBL_RES;
				2057	}
				2058
				2059	/* empty window and reset */
				2060	bitcpy = 0;
				2061	bitbuf = 0;
				2062	mode = 1;
				2063	}
				2064	}
				2065
				2066	/* if bits remain then square/multiply */
				2067	if (mode == 2 && bitcpy > 0) {
				2068	/* square then multiply if the bit is set */
				2069	for (x = 0; x < bitcpy; x++) {
				2070	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				2071	goto LBL_RES;
				2072	}
				2073	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
				2074	goto LBL_RES;
				2075	}
				2076
				2077	bitbuf <<= 1;
				2078	if ((bitbuf & (1 << winsize)) != 0) {
				2079	/* then multiply */
				2080	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
				2081	goto LBL_RES;
				2082	}
				2083	if ((err = redux (&res, P, &mu)) != MP_OKAY) {
				2084	goto LBL_RES;
				2085	}
				2086	}
				2087	}
				2088	}
				2089
				2090	mp_exch (&res, Y);
				2091	err = MP_OKAY;
				2092	LBL_RES:mp_clear (&res);
				2093	LBL_MU:mp_clear (&mu);
				2094	LBL_M:
				2095	mp_clear(&M[1]);
				2096	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
				2097	mp_clear (&M[x]);
				2098	}
				2099	return err;
				2100	}
				2101
				2102
				2103	/* computes b = aa /
				2104	static int mp_sqr (mp_int * a, mp_int * b)
				2105	{
				2106	int res;
				2107
				2108	#ifdef BN_MP_TOOM_SQR_C
				2109	/* use Toom-Cook? */
				2110	if (a->used >= TOOM_SQR_CUTOFF) {
				2111	res = mp_toom_sqr(a, b);
				2112	/* Karatsuba? */
				2113	} else
				2114	#endif
				2115	#ifdef BN_MP_KARATSUBA_SQR_C
				2116	if (a->used >= KARATSUBA_SQR_CUTOFF) {
				2117	res = mp_karatsuba_sqr (a, b);
				2118	} else
				2119	#endif
				2120	{
				2121	#ifdef BN_FAST_S_MP_SQR_C
				2122	/* can we use the fast comba multiplier? */
				2123	if ((a->used * 2 + 1) < MP_WARRAY &&
				2124	a->used <
				2125	(1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
				2126	res = fast_s_mp_sqr (a, b);
				2127	} else
				2128	#endif
				2129	#ifdef BN_S_MP_SQR_C
				2130	res = s_mp_sqr (a, b);
				2131	#else
				2132	#error mp_sqr could fail
				2133	res = MP_VAL;
				2134	#endif
				2135	}
				2136	b->sign = MP_ZPOS;
				2137	return res;
				2138	}
				2139
				2140
				2141	/* reduces a modulo n where n is of the form 2**p - d
				2142	This differs from reduce_2k since "d" can be larger
				2143	than a single digit.
				2144	*/
				2145	static int mp_reduce_2k_l(mp_int a, mp_int n, mp_int *d)
				2146	{
				2147	mp_int q;
				2148	int p, res;
				2149
				2150	if ((res = mp_init(&q)) != MP_OKAY) {
				2151	return res;
				2152	}
				2153
				2154	p = mp_count_bits(n);
				2155	top:
				2156	/* q = a/2p, a = a mod 2p */
				2157	if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
				2158	goto ERR;
				2159	}
				2160
				2161	/* q = q * d */
				2162	if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
				2163	goto ERR;
				2164	}
				2165
				2166	/* a = a + q */
				2167	if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
				2168	goto ERR;
				2169	}
				2170
				2171	if (mp_cmp_mag(a, n) != MP_LT) {
				2172	s_mp_sub(a, n, a);
				2173	goto top;
				2174	}
				2175
				2176	ERR:
				2177	mp_clear(&q);
				2178	return res;
				2179	}
				2180
				2181
				2182	/* determines the setup value */
				2183	static int mp_reduce_2k_setup_l(mp_int a, mp_int d)
				2184	{
				2185	int res;
				2186	mp_int tmp;
				2187
				2188	if ((res = mp_init(&tmp)) != MP_OKAY) {
				2189	return res;
				2190	}
				2191
				2192	if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
				2193	goto ERR;
				2194	}
				2195
				2196	if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
				2197	goto ERR;
				2198	}
				2199
				2200	ERR:
				2201	mp_clear(&tmp);
				2202	return res;
				2203	}
				2204
				2205
				2206	/* computes a = 2**b
				2207	*
				2208	* Simple algorithm which zeroes the int, grows it then just sets one bit
				2209	* as required.
				2210	*/
				2211	static int mp_2expt (mp_int * a, int b)
				2212	{
				2213	int res;
				2214
				2215	/* zero a as per default */
				2216	mp_zero (a);
				2217
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame]	2218	/* grow a to accommodate the single bit */
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2219	if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
				2220	return res;
				2221	}
				2222
				2223	/* set the used count of where the bit will go */
				2224	a->used = b / DIGIT_BIT + 1;
				2225
				2226	/* put the single bit in its place */
				2227	a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
				2228
				2229	return MP_OKAY;
				2230	}
				2231
				2232
				2233	/* pre-calculate the value required for Barrett reduction
				2234	* For a given modulus "b" it calulates the value required in "a"
				2235	*/
				2236	static int mp_reduce_setup (mp_int * a, mp_int * b)
				2237	{
				2238	int res;
				2239
				2240	if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
				2241	return res;
				2242	}
				2243	return mp_div (a, b, a, NULL);
				2244	}
				2245
				2246
				2247	/* reduces x mod m, assumes 0 < x < m**2, mu is
				2248	* precomputed via mp_reduce_setup.
				2249	* From HAC pp.604 Algorithm 14.42
				2250	*/
				2251	static int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
				2252	{
				2253	mp_int q;
				2254	int res, um = m->used;
				2255
				2256	/* q = x */
				2257	if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
				2258	return res;
				2259	}
				2260
				2261	/* q1 = x / b*(k-1) /
				2262	mp_rshd (&q, um - 1);
				2263
				2264	/* according to HAC this optimization is ok */
				2265	if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
				2266	if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
				2267	goto CLEANUP;
				2268	}
				2269	} else {
				2270	#ifdef BN_S_MP_MUL_HIGH_DIGS_C
				2271	if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
				2272	goto CLEANUP;
				2273	}
				2274	#elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
				2275	if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
				2276	goto CLEANUP;
				2277	}
				2278	#else
				2279	{
				2280	#error mp_reduce would always fail
				2281	res = MP_VAL;
				2282	goto CLEANUP;
				2283	}
				2284	#endif
				2285	}
				2286
				2287	/* q3 = q2 / b*(k+1) /
				2288	mp_rshd (&q, um + 1);
				2289
				2290	/* x = x mod b*(k+1), quick (no division) /
				2291	if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
				2292	goto CLEANUP;
				2293	}
				2294
				2295	/* q = q * m mod b*(k+1), quick (no division) /
				2296	if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
				2297	goto CLEANUP;
				2298	}
				2299
				2300	/* x = x - q */
				2301	if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
				2302	goto CLEANUP;
				2303	}
				2304
				2305	/* If x < 0, add b*(k+1) to it /
				2306	if (mp_cmp_d (x, 0) == MP_LT) {
				2307	mp_set (&q, 1);
				2308	if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) {
				2309	goto CLEANUP;
				2310	}
				2311	if ((res = mp_add (x, &q, x)) != MP_OKAY) {
				2312	goto CLEANUP;
				2313	}
				2314	}
				2315
				2316	/* Back off if it's too big */
				2317	while (mp_cmp (x, m) != MP_LT) {
				2318	if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
				2319	goto CLEANUP;
				2320	}
				2321	}
				2322
				2323	CLEANUP:
				2324	mp_clear (&q);
				2325
				2326	return res;
				2327	}
				2328
				2329
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame]	2330	/* multiplies \|a\| * \|b\| and only computes up to digs digits of result
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2331	* HAC pp. 595, Algorithm 14.12 Modified so you can control how
				2332	* many digits of output are created.
				2333	*/
				2334	static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
				2335	{
				2336	mp_int t;
				2337	int res, pa, pb, ix, iy;
				2338	mp_digit u;
				2339	mp_word r;
				2340	mp_digit tmpx, tmpt, tmpy;
				2341
				2342	/* can we use the fast multiplier? */
				2343	if (((digs) < MP_WARRAY) &&
				2344	MIN (a->used, b->used) <
				2345	(1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
				2346	return fast_s_mp_mul_digs (a, b, c, digs);
				2347	}
				2348
				2349	if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
				2350	return res;
				2351	}
				2352	t.used = digs;
				2353
				2354	/* compute the digits of the product directly */
				2355	pa = a->used;
				2356	for (ix = 0; ix < pa; ix++) {
				2357	/* set the carry to zero */
				2358	u = 0;
				2359
				2360	/* limit ourselves to making digs digits of output */
				2361	pb = MIN (b->used, digs - ix);
				2362
				2363	/* setup some aliases */
				2364	/* copy of the digit from a used within the nested loop */
				2365	tmpx = a->dp[ix];
				2366
				2367	/* an alias for the destination shifted ix places */
				2368	tmpt = t.dp + ix;
				2369
				2370	/* an alias for the digits of b */
				2371	tmpy = b->dp;
				2372
				2373	/* compute the columns of the output and propagate the carry */
				2374	for (iy = 0; iy < pb; iy++) {
				2375	/* compute the column as a mp_word */
				2376	r = ((mp_word)*tmpt) +
				2377	((mp_word)tmpx) * ((mp_word)*tmpy++) +
				2378	((mp_word) u);
				2379
				2380	/* the new column is the lower part of the result */
				2381	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
				2382
				2383	/* get the carry word from the result */
				2384	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
				2385	}
				2386	/* set carry if it is placed below digs */
				2387	if (ix + iy < digs) {
				2388	*tmpt = u;
				2389	}
				2390	}
				2391
				2392	mp_clamp (&t);
				2393	mp_exch (&t, c);
				2394
				2395	mp_clear (&t);
				2396	return MP_OKAY;
				2397	}
				2398
				2399
				2400	/* Fast (comba) multiplier
				2401	*
				2402	* This is the fast column-array [comba] multiplier. It is
				2403	* designed to compute the columns of the product first
				2404	* then handle the carries afterwards. This has the effect
				2405	* of making the nested loops that compute the columns very
				2406	* simple and schedulable on super-scalar processors.
				2407	*
				2408	* This has been modified to produce a variable number of
				2409	* digits of output so if say only a half-product is required
				2410	* you don't have to compute the upper half (a feature
				2411	* required for fast Barrett reduction).
				2412	*
				2413	* Based on Algorithm 14.12 on pp.595 of HAC.
				2414	*
				2415	*/
				2416	static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
				2417	{
				2418	int olduse, res, pa, ix, iz;
				2419	mp_digit W[MP_WARRAY];
				2420	register mp_word _W;
				2421
				2422	/* grow the destination as required */
				2423	if (c->alloc < digs) {
				2424	if ((res = mp_grow (c, digs)) != MP_OKAY) {
				2425	return res;
				2426	}
				2427	}
				2428
				2429	/* number of output digits to produce */
				2430	pa = MIN(digs, a->used + b->used);
				2431
				2432	/* clear the carry */
				2433	_W = 0;
				2434	for (ix = 0; ix < pa; ix++) {
				2435	int tx, ty;
				2436	int iy;
				2437	mp_digit tmpx, tmpy;
				2438
				2439	/* get offsets into the two bignums */
				2440	ty = MIN(b->used-1, ix);
				2441	tx = ix - ty;
				2442
				2443	/* setup temp aliases */
				2444	tmpx = a->dp + tx;
				2445	tmpy = b->dp + ty;
				2446
				2447	/* this is the number of times the loop will iterrate, essentially
				2448	while (tx++ < a->used && ty-- >= 0) { ... }
				2449	*/
				2450	iy = MIN(a->used-tx, ty+1);
				2451
				2452	/* execute loop */
				2453	for (iz = 0; iz < iy; ++iz) {
				2454	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
				2455
				2456	}
				2457
				2458	/* store term */
				2459	W[ix] = ((mp_digit)_W) & MP_MASK;
				2460
				2461	/* make next carry */
				2462	_W = _W >> ((mp_word)DIGIT_BIT);
				2463	}
				2464
				2465	/* setup dest */
				2466	olduse = c->used;
				2467	c->used = pa;
				2468
				2469	{
				2470	register mp_digit *tmpc;
				2471	tmpc = c->dp;
				2472	for (ix = 0; ix < pa+1; ix++) {
				2473	/* now extract the previous digit [below the carry] */
				2474	*tmpc++ = W[ix];
				2475	}
				2476
				2477	/* clear unused digits [that existed in the old copy of c] */
				2478	for (; ix < olduse; ix++) {
				2479	*tmpc++ = 0;
				2480	}
				2481	}
				2482	mp_clamp (c);
				2483	return MP_OKAY;
				2484	}
				2485
				2486
				2487	/* init an mp_init for a given size */
				2488	static int mp_init_size (mp_int * a, int size)
				2489	{
				2490	int x;
				2491
				2492	/* pad size so there are always extra digits */
				2493	size += (MP_PREC * 2) - (size % MP_PREC);
				2494
				2495	/* alloc mem */
				2496	a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size);
				2497	if (a->dp == NULL) {
				2498	return MP_MEM;
				2499	}
				2500
				2501	/* set the members */
				2502	a->used = 0;
				2503	a->alloc = size;
				2504	a->sign = MP_ZPOS;
				2505
				2506	/* zero the digits */
				2507	for (x = 0; x < size; x++) {
				2508	a->dp[x] = 0;
				2509	}
				2510
				2511	return MP_OKAY;
				2512	}
				2513
				2514
				2515	/* low level squaring, b = aa, HAC pp.596-597, Algorithm 14.16 /
				2516	static int s_mp_sqr (mp_int * a, mp_int * b)
				2517	{
				2518	mp_int t;
				2519	int res, ix, iy, pa;
				2520	mp_word r;
				2521	mp_digit u, tmpx, *tmpt;
				2522
				2523	pa = a->used;
				2524	if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
				2525	return res;
				2526	}
				2527
				2528	/* default used is maximum possible size */
				2529	t.used = 2*pa + 1;
				2530
				2531	for (ix = 0; ix < pa; ix++) {
				2532	/* first calculate the digit at 2ix /
				2533	/* calculate double precision result */
				2534	r = ((mp_word) t.dp[2*ix]) +
				2535	((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
				2536
				2537	/* store lower part in result */
				2538	t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
				2539
				2540	/* get the carry */
				2541	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
				2542
				2543	/* left hand side of A[ix] * A[iy] */
				2544	tmpx = a->dp[ix];
				2545
				2546	/* alias for where to store the results */
				2547	tmpt = t.dp + (2*ix + 1);
				2548
				2549	for (iy = ix + 1; iy < pa; iy++) {
				2550	/* first calculate the product */
				2551	r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
				2552
				2553	/* now calculate the double precision result, note we use
				2554	* addition instead of *2 since it's easier to optimize
				2555	*/
				2556	r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
				2557
				2558	/* store lower part */
				2559	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
				2560
				2561	/* get carry */
				2562	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
				2563	}
				2564	/* propagate upwards */
				2565	while (u != ((mp_digit) 0)) {
				2566	r = ((mp_word) *tmpt) + ((mp_word) u);
				2567	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
				2568	u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
				2569	}
				2570	}
				2571
				2572	mp_clamp (&t);
				2573	mp_exch (&t, b);
				2574	mp_clear (&t);
				2575	return MP_OKAY;
				2576	}
				2577
				2578
				2579	/* multiplies \|a\| * \|b\| and does not compute the lower digs digits
				2580	* [meant to get the higher part of the product]
				2581	*/
				2582	static int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
				2583	{
				2584	mp_int t;
				2585	int res, pa, pb, ix, iy;
				2586	mp_digit u;
				2587	mp_word r;
				2588	mp_digit tmpx, tmpt, tmpy;
				2589
				2590	/* can we use the fast multiplier? */
				2591	#ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
				2592	if (((a->used + b->used + 1) < MP_WARRAY)
				2593	&& MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
				2594	return fast_s_mp_mul_high_digs (a, b, c, digs);
				2595	}
				2596	#endif
				2597
				2598	if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
				2599	return res;
				2600	}
				2601	t.used = a->used + b->used + 1;
				2602
				2603	pa = a->used;
				2604	pb = b->used;
				2605	for (ix = 0; ix < pa; ix++) {
				2606	/* clear the carry */
				2607	u = 0;
				2608
				2609	/* left hand side of A[ix] * B[iy] */
				2610	tmpx = a->dp[ix];
				2611
				2612	/* alias to the address of where the digits will be stored */
				2613	tmpt = &(t.dp[digs]);
				2614
				2615	/* alias for where to read the right hand side from */
				2616	tmpy = b->dp + (digs - ix);
				2617
				2618	for (iy = digs - ix; iy < pb; iy++) {
				2619	/* calculate the double precision result */
				2620	r = ((mp_word)*tmpt) +
				2621	((mp_word)tmpx) * ((mp_word)*tmpy++) +
				2622	((mp_word) u);
				2623
				2624	/* get the lower part */
				2625	*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
				2626
				2627	/* carry the carry */
				2628	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
				2629	}
				2630	*tmpt = u;
				2631	}
				2632	mp_clamp (&t);
				2633	mp_exch (&t, c);
				2634	mp_clear (&t);
				2635	return MP_OKAY;
				2636	}
				2637
				2638
				2639	#ifdef BN_MP_MONTGOMERY_SETUP_C
				2640	/* setups the montgomery reduction stuff */
				2641	static int
				2642	mp_montgomery_setup (mp_int * n, mp_digit * rho)
				2643	{
				2644	mp_digit x, b;
				2645
				2646	/* fast inversion mod 2**k
				2647	*
				2648	* Based on the fact that
				2649	*
				2650	* XA = 1 (mod 2n) => (X(2-XA)) A = 1 (mod 22n)
				2651	* => 2XA - XXA*A = 1
				2652	* => 2*(1) - (1) = 1
				2653	*/
				2654	b = n->dp[0];
				2655
				2656	if ((b & 1) == 0) {
				2657	return MP_VAL;
				2658	}
				2659
				2660	x = (((b + 2) & 4) << 1) + b; /* here xa==1 mod 24 /
				2661	x = 2 - b x; /* here xa==1 mod 28 /
				2662	#if !defined(MP_8BIT)
				2663	x = 2 - b x; /* here xa==1 mod 216 /
				2664	#endif
				2665	#if defined(MP_64BIT) \|\| !(defined(MP_8BIT) \|\| defined(MP_16BIT))
				2666	x = 2 - b x; /* here xa==1 mod 232 /
				2667	#endif
				2668	#ifdef MP_64BIT
				2669	x = 2 - b x; /* here xa==1 mod 264 /
				2670	#endif
				2671
				2672	/* rho = -1/m mod b */
				2673	*rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
				2674
				2675	return MP_OKAY;
				2676	}
				2677	#endif
				2678
				2679
				2680	#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
				2681	/* computes xR**-1 == x (mod N) via Montgomery Reduction
				2682	*
				2683	* This is an optimized implementation of montgomery_reduce
				2684	* which uses the comba method to quickly calculate the columns of the
				2685	* reduction.
				2686	*
				2687	* Based on Algorithm 14.32 on pp.601 of HAC.
				2688	*/
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame]	2689	static int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2690	{
				2691	int ix, res, olduse;
				2692	mp_word W[MP_WARRAY];
				2693
				2694	/* get old used count */
				2695	olduse = x->used;
				2696
				2697	/* grow a as required */
				2698	if (x->alloc < n->used + 1) {
				2699	if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
				2700	return res;
				2701	}
				2702	}
				2703
				2704	/* first we have to get the digits of the input into
				2705	* an array of double precision words W[...]
				2706	*/
				2707	{
				2708	register mp_word *_W;
				2709	register mp_digit *tmpx;
				2710
				2711	/* alias for the W[] array */
				2712	_W = W;
				2713
				2714	/* alias for the digits of x*/
				2715	tmpx = x->dp;
				2716
				2717	/* copy the digits of a into W[0..a->used-1] */
				2718	for (ix = 0; ix < x->used; ix++) {
				2719	_W++ = tmpx++;
				2720	}
				2721
				2722	/* zero the high words of W[a->used..m->used2] /
				2723	for (; ix < n->used * 2 + 1; ix++) {
				2724	*_W++ = 0;
				2725	}
				2726	}
				2727
				2728	/* now we proceed to zero successive digits
				2729	* from the least significant upwards
				2730	*/
				2731	for (ix = 0; ix < n->used; ix++) {
				2732	/* mu = ai * m' mod b
				2733	*
				2734	* We avoid a double precision multiplication (which isn't required)
				2735	* by casting the value down to a mp_digit. Note this requires
				2736	* that W[ix-1] have the carry cleared (see after the inner loop)
				2737	*/
				2738	register mp_digit mu;
				2739	mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
				2740
				2741	/* a = a + mu * m * b**i
				2742	*
				2743	* This is computed in place and on the fly. The multiplication
				2744	* by b**i is handled by offseting which columns the results
				2745	* are added to.
				2746	*
				2747	* Note the comba method normally doesn't handle carries in the
				2748	* inner loop In this case we fix the carry from the previous
				2749	* column since the Montgomery reduction requires digits of the
				2750	* result (so far) [see above] to work. This is
				2751	* handled by fixing up one carry after the inner loop. The
				2752	* carry fixups are done in order so after these loops the
				2753	* first m->used words of W[] have the carries fixed
				2754	*/
				2755	{
				2756	register int iy;
				2757	register mp_digit *tmpn;
				2758	register mp_word *_W;
				2759
				2760	/* alias for the digits of the modulus */
				2761	tmpn = n->dp;
				2762
				2763	/* Alias for the columns set by an offset of ix */
				2764	_W = W + ix;
				2765
				2766	/* inner loop */
				2767	for (iy = 0; iy < n->used; iy++) {
				2768	_W++ += ((mp_word)mu) ((mp_word)*tmpn++);
				2769	}
				2770	}
				2771
				2772	/* now fix carry for next digit, W[ix+1] */
				2773	W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
				2774	}
				2775
				2776	/* now we have to propagate the carries and
				2777	* shift the words downward [all those least
				2778	* significant digits we zeroed].
				2779	*/
				2780	{
				2781	register mp_digit *tmpx;
				2782	register mp_word _W, _W1;
				2783
				2784	/* nox fix rest of carries */
				2785
				2786	/* alias for current word */
				2787	_W1 = W + ix;
				2788
				2789	/* alias for next word, where the carry goes */
				2790	_W = W + ++ix;
				2791
				2792	for (; ix <= n->used * 2 + 1; ix++) {
				2793	_W++ += _W1++ >> ((mp_word) DIGIT_BIT);
				2794	}
				2795
				2796	/* copy out, A = A/b**n
				2797	*
				2798	* The result is A/b**n but instead of converting from an
				2799	* array of mp_word to mp_digit than calling mp_rshd
				2800	* we just copy them in the right order
				2801	*/
				2802
				2803	/* alias for destination word */
				2804	tmpx = x->dp;
				2805
				2806	/* alias for shifted double precision result */
				2807	_W = W + n->used;
				2808
				2809	for (ix = 0; ix < n->used + 1; ix++) {
				2810	tmpx++ = (mp_digit)(_W++ & ((mp_word) MP_MASK));
				2811	}
				2812
				2813	/* zero oldused digits, if the input a was larger than
				2814	* m->used+1 we'll have to clear the digits
				2815	*/
				2816	for (; ix < olduse; ix++) {
				2817	*tmpx++ = 0;
				2818	}
				2819	}
				2820
				2821	/* set the max used and clamp */
				2822	x->used = n->used + 1;
				2823	mp_clamp (x);
				2824
				2825	/* if A >= m then A = A - m */
				2826	if (mp_cmp_mag (x, n) != MP_LT) {
				2827	return s_mp_sub (x, n, x);
				2828	}
				2829	return MP_OKAY;
				2830	}
				2831	#endif
				2832
				2833
				2834	#ifdef BN_MP_MUL_2_C
				2835	/* b = a2 /
				2836	static int mp_mul_2(mp_int * a, mp_int * b)
				2837	{
				2838	int x, res, oldused;
				2839
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame]	2840	/* grow to accommodate result */
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2841	if (b->alloc < a->used + 1) {
				2842	if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
				2843	return res;
				2844	}
				2845	}
				2846
				2847	oldused = b->used;
				2848	b->used = a->used;
				2849
				2850	{
				2851	register mp_digit r, rr, tmpa, tmpb;
				2852
				2853	/* alias for source */
				2854	tmpa = a->dp;
				2855
				2856	/* alias for dest */
				2857	tmpb = b->dp;
				2858
				2859	/* carry */
				2860	r = 0;
				2861	for (x = 0; x < a->used; x++) {
				2862
				2863	/* get what will be the next carry bit from the
				2864	* MSB of the current digit
				2865	*/
				2866	rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
				2867
				2868	/* now shift up this digit, add in the carry [from the previous] */
				2869	tmpb++ = ((tmpa++ << ((mp_digit)1)) \| r) & MP_MASK;
				2870
				2871	/* copy the carry that would be from the source
				2872	* digit into the next iteration
				2873	*/
				2874	r = rr;
				2875	}
				2876
				2877	/* new leading digit? */
				2878	if (r != 0) {
				2879	/* add a MSB which is always 1 at this point */
				2880	*tmpb = 1;
				2881	++(b->used);
				2882	}
				2883
				2884	/* now zero any excess digits on the destination
				2885	* that we didn't write to
				2886	*/
				2887	tmpb = b->dp + b->used;
				2888	for (x = b->used; x < oldused; x++) {
				2889	*tmpb++ = 0;
				2890	}
				2891	}
				2892	b->sign = a->sign;
				2893	return MP_OKAY;
				2894	}
				2895	#endif
				2896
				2897
				2898	#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
				2899	/*
				2900	* shifts with subtractions when the result is greater than b.
				2901	*
Dmitry Shmidt	1f69aa5	2012-01-24 16:10:04 -0800	[diff] [blame]	2902	* The method is slightly modified to shift B unconditionally up to just under
				2903	* the leading bit of b. This saves a lot of multiple precision shifting.
Dmitry Shmidt	8d520ff	2011-05-09 14:06:53 -0700	[diff] [blame]	2904	*/
				2905	static int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
				2906	{
				2907	int x, bits, res;
				2908
				2909	/* how many bits of last digit does b use */
				2910	bits = mp_count_bits (b) % DIGIT_BIT;
				2911
				2912	if (b->used > 1) {
				2913	if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
				2914	return res;
				2915	}
				2916	} else {
				2917	mp_set(a, 1);
				2918	bits = 1;
				2919	}
				2920
				2921
				2922	/* now compute C = A * B mod b */
				2923	for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
				2924	if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
				2925	return res;
				2926	}
				2927	if (mp_cmp_mag (a, b) != MP_LT) {
				2928	if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
				2929	return res;
				2930	}
				2931	}
				2932	}
				2933
				2934	return MP_OKAY;
				2935	}
				2936	#endif
				2937
				2938
				2939	#ifdef BN_MP_EXPTMOD_FAST_C
				2940	/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
				2941	*
				2942	* Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
				2943	* The value of k changes based on the size of the exponent.
				2944	*
				2945	* Uses Montgomery or Diminished Radix reduction [whichever appropriate]
				2946	*/
				2947
				2948	static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
				2949	{
				2950	mp_int M[TAB_SIZE], res;
				2951	mp_digit buf, mp;
				2952	int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
				2953
				2954	/* use a pointer to the reduction algorithm. This allows us to use
				2955	* one of many reduction algorithms without modding the guts of
				2956	* the code with if statements everywhere.
				2957	*/
				2958	int (redux)(mp_int,mp_int*,mp_digit);
				2959
				2960	/* find window size */
				2961	x = mp_count_bits (X);
				2962	if (x <= 7) {
				2963	winsize = 2;
				2964	} else if (x <= 36) {
				2965	winsize = 3;
				2966	} else if (x <= 140) {
				2967	winsize = 4;
				2968	} else if (x <= 450) {
				2969	winsize = 5;
				2970	} else if (x <= 1303) {
				2971	winsize = 6;
				2972	} else if (x <= 3529) {
				2973	winsize = 7;
				2974	} else {
				2975	winsize = 8;
				2976	}
				2977
				2978	#ifdef MP_LOW_MEM
				2979	if (winsize > 5) {
				2980	winsize = 5;
				2981	}
				2982	#endif
				2983
				2984	/* init M array */
				2985	/* init first cell */
				2986	if ((err = mp_init(&M[1])) != MP_OKAY) {
				2987	return err;
				2988	}
				2989
				2990	/* now init the second half of the array */
				2991	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
				2992	if ((err = mp_init(&M[x])) != MP_OKAY) {
				2993	for (y = 1<<(winsize-1); y < x; y++) {
				2994	mp_clear (&M[y]);
				2995	}
				2996	mp_clear(&M[1]);
				2997	return err;
				2998	}
				2999	}
				3000
				3001	/* determine and setup reduction code */
				3002	if (redmode == 0) {
				3003	#ifdef BN_MP_MONTGOMERY_SETUP_C
				3004	/* now setup montgomery */
				3005	if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
				3006	goto LBL_M;
				3007	}
				3008	#else
				3009	err = MP_VAL;
				3010	goto LBL_M;
				3011	#endif
				3012
				3013	/* automatically pick the comba one if available (saves quite a few calls/ifs) */
				3014	#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
				3015	if (((P->used * 2 + 1) < MP_WARRAY) &&
				3016	P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
				3017	redux = fast_mp_montgomery_reduce;
				3018	} else
				3019	#endif
				3020	{
				3021	#ifdef BN_MP_MONTGOMERY_REDUCE_C
				3022	/* use slower baseline Montgomery method */
				3023	redux = mp_montgomery_reduce;
				3024	#else
				3025	err = MP_VAL;
				3026	goto LBL_M;
				3027	#endif
				3028	}
				3029	} else if (redmode == 1) {
				3030	#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
				3031	/* setup DR reduction for moduli of the form B*k - b /
				3032	mp_dr_setup(P, &mp);
				3033	redux = mp_dr_reduce;
				3034	#else
				3035	err = MP_VAL;
				3036	goto LBL_M;
				3037	#endif
				3038	} else {
				3039	#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
				3040	/* setup DR reduction for moduli of the form 2*k - b /
				3041	if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
				3042	goto LBL_M;
				3043	}
				3044	redux = mp_reduce_2k;
				3045	#else
				3046	err = MP_VAL;
				3047	goto LBL_M;
				3048	#endif
				3049	}
				3050
				3051	/* setup result */
				3052	if ((err = mp_init (&res)) != MP_OKAY) {
				3053	goto LBL_M;
				3054	}
				3055
				3056	/* create M table
				3057	*
				3058
				3059	*
				3060	* The first half of the table is not computed though accept for M[0] and M[1]
				3061	*/
				3062
				3063	if (redmode == 0) {
				3064	#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
				3065	/* now we need R mod m */
				3066	if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
				3067	goto LBL_RES;
				3068	}
				3069	#else
				3070	err = MP_VAL;
				3071	goto LBL_RES;
				3072	#endif
				3073
				3074	/* now set M[1] to G * R mod m */
				3075	if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
				3076	goto LBL_RES;
				3077	}
				3078	} else {
				3079	mp_set(&res, 1);
				3080	if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
				3081	goto LBL_RES;
				3082	}
				3083	}
				3084
				3085	/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
				3086	if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
				3087	goto LBL_RES;
				3088	}
				3089
				3090	for (x = 0; x < (winsize - 1); x++) {
				3091	if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
				3092	goto LBL_RES;
				3093	}
				3094	if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
				3095	goto LBL_RES;
				3096	}
				3097	}
				3098
				3099	/* create upper table */
				3100	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
				3101	if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
				3102	goto LBL_RES;
				3103	}
				3104	if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
				3105	goto LBL_RES;
				3106	}
				3107	}
				3108
				3109	/* set initial mode and bit cnt */
				3110	mode = 0;
				3111	bitcnt = 1;
				3112	buf = 0;
				3113	digidx = X->used - 1;
				3114	bitcpy = 0;
				3115	bitbuf = 0;
				3116
				3117	for (;;) {
				3118	/* grab next digit as required */
				3119	if (--bitcnt == 0) {
				3120	/* if digidx == -1 we are out of digits so break */
				3121	if (digidx == -1) {
				3122	break;
				3123	}
				3124	/* read next digit and reset bitcnt */
				3125	buf = X->dp[digidx--];
				3126	bitcnt = (int)DIGIT_BIT;
				3127	}
				3128
				3129	/* grab the next msb from the exponent */
				3130	y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
				3131	buf <<= (mp_digit)1;
				3132
				3133	/* if the bit is zero and mode == 0 then we ignore it
				3134	* These represent the leading zero bits before the first 1 bit
				3135	* in the exponent. Technically this opt is not required but it
				3136	* does lower the # of trivial squaring/reductions used
				3137	*/
				3138	if (mode == 0 && y == 0) {
				3139	continue;
				3140	}
				3141
				3142	/* if the bit is zero and mode == 1 then we square */
				3143	if (mode == 1 && y == 0) {
				3144	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				3145	goto LBL_RES;
				3146	}
				3147	if ((err = redux (&res, P, mp)) != MP_OKAY) {
				3148	goto LBL_RES;
				3149	}
				3150	continue;
				3151	}
				3152
				3153	/* else we add it to the window */
				3154	bitbuf \|= (y << (winsize - ++bitcpy));
				3155	mode = 2;
				3156
				3157	if (bitcpy == winsize) {
				3158	/* ok window is filled so square as required and multiply */
				3159	/* square first */
				3160	for (x = 0; x < winsize; x++) {
				3161	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				3162	goto LBL_RES;
				3163	}
				3164	if ((err = redux (&res, P, mp)) != MP_OKAY) {
				3165	goto LBL_RES;
				3166	}
				3167	}
				3168
				3169	/* then multiply */
				3170	if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
				3171	goto LBL_RES;
				3172	}
				3173	if ((err = redux (&res, P, mp)) != MP_OKAY) {
				3174	goto LBL_RES;
				3175	}
				3176
				3177	/* empty window and reset */
				3178	bitcpy = 0;
				3179	bitbuf = 0;
				3180	mode = 1;
				3181	}
				3182	}
				3183
				3184	/* if bits remain then square/multiply */
				3185	if (mode == 2 && bitcpy > 0) {
				3186	/* square then multiply if the bit is set */
				3187	for (x = 0; x < bitcpy; x++) {
				3188	if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
				3189	goto LBL_RES;
				3190	}
				3191	if ((err = redux (&res, P, mp)) != MP_OKAY) {
				3192	goto LBL_RES;
				3193	}
				3194
				3195	/* get next bit of the window */
				3196	bitbuf <<= 1;
				3197	if ((bitbuf & (1 << winsize)) != 0) {
				3198	/* then multiply */
				3199	if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
				3200	goto LBL_RES;
				3201	}
				3202	if ((err = redux (&res, P, mp)) != MP_OKAY) {
				3203	goto LBL_RES;
				3204	}
				3205	}
				3206	}
				3207	}
				3208
				3209	if (redmode == 0) {
				3210	/* fixup result if Montgomery reduction is used
				3211	* recall that any value in a Montgomery system is
				3212	* actually multiplied by R mod n. So we have
				3213	* to reduce one more time to cancel out the factor
				3214	* of R.
				3215	*/
				3216	if ((err = redux(&res, P, mp)) != MP_OKAY) {
				3217	goto LBL_RES;
				3218	}
				3219	}
				3220
				3221	/* swap res with Y */
				3222	mp_exch (&res, Y);
				3223	err = MP_OKAY;
				3224	LBL_RES:mp_clear (&res);
				3225	LBL_M:
				3226	mp_clear(&M[1]);
				3227	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
				3228	mp_clear (&M[x]);
				3229	}
				3230	return err;
				3231	}
				3232	#endif
				3233
				3234
				3235	#ifdef BN_FAST_S_MP_SQR_C
				3236	/* the jist of squaring...
				3237	* you do like mult except the offset of the tmpx [one that
				3238	* starts closer to zero] can't equal the offset of tmpy.
				3239	* So basically you set up iy like before then you min it with
				3240	* (ty-tx) so that it never happens. You double all those
				3241	* you add in the inner loop
				3242
				3243	After that loop you do the squares and add them in.
				3244	*/
				3245
				3246	static int fast_s_mp_sqr (mp_int * a, mp_int * b)
				3247	{
				3248	int olduse, res, pa, ix, iz;
				3249	mp_digit W[MP_WARRAY], *tmpx;
				3250	mp_word W1;
				3251
				3252	/* grow the destination as required */
				3253	pa = a->used + a->used;
				3254	if (b->alloc < pa) {
				3255	if ((res = mp_grow (b, pa)) != MP_OKAY) {
				3256	return res;
				3257	}
				3258	}
				3259
				3260	/* number of output digits to produce */
				3261	W1 = 0;
				3262	for (ix = 0; ix < pa; ix++) {
				3263	int tx, ty, iy;
				3264	mp_word _W;
				3265	mp_digit *tmpy;
				3266
				3267	/* clear counter */
				3268	_W = 0;
				3269
				3270	/* get offsets into the two bignums */
				3271	ty = MIN(a->used-1, ix);
				3272	tx = ix - ty;
				3273
				3274	/* setup temp aliases */
				3275	tmpx = a->dp + tx;
				3276	tmpy = a->dp + ty;
				3277
				3278	/* this is the number of times the loop will iterrate, essentially
				3279	while (tx++ < a->used && ty-- >= 0) { ... }
				3280	*/
				3281	iy = MIN(a->used-tx, ty+1);
				3282
				3283	/* now for squaring tx can never equal ty
				3284	* we halve the distance since they approach at a rate of 2x
				3285	* and we have to round because odd cases need to be executed
				3286	*/
				3287	iy = MIN(iy, (ty-tx+1)>>1);
				3288
				3289	/* execute loop */
				3290	for (iz = 0; iz < iy; iz++) {
				3291	_W += ((mp_word)tmpx++)((mp_word)*tmpy--);
				3292	}
				3293
				3294	/* double the inner product and add carry */
				3295	_W = _W + _W + W1;
				3296
				3297	/* even columns have the square term in them */
				3298	if ((ix&1) == 0) {
				3299	_W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
				3300	}
				3301
				3302	/* store it */
				3303	W[ix] = (mp_digit)(_W & MP_MASK);
				3304
				3305	/* make next carry */
				3306	W1 = _W >> ((mp_word)DIGIT_BIT);
				3307	}
				3308
				3309	/* setup dest */
				3310	olduse = b->used;
				3311	b->used = a->used+a->used;
				3312
				3313	{
				3314	mp_digit *tmpb;
				3315	tmpb = b->dp;
				3316	for (ix = 0; ix < pa; ix++) {
				3317	*tmpb++ = W[ix] & MP_MASK;
				3318	}
				3319
				3320	/* clear unused digits [that existed in the old copy of c] */
				3321	for (; ix < olduse; ix++) {
				3322	*tmpb++ = 0;
				3323	}
				3324	}
				3325	mp_clamp (b);
				3326	return MP_OKAY;
				3327	}
				3328	#endif
				3329
				3330
				3331	#ifdef BN_MP_MUL_D_C
				3332	/* multiply by a digit */
				3333	static int
				3334	mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
				3335	{
				3336	mp_digit u, tmpa, tmpc;
				3337	mp_word r;
				3338	int ix, res, olduse;
				3339
				3340	/* make sure c is big enough to hold ab /
				3341	if (c->alloc < a->used + 1) {
				3342	if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
				3343	return res;
				3344	}
				3345	}
				3346
				3347	/* get the original destinations used count */
				3348	olduse = c->used;
				3349
				3350	/* set the sign */
				3351	c->sign = a->sign;
				3352
				3353	/* alias for a->dp [source] */
				3354	tmpa = a->dp;
				3355
				3356	/* alias for c->dp [dest] */
				3357	tmpc = c->dp;
				3358
				3359	/* zero carry */
				3360	u = 0;
				3361
				3362	/* compute columns */
				3363	for (ix = 0; ix < a->used; ix++) {
				3364	/* compute product and carry sum for this term */
				3365	r = ((mp_word) u) + ((mp_word)tmpa++) ((mp_word)b);
				3366
				3367	/* mask off higher bits to get a single digit */
				3368	*tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
				3369
				3370	/* send carry into next iteration */
				3371	u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
				3372	}
				3373
				3374	/* store final carry [if any] and increment ix offset */
				3375	*tmpc++ = u;
				3376	++ix;
				3377
				3378	/* now zero digits above the top */
				3379	while (ix++ < olduse) {
				3380	*tmpc++ = 0;
				3381	}
				3382
				3383	/* set used count */
				3384	c->used = a->used + 1;
				3385	mp_clamp(c);
				3386
				3387	return MP_OKAY;
				3388	}
				3389	#endif