WebSVN - shark - Blame - Rev 1083 - /shark/tags/rel_1_5

Rev	Author	Line No.	Line
2	pj	1	/*-
		2	* Copyright (c) 1992, 1993
		3	* The Regents of the University of California. All rights reserved.
		4	*
		5	* This software was developed by the Computer Systems Engineering group
		6	* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
		7	* contributed to Berkeley.
		8	*
		9	* Redistribution and use in source and binary forms, with or without
		10	* modification, are permitted provided that the following conditions
		11	* are met:
		12	* 1. Redistributions of source code must retain the above copyright
		13	* notice, this list of conditions and the following disclaimer.
		14	* 2. Redistributions in binary form must reproduce the above copyright
		15	* notice, this list of conditions and the following disclaimer in the
		16	* documentation and/or other materials provided with the distribution.
		17	* 3. All advertising materials mentioning features or use of this software
		18	* must display the following acknowledgement:
		19	* This product includes software developed by the University of
		20	* California, Berkeley and its contributors.
		21	* 4. Neither the name of the University nor the names of its contributors
		22	* may be used to endorse or promote products derived from this software
		23	* without specific prior written permission.
		24	*
		25	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
		26	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
		27	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
		28	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
		29	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
		30	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
		31	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
		32	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
		33	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
		34	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
		35	* SUCH DAMAGE.
		36	*/
		37
		38	#if defined(LIBC_SCCS) && !defined(lint)
		39	static char sccsid[] = "@(#)muldi3.c 8.1 (Berkeley) 6/4/93";
		40	#endif /* LIBC_SCCS and not lint */
		41
		42	#include "quad.h"
		43
		44	/*
		45	* Multiply two quads.
		46	*
		47	* Our algorithm is based on the following. Split incoming quad values
		48	* u and v (where u,v >= 0) into
		49	*
		50	* u = 2^n u1 * u0 (n = number of bits in `u_long', usu. 32)
		51	*
		52	* and
		53	*
		54	* v = 2^n v1 * v0
		55	*
		56	* Then
		57	*
		58	* uv = 2^2n u1 v1 + 2^n u1 v0 + 2^n v1 u0 + u0 v0
		59	* = 2^2n u1 v1 + 2^n (u1 v0 + v1 u0) + u0 v0
		60	*
		61	* Now add 2^n u1 v1 to the first term and subtract it from the middle,
		62	* and add 2^n u0 v0 to the last term and subtract it from the middle.
		63	* This gives:
		64	*
		65	* uv = (2^2n + 2^n) (u1 v1) +
		66	* (2^n) (u1 v0 - u1 v1 + u0 v1 - u0 v0) +
		67	* (2^n + 1) (u0 v0)
		68	*
		69	* Factoring the middle a bit gives us:
		70	*
		71	* uv = (2^2n + 2^n) (u1 v1) + [u1v1 = high]
		72	* (2^n) (u1 - u0) (v0 - v1) + [(u1-u0)... = mid]
		73	* (2^n + 1) (u0 v0) [u0v0 = low]
		74	*
		75	* The terms (u1 v1), (u1 - u0) (v0 - v1), and (u0 v0) can all be done
		76	* in just half the precision of the original. (Note that either or both
		77	* of (u1 - u0) or (v0 - v1) may be negative.)
		78	*
		79	* This algorithm is from Knuth vol. 2 (2nd ed), section 4.3.3, p. 278.
		80	*
		81	* Since C does not give us a `long * long = quad' operator, we split
		82	* our input quads into two longs, then split the two longs into two
		83	* shorts. We can then calculate `short * short = long' in native
		84	* arithmetic.
		85	*
		86	* Our product should, strictly speaking, be a `long quad', with 128
		87	* bits, but we are going to discard the upper 64. In other words,
		88	* we are not interested in uv, but rather in (uv mod 2^2n). This
		89	* makes some of the terms above vanish, and we get:
		90	*
		91	* (2^n)(high) + (2^n)(mid) + (2^n + 1)(low)
		92	*
		93	* or
		94	*
		95	* (2^n)(high + mid + low) + low
		96	*
		97	* Furthermore, `high' and `mid' can be computed mod 2^n, as any factor
		98	* of 2^n in either one will also vanish. Only `low' need be computed
		99	* mod 2^2n, and only because of the final term above.
		100	*/
		101	static quad_t __lmulq(u_long, u_long);
		102
		103	quad_t
		104	__muldi3(a, b)
		105	quad_t a, b;
		106	{
		107	union uu u, v, low, prod;
		108	register u_long high, mid, udiff, vdiff;
		109	register int negall, negmid;
		110	#define u1 u.ul[H]
		111	#define u0 u.ul[L]
		112	#define v1 v.ul[H]
		113	#define v0 v.ul[L]
		114
		115	/*
		116	* Get u and v such that u, v >= 0. When this is finished,
		117	* u1, u0, v1, and v0 will be directly accessible through the
		118	* longword fields.
		119	*/
		120	if (a >= 0)
		121	u.q = a, negall = 0;
		122	else
		123	u.q = -a, negall = 1;
		124	if (b >= 0)
		125	v.q = b;
		126	else
		127	v.q = -b, negall ^= 1;
		128
		129	if (u1 == 0 && v1 == 0) {
		130	/*
		131	* An (I hope) important optimization occurs when u1 and v1
		132	* are both 0. This should be common since most numbers
		133	* are small. Here the product is just u0*v0.
		134	*/
		135	prod.q = __lmulq(u0, v0);
		136	} else {
		137	/*
		138	* Compute the three intermediate products, remembering
		139	* whether the middle term is negative. We can discard
		140	* any upper bits in high and mid, so we can use native
		141	* u_long * u_long => u_long arithmetic.
		142	*/
		143	low.q = __lmulq(u0, v0);
		144
		145	if (u1 >= u0)
		146	negmid = 0, udiff = u1 - u0;
		147	else
		148	negmid = 1, udiff = u0 - u1;
		149	if (v0 >= v1)
		150	vdiff = v0 - v1;
		151	else
		152	vdiff = v1 - v0, negmid ^= 1;
		153	mid = udiff * vdiff;
		154
		155	high = u1 * v1;
		156
		157	/*
		158	* Assemble the final product.
		159	*/
		160	prod.ul[H] = high + (negmid ? -mid : mid) + low.ul[L] +
		161	low.ul[H];
		162	prod.ul[L] = low.ul[L];
		163	}
		164	return (negall ? -prod.q : prod.q);
		165	#undef u1
		166	#undef u0
		167	#undef v1
		168	#undef v0
		169	}
		170
		171	/*
		172	* Multiply two 2N-bit longs to produce a 4N-bit quad, where N is half
		173	* the number of bits in a long (whatever that is---the code below
		174	* does not care as long as quad.h does its part of the bargain---but
		175	* typically N==16).
		176	*
		177	* We use the same algorithm from Knuth, but this time the modulo refinement
		178	* does not apply. On the other hand, since N is half the size of a long,
		179	* we can get away with native multiplication---none of our input terms
		180	* exceeds (ULONG_MAX >> 1).
		181	*
		182	* Note that, for u_long l, the quad-precision result
		183	*
		184	* l << N
		185	*
		186	* splits into high and low longs as HHALF(l) and LHUP(l) respectively.
		187	*/
		188	static quad_t
		189	__lmulq(u_long u, u_long v)
		190	{
		191	u_long u1, u0, v1, v0, udiff, vdiff, high, mid, low;
		192	u_long prodh, prodl, was;
		193	union uu prod;
		194	int neg;
		195
		196	u1 = HHALF(u);
		197	u0 = LHALF(u);
		198	v1 = HHALF(v);
		199	v0 = LHALF(v);
		200
		201	low = u0 * v0;
		202
		203	/* This is the same small-number optimization as before. */
		204	if (u1 == 0 && v1 == 0)
		205	return (low);
		206
		207	if (u1 >= u0)
		208	udiff = u1 - u0, neg = 0;
		209	else
		210	udiff = u0 - u1, neg = 1;
		211	if (v0 >= v1)
		212	vdiff = v0 - v1;
		213	else
		214	vdiff = v1 - v0, neg ^= 1;
		215	mid = udiff * vdiff;
		216
		217	high = u1 * v1;
		218
		219	/* prod = (high << 2N) + (high << N); */
		220	prodh = high + HHALF(high);
		221	prodl = LHUP(high);
		222
		223	/* if (neg) prod -= mid << N; else prod += mid << N; */
		224	if (neg) {
		225	was = prodl;
		226	prodl -= LHUP(mid);
		227	prodh -= HHALF(mid) + (prodl > was);
		228	} else {
		229	was = prodl;
		230	prodl += LHUP(mid);
		231	prodh += HHALF(mid) + (prodl < was);
		232	}
		233
		234	/* prod += low << N */
		235	was = prodl;
		236	prodl += LHUP(low);
		237	prodh += HHALF(low) + (prodl < was);
		238	/* ... + low; */
		239	if ((prodl += low) < low)
		240	prodh++;
		241
		242	/* return 4N-bit product */
		243	prod.ul[H] = prodh;
		244	prod.ul[L] = prodl;
		245	return (prod.q);
		246	}

Subversion Repositories shark

(root)/shark/tags/rel_1_5_4/libc/quad/muldi3.c @ 1613 - Rev 1083