1/* $NetBSD: n_argred.S,v 1.1 1995/10/10 23:40:21 ragge Exp $ */ 2/* 3 * Copyright (c) 1985, 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by the University of 17 * California, Berkeley and its contributors. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)argred.s 8.1 (Berkeley) 6/4/93 35 */ 36 37/* 38 * libm$argred implements Bob Corbett's argument reduction and 39 * libm$sincos implements Peter Tang's double precision sin/cos. 40 * 41 * Note: The two entry points libm$argred and libm$sincos are meant 42 * to be used only by _sin, _cos and _tan. 43 * 44 * method: true range reduction to [-pi/4,pi/4], P. Tang & B. Corbett 45 * S. McDonald, April 4, 1985 46 */ 47 .globl libm$argred 48 .globl libm$sincos 49 .text 50 .align 1 51 52libm$argred: 53/* 54 * Compare the argument with the largest possible that can 55 * be reduced by table lookup. r3 := |x| will be used in table_lookup . 56 */ 57 movd r0,r3 58 bgeq abs1 59 mnegd r3,r3 60abs1: 61 cmpd r3,$0d+4.55530934770520019583e+01 62 blss small_arg 63 jsb trigred 64 rsb 65small_arg: 66 jsb table_lookup 67 rsb 68/* 69 * At this point, 70 * r0 contains the quadrant number, 0, 1, 2, or 3; 71 * r2/r1 contains the reduced argument as a D-format number; 72 * r3 contains a F-format extension to the reduced argument; 73 * r4 contains a 0 or 1 corresponding to a sin or cos entry. 74 */ 75libm$sincos: 76/* 77 * Compensate for a cosine entry by adding one to the quadrant number. 78 */ 79 addl2 r4,r0 80/* 81 * Polyd clobbers r5-r0 ; save X in r7/r6 . 82 * This can be avoided by rewriting trigred . 83 */ 84 movd r1,r6 85/* 86 * Likewise, save alpha in r8 . 87 * This can be avoided by rewriting trigred . 88 */ 89 movf r3,r8 90/* 91 * Odd or even quadrant? cosine if odd, sine otherwise. 92 * Save floor(quadrant/2) in r9 ; it determines the final sign. 93 */ 94 rotl $-1,r0,r9 95 blss cosine 96sine: 97 muld2 r1,r1 # Xsq = X * X 98 cmpw $0x2480,r1 # [zl] Xsq > 2^-56? 99 blss 1f # [zl] yes, go ahead and do polyd 100 clrq r1 # [zl] work around 11/780 FPA polyd bug 1011: 102 polyd r1,$7,sin_coef # Q = P(Xsq) , of deg 7 103 mulf3 $0f3.0,r8,r4 # beta = 3 * alpha 104 mulf2 r0,r4 # beta = Q * beta 105 addf2 r8,r4 # beta = alpha + beta 106 muld2 r6,r0 # S(X) = X * Q 107/* cvtfd r4,r4 ... r5 = 0 after a polyd. */ 108 addd2 r4,r0 # S(X) = beta + S(X) 109 addd2 r6,r0 # S(X) = X + S(X) 110 brb done 111cosine: 112 muld2 r6,r6 # Xsq = X * X 113 beql zero_arg 114 mulf2 r1,r8 # beta = X * alpha 115 polyd r6,$7,cos_coef /* Q = P'(Xsq) , of deg 7 */ 116 subd3 r0,r8,r0 # beta = beta - Q 117 subw2 $0x80,r6 # Xsq = Xsq / 2 118 addd2 r0,r6 # Xsq = Xsq + beta 119zero_arg: 120 subd3 r6,$0d1.0,r0 # C(X) = 1 - Xsq 121done: 122 blbc r9,even 123 mnegd r0,r0 124even: 125 rsb 126 127.data 128.align 2 129 130sin_coef: 131 .double 0d-7.53080332264191085773e-13 # s7 = 2^-29 -1.a7f2504ffc49f8.. 132 .double 0d+1.60573519267703489121e-10 # s6 = 2^-21 1.611adaede473c8.. 133 .double 0d-2.50520965150706067211e-08 # s5 = 2^-1a -1.ae644921ed8382.. 134 .double 0d+2.75573191800593885716e-06 # s4 = 2^-13 1.71de3a4b884278.. 135 .double 0d-1.98412698411850507950e-04 # s3 = 2^-0d -1.a01a01a0125e7d.. 136 .double 0d+8.33333333333325688985e-03 # s2 = 2^-07 1.11111111110e50 137 .double 0d-1.66666666666666664354e-01 # s1 = 2^-03 -1.55555555555554 138 .double 0d+0.00000000000000000000e+00 # s0 = 0 139 140cos_coef: 141 .double 0d-1.13006966202629430300e-11 # s7 = 2^-25 -1.8D9BA04D1374BE.. 142 .double 0d+2.08746646574796004700e-09 # s6 = 2^-1D 1.1EE632650350BA.. 143 .double 0d-2.75573073031284417300e-07 # s5 = 2^-16 -1.27E4F31411719E.. 144 .double 0d+2.48015872682668025200e-05 # s4 = 2^-10 1.A01A0196B902E8.. 145 .double 0d-1.38888888888464709200e-03 # s3 = 2^-0A -1.6C16C16C11FACE.. 146 .double 0d+4.16666666666664761400e-02 # s2 = 2^-05 1.5555555555539E 147 .double 0d+0.00000000000000000000e+00 # s1 = 0 148 .double 0d+0.00000000000000000000e+00 # s0 = 0 149 150/* 151 * Multiples of pi/2 expressed as the sum of three doubles, 152 * 153 * trailing: n * pi/2 , n = 0, 1, 2, ..., 29 154 * trailing[n] , 155 * 156 * middle: n * pi/2 , n = 0, 1, 2, ..., 29 157 * middle[n] , 158 * 159 * leading: n * pi/2 , n = 0, 1, 2, ..., 29 160 * leading[n] , 161 * 162 * where 163 * leading[n] := (n * pi/2) rounded, 164 * middle[n] := (n * pi/2 - leading[n]) rounded, 165 * trailing[n] := (( n * pi/2 - leading[n]) - middle[n]) rounded . 166 */ 167trailing: 168 .double 0d+0.00000000000000000000e+00 # 0 * pi/2 trailing 169 .double 0d+4.33590506506189049611e-35 # 1 * pi/2 trailing 170 .double 0d+8.67181013012378099223e-35 # 2 * pi/2 trailing 171 .double 0d+1.30077151951856714215e-34 # 3 * pi/2 trailing 172 .double 0d+1.73436202602475619845e-34 # 4 * pi/2 trailing 173 .double 0d-1.68390735624352669192e-34 # 5 * pi/2 trailing 174 .double 0d+2.60154303903713428430e-34 # 6 * pi/2 trailing 175 .double 0d-8.16726343231148352150e-35 # 7 * pi/2 trailing 176 .double 0d+3.46872405204951239689e-34 # 8 * pi/2 trailing 177 .double 0d+3.90231455855570147991e-34 # 9 * pi/2 trailing 178 .double 0d-3.36781471248705338384e-34 # 10 * pi/2 trailing 179 .double 0d-1.06379439835298071785e-33 # 11 * pi/2 trailing 180 .double 0d+5.20308607807426856861e-34 # 12 * pi/2 trailing 181 .double 0d+5.63667658458045770509e-34 # 13 * pi/2 trailing 182 .double 0d-1.63345268646229670430e-34 # 14 * pi/2 trailing 183 .double 0d-1.19986217995610764801e-34 # 15 * pi/2 trailing 184 .double 0d+6.93744810409902479378e-34 # 16 * pi/2 trailing 185 .double 0d-8.03640094449267300110e-34 # 17 * pi/2 trailing 186 .double 0d+7.80462911711140295982e-34 # 18 * pi/2 trailing 187 .double 0d-7.16921993148029483506e-34 # 19 * pi/2 trailing 188 .double 0d-6.73562942497410676769e-34 # 20 * pi/2 trailing 189 .double 0d-6.30203891846791677593e-34 # 21 * pi/2 trailing 190 .double 0d-2.12758879670596143570e-33 # 22 * pi/2 trailing 191 .double 0d+2.53800212047402350390e-33 # 23 * pi/2 trailing 192 .double 0d+1.04061721561485371372e-33 # 24 * pi/2 trailing 193 .double 0d+6.11729905311472319056e-32 # 25 * pi/2 trailing 194 .double 0d+1.12733531691609154102e-33 # 26 * pi/2 trailing 195 .double 0d-3.70049587943078297272e-34 # 27 * pi/2 trailing 196 .double 0d-3.26690537292459340860e-34 # 28 * pi/2 trailing 197 .double 0d-1.14812616507957271361e-34 # 29 * pi/2 trailing 198 199middle: 200 .double 0d+0.00000000000000000000e+00 # 0 * pi/2 middle 201 .double 0d+5.72118872610983179676e-18 # 1 * pi/2 middle 202 .double 0d+1.14423774522196635935e-17 # 2 * pi/2 middle 203 .double 0d-3.83475850529283316309e-17 # 3 * pi/2 middle 204 .double 0d+2.28847549044393271871e-17 # 4 * pi/2 middle 205 .double 0d-2.69052076007086676522e-17 # 5 * pi/2 middle 206 .double 0d-7.66951701058566632618e-17 # 6 * pi/2 middle 207 .double 0d-1.54628301484890040587e-17 # 7 * pi/2 middle 208 .double 0d+4.57695098088786543741e-17 # 8 * pi/2 middle 209 .double 0d+1.07001849766246313192e-16 # 9 * pi/2 middle 210 .double 0d-5.38104152014173353044e-17 # 10 * pi/2 middle 211 .double 0d-2.14622680169080983801e-16 # 11 * pi/2 middle 212 .double 0d-1.53390340211713326524e-16 # 12 * pi/2 middle 213 .double 0d-9.21580002543456677056e-17 # 13 * pi/2 middle 214 .double 0d-3.09256602969780081173e-17 # 14 * pi/2 middle 215 .double 0d+3.03066796603896507006e-17 # 15 * pi/2 middle 216 .double 0d+9.15390196177573087482e-17 # 16 * pi/2 middle 217 .double 0d+1.52771359575124969107e-16 # 17 * pi/2 middle 218 .double 0d+2.14003699532492626384e-16 # 18 * pi/2 middle 219 .double 0d-1.68853170360202329427e-16 # 19 * pi/2 middle 220 .double 0d-1.07620830402834670609e-16 # 20 * pi/2 middle 221 .double 0d+3.97700719404595604379e-16 # 21 * pi/2 middle 222 .double 0d-4.29245360338161967602e-16 # 22 * pi/2 middle 223 .double 0d-3.68013020380794313406e-16 # 23 * pi/2 middle 224 .double 0d-3.06780680423426653047e-16 # 24 * pi/2 middle 225 .double 0d-2.45548340466059054318e-16 # 25 * pi/2 middle 226 .double 0d-1.84316000508691335411e-16 # 26 * pi/2 middle 227 .double 0d-1.23083660551323675053e-16 # 27 * pi/2 middle 228 .double 0d-6.18513205939560162346e-17 # 28 * pi/2 middle 229 .double 0d-6.18980636588357585202e-19 # 29 * pi/2 middle 230 231leading: 232 .double 0d+0.00000000000000000000e+00 # 0 * pi/2 leading 233 .double 0d+1.57079632679489661351e+00 # 1 * pi/2 leading 234 .double 0d+3.14159265358979322702e+00 # 2 * pi/2 leading 235 .double 0d+4.71238898038468989604e+00 # 3 * pi/2 leading 236 .double 0d+6.28318530717958645404e+00 # 4 * pi/2 leading 237 .double 0d+7.85398163397448312306e+00 # 5 * pi/2 leading 238 .double 0d+9.42477796076937979208e+00 # 6 * pi/2 leading 239 .double 0d+1.09955742875642763501e+01 # 7 * pi/2 leading 240 .double 0d+1.25663706143591729081e+01 # 8 * pi/2 leading 241 .double 0d+1.41371669411540694661e+01 # 9 * pi/2 leading 242 .double 0d+1.57079632679489662461e+01 # 10 * pi/2 leading 243 .double 0d+1.72787595947438630262e+01 # 11 * pi/2 leading 244 .double 0d+1.88495559215387595842e+01 # 12 * pi/2 leading 245 .double 0d+2.04203522483336561422e+01 # 13 * pi/2 leading 246 .double 0d+2.19911485751285527002e+01 # 14 * pi/2 leading 247 .double 0d+2.35619449019234492582e+01 # 15 * pi/2 leading 248 .double 0d+2.51327412287183458162e+01 # 16 * pi/2 leading 249 .double 0d+2.67035375555132423742e+01 # 17 * pi/2 leading 250 .double 0d+2.82743338823081389322e+01 # 18 * pi/2 leading 251 .double 0d+2.98451302091030359342e+01 # 19 * pi/2 leading 252 .double 0d+3.14159265358979324922e+01 # 20 * pi/2 leading 253 .double 0d+3.29867228626928286062e+01 # 21 * pi/2 leading 254 .double 0d+3.45575191894877260523e+01 # 22 * pi/2 leading 255 .double 0d+3.61283155162826226103e+01 # 23 * pi/2 leading 256 .double 0d+3.76991118430775191683e+01 # 24 * pi/2 leading 257 .double 0d+3.92699081698724157263e+01 # 25 * pi/2 leading 258 .double 0d+4.08407044966673122843e+01 # 26 * pi/2 leading 259 .double 0d+4.24115008234622088423e+01 # 27 * pi/2 leading 260 .double 0d+4.39822971502571054003e+01 # 28 * pi/2 leading 261 .double 0d+4.55530934770520019583e+01 # 29 * pi/2 leading 262 263twoOverPi: 264 .double 0d+6.36619772367581343076e-01 265 .text 266 .align 1 267 268table_lookup: 269 muld3 r3,twoOverPi,r0 270 cvtrdl r0,r0 # n = nearest int to ((2/pi)*|x|) rnded 271 mull3 $8,r0,r5 272 subd2 leading(r5),r3 # p = (|x| - leading n*pi/2) exactly 273 subd3 middle(r5),r3,r1 # q = (p - middle n*pi/2) rounded 274 subd2 r1,r3 # r = (p - q) 275 subd2 middle(r5),r3 # r = r - middle n*pi/2 276 subd2 trailing(r5),r3 # r = r - trailing n*pi/2 rounded 277/* 278 * If the original argument was negative, 279 * negate the reduce argument and 280 * adjust the octant/quadrant number. 281 */ 282 tstw 4(ap) 283 bgeq abs2 284 mnegf r1,r1 285 mnegf r3,r3 286/* subb3 r0,$8,r0 ...used for pi/4 reduction -S.McD */ 287 subb3 r0,$4,r0 288abs2: 289/* 290 * Clear all unneeded octant/quadrant bits. 291 */ 292/* bicb2 $0xf8,r0 ...used for pi/4 reduction -S.McD */ 293 bicb2 $0xfc,r0 294 rsb 295/* 296 * p.0 297 */ 298 .text 299 .align 2 300/* 301 * Only 256 (actually 225) bits of 2/pi are needed for VAX double 302 * precision; this was determined by enumerating all the nearest 303 * machine integer multiples of pi/2 using continued fractions. 304 * (8a8d3673775b7ff7 required the most bits.) -S.McD 305 */ 306 .long 0 307 .long 0 308 .long 0xaef1586d 309 .long 0x9458eaf7 310 .long 0x10e4107f 311 .long 0xd8a5664f 312 .long 0x4d377036 313 .long 0x09d5f47d 314 .long 0x91054a7f 315 .long 0xbe60db93 316bits2opi: 317 .long 0x00000028 318 .long 0 319/* 320 * Note: wherever you see the word `octant', read `quadrant'. 321 * Currently this code is set up for pi/2 argument reduction. 322 * By uncommenting/commenting the appropriate lines, it will 323 * also serve as a pi/4 argument reduction code. 324 */ 325 326/* p.1 327 * Trigred preforms argument reduction 328 * for the trigonometric functions. It 329 * takes one input argument, a D-format 330 * number in r1/r0 . The magnitude of 331 * the input argument must be greater 332 * than or equal to 1/2 . Trigred produces 333 * three results: the number of the octant 334 * occupied by the argument, the reduced 335 * argument, and an extension of the 336 * reduced argument. The octant number is 337 * returned in r0 . The reduced argument 338 * is returned as a D-format number in 339 * r2/r1 . An 8 bit extension of the 340 * reduced argument is returned as an 341 * F-format number in r3. 342 * p.2 343 */ 344trigred: 345/* 346 * Save the sign of the input argument. 347 */ 348 movw r0,-(sp) 349/* 350 * Extract the exponent field. 351 */ 352 extzv $7,$7,r0,r2 353/* 354 * Convert the fraction part of the input 355 * argument into a quadword integer. 356 */ 357 bicw2 $0xff80,r0 358 bisb2 $0x80,r0 # -S.McD 359 rotl $16,r0,r0 360 rotl $16,r1,r1 361/* 362 * If r1 is negative, add 1 to r0 . This 363 * adjustment is made so that the two's 364 * complement multiplications done later 365 * will produce unsigned results. 366 */ 367 bgeq posmid 368 incl r0 369posmid: 370/* p.3 371 * 372 * Set r3 to the address of the first quadword 373 * used to obtain the needed portion of 2/pi . 374 * The address is longword aligned to ensure 375 * efficient access. 376 */ 377 ashl $-3,r2,r3 378 bicb2 $3,r3 379 subl3 r3,$bits2opi,r3 380/* 381 * Set r2 to the size of the shift needed to 382 * obtain the correct portion of 2/pi . 383 */ 384 bicb2 $0xe0,r2 385/* p.4 386 * 387 * Move the needed 128 bits of 2/pi into 388 * r11 - r8 . Adjust the numbers to allow 389 * for unsigned multiplication. 390 */ 391 ashq r2,(r3),r10 392 393 subl2 $4,r3 394 ashq r2,(r3),r9 395 bgeq signoff1 396 incl r11 397signoff1: 398 subl2 $4,r3 399 ashq r2,(r3),r8 400 bgeq signoff2 401 incl r10 402signoff2: 403 subl2 $4,r3 404 ashq r2,(r3),r7 405 bgeq signoff3 406 incl r9 407signoff3: 408/* p.5 409 * 410 * Multiply the contents of r0/r1 by the 411 * slice of 2/pi in r11 - r8 . 412 */ 413 emul r0,r8,$0,r4 414 emul r0,r9,r5,r5 415 emul r0,r10,r6,r6 416 417 emul r1,r8,$0,r7 418 emul r1,r9,r8,r8 419 emul r1,r10,r9,r9 420 emul r1,r11,r10,r10 421 422 addl2 r4,r8 423 adwc r5,r9 424 adwc r6,r10 425/* p.6 426 * 427 * If there are more than five leading zeros 428 * after the first two quotient bits or if there 429 * are more than five leading ones after the first 430 * two quotient bits, generate more fraction bits. 431 * Otherwise, branch to code to produce the result. 432 */ 433 bicl3 $0xc1ffffff,r10,r4 434 beql more1 435 cmpl $0x3e000000,r4 436 bneq result 437more1: 438/* p.7 439 * 440 * generate another 32 result bits. 441 */ 442 subl2 $4,r3 443 ashq r2,(r3),r5 444 bgeq signoff4 445 446 emul r1,r6,$0,r4 447 addl2 r1,r5 448 emul r0,r6,r5,r5 449 addl2 r0,r6 450 brb addbits1 451 452signoff4: 453 emul r1,r6,$0,r4 454 emul r0,r6,r5,r5 455 456addbits1: 457 addl2 r5,r7 458 adwc r6,r8 459 adwc $0,r9 460 adwc $0,r10 461/* p.8 462 * 463 * Check for massive cancellation. 464 */ 465 bicl3 $0xc0000000,r10,r6 466/* bneq more2 -S.McD Test was backwards */ 467 beql more2 468 cmpl $0x3fffffff,r6 469 bneq result 470more2: 471/* p.9 472 * 473 * If massive cancellation has occurred, 474 * generate another 24 result bits. 475 * Testing has shown there will always be 476 * enough bits after this point. 477 */ 478 subl2 $4,r3 479 ashq r2,(r3),r5 480 bgeq signoff5 481 482 emul r0,r6,r4,r5 483 addl2 r0,r6 484 brb addbits2 485 486signoff5: 487 emul r0,r6,r4,r5 488 489addbits2: 490 addl2 r6,r7 491 adwc $0,r8 492 adwc $0,r9 493 adwc $0,r10 494/* p.10 495 * 496 * The following code produces the reduced 497 * argument from the product bits contained 498 * in r10 - r7 . 499 */ 500result: 501/* 502 * Extract the octant number from r10 . 503 */ 504/* extzv $29,$3,r10,r0 ...used for pi/4 reduction -S.McD */ 505 extzv $30,$2,r10,r0 506/* 507 * Clear the octant bits in r10 . 508 */ 509/* bicl2 $0xe0000000,r10 ...used for pi/4 reduction -S.McD */ 510 bicl2 $0xc0000000,r10 511/* 512 * Zero the sign flag. 513 */ 514 clrl r5 515/* p.11 516 * 517 * Check to see if the fraction is greater than 518 * or equal to one-half. If it is, add one 519 * to the octant number, set the sign flag 520 * on, and replace the fraction with 1 minus 521 * the fraction. 522 */ 523/* bitl $0x10000000,r10 ...used for pi/4 reduction -S.McD */ 524 bitl $0x20000000,r10 525 beql small 526 incl r0 527 incl r5 528/* subl3 r10,$0x1fffffff,r10 ...used for pi/4 reduction -S.McD */ 529 subl3 r10,$0x3fffffff,r10 530 mcoml r9,r9 531 mcoml r8,r8 532 mcoml r7,r7 533small: 534/* p.12 535 * 536 * Test whether the first 29 bits of the ...used for pi/4 reduction -S.McD 537 * Test whether the first 30 bits of the 538 * fraction are zero. 539 */ 540 tstl r10 541 beql tiny 542/* 543 * Find the position of the first one bit in r10 . 544 */ 545 cvtld r10,r1 546 extzv $7,$7,r1,r1 547/* 548 * Compute the size of the shift needed. 549 */ 550 subl3 r1,$32,r6 551/* 552 * Shift up the high order 64 bits of the 553 * product. 554 */ 555 ashq r6,r9,r10 556 ashq r6,r8,r9 557 brb mult 558/* p.13 559 * 560 * Test to see if the sign bit of r9 is on. 561 */ 562tiny: 563 tstl r9 564 bgeq tinier 565/* 566 * If it is, shift the product bits up 32 bits. 567 */ 568 movl $32,r6 569 movq r8,r10 570 tstl r10 571 brb mult 572/* p.14 573 * 574 * Test whether r9 is zero. It is probably 575 * impossible for both r10 and r9 to be 576 * zero, but until proven to be so, the test 577 * must be made. 578 */ 579tinier: 580 beql zero 581/* 582 * Find the position of the first one bit in r9 . 583 */ 584 cvtld r9,r1 585 extzv $7,$7,r1,r1 586/* 587 * Compute the size of the shift needed. 588 */ 589 subl3 r1,$32,r1 590 addl3 $32,r1,r6 591/* 592 * Shift up the high order 64 bits of the 593 * product. 594 */ 595 ashq r1,r8,r10 596 ashq r1,r7,r9 597 brb mult 598/* p.15 599 * 600 * The following code sets the reduced 601 * argument to zero. 602 */ 603zero: 604 clrl r1 605 clrl r2 606 clrl r3 607 brw return 608/* p.16 609 * 610 * At this point, r0 contains the octant number, 611 * r6 indicates the number of bits the fraction 612 * has been shifted, r5 indicates the sign of 613 * the fraction, r11/r10 contain the high order 614 * 64 bits of the fraction, and the condition 615 * codes indicate where the sign bit of r10 616 * is on. The following code multiplies the 617 * fraction by pi/2 . 618 */ 619mult: 620/* 621 * Save r11/r10 in r4/r1 . -S.McD 622 */ 623 movl r11,r4 624 movl r10,r1 625/* 626 * If the sign bit of r10 is on, add 1 to r11 . 627 */ 628 bgeq signoff6 629 incl r11 630signoff6: 631/* p.17 632 * 633 * Move pi/2 into r3/r2 . 634 */ 635 movq $0xc90fdaa22168c235,r2 636/* 637 * Multiply the fraction by the portion of pi/2 638 * in r2 . 639 */ 640 emul r2,r10,$0,r7 641 emul r2,r11,r8,r7 642/* 643 * Multiply the fraction by the portion of pi/2 644 * in r3 . 645 */ 646 emul r3,r10,$0,r9 647 emul r3,r11,r10,r10 648/* 649 * Add the product bits together. 650 */ 651 addl2 r7,r9 652 adwc r8,r10 653 adwc $0,r11 654/* 655 * Compensate for not sign extending r8 above.-S.McD 656 */ 657 tstl r8 658 bgeq signoff6a 659 decl r11 660signoff6a: 661/* 662 * Compensate for r11/r10 being unsigned. -S.McD 663 */ 664 addl2 r2,r10 665 adwc r3,r11 666/* 667 * Compensate for r3/r2 being unsigned. -S.McD 668 */ 669 addl2 r1,r10 670 adwc r4,r11 671/* p.18 672 * 673 * If the sign bit of r11 is zero, shift the 674 * product bits up one bit and increment r6 . 675 */ 676 blss signon 677 incl r6 678 ashq $1,r10,r10 679 tstl r9 680 bgeq signoff7 681 incl r10 682signoff7: 683signon: 684/* p.19 685 * 686 * Shift the 56 most significant product 687 * bits into r9/r8 . The sign extension 688 * will be handled later. 689 */ 690 ashq $-8,r10,r8 691/* 692 * Convert the low order 8 bits of r10 693 * into an F-format number. 694 */ 695 cvtbf r10,r3 696/* 697 * If the result of the conversion was 698 * negative, add 1 to r9/r8 . 699 */ 700 bgeq chop 701 incl r8 702 adwc $0,r9 703/* 704 * If r9 is now zero, branch to special 705 * code to handle that possibility. 706 */ 707 beql carryout 708chop: 709/* p.20 710 * 711 * Convert the number in r9/r8 into 712 * D-format number in r2/r1 . 713 */ 714 rotl $16,r8,r2 715 rotl $16,r9,r1 716/* 717 * Set the exponent field to the appropriate 718 * value. Note that the extra bits created by 719 * sign extension are now eliminated. 720 */ 721 subw3 r6,$131,r6 722 insv r6,$7,$9,r1 723/* 724 * Set the exponent field of the F-format 725 * number in r3 to the appropriate value. 726 */ 727 tstf r3 728 beql return 729/* extzv $7,$8,r3,r4 -S.McD */ 730 extzv $7,$7,r3,r4 731 addw2 r4,r6 732/* subw2 $217,r6 -S.McD */ 733 subw2 $64,r6 734 insv r6,$7,$8,r3 735 brb return 736/* p.21 737 * 738 * The following code generates the appropriate 739 * result for the unlikely possibility that 740 * rounding the number in r9/r8 resulted in 741 * a carry out. 742 */ 743carryout: 744 clrl r1 745 clrl r2 746 subw3 r6,$132,r6 747 insv r6,$7,$9,r1 748 tstf r3 749 beql return 750 extzv $7,$8,r3,r4 751 addw2 r4,r6 752 subw2 $218,r6 753 insv r6,$7,$8,r3 754/* p.22 755 * 756 * The following code makes an needed 757 * adjustments to the signs of the 758 * results or to the octant number, and 759 * then returns. 760 */ 761return: 762/* 763 * Test if the fraction was greater than or 764 * equal to 1/2 . If so, negate the reduced 765 * argument. 766 */ 767 blbc r5,signoff8 768 mnegf r1,r1 769 mnegf r3,r3 770signoff8: 771/* p.23 772 * 773 * If the original argument was negative, 774 * negate the reduce argument and 775 * adjust the octant number. 776 */ 777 tstw (sp)+ 778 bgeq signoff9 779 mnegf r1,r1 780 mnegf r3,r3 781/* subb3 r0,$8,r0 ...used for pi/4 reduction -S.McD */ 782 subb3 r0,$4,r0 783signoff9: 784/* 785 * Clear all unneeded octant bits. 786 * 787 * bicb2 $0xf8,r0 ...used for pi/4 reduction -S.McD */ 788 bicb2 $0xfc,r0 789/* 790 * Return. 791 */ 792 rsb 793