1! Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. 2! 3! Licensed under the Apache License 2.0 (the "License"). You may not use 4! this file except in compliance with the License. You can obtain a copy 5! in the file LICENSE in the source distribution or at 6! https://www.openssl.org/source/license.html 7 8#if defined(__SUNPRO_C) && defined(__sparcv9) 9# define ABI64 /* They've said -xarch=v9 at command line */ 10#elif defined(__GNUC__) && defined(__arch64__) 11# define ABI64 /* They've said -m64 at command line */ 12#endif 13 14#ifdef ABI64 15 .register %g2,#scratch 16 .register %g3,#scratch 17# define FRAME -192 18# define BIAS 2047 19#else 20# define FRAME -96 21# define BIAS 0 22#endif 23 24.text 25.align 32 26.global OPENSSL_wipe_cpu 27.type OPENSSL_wipe_cpu,#function 28! Keep in mind that this does not excuse us from wiping the stack! 29! This routine wipes registers, but not the backing store [which 30! resides on the stack, toward lower addresses]. To facilitate for 31! stack wiping I return pointer to the top of stack of the *caller*. 32OPENSSL_wipe_cpu: 33 save %sp,FRAME,%sp 34 nop 35#ifdef __sun 36#include <sys/trap.h> 37 ta ST_CLEAN_WINDOWS 38#else 39 call .walk.reg.wins 40#endif 41 nop 42 call .PIC.zero.up 43 mov .zero-(.-4),%o0 44 ld [%o0],%f0 45 ld [%o0],%f1 46 47 subcc %g0,1,%o0 48 ! Following is V9 "rd %ccr,%o0" instruction. However! V8 49 ! specification says that it ("rd %asr2,%o0" in V8 terms) does 50 ! not cause illegal_instruction trap, however it may read from 51 ! the %y register. It therefore can be used to determine if the 52 ! CPU the code is executing on is V8- or V9-compliant, as V9 53 ! returns a distinct value of 0x99, "negative" and "borrow" 54 ! bits set in both %icc and %xcc. The %y register needs to be 55 ! set away from 0x99 to avoid false-positives matches. 56 ! 57 wr 0, %y 58 .word 0x91408000 !rd %ccr,%o0 59 cmp %o0,0x99 60 bne .v8 61 nop 62 ! Even though we do not use %fp register bank, 63 ! we wipe it as memcpy might have used it... 64 .word 0xbfa00040 !fmovd %f0,%f62 65 .word 0xbba00040 !... 66 .word 0xb7a00040 67 .word 0xb3a00040 68 .word 0xafa00040 69 .word 0xaba00040 70 .word 0xa7a00040 71 .word 0xa3a00040 72 .word 0x9fa00040 73 .word 0x9ba00040 74 .word 0x97a00040 75 .word 0x93a00040 76 .word 0x8fa00040 77 .word 0x8ba00040 78 .word 0x87a00040 79 .word 0x83a00040 !fmovd %f0,%f32 80.v8: fmovs %f1,%f31 81 clr %o0 82 fmovs %f0,%f30 83 clr %o1 84 fmovs %f1,%f29 85 clr %o2 86 fmovs %f0,%f28 87 clr %o3 88 fmovs %f1,%f27 89 clr %o4 90 fmovs %f0,%f26 91 clr %o5 92 fmovs %f1,%f25 93 clr %o7 94 fmovs %f0,%f24 95 clr %l0 96 fmovs %f1,%f23 97 clr %l1 98 fmovs %f0,%f22 99 clr %l2 100 fmovs %f1,%f21 101 clr %l3 102 fmovs %f0,%f20 103 clr %l4 104 fmovs %f1,%f19 105 clr %l5 106 fmovs %f0,%f18 107 clr %l6 108 fmovs %f1,%f17 109 clr %l7 110 fmovs %f0,%f16 111 clr %i0 112 fmovs %f1,%f15 113 clr %i1 114 fmovs %f0,%f14 115 clr %i2 116 fmovs %f1,%f13 117 clr %i3 118 fmovs %f0,%f12 119 clr %i4 120 fmovs %f1,%f11 121 clr %i5 122 fmovs %f0,%f10 123 clr %g1 124 fmovs %f1,%f9 125 clr %g2 126 fmovs %f0,%f8 127 clr %g3 128 fmovs %f1,%f7 129 clr %g4 130 fmovs %f0,%f6 131 clr %g5 132 fmovs %f1,%f5 133 fmovs %f0,%f4 134 fmovs %f1,%f3 135 fmovs %f0,%f2 136 137 add %fp,BIAS,%i0 ! return pointer to caller´s top of stack 138 139 ret 140 restore 141 142.zero: .long 0x0,0x0 143.PIC.zero.up: 144 retl 145 add %o0,%o7,%o0 146#ifdef DEBUG 147.global walk_reg_wins 148.type walk_reg_wins,#function 149walk_reg_wins: 150#endif 151.walk.reg.wins: 152 save %sp,FRAME,%sp 153 cmp %i7,%o7 154 be 2f 155 clr %o0 156 cmp %o7,0 ! compiler never cleans %o7... 157 be 1f ! could have been a leaf function... 158 clr %o1 159 call .walk.reg.wins 160 nop 1611: clr %o2 162 clr %o3 163 clr %o4 164 clr %o5 165 clr %o7 166 clr %l0 167 clr %l1 168 clr %l2 169 clr %l3 170 clr %l4 171 clr %l5 172 clr %l6 173 clr %l7 174 add %o0,1,%i0 ! used for debugging 1752: ret 176 restore 177.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu 178 179.global OPENSSL_atomic_add 180.type OPENSSL_atomic_add,#function 181.align 32 182OPENSSL_atomic_add: 183#ifndef ABI64 184 subcc %g0,1,%o2 185 wr 0, %y 186 .word 0x95408000 !rd %ccr,%o2, see comment above 187 cmp %o2,0x99 188 be .v9 189 nop 190 save %sp,FRAME,%sp 191 ba .enter 192 nop 193#ifdef __sun 194! Note that you do not have to link with libthread to call thr_yield, 195! as libc provides a stub, which is overloaded the moment you link 196! with *either* libpthread or libthread... 197#define YIELD_CPU thr_yield 198#else 199! applies at least to Linux and FreeBSD... Feedback expected... 200#define YIELD_CPU sched_yield 201#endif 202.spin: call YIELD_CPU 203 nop 204.enter: ld [%i0],%i2 205 cmp %i2,-4096 206 be .spin 207 mov -1,%i2 208 swap [%i0],%i2 209 cmp %i2,-1 210 be .spin 211 add %i2,%i1,%i2 212 stbar 213 st %i2,[%i0] 214 sra %i2,%g0,%i0 215 ret 216 restore 217.v9: 218#endif 219 ld [%o0],%o2 2201: add %o1,%o2,%o3 221 .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3 222 cmp %o2,%o3 223 bne 1b 224 mov %o3,%o2 ! cas is always fetching to dest. register 225 add %o1,%o2,%o0 ! OpenSSL expects the new value 226 retl 227 sra %o0,%g0,%o0 ! we return signed int, remember? 228.size OPENSSL_atomic_add,.-OPENSSL_atomic_add 229 230.global _sparcv9_rdtick 231.align 32 232_sparcv9_rdtick: 233 subcc %g0,1,%o0 234 wr 0, %y 235 .word 0x91408000 !rd %ccr,%o0, see comment above 236 cmp %o0,0x99 237 bne .notick 238 xor %o0,%o0,%o0 239 .word 0x91410000 !rd %tick,%o0 240 retl 241 .word 0x93323020 !srlx %o0,32,%o1 242.notick: 243 retl 244 xor %o1,%o1,%o1 245.type _sparcv9_rdtick,#function 246.size _sparcv9_rdtick,.-_sparcv9_rdtick 247 248.global _sparcv9_vis1_probe 249.align 8 250_sparcv9_vis1_probe: 251 add %sp,BIAS+2,%o1 252 .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 253 retl 254 .word 0x81b00d80 !fxor %f0,%f0,%f0 255.type _sparcv9_vis1_probe,#function 256.size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe 257 258! Probe and instrument VIS1 instruction. Output is number of cycles it 259! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit 260! is slow (documented to be 6 cycles on T2) and the core is in-order 261! single-issue, it should be possible to distinguish Tx reliably... 262! Observed return values are: 263! 264! UltraSPARC IIe 7 265! UltraSPARC III 7 266! UltraSPARC T1 24 267! SPARC T4 65(*) 268! 269! (*) result has lesser to do with VIS instruction latencies, rdtick 270! appears that slow, but it does the trick in sense that FP and 271! VIS code paths are still slower than integer-only ones. 272! 273! Numbers for T2 and SPARC64 V-VII are more than welcomed. 274! 275! It would be possible to detect specifically US-T1 by instrumenting 276! fmul8ulx16, which is emulated on T1 and as such accounts for quite 277! a lot of %tick-s, couple of thousand on Linux... 278.global _sparcv9_vis1_instrument 279.align 8 280_sparcv9_vis1_instrument: 281 .word 0x81b00d80 !fxor %f0,%f0,%f0 282 .word 0x85b08d82 !fxor %f2,%f2,%f2 283 .word 0x91410000 !rd %tick,%o0 284 .word 0x81b00d80 !fxor %f0,%f0,%f0 285 .word 0x85b08d82 !fxor %f2,%f2,%f2 286 .word 0x93410000 !rd %tick,%o1 287 .word 0x81b00d80 !fxor %f0,%f0,%f0 288 .word 0x85b08d82 !fxor %f2,%f2,%f2 289 .word 0x95410000 !rd %tick,%o2 290 .word 0x81b00d80 !fxor %f0,%f0,%f0 291 .word 0x85b08d82 !fxor %f2,%f2,%f2 292 .word 0x97410000 !rd %tick,%o3 293 .word 0x81b00d80 !fxor %f0,%f0,%f0 294 .word 0x85b08d82 !fxor %f2,%f2,%f2 295 .word 0x99410000 !rd %tick,%o4 296 297 ! calculate intervals 298 sub %o1,%o0,%o0 299 sub %o2,%o1,%o1 300 sub %o3,%o2,%o2 301 sub %o4,%o3,%o3 302 303 ! find minimum value 304 cmp %o0,%o1 305 .word 0x38680002 !bgu,a %xcc,.+8 306 mov %o1,%o0 307 cmp %o0,%o2 308 .word 0x38680002 !bgu,a %xcc,.+8 309 mov %o2,%o0 310 cmp %o0,%o3 311 .word 0x38680002 !bgu,a %xcc,.+8 312 mov %o3,%o0 313 314 retl 315 nop 316.type _sparcv9_vis1_instrument,#function 317.size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument 318 319.global _sparcv9_vis2_probe 320.align 8 321_sparcv9_vis2_probe: 322 retl 323 .word 0x81b00980 !bshuffle %f0,%f0,%f0 324.type _sparcv9_vis2_probe,#function 325.size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe 326 327.global _sparcv9_fmadd_probe 328.align 8 329_sparcv9_fmadd_probe: 330 .word 0x81b00d80 !fxor %f0,%f0,%f0 331 .word 0x85b08d82 !fxor %f2,%f2,%f2 332 retl 333 .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0 334.type _sparcv9_fmadd_probe,#function 335.size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe 336 337.global _sparcv9_rdcfr 338.align 8 339_sparcv9_rdcfr: 340 retl 341 .word 0x91468000 !rd %asr26,%o0 342.type _sparcv9_rdcfr,#function 343.size _sparcv9_rdcfr,.-_sparcv9_rdcfr 344 345.global _sparcv9_vis3_probe 346.align 8 347_sparcv9_vis3_probe: 348 retl 349 .word 0x81b022a0 !xmulx %g0,%g0,%g0 350.type _sparcv9_vis3_probe,#function 351.size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe 352 353.global _sparcv9_random 354.align 8 355_sparcv9_random: 356 retl 357 .word 0x91b002a0 !random %o0 358.type _sparcv9_random,#function 359.size _sparcv9_random,.-_sparcv9_vis3_probe 360 361.global _sparcv9_fjaesx_probe 362.align 8 363_sparcv9_fjaesx_probe: 364 .word 0x81b09206 !faesencx %f2,%f6,%f0 365 retl 366 nop 367.size _sparcv9_fjaesx_probe,.-_sparcv9_fjaesx_probe 368 369.global OPENSSL_cleanse 370.align 32 371OPENSSL_cleanse: 372 cmp %o1,14 373 nop 374#ifdef ABI64 375 bgu %xcc,.Lot 376#else 377 bgu .Lot 378#endif 379 cmp %o1,0 380 bne .Little 381 nop 382 retl 383 nop 384 385.Little: 386 stb %g0,[%o0] 387 subcc %o1,1,%o1 388 bnz .Little 389 add %o0,1,%o0 390 retl 391 nop 392.align 32 393.Lot: 394#ifndef ABI64 395 subcc %g0,1,%g1 396 ! see above for explanation 397 wr 0, %y 398 .word 0x83408000 !rd %ccr,%g1, see comment above 399 cmp %g1,0x99 400 bne .v8lot 401 nop 402#endif 403 404.v9lot: andcc %o0,7,%g0 405 bz .v9aligned 406 nop 407 stb %g0,[%o0] 408 sub %o1,1,%o1 409 ba .v9lot 410 add %o0,1,%o0 411.align 16,0x01000000 412.v9aligned: 413 .word 0xc0720000 !stx %g0,[%o0] 414 sub %o1,8,%o1 415 andcc %o1,-8,%g0 416#ifdef ABI64 417 .word 0x126ffffd !bnz %xcc,.v9aligned 418#else 419 .word 0x124ffffd !bnz %icc,.v9aligned 420#endif 421 add %o0,8,%o0 422 423 cmp %o1,0 424 bne .Little 425 nop 426 retl 427 nop 428#ifndef ABI64 429.v8lot: andcc %o0,3,%g0 430 bz .v8aligned 431 nop 432 stb %g0,[%o0] 433 sub %o1,1,%o1 434 ba .v8lot 435 add %o0,1,%o0 436 nop 437.v8aligned: 438 st %g0,[%o0] 439 sub %o1,4,%o1 440 andcc %o1,-4,%g0 441 bnz .v8aligned 442 add %o0,4,%o0 443 444 cmp %o1,0 445 bne .Little 446 nop 447 retl 448 nop 449#endif 450.type OPENSSL_cleanse,#function 451.size OPENSSL_cleanse,.-OPENSSL_cleanse 452 453.global CRYPTO_memcmp 454.align 16 455CRYPTO_memcmp: 456 cmp %o2,0 457#ifdef ABI64 458 beq,pn %xcc,.Lno_data 459#else 460 beq .Lno_data 461#endif 462 xor %g1,%g1,%g1 463 nop 464 465.Loop_cmp: 466 ldub [%o0],%o3 467 add %o0,1,%o0 468 ldub [%o1],%o4 469 add %o1,1,%o1 470 subcc %o2,1,%o2 471 xor %o3,%o4,%o4 472#ifdef ABI64 473 bnz %xcc,.Loop_cmp 474#else 475 bnz .Loop_cmp 476#endif 477 or %o4,%g1,%g1 478 479 sub %g0,%g1,%g1 480 srl %g1,31,%g1 481.Lno_data: 482 retl 483 mov %g1,%o0 484.type CRYPTO_memcmp,#function 485.size CRYPTO_memcmp,.-CRYPTO_memcmp 486 487.global _sparcv9_vis1_instrument_bus 488.align 8 489_sparcv9_vis1_instrument_bus: 490 mov %o1,%o3 ! save cnt 491 .word 0x99410000 !rd %tick,%o4 ! tick 492 mov %o4,%o5 ! lasttick = tick 493 set 0,%g4 ! diff 494 495 andn %o0,63,%g1 496 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load 497 .word 0x8143e040 !membar #Sync 498 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit 499 .word 0x8143e040 !membar #Sync 500 ld [%o0],%o4 501 add %o4,%g4,%g4 502 .word 0xc9e2100c !cas [%o0],%o4,%g4 503 504.Loop: .word 0x99410000 !rd %tick,%o4 505 sub %o4,%o5,%g4 ! diff=tick-lasttick 506 mov %o4,%o5 ! lasttick=tick 507 508 andn %o0,63,%g1 509 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load 510 .word 0x8143e040 !membar #Sync 511 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit 512 .word 0x8143e040 !membar #Sync 513 ld [%o0],%o4 514 add %o4,%g4,%g4 515 .word 0xc9e2100c !cas [%o0],%o4,%g4 516 subcc %o1,1,%o1 ! --$cnt 517 bnz .Loop 518 add %o0,4,%o0 ! ++$out 519 520 retl 521 mov %o3,%o0 522.type _sparcv9_vis1_instrument_bus,#function 523.size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus 524 525.global _sparcv9_vis1_instrument_bus2 526.align 8 527_sparcv9_vis1_instrument_bus2: 528 mov %o1,%o3 ! save cnt 529 sll %o1,2,%o1 ! cnt*=4 530 531 .word 0x99410000 !rd %tick,%o4 ! tick 532 mov %o4,%o5 ! lasttick = tick 533 set 0,%g4 ! diff 534 535 andn %o0,63,%g1 536 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load 537 .word 0x8143e040 !membar #Sync 538 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit 539 .word 0x8143e040 !membar #Sync 540 ld [%o0],%o4 541 add %o4,%g4,%g4 542 .word 0xc9e2100c !cas [%o0],%o4,%g4 543 544 .word 0x99410000 !rd %tick,%o4 ! tick 545 sub %o4,%o5,%g4 ! diff=tick-lasttick 546 mov %o4,%o5 ! lasttick=tick 547 mov %g4,%g5 ! lastdiff=diff 548.Loop2: 549 andn %o0,63,%g1 550 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load 551 .word 0x8143e040 !membar #Sync 552 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit 553 .word 0x8143e040 !membar #Sync 554 ld [%o0],%o4 555 add %o4,%g4,%g4 556 .word 0xc9e2100c !cas [%o0],%o4,%g4 557 558 subcc %o2,1,%o2 ! --max 559 bz .Ldone2 560 nop 561 562 .word 0x99410000 !rd %tick,%o4 ! tick 563 sub %o4,%o5,%g4 ! diff=tick-lasttick 564 mov %o4,%o5 ! lasttick=tick 565 cmp %g4,%g5 566 mov %g4,%g5 ! lastdiff=diff 567 568 .word 0x83408000 !rd %ccr,%g1 569 and %g1,4,%g1 ! isolate zero flag 570 xor %g1,4,%g1 ! flip zero flag 571 572 subcc %o1,%g1,%o1 ! conditional --$cnt 573 bnz .Loop2 574 add %o0,%g1,%o0 ! conditional ++$out 575 576.Ldone2: 577 srl %o1,2,%o1 578 retl 579 sub %o3,%o1,%o0 580.type _sparcv9_vis1_instrument_bus2,#function 581.size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2 582 583.section ".init",#alloc,#execinstr 584 call OPENSSL_cpuid_setup 585 nop 586