1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mattr=+sha -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 3; RUN: llc < %s -mattr=+sha,+avx2 -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 4 5declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone 6 7define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { 8; SSE-LABEL: test_sha1rnds4rr: 9; SSE: # %bb.0: # %entry 10; SSE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 11; SSE-NEXT: retq 12; 13; AVX-LABEL: test_sha1rnds4rr: 14; AVX: # %bb.0: # %entry 15; AVX-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x0f,0x3a,0xcc,0xc1,0x03] 16; AVX-NEXT: retq # encoding: [0xc3] 17entry: 18 %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3) 19 ret <4 x i32> %0 20} 21 22define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable { 23; SSE-LABEL: test_sha1rnds4rm: 24; SSE: # %bb.0: # %entry 25; SSE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 26; SSE-NEXT: retq 27; 28; AVX-LABEL: test_sha1rnds4rm: 29; AVX: # %bb.0: # %entry 30; AVX-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x0f,0x3a,0xcc,0x07,0x03] 31; AVX-NEXT: retq # encoding: [0xc3] 32entry: 33 %0 = load <4 x i32>, ptr %b 34 %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3) 35 ret <4 x i32> %1 36} 37 38declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone 39 40define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { 41; SSE-LABEL: test_sha1nexterr: 42; SSE: # %bb.0: # %entry 43; SSE-NEXT: sha1nexte %xmm1, %xmm0 44; SSE-NEXT: retq 45; 46; AVX-LABEL: test_sha1nexterr: 47; AVX: # %bb.0: # %entry 48; AVX-NEXT: sha1nexte %xmm1, %xmm0 # encoding: [0x0f,0x38,0xc8,0xc1] 49; AVX-NEXT: retq # encoding: [0xc3] 50entry: 51 %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b) 52 ret <4 x i32> %0 53} 54 55define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable { 56; SSE-LABEL: test_sha1nexterm: 57; SSE: # %bb.0: # %entry 58; SSE-NEXT: sha1nexte (%rdi), %xmm0 59; SSE-NEXT: retq 60; 61; AVX-LABEL: test_sha1nexterm: 62; AVX: # %bb.0: # %entry 63; AVX-NEXT: sha1nexte (%rdi), %xmm0 # encoding: [0x0f,0x38,0xc8,0x07] 64; AVX-NEXT: retq # encoding: [0xc3] 65entry: 66 %0 = load <4 x i32>, ptr %b 67 %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0) 68 ret <4 x i32> %1 69} 70 71declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone 72 73define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { 74; SSE-LABEL: test_sha1msg1rr: 75; SSE: # %bb.0: # %entry 76; SSE-NEXT: sha1msg1 %xmm1, %xmm0 77; SSE-NEXT: retq 78; 79; AVX-LABEL: test_sha1msg1rr: 80; AVX: # %bb.0: # %entry 81; AVX-NEXT: sha1msg1 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xc9,0xc1] 82; AVX-NEXT: retq # encoding: [0xc3] 83entry: 84 %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b) 85 ret <4 x i32> %0 86} 87 88define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable { 89; SSE-LABEL: test_sha1msg1rm: 90; SSE: # %bb.0: # %entry 91; SSE-NEXT: sha1msg1 (%rdi), %xmm0 92; SSE-NEXT: retq 93; 94; AVX-LABEL: test_sha1msg1rm: 95; AVX: # %bb.0: # %entry 96; AVX-NEXT: sha1msg1 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xc9,0x07] 97; AVX-NEXT: retq # encoding: [0xc3] 98entry: 99 %0 = load <4 x i32>, ptr %b 100 %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0) 101 ret <4 x i32> %1 102} 103 104declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone 105 106define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { 107; SSE-LABEL: test_sha1msg2rr: 108; SSE: # %bb.0: # %entry 109; SSE-NEXT: sha1msg2 %xmm1, %xmm0 110; SSE-NEXT: retq 111; 112; AVX-LABEL: test_sha1msg2rr: 113; AVX: # %bb.0: # %entry 114; AVX-NEXT: sha1msg2 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xca,0xc1] 115; AVX-NEXT: retq # encoding: [0xc3] 116entry: 117 %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b) 118 ret <4 x i32> %0 119} 120 121define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable { 122; SSE-LABEL: test_sha1msg2rm: 123; SSE: # %bb.0: # %entry 124; SSE-NEXT: sha1msg2 (%rdi), %xmm0 125; SSE-NEXT: retq 126; 127; AVX-LABEL: test_sha1msg2rm: 128; AVX: # %bb.0: # %entry 129; AVX-NEXT: sha1msg2 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xca,0x07] 130; AVX-NEXT: retq # encoding: [0xc3] 131entry: 132 %0 = load <4 x i32>, ptr %b 133 %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0) 134 ret <4 x i32> %1 135} 136 137declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 138 139define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable { 140; SSE-LABEL: test_sha256rnds2rr: 141; SSE: # %bb.0: # %entry 142; SSE-NEXT: movaps %xmm0, %xmm3 143; SSE-NEXT: movaps %xmm2, %xmm0 144; SSE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 145; SSE-NEXT: movaps %xmm3, %xmm0 146; SSE-NEXT: retq 147; 148; AVX-LABEL: test_sha256rnds2rr: 149; AVX: # %bb.0: # %entry 150; AVX-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8] 151; AVX-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 152; AVX-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x0f,0x38,0xcb,0xd9] 153; AVX-NEXT: vmovaps %xmm3, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc3] 154; AVX-NEXT: retq # encoding: [0xc3] 155entry: 156 %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) 157 ret <4 x i32> %0 158} 159 160define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwind uwtable { 161; SSE-LABEL: test_sha256rnds2rm: 162; SSE: # %bb.0: # %entry 163; SSE-NEXT: movaps %xmm0, %xmm2 164; SSE-NEXT: movaps %xmm1, %xmm0 165; SSE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 166; SSE-NEXT: movaps %xmm2, %xmm0 167; SSE-NEXT: retq 168; 169; AVX-LABEL: test_sha256rnds2rm: 170; AVX: # %bb.0: # %entry 171; AVX-NEXT: vmovaps %xmm0, %xmm2 # encoding: [0xc5,0xf8,0x28,0xd0] 172; AVX-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1] 173; AVX-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x0f,0x38,0xcb,0x17] 174; AVX-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2] 175; AVX-NEXT: retq # encoding: [0xc3] 176entry: 177 %0 = load <4 x i32>, ptr %b 178 %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c) 179 ret <4 x i32> %1 180} 181 182declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone 183 184define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { 185; SSE-LABEL: test_sha256msg1rr: 186; SSE: # %bb.0: # %entry 187; SSE-NEXT: sha256msg1 %xmm1, %xmm0 188; SSE-NEXT: retq 189; 190; AVX-LABEL: test_sha256msg1rr: 191; AVX: # %bb.0: # %entry 192; AVX-NEXT: sha256msg1 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xcc,0xc1] 193; AVX-NEXT: retq # encoding: [0xc3] 194entry: 195 %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b) 196 ret <4 x i32> %0 197} 198 199define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable { 200; SSE-LABEL: test_sha256msg1rm: 201; SSE: # %bb.0: # %entry 202; SSE-NEXT: sha256msg1 (%rdi), %xmm0 203; SSE-NEXT: retq 204; 205; AVX-LABEL: test_sha256msg1rm: 206; AVX: # %bb.0: # %entry 207; AVX-NEXT: sha256msg1 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xcc,0x07] 208; AVX-NEXT: retq # encoding: [0xc3] 209entry: 210 %0 = load <4 x i32>, ptr %b 211 %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0) 212 ret <4 x i32> %1 213} 214 215declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone 216 217define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable { 218; SSE-LABEL: test_sha256msg2rr: 219; SSE: # %bb.0: # %entry 220; SSE-NEXT: sha256msg2 %xmm1, %xmm0 221; SSE-NEXT: retq 222; 223; AVX-LABEL: test_sha256msg2rr: 224; AVX: # %bb.0: # %entry 225; AVX-NEXT: sha256msg2 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xcd,0xc1] 226; AVX-NEXT: retq # encoding: [0xc3] 227entry: 228 %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b) 229 ret <4 x i32> %0 230} 231 232define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable { 233; SSE-LABEL: test_sha256msg2rm: 234; SSE: # %bb.0: # %entry 235; SSE-NEXT: sha256msg2 (%rdi), %xmm0 236; SSE-NEXT: retq 237; 238; AVX-LABEL: test_sha256msg2rm: 239; AVX: # %bb.0: # %entry 240; AVX-NEXT: sha256msg2 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xcd,0x07] 241; AVX-NEXT: retq # encoding: [0xc3] 242entry: 243 %0 = load <4 x i32>, ptr %b 244 %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0) 245 ret <4 x i32> %1 246} 247 248; Make sure we don't forget that sha instructions have no VEX equivalents and thus don't zero YMM/ZMM. 249define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, ptr %b) nounwind uwtable { 250; SSE-LABEL: test_sha1rnds4_zero_extend: 251; SSE: # %bb.0: # %entry 252; SSE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 253; SSE-NEXT: xorps %xmm1, %xmm1 254; SSE-NEXT: retq 255; 256; AVX-LABEL: test_sha1rnds4_zero_extend: 257; AVX: # %bb.0: # %entry 258; AVX-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x0f,0x3a,0xcc,0x07,0x03] 259; AVX-NEXT: vmovaps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc0] 260; AVX-NEXT: retq # encoding: [0xc3] 261entry: 262 %0 = load <4 x i32>, ptr %b 263 %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3) 264 %2 = shufflevector <4 x i32> %1, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 265 ret <8 x i32> %2 266} 267;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 268; CHECK: {{.*}} 269