1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; Test vector extraction of byte-swapped value to memory. 3; 4; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s 5 6declare i16 @llvm.bswap.i16(i16) 7declare i32 @llvm.bswap.i32(i32) 8declare i64 @llvm.bswap.i64(i64) 9declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) 10declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) 11declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) 12 13; Test v8i16 extraction from the first element. 14define void @f1(<8 x i16> %val, ptr %ptr) { 15; CHECK-LABEL: f1: 16; CHECK: # %bb.0: 17; CHECK-NEXT: vstebrh %v24, 0(%r2), 0 18; CHECK-NEXT: br %r14 19 %element = extractelement <8 x i16> %val, i32 0 20 %swap = call i16 @llvm.bswap.i16(i16 %element) 21 store i16 %swap, ptr %ptr 22 ret void 23} 24 25; Test v8i16 extraction from the last element. 26define void @f2(<8 x i16> %val, ptr %ptr) { 27; CHECK-LABEL: f2: 28; CHECK: # %bb.0: 29; CHECK-NEXT: vstebrh %v24, 0(%r2), 7 30; CHECK-NEXT: br %r14 31 %element = extractelement <8 x i16> %val, i32 7 32 %swap = call i16 @llvm.bswap.i16(i16 %element) 33 store i16 %swap, ptr %ptr 34 ret void 35} 36 37; Test v8i16 extraction of an invalid element. This must compile, 38; but we don't care what it does. 39define void @f3(<8 x i16> %val, ptr %ptr) { 40; CHECK-LABEL: f3: 41; CHECK: # %bb.0: 42; CHECK-NEXT: br %r14 43 %element = extractelement <8 x i16> %val, i32 8 44 %swap = call i16 @llvm.bswap.i16(i16 %element) 45 store i16 %swap, ptr %ptr 46 ret void 47} 48 49; Test v8i16 extraction with the highest in-range offset. 50define void @f4(<8 x i16> %val, ptr %base) { 51; CHECK-LABEL: f4: 52; CHECK: # %bb.0: 53; CHECK-NEXT: vstebrh %v24, 4094(%r2), 5 54; CHECK-NEXT: br %r14 55 %ptr = getelementptr i16, ptr %base, i32 2047 56 %element = extractelement <8 x i16> %val, i32 5 57 %swap = call i16 @llvm.bswap.i16(i16 %element) 58 store i16 %swap, ptr %ptr 59 ret void 60} 61 62; Test v8i16 extraction with the first ouf-of-range offset. 63define void @f5(<8 x i16> %val, ptr %base) { 64; CHECK-LABEL: f5: 65; CHECK: # %bb.0: 66; CHECK-NEXT: aghi %r2, 4096 67; CHECK-NEXT: vstebrh %v24, 0(%r2), 1 68; CHECK-NEXT: br %r14 69 %ptr = getelementptr i16, ptr %base, i32 2048 70 %element = extractelement <8 x i16> %val, i32 1 71 %swap = call i16 @llvm.bswap.i16(i16 %element) 72 store i16 %swap, ptr %ptr 73 ret void 74} 75 76; Test v8i16 extraction from a variable element. 77define void @f6(<8 x i16> %val, ptr %ptr, i32 %index) { 78; CHECK-LABEL: f6: 79; CHECK: # %bb.0: 80; CHECK-NEXT: vlgvh %r0, %v24, 0(%r3) 81; CHECK-NEXT: strvh %r0, 0(%r2) 82; CHECK-NEXT: br %r14 83 %element = extractelement <8 x i16> %val, i32 %index 84 %swap = call i16 @llvm.bswap.i16(i16 %element) 85 store i16 %swap, ptr %ptr 86 ret void 87} 88 89; Test v8i16 extraction using a vector bswap. 90define void @f7(<8 x i16> %val, ptr %ptr) { 91; CHECK-LABEL: f7: 92; CHECK: # %bb.0: 93; CHECK-NEXT: vstebrh %v24, 0(%r2), 0 94; CHECK-NEXT: br %r14 95 %swap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val) 96 %element = extractelement <8 x i16> %swap, i32 0 97 store i16 %element, ptr %ptr 98 ret void 99} 100 101; Test v4i32 extraction from the first element. 102define void @f8(<4 x i32> %val, ptr %ptr) { 103; CHECK-LABEL: f8: 104; CHECK: # %bb.0: 105; CHECK-NEXT: vstebrf %v24, 0(%r2), 0 106; CHECK-NEXT: br %r14 107 %element = extractelement <4 x i32> %val, i32 0 108 %swap = call i32 @llvm.bswap.i32(i32 %element) 109 store i32 %swap, ptr %ptr 110 ret void 111} 112 113; Test v4i32 extraction from the last element. 114define void @f9(<4 x i32> %val, ptr %ptr) { 115; CHECK-LABEL: f9: 116; CHECK: # %bb.0: 117; CHECK-NEXT: vstebrf %v24, 0(%r2), 3 118; CHECK-NEXT: br %r14 119 %element = extractelement <4 x i32> %val, i32 3 120 %swap = call i32 @llvm.bswap.i32(i32 %element) 121 store i32 %swap, ptr %ptr 122 ret void 123} 124 125; Test v4i32 extraction of an invalid element. This must compile, 126; but we don't care what it does. 127define void @f10(<4 x i32> %val, ptr %ptr) { 128; CHECK-LABEL: f10: 129; CHECK: # %bb.0: 130; CHECK-NEXT: br %r14 131 %element = extractelement <4 x i32> %val, i32 4 132 %swap = call i32 @llvm.bswap.i32(i32 %element) 133 store i32 %swap, ptr %ptr 134 ret void 135} 136 137; Test v4i32 extraction with the highest in-range offset. 138define void @f11(<4 x i32> %val, ptr %base) { 139; CHECK-LABEL: f11: 140; CHECK: # %bb.0: 141; CHECK-NEXT: vstebrf %v24, 4092(%r2), 2 142; CHECK-NEXT: br %r14 143 %ptr = getelementptr i32, ptr %base, i32 1023 144 %element = extractelement <4 x i32> %val, i32 2 145 %swap = call i32 @llvm.bswap.i32(i32 %element) 146 store i32 %swap, ptr %ptr 147 ret void 148} 149 150; Test v4i32 extraction with the first ouf-of-range offset. 151define void @f12(<4 x i32> %val, ptr %base) { 152; CHECK-LABEL: f12: 153; CHECK: # %bb.0: 154; CHECK-NEXT: aghi %r2, 4096 155; CHECK-NEXT: vstebrf %v24, 0(%r2), 1 156; CHECK-NEXT: br %r14 157 %ptr = getelementptr i32, ptr %base, i32 1024 158 %element = extractelement <4 x i32> %val, i32 1 159 %swap = call i32 @llvm.bswap.i32(i32 %element) 160 store i32 %swap, ptr %ptr 161 ret void 162} 163 164; Test v4i32 extraction from a variable element. 165define void @f13(<4 x i32> %val, ptr %ptr, i32 %index) { 166; CHECK-LABEL: f13: 167; CHECK: # %bb.0: 168; CHECK-NEXT: vlgvf %r0, %v24, 0(%r3) 169; CHECK-NEXT: strv %r0, 0(%r2) 170; CHECK-NEXT: br %r14 171 %element = extractelement <4 x i32> %val, i32 %index 172 %swap = call i32 @llvm.bswap.i32(i32 %element) 173 store i32 %swap, ptr %ptr 174 ret void 175} 176 177; Test v4i32 extraction using a vector bswap. 178define void @f14(<4 x i32> %val, ptr %ptr) { 179; CHECK-LABEL: f14: 180; CHECK: # %bb.0: 181; CHECK-NEXT: vstebrf %v24, 0(%r2), 0 182; CHECK-NEXT: br %r14 183 %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) 184 %element = extractelement <4 x i32> %swap, i32 0 185 store i32 %element, ptr %ptr 186 ret void 187} 188 189; Test v2i64 extraction from the first element. 190define void @f15(<2 x i64> %val, ptr %ptr) { 191; CHECK-LABEL: f15: 192; CHECK: # %bb.0: 193; CHECK-NEXT: vstebrg %v24, 0(%r2), 0 194; CHECK-NEXT: br %r14 195 %element = extractelement <2 x i64> %val, i32 0 196 %swap = call i64 @llvm.bswap.i64(i64 %element) 197 store i64 %swap, ptr %ptr 198 ret void 199} 200 201; Test v2i64 extraction from the last element. 202define void @f16(<2 x i64> %val, ptr %ptr) { 203; CHECK-LABEL: f16: 204; CHECK: # %bb.0: 205; CHECK-NEXT: vstebrg %v24, 0(%r2), 1 206; CHECK-NEXT: br %r14 207 %element = extractelement <2 x i64> %val, i32 1 208 %swap = call i64 @llvm.bswap.i64(i64 %element) 209 store i64 %swap, ptr %ptr 210 ret void 211} 212 213; Test v2i64 extraction of an invalid element. This must compile, 214; but we don't care what it does. 215define void @f17(<2 x i64> %val, ptr %ptr) { 216; CHECK-LABEL: f17: 217; CHECK: # %bb.0: 218; CHECK-NEXT: br %r14 219 %element = extractelement <2 x i64> %val, i32 2 220 %swap = call i64 @llvm.bswap.i64(i64 %element) 221 store i64 %swap, ptr %ptr 222 ret void 223} 224 225; Test v2i64 extraction with the highest in-range offset. 226define void @f18(<2 x i64> %val, ptr %base) { 227; CHECK-LABEL: f18: 228; CHECK: # %bb.0: 229; CHECK-NEXT: vstebrg %v24, 4088(%r2), 1 230; CHECK-NEXT: br %r14 231 %ptr = getelementptr i64, ptr %base, i32 511 232 %element = extractelement <2 x i64> %val, i32 1 233 %swap = call i64 @llvm.bswap.i64(i64 %element) 234 store i64 %swap, ptr %ptr 235 ret void 236} 237 238; Test v2i64 extraction with the first ouf-of-range offset. 239define void @f19(<2 x i64> %val, ptr %base) { 240; CHECK-LABEL: f19: 241; CHECK: # %bb.0: 242; CHECK-NEXT: aghi %r2, 4096 243; CHECK-NEXT: vstebrg %v24, 0(%r2), 0 244; CHECK-NEXT: br %r14 245 %ptr = getelementptr i64, ptr %base, i32 512 246 %element = extractelement <2 x i64> %val, i32 0 247 %swap = call i64 @llvm.bswap.i64(i64 %element) 248 store i64 %swap, ptr %ptr 249 ret void 250} 251 252; Test v2i64 extraction from a variable element. 253define void @f20(<2 x i64> %val, ptr %ptr, i32 %index) { 254; CHECK-LABEL: f20: 255; CHECK: # %bb.0: 256; CHECK-NEXT: vlgvg %r0, %v24, 0(%r3) 257; CHECK-NEXT: strvg %r0, 0(%r2) 258; CHECK-NEXT: br %r14 259 %element = extractelement <2 x i64> %val, i32 %index 260 %swap = call i64 @llvm.bswap.i64(i64 %element) 261 store i64 %swap, ptr %ptr 262 ret void 263} 264 265; Test v2i64 extraction using a vector bswap. 266define void @f21(<2 x i64> %val, ptr %ptr) { 267; CHECK-LABEL: f21: 268; CHECK: # %bb.0: 269; CHECK-NEXT: vstebrg %v24, 0(%r2), 0 270; CHECK-NEXT: br %r14 271 %swap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) 272 %element = extractelement <2 x i64> %swap, i32 0 273 store i64 %element, ptr %ptr 274 ret void 275} 276 277