1; Test stores of element-swapped vector elements. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s 4 5; Test v16i8 stores. 6define void @f1(<16 x i8> %val, ptr %ptr) { 7; CHECK-LABEL: f1: 8; CHECK: vstbrq %v24, 0(%r2) 9; CHECK: br %r14 10 %swap = shufflevector <16 x i8> %val, <16 x i8> undef, 11 <16 x i32> <i32 15, i32 14, i32 13, i32 12, 12 i32 11, i32 10, i32 9, i32 8, 13 i32 7, i32 6, i32 5, i32 4, 14 i32 3, i32 2, i32 1, i32 0> 15 store <16 x i8> %swap, ptr %ptr 16 ret void 17} 18 19; Test v8i16 stores. 20define void @f2(<8 x i16> %val, ptr %ptr) { 21; CHECK-LABEL: f2: 22; CHECK: vsterh %v24, 0(%r2) 23; CHECK: br %r14 24 %swap = shufflevector <8 x i16> %val, <8 x i16> undef, 25 <8 x i32> <i32 7, i32 6, i32 5, i32 4, 26 i32 3, i32 2, i32 1, i32 0> 27 store <8 x i16> %swap, ptr %ptr 28 ret void 29} 30 31; Test v4i32 stores. 32define void @f3(<4 x i32> %val, ptr %ptr) { 33; CHECK-LABEL: f3: 34; CHECK: vsterf %v24, 0(%r2) 35; CHECK: br %r14 36 %swap = shufflevector <4 x i32> %val, <4 x i32> undef, 37 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 38 store <4 x i32> %swap, ptr %ptr 39 ret void 40} 41 42; Test v2i64 stores. 43define void @f4(<2 x i64> %val, ptr %ptr) { 44; CHECK-LABEL: f4: 45; CHECK: vsterg %v24, 0(%r2) 46; CHECK: br %r14 47 %swap = shufflevector <2 x i64> %val, <2 x i64> undef, 48 <2 x i32> <i32 1, i32 0> 49 store <2 x i64> %swap, ptr %ptr 50 ret void 51} 52 53; Test v4f32 stores. 54define void @f5(<4 x float> %val, ptr %ptr) { 55; CHECK-LABEL: f5: 56; CHECK: vsterf %v24, 0(%r2) 57; CHECK: br %r14 58 %swap = shufflevector <4 x float> %val, <4 x float> undef, 59 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 60 store <4 x float> %swap, ptr %ptr 61 ret void 62} 63 64; Test v2f64 stores. 65define void @f6(<2 x double> %val, ptr %ptr) { 66; CHECK-LABEL: f6: 67; CHECK: vsterg %v24, 0(%r2) 68; CHECK: br %r14 69 %swap = shufflevector <2 x double> %val, <2 x double> undef, 70 <2 x i32> <i32 1, i32 0> 71 store <2 x double> %swap, ptr %ptr 72 ret void 73} 74 75; Test the highest aligned in-range offset. 76define void @f7(<4 x i32> %val, ptr %base) { 77; CHECK-LABEL: f7: 78; CHECK: vsterf %v24, 4080(%r2) 79; CHECK: br %r14 80 %ptr = getelementptr <4 x i32>, ptr %base, i64 255 81 %swap = shufflevector <4 x i32> %val, <4 x i32> undef, 82 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 83 store <4 x i32> %swap, ptr %ptr 84 ret void 85} 86 87; Test the highest unaligned in-range offset. 88define void @f8(<4 x i32> %val, ptr %base) { 89; CHECK-LABEL: f8: 90; CHECK: vsterf %v24, 4095(%r2) 91; CHECK: br %r14 92 %addr = getelementptr i8, ptr %base, i64 4095 93 %swap = shufflevector <4 x i32> %val, <4 x i32> undef, 94 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 95 store <4 x i32> %swap, ptr %addr, align 1 96 ret void 97} 98 99; Test the next offset up, which requires separate address logic, 100define void @f9(<4 x i32> %val, ptr %base) { 101; CHECK-LABEL: f9: 102; CHECK: aghi %r2, 4096 103; CHECK: vsterf %v24, 0(%r2) 104; CHECK: br %r14 105 %ptr = getelementptr <4 x i32>, ptr %base, i64 256 106 %swap = shufflevector <4 x i32> %val, <4 x i32> undef, 107 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 108 store <4 x i32> %swap, ptr %ptr 109 ret void 110} 111 112; Test negative offsets, which also require separate address logic, 113define void @f10(<4 x i32> %val, ptr %base) { 114; CHECK-LABEL: f10: 115; CHECK: aghi %r2, -16 116; CHECK: vsterf %v24, 0(%r2) 117; CHECK: br %r14 118 %ptr = getelementptr <4 x i32>, ptr %base, i64 -1 119 %swap = shufflevector <4 x i32> %val, <4 x i32> undef, 120 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 121 store <4 x i32> %swap, ptr %ptr 122 ret void 123} 124 125; Check that indexes are allowed. 126define void @f11(<4 x i32> %val, ptr %base, i64 %index) { 127; CHECK-LABEL: f11: 128; CHECK: vsterf %v24, 0(%r3,%r2) 129; CHECK: br %r14 130 %addr = getelementptr i8, ptr %base, i64 %index 131 %swap = shufflevector <4 x i32> %val, <4 x i32> undef, 132 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 133 store <4 x i32> %swap, ptr %addr, align 1 134 ret void 135} 136 137