1; Test loads of byte-swapped vector elements. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s 4 5; Test v16i8 loads. 6define <16 x i8> @f1(ptr %ptr) { 7; CHECK-LABEL: f1: 8; CHECK: vlbrq %v24, 0(%r2) 9; CHECK: br %r14 10 %load = load <16 x i8>, ptr %ptr 11 %ret = shufflevector <16 x i8> %load, <16 x i8> undef, 12 <16 x i32> <i32 15, i32 14, i32 13, i32 12, 13 i32 11, i32 10, i32 9, i32 8, 14 i32 7, i32 6, i32 5, i32 4, 15 i32 3, i32 2, i32 1, i32 0> 16 ret <16 x i8> %ret 17} 18 19; Test v8i16 loads. 20define <8 x i16> @f2(ptr %ptr) { 21; CHECK-LABEL: f2: 22; CHECK: vlerh %v24, 0(%r2) 23; CHECK: br %r14 24 %load = load <8 x i16>, ptr %ptr 25 %ret = shufflevector <8 x i16> %load, <8 x i16> undef, 26 <8 x i32> <i32 7, i32 6, i32 5, i32 4, 27 i32 3, i32 2, i32 1, i32 0> 28 ret <8 x i16> %ret 29} 30 31; Test v4i32 loads. 32define <4 x i32> @f3(ptr %ptr) { 33; CHECK-LABEL: f3: 34; CHECK: vlerf %v24, 0(%r2) 35; CHECK: br %r14 36 %load = load <4 x i32>, ptr %ptr 37 %ret = shufflevector <4 x i32> %load, <4 x i32> undef, 38 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 39 ret <4 x i32> %ret 40} 41 42; Test v2i64 loads. 43define <2 x i64> @f4(ptr %ptr) { 44; CHECK-LABEL: f4: 45; CHECK: vlerg %v24, 0(%r2) 46; CHECK: br %r14 47 %load = load <2 x i64>, ptr %ptr 48 %ret = shufflevector <2 x i64> %load, <2 x i64> undef, 49 <2 x i32> <i32 1, i32 0> 50 ret <2 x i64> %ret 51} 52 53; Test v4f32 loads. 54define <4 x float> @f5(ptr %ptr) { 55; CHECK-LABEL: f5: 56; CHECK: vlerf %v24, 0(%r2) 57; CHECK: br %r14 58 %load = load <4 x float>, ptr %ptr 59 %ret = shufflevector <4 x float> %load, <4 x float> undef, 60 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 61 ret <4 x float> %ret 62} 63 64; Test v2f64 loads. 65define <2 x double> @f6(ptr %ptr) { 66; CHECK-LABEL: f6: 67; CHECK: vlerg %v24, 0(%r2) 68; CHECK: br %r14 69 %load = load <2 x double>, ptr %ptr 70 %ret = shufflevector <2 x double> %load, <2 x double> undef, 71 <2 x i32> <i32 1, i32 0> 72 ret <2 x double> %ret 73} 74 75; Test the highest aligned in-range offset. 76define <4 x i32> @f7(ptr %base) { 77; CHECK-LABEL: f7: 78; CHECK: vlerf %v24, 4080(%r2) 79; CHECK: br %r14 80 %ptr = getelementptr <4 x i32>, ptr %base, i64 255 81 %load = load <4 x i32>, ptr %ptr 82 %ret = shufflevector <4 x i32> %load, <4 x i32> undef, 83 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 84 ret <4 x i32> %ret 85} 86 87; Test the highest unaligned in-range offset. 88define <4 x i32> @f8(ptr %base) { 89; CHECK-LABEL: f8: 90; CHECK: vlerf %v24, 4095(%r2) 91; CHECK: br %r14 92 %addr = getelementptr i8, ptr %base, i64 4095 93 %load = load <4 x i32>, ptr %addr 94 %ret = shufflevector <4 x i32> %load, <4 x i32> undef, 95 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 96 ret <4 x i32> %ret 97} 98 99; Test the next offset up, which requires separate address logic, 100define <4 x i32> @f9(ptr %base) { 101; CHECK-LABEL: f9: 102; CHECK: aghi %r2, 4096 103; CHECK: vlerf %v24, 0(%r2) 104; CHECK: br %r14 105 %ptr = getelementptr <4 x i32>, ptr %base, i64 256 106 %load = load <4 x i32>, ptr %ptr 107 %ret = shufflevector <4 x i32> %load, <4 x i32> undef, 108 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 109 ret <4 x i32> %ret 110} 111 112; Test negative offsets, which also require separate address logic, 113define <4 x i32> @f10(ptr %base) { 114; CHECK-LABEL: f10: 115; CHECK: aghi %r2, -16 116; CHECK: vlerf %v24, 0(%r2) 117; CHECK: br %r14 118 %ptr = getelementptr <4 x i32>, ptr %base, i64 -1 119 %load = load <4 x i32>, ptr %ptr 120 %ret = shufflevector <4 x i32> %load, <4 x i32> undef, 121 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 122 ret <4 x i32> %ret 123} 124 125; Check that indexes are allowed. 126define <4 x i32> @f11(ptr %base, i64 %index) { 127; CHECK-LABEL: f11: 128; CHECK: vlerf %v24, 0(%r3,%r2) 129; CHECK: br %r14 130 %addr = getelementptr i8, ptr %base, i64 %index 131 %load = load <4 x i32>, ptr %addr 132 %ret = shufflevector <4 x i32> %load, <4 x i32> undef, 133 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 134 ret <4 x i32> %ret 135} 136 137