1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; Test replications of a byte-swapped scalar memory value. 3; 4; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s 5 6declare i16 @llvm.bswap.i16(i16) 7declare i32 @llvm.bswap.i32(i32) 8declare i64 @llvm.bswap.i64(i64) 9declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) 10declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) 11declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) 12 13; Test a v8i16 replicating load with no offset. 14define <8 x i16> @f1(ptr %ptr) { 15; CHECK-LABEL: f1: 16; CHECK: # %bb.0: 17; CHECK-NEXT: vlbrreph %v24, 0(%r2) 18; CHECK-NEXT: br %r14 19 %scalar = load i16, ptr %ptr 20 %swap = call i16 @llvm.bswap.i16(i16 %scalar) 21 %val = insertelement <8 x i16> undef, i16 %swap, i32 0 22 %ret = shufflevector <8 x i16> %val, <8 x i16> undef, 23 <8 x i32> zeroinitializer 24 ret <8 x i16> %ret 25} 26 27; Test a v8i16 replicating load with the maximum in-range offset. 28define <8 x i16> @f2(ptr %base) { 29; CHECK-LABEL: f2: 30; CHECK: # %bb.0: 31; CHECK-NEXT: vlbrreph %v24, 4094(%r2) 32; CHECK-NEXT: br %r14 33 %ptr = getelementptr i16, ptr %base, i64 2047 34 %scalar = load i16, ptr %ptr 35 %swap = call i16 @llvm.bswap.i16(i16 %scalar) 36 %val = insertelement <8 x i16> undef, i16 %swap, i32 0 37 %ret = shufflevector <8 x i16> %val, <8 x i16> undef, 38 <8 x i32> zeroinitializer 39 ret <8 x i16> %ret 40} 41 42; Test a v8i16 replicating load with the first out-of-range offset. 43define <8 x i16> @f3(ptr %base) { 44; CHECK-LABEL: f3: 45; CHECK: # %bb.0: 46; CHECK-NEXT: aghi %r2, 4096 47; CHECK-NEXT: vlbrreph %v24, 0(%r2) 48; CHECK-NEXT: br %r14 49 %ptr = getelementptr i16, ptr %base, i64 2048 50 %scalar = load i16, ptr %ptr 51 %swap = call i16 @llvm.bswap.i16(i16 %scalar) 52 %val = insertelement <8 x i16> undef, i16 %swap, i32 0 53 %ret = shufflevector <8 x i16> %val, <8 x i16> undef, 54 <8 x i32> zeroinitializer 55 ret <8 x i16> %ret 56} 57 58; Test a v8i16 replicating load using a vector bswap. 59define <8 x i16> @f4(ptr %ptr) { 60; CHECK-LABEL: f4: 61; CHECK: # %bb.0: 62; CHECK-NEXT: vlbrreph %v24, 0(%r2) 63; CHECK-NEXT: br %r14 64 %scalar = load i16, ptr %ptr 65 %val = insertelement <8 x i16> undef, i16 %scalar, i32 0 66 %rep = shufflevector <8 x i16> %val, <8 x i16> undef, 67 <8 x i32> zeroinitializer 68 %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %rep) 69 ret <8 x i16> %ret 70} 71 72; Test a v4i32 replicating load with no offset. 73define <4 x i32> @f5(ptr %ptr) { 74; CHECK-LABEL: f5: 75; CHECK: # %bb.0: 76; CHECK-NEXT: vlbrrepf %v24, 0(%r2) 77; CHECK-NEXT: br %r14 78 %scalar = load i32, ptr %ptr 79 %swap = call i32 @llvm.bswap.i32(i32 %scalar) 80 %val = insertelement <4 x i32> undef, i32 %swap, i32 0 81 %ret = shufflevector <4 x i32> %val, <4 x i32> undef, 82 <4 x i32> zeroinitializer 83 ret <4 x i32> %ret 84} 85 86; Test a v4i32 replicating load with the maximum in-range offset. 87define <4 x i32> @f6(ptr %base) { 88; CHECK-LABEL: f6: 89; CHECK: # %bb.0: 90; CHECK-NEXT: vlbrrepf %v24, 4092(%r2) 91; CHECK-NEXT: br %r14 92 %ptr = getelementptr i32, ptr %base, i64 1023 93 %scalar = load i32, ptr %ptr 94 %swap = call i32 @llvm.bswap.i32(i32 %scalar) 95 %val = insertelement <4 x i32> undef, i32 %swap, i32 0 96 %ret = shufflevector <4 x i32> %val, <4 x i32> undef, 97 <4 x i32> zeroinitializer 98 ret <4 x i32> %ret 99} 100 101; Test a v4i32 replicating load with the first out-of-range offset. 102define <4 x i32> @f7(ptr %base) { 103; CHECK-LABEL: f7: 104; CHECK: # %bb.0: 105; CHECK-NEXT: aghi %r2, 4096 106; CHECK-NEXT: vlbrrepf %v24, 0(%r2) 107; CHECK-NEXT: br %r14 108 %ptr = getelementptr i32, ptr %base, i64 1024 109 %scalar = load i32, ptr %ptr 110 %swap = call i32 @llvm.bswap.i32(i32 %scalar) 111 %val = insertelement <4 x i32> undef, i32 %swap, i32 0 112 %ret = shufflevector <4 x i32> %val, <4 x i32> undef, 113 <4 x i32> zeroinitializer 114 ret <4 x i32> %ret 115} 116 117; Test a v4i32 replicating load using a vector bswap. 118define <4 x i32> @f8(ptr %ptr) { 119; CHECK-LABEL: f8: 120; CHECK: # %bb.0: 121; CHECK-NEXT: vlbrrepf %v24, 0(%r2) 122; CHECK-NEXT: br %r14 123 %scalar = load i32, ptr %ptr 124 %val = insertelement <4 x i32> undef, i32 %scalar, i32 0 125 %rep = shufflevector <4 x i32> %val, <4 x i32> undef, 126 <4 x i32> zeroinitializer 127 %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %rep) 128 ret <4 x i32> %ret 129} 130 131; Test a v2i64 replicating load with no offset. 132define <2 x i64> @f9(ptr %ptr) { 133; CHECK-LABEL: f9: 134; CHECK: # %bb.0: 135; CHECK-NEXT: vlbrrepg %v24, 0(%r2) 136; CHECK-NEXT: br %r14 137 %scalar = load i64, ptr %ptr 138 %swap = call i64 @llvm.bswap.i64(i64 %scalar) 139 %val = insertelement <2 x i64> undef, i64 %swap, i32 0 140 %ret = shufflevector <2 x i64> %val, <2 x i64> undef, 141 <2 x i32> zeroinitializer 142 ret <2 x i64> %ret 143} 144 145; Test a v2i64 replicating load with the maximum in-range offset. 146define <2 x i64> @f10(ptr %base) { 147; CHECK-LABEL: f10: 148; CHECK: # %bb.0: 149; CHECK-NEXT: vlbrrepg %v24, 4088(%r2) 150; CHECK-NEXT: br %r14 151 %ptr = getelementptr i64, ptr %base, i32 511 152 %scalar = load i64, ptr %ptr 153 %swap = call i64 @llvm.bswap.i64(i64 %scalar) 154 %val = insertelement <2 x i64> undef, i64 %swap, i32 0 155 %ret = shufflevector <2 x i64> %val, <2 x i64> undef, 156 <2 x i32> zeroinitializer 157 ret <2 x i64> %ret 158} 159 160; Test a v2i64 replicating load with the first out-of-range offset. 161define <2 x i64> @f11(ptr %base) { 162; CHECK-LABEL: f11: 163; CHECK: # %bb.0: 164; CHECK-NEXT: aghi %r2, 4096 165; CHECK-NEXT: vlbrrepg %v24, 0(%r2) 166; CHECK-NEXT: br %r14 167 %ptr = getelementptr i64, ptr %base, i32 512 168 %scalar = load i64, ptr %ptr 169 %swap = call i64 @llvm.bswap.i64(i64 %scalar) 170 %val = insertelement <2 x i64> undef, i64 %swap, i32 0 171 %ret = shufflevector <2 x i64> %val, <2 x i64> undef, 172 <2 x i32> zeroinitializer 173 ret <2 x i64> %ret 174} 175 176; Test a v2i64 replicating load using a vector bswap. 177define <2 x i64> @f12(ptr %ptr) { 178; CHECK-LABEL: f12: 179; CHECK: # %bb.0: 180; CHECK-NEXT: vlbrrepg %v24, 0(%r2) 181; CHECK-NEXT: br %r14 182 %scalar = load i64, ptr %ptr 183 %val = insertelement <2 x i64> undef, i64 %scalar, i32 0 184 %rep = shufflevector <2 x i64> %val, <2 x i64> undef, 185 <2 x i32> zeroinitializer 186 %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %rep) 187 ret <2 x i64> %ret 188} 189 190; Test a v8i16 replicating load with an index. 191define <8 x i16> @f13(ptr %base, i64 %index) { 192; CHECK-LABEL: f13: 193; CHECK: # %bb.0: 194; CHECK-NEXT: sllg %r1, %r3, 1 195; CHECK-NEXT: vlbrreph %v24, 2046(%r1,%r2) 196; CHECK-NEXT: br %r14 197 %ptr1 = getelementptr i16, ptr %base, i64 %index 198 %ptr = getelementptr i16, ptr %ptr1, i64 1023 199 %scalar = load i16, ptr %ptr 200 %swap = call i16 @llvm.bswap.i16(i16 %scalar) 201 %val = insertelement <8 x i16> undef, i16 %swap, i32 0 202 %ret = shufflevector <8 x i16> %val, <8 x i16> undef, 203 <8 x i32> zeroinitializer 204 ret <8 x i16> %ret 205} 206 207