1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s 3; 4; Test storing of replicated values using vector replicate type instructions. 5 6;; Replicated registers 7 8define void @fun_2x1b(ptr %Src, ptr %Dst) { 9; CHECK-LABEL: fun_2x1b: 10; CHECK: # %bb.0: 11; CHECK-NEXT: vlrepb %v0, 0(%r2) 12; CHECK-NEXT: vsteh %v0, 0(%r3), 0 13; CHECK-NEXT: br %r14 14 %i = load i8, ptr %Src 15 %ZE = zext i8 %i to i16 16 %Val = mul i16 %ZE, 257 17 store i16 %Val, ptr %Dst 18 ret void 19} 20 21; Test multiple stores of same value. 22define void @fun_4x1b(ptr %Src, ptr %Dst, ptr %Dst2) { 23; CHECK-LABEL: fun_4x1b: 24; CHECK: # %bb.0: 25; CHECK-NEXT: vlrepb %v0, 0(%r2) 26; CHECK-NEXT: vstef %v0, 0(%r3), 0 27; CHECK-NEXT: vstef %v0, 0(%r4), 0 28; CHECK-NEXT: br %r14 29 %i = load i8, ptr %Src 30 %ZE = zext i8 %i to i32 31 %Val = mul i32 %ZE, 16843009 32 store i32 %Val, ptr %Dst 33 store i32 %Val, ptr %Dst2 34 ret void 35} 36 37define void @fun_8x1b(ptr %Src, ptr %Dst) { 38; CHECK-LABEL: fun_8x1b: 39; CHECK: # %bb.0: 40; CHECK-NEXT: vlrepb %v0, 0(%r2) 41; CHECK-NEXT: vsteg %v0, 0(%r3), 0 42; CHECK-NEXT: br %r14 43 %i = load i8, ptr %Src 44 %ZE = zext i8 %i to i64 45 %Val = mul i64 %ZE, 72340172838076673 46 store i64 %Val, ptr %Dst 47 ret void 48} 49 50; A second truncated store of same value. 51define void @fun_8x1b_4x1b(ptr %Src, ptr %Dst, ptr %Dst2) { 52; CHECK-LABEL: fun_8x1b_4x1b: 53; CHECK: # %bb.0: 54; CHECK-NEXT: vlrepb %v0, 0(%r2) 55; CHECK-NEXT: vsteg %v0, 0(%r3), 0 56; CHECK-NEXT: vstef %v0, 0(%r4), 0 57; CHECK-NEXT: br %r14 58 %i = load i8, ptr %Src 59 %ZE = zext i8 %i to i64 60 %Val = mul i64 %ZE, 72340172838076673 61 store i64 %Val, ptr %Dst 62 %TrVal = trunc i64 %Val to i32 63 store i32 %TrVal, ptr %Dst2 64 ret void 65} 66 67define void @fun_2x2b(ptr %Src, ptr %Dst) { 68; CHECK-LABEL: fun_2x2b: 69; CHECK: # %bb.0: 70; CHECK-NEXT: vlreph %v0, 0(%r2) 71; CHECK-NEXT: vstef %v0, 0(%r3), 0 72; CHECK-NEXT: br %r14 73 %i = load i16, ptr %Src 74 %ZE = zext i16 %i to i32 75 %Val = mul i32 %ZE, 65537 76 store i32 %Val, ptr %Dst 77 ret void 78} 79 80define void @fun_4x2b(ptr %Src, ptr %Dst) { 81; CHECK-LABEL: fun_4x2b: 82; CHECK: # %bb.0: 83; CHECK-NEXT: vlreph %v0, 0(%r2) 84; CHECK-NEXT: vsteg %v0, 0(%r3), 0 85; CHECK-NEXT: br %r14 86 %i = load i16, ptr %Src 87 %ZE = zext i16 %i to i64 88 %Val = mul i64 %ZE, 281479271743489 89 store i64 %Val, ptr %Dst 90 ret void 91} 92 93define void @fun_2x4b(ptr %Src, ptr %Dst) { 94; CHECK-LABEL: fun_2x4b: 95; CHECK: # %bb.0: 96; CHECK-NEXT: vlrepf %v0, 0(%r2) 97; CHECK-NEXT: vsteg %v0, 0(%r3), 0 98; CHECK-NEXT: br %r14 99 %i = load i32, ptr %Src 100 %ZE = zext i32 %i to i64 101 %Val = mul i64 %ZE, 4294967297 102 store i64 %Val, ptr %Dst 103 ret void 104} 105 106;; Replicated registers already in a vector. 107 108; Test multiple stores of same value. 109define void @fun_2Eltsx8x1b(ptr %Src, ptr %Dst, ptr %Dst2) { 110; CHECK-LABEL: fun_2Eltsx8x1b: 111; CHECK: # %bb.0: 112; CHECK-NEXT: vlrepb %v0, 0(%r2) 113; CHECK-NEXT: vst %v0, 0(%r3), 3 114; CHECK-NEXT: vst %v0, 0(%r4), 3 115; CHECK-NEXT: br %r14 116 %i = load i8, ptr %Src 117 %ZE = zext i8 %i to i64 118 %Mul = mul i64 %ZE, 72340172838076673 119 %tmp = insertelement <2 x i64> undef, i64 %Mul, i32 0 120 %Val = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer 121 store <2 x i64> %Val, ptr %Dst 122 store <2 x i64> %Val, ptr %Dst2 123 ret void 124} 125 126define void @fun_4Eltsx2x2b(ptr %Src, ptr %Dst) { 127; CHECK-LABEL: fun_4Eltsx2x2b: 128; CHECK: # %bb.0: 129; CHECK-NEXT: vlreph %v0, 0(%r2) 130; CHECK-NEXT: vst %v0, 0(%r3), 3 131; CHECK-NEXT: br %r14 132 %i = load i16, ptr %Src 133 %ZE = zext i16 %i to i32 134 %Mul = mul i32 %ZE, 65537 135 %tmp = insertelement <4 x i32> undef, i32 %Mul, i32 0 136 %Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer 137 store <4 x i32> %Val, ptr %Dst 138 ret void 139} 140 141define void @fun_6Eltsx2x2b(ptr %Src, ptr %Dst) { 142; CHECK-LABEL: fun_6Eltsx2x2b: 143; CHECK: # %bb.0: 144; CHECK-NEXT: vlreph %v0, 0(%r2) 145; CHECK-NEXT: vsteg %v0, 16(%r3), 0 146; CHECK-NEXT: vst %v0, 0(%r3), 4 147; CHECK-NEXT: br %r14 148 %i = load i16, ptr %Src 149 %ZE = zext i16 %i to i32 150 %Mul = mul i32 %ZE, 65537 151 %tmp = insertelement <6 x i32> undef, i32 %Mul, i32 0 152 %Val = shufflevector <6 x i32> %tmp, <6 x i32> undef, <6 x i32> zeroinitializer 153 store <6 x i32> %Val, ptr %Dst 154 ret void 155} 156 157define void @fun_2Eltsx2x4b(ptr %Src, ptr %Dst) { 158; CHECK-LABEL: fun_2Eltsx2x4b: 159; CHECK: # %bb.0: 160; CHECK-NEXT: vlrepf %v0, 0(%r2) 161; CHECK-NEXT: vst %v0, 0(%r3), 3 162; CHECK-NEXT: br %r14 163 %i = load i32, ptr %Src 164 %ZE = zext i32 %i to i64 165 %Mul = mul i64 %ZE, 4294967297 166 %tmp = insertelement <2 x i64> undef, i64 %Mul, i32 0 167 %Val = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer 168 store <2 x i64> %Val, ptr %Dst 169 ret void 170} 171 172define void @fun_5Eltsx2x4b(ptr %Src, ptr %Dst) { 173; CHECK-LABEL: fun_5Eltsx2x4b: 174; CHECK: # %bb.0: 175; CHECK-NEXT: vlrepf %v0, 0(%r2) 176; CHECK-NEXT: vsteg %v0, 32(%r3), 0 177; CHECK-NEXT: vst %v0, 16(%r3), 4 178; CHECK-NEXT: vst %v0, 0(%r3), 4 179; CHECK-NEXT: br %r14 180 %i = load i32, ptr %Src 181 %ZE = zext i32 %i to i64 182 %Mul = mul i64 %ZE, 4294967297 183 %tmp = insertelement <5 x i64> undef, i64 %Mul, i32 0 184 %Val = shufflevector <5 x i64> %tmp, <5 x i64> undef, <5 x i32> zeroinitializer 185 store <5 x i64> %Val, ptr %Dst 186 ret void 187} 188 189; Test replicating an incoming argument. 190define void @fun_8x1b_arg(i8 %Arg, ptr %Dst) { 191; CHECK-LABEL: fun_8x1b_arg: 192; CHECK: # %bb.0: 193; CHECK-NEXT: vlvgp %v0, %r2, %r2 194; CHECK-NEXT: vrepb %v0, %v0, 7 195; CHECK-NEXT: vsteg %v0, 0(%r3), 0 196; CHECK-NEXT: br %r14 197 %ZE = zext i8 %Arg to i64 198 %Val = mul i64 %ZE, 72340172838076673 199 store i64 %Val, ptr %Dst 200 ret void 201} 202 203; A replication of a non-local value (ISD::AssertZext case). 204define void @fun_nonlocalval() { 205; CHECK-LABEL: fun_nonlocalval: 206; CHECK: # %bb.0: 207; CHECK-NEXT: lhi %r0, 0 208; CHECK-NEXT: ciblh %r0, 0, 0(%r14) 209; CHECK-NEXT: .LBB13_1: # %bb2 210; CHECK-NEXT: llgf %r0, 0(%r1) 211; CHECK-NEXT: vlvgp %v0, %r0, %r0 212; CHECK-NEXT: vrepf %v0, %v0, 1 213; CHECK-NEXT: vst %v0, 0(%r1), 3 214; CHECK-NEXT: br %r14 215 %i = load i32, ptr undef, align 4 216 br i1 undef, label %bb2, label %bb7 217 218bb2: ; preds = %bb1 219 %i3 = zext i32 %i to i64 220 %i4 = mul nuw i64 %i3, 4294967297 221 %i5 = insertelement <2 x i64> poison, i64 %i4, i64 0 222 %i6 = shufflevector <2 x i64> %i5, <2 x i64> poison, <2 x i32> zeroinitializer 223 store <2 x i64> %i6, ptr undef, align 8 224 ret void 225 226bb7: 227 ret void 228} 229 230;; Replicated immediates 231 232; Some cases where scalar instruction is better 233define void @fun_8x1i_zero(ptr %Dst) { 234; CHECK-LABEL: fun_8x1i_zero: 235; CHECK: # %bb.0: 236; CHECK-NEXT: mvghi 0(%r2), 0 237; CHECK-NEXT: br %r14 238 store i64 0, ptr %Dst 239 ret void 240} 241 242define void @fun_4x1i_minus1(ptr %Dst) { 243; CHECK-LABEL: fun_4x1i_minus1: 244; CHECK: # %bb.0: 245; CHECK-NEXT: mvhi 0(%r2), -1 246; CHECK-NEXT: br %r14 247 store i32 -1, ptr %Dst 248 ret void 249} 250 251define void @fun_4x1i_allones(ptr %Dst) { 252; CHECK-LABEL: fun_4x1i_allones: 253; CHECK: # %bb.0: 254; CHECK-NEXT: mvhi 0(%r2), -1 255; CHECK-NEXT: br %r14 256 store i32 4294967295, ptr %Dst 257 ret void 258} 259 260define void @fun_2i(ptr %Dst) { 261; CHECK-LABEL: fun_2i: 262; CHECK: # %bb.0: 263; CHECK-NEXT: mvhhi 0(%r2), 1 264; CHECK-NEXT: br %r14 265 store i16 1, ptr %Dst 266 ret void 267} 268 269define void @fun_2x2i(ptr %Dst) { 270; CHECK-LABEL: fun_2x2i: 271; CHECK: # %bb.0: 272; CHECK-NEXT: vrepih %v0, 1 273; CHECK-NEXT: vstef %v0, 0(%r2), 0 274; CHECK-NEXT: br %r14 275 store i32 65537, ptr %Dst 276 ret void 277} 278 279define void @fun_4x2i(ptr %Dst) { 280; CHECK-LABEL: fun_4x2i: 281; CHECK: # %bb.0: 282; CHECK-NEXT: vrepih %v0, 1 283; CHECK-NEXT: vsteg %v0, 0(%r2), 0 284; CHECK-NEXT: br %r14 285 store i64 281479271743489, ptr %Dst 286 ret void 287} 288 289define void @fun_2x4i(ptr %Dst) { 290; CHECK-LABEL: fun_2x4i: 291; CHECK: # %bb.0: 292; CHECK-NEXT: vrepif %v0, 1 293; CHECK-NEXT: vsteg %v0, 0(%r2), 0 294; CHECK-NEXT: br %r14 295 store i64 4294967297, ptr %Dst 296 ret void 297} 298 299; Store replicated immediate twice using the same vector. 300define void @fun_4x1i(ptr %Dst, ptr %Dst2) { 301; CHECK-LABEL: fun_4x1i: 302; CHECK: # %bb.0: 303; CHECK-NEXT: vrepib %v0, 3 304; CHECK-NEXT: vstef %v0, 0(%r2), 0 305; CHECK-NEXT: vstef %v0, 0(%r3), 0 306; CHECK-NEXT: br %r14 307 store i32 50529027, ptr %Dst 308 store i32 50529027, ptr %Dst2 309 ret void 310} 311 312define void @fun_8x1i(ptr %Dst, ptr %Dst2) { 313; CHECK-LABEL: fun_8x1i: 314; CHECK: # %bb.0: 315; CHECK-NEXT: vrepib %v0, 1 316; CHECK-NEXT: vsteg %v0, 0(%r2), 0 317; CHECK-NEXT: vsteg %v0, 0(%r3), 0 318; CHECK-NEXT: br %r14 319 store i64 72340172838076673, ptr %Dst 320 store i64 72340172838076673, ptr %Dst2 321 ret void 322} 323 324; Similar, but with vectors. 325define void @fun_4Eltsx4x1i_2Eltsx4x1i(ptr %Dst, ptr %Dst2) { 326; CHECK-LABEL: fun_4Eltsx4x1i_2Eltsx4x1i: 327; CHECK: # %bb.0: 328; CHECK-NEXT: vrepib %v0, 3 329; CHECK-NEXT: vst %v0, 0(%r2), 3 330; CHECK-NEXT: vsteg %v0, 0(%r3), 0 331; CHECK-NEXT: br %r14 332 %tmp = insertelement <4 x i32> undef, i32 50529027, i32 0 333 %Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer 334 store <4 x i32> %Val, ptr %Dst 335 %tmp2 = insertelement <2 x i32> undef, i32 50529027, i32 0 336 %Val2 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer 337 store <2 x i32> %Val2, ptr %Dst2 338 ret void 339} 340 341; Same, but 64-bit store is scalar. 342define void @fun_4Eltsx4x1i_8x1i(ptr %Dst, ptr %Dst2) { 343; CHECK-LABEL: fun_4Eltsx4x1i_8x1i: 344; CHECK: # %bb.0: 345; CHECK-NEXT: vrepib %v0, 3 346; CHECK-NEXT: vst %v0, 0(%r2), 3 347; CHECK-NEXT: vsteg %v0, 0(%r3), 0 348; CHECK-NEXT: br %r14 349 %tmp = insertelement <4 x i32> undef, i32 50529027, i32 0 350 %Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer 351 store <4 x i32> %Val, ptr %Dst 352 store i64 217020518514230019, ptr %Dst2 353 ret void 354} 355 356define void @fun_3Eltsx2x4i(ptr %Dst) { 357; CHECK-LABEL: fun_3Eltsx2x4i: 358; CHECK: # %bb.0: 359; CHECK-NEXT: vrepif %v0, 1 360; CHECK-NEXT: vsteg %v0, 16(%r2), 0 361; CHECK-NEXT: vst %v0, 0(%r2), 4 362; CHECK-NEXT: br %r14 363 %tmp = insertelement <3 x i64> undef, i64 4294967297, i32 0 364 %Val = shufflevector <3 x i64> %tmp, <3 x i64> undef, <3 x i32> zeroinitializer 365 store <3 x i64> %Val, ptr %Dst 366 ret void 367} 368 369define void @fun_16x1i(ptr %Dst) { 370; CHECK-LABEL: fun_16x1i: 371; CHECK: # %bb.0: 372; CHECK-NEXT: vrepib %v0, 1 373; CHECK-NEXT: vst %v0, 0(%r2), 3 374; CHECK-NEXT: br %r14 375 store i128 1334440654591915542993625911497130241, ptr %Dst 376 ret void 377} 378