1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 3; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ 4; RUN: -check-prefix=P9 5; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 6; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ 7; RUN: -check-prefix=P8 8; RUN: llc -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 9; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ 10; RUN: -check-prefix=P7 11; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 12; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \ 13; RUN: -check-prefix=P9-AIX32 14; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 15; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \ 16; RUN: -check-prefix=P8-AIX32 17; RUN: llc -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 18; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \ 19; RUN: -check-prefix=P7-AIX32 20 21; v2f64 22define dso_local void @test(ptr nocapture %c, ptr nocapture readonly %a) local_unnamed_addr { 23; P9-LABEL: test: 24; P9: # %bb.0: # %entry 25; P9-NEXT: addi r4, r4, 24 26; P9-NEXT: lxvdsx vs0, 0, r4 27; P9-NEXT: stxv vs0, 0(r3) 28; P9-NEXT: blr 29; 30; P8-LABEL: test: 31; P8: # %bb.0: # %entry 32; P8-NEXT: addi r4, r4, 24 33; P8-NEXT: lxvdsx vs0, 0, r4 34; P8-NEXT: stxvd2x vs0, 0, r3 35; P8-NEXT: blr 36; 37; P7-LABEL: test: 38; P7: # %bb.0: # %entry 39; P7-NEXT: addi r4, r4, 24 40; P7-NEXT: lxvdsx vs0, 0, r4 41; P7-NEXT: stxvd2x vs0, 0, r3 42; P7-NEXT: blr 43; 44; P9-AIX32-LABEL: test: 45; P9-AIX32: # %bb.0: # %entry 46; P9-AIX32-NEXT: addi r4, r4, 24 47; P9-AIX32-NEXT: lxvdsx vs0, 0, r4 48; P9-AIX32-NEXT: stxv vs0, 0(r3) 49; P9-AIX32-NEXT: blr 50; 51; P8-AIX32-LABEL: test: 52; P8-AIX32: # %bb.0: # %entry 53; P8-AIX32-NEXT: addi r4, r4, 24 54; P8-AIX32-NEXT: lxvdsx vs0, 0, r4 55; P8-AIX32-NEXT: stxvd2x vs0, 0, r3 56; P8-AIX32-NEXT: blr 57; 58; P7-AIX32-LABEL: test: 59; P7-AIX32: # %bb.0: # %entry 60; P7-AIX32-NEXT: addi r4, r4, 24 61; P7-AIX32-NEXT: lxvdsx vs0, 0, r4 62; P7-AIX32-NEXT: stxvd2x vs0, 0, r3 63; P7-AIX32-NEXT: blr 64entry: 65 %arrayidx = getelementptr inbounds double, ptr %a, i64 3 66 %0 = load double, ptr %arrayidx, align 8 67 %splat.splatinsert.i = insertelement <2 x double> undef, double %0, i32 0 68 %splat.splat.i = shufflevector <2 x double> %splat.splatinsert.i, <2 x double> undef, <2 x i32> zeroinitializer 69 store <2 x double> %splat.splat.i, ptr %c, align 16 70 ret void 71} 72 73; v4f32 74define dso_local void @test2(ptr nocapture %c, ptr nocapture readonly %a) local_unnamed_addr { 75; P9-LABEL: test2: 76; P9: # %bb.0: # %entry 77; P9-NEXT: addi r4, r4, 12 78; P9-NEXT: lxvwsx vs0, 0, r4 79; P9-NEXT: stxv vs0, 0(r3) 80; P9-NEXT: blr 81; 82; P8-LABEL: test2: 83; P8: # %bb.0: # %entry 84; P8-NEXT: addi r4, r4, 12 85; P8-NEXT: lfiwzx f0, 0, r4 86; P8-NEXT: xxspltw vs0, vs0, 1 87; P8-NEXT: stxvd2x vs0, 0, r3 88; P8-NEXT: blr 89; 90; P7-LABEL: test2: 91; P7: # %bb.0: # %entry 92; P7-NEXT: addi r4, r4, 12 93; P7-NEXT: lfiwzx f0, 0, r4 94; P7-NEXT: xxspltw vs0, vs0, 1 95; P7-NEXT: stxvw4x vs0, 0, r3 96; P7-NEXT: blr 97; 98; P9-AIX32-LABEL: test2: 99; P9-AIX32: # %bb.0: # %entry 100; P9-AIX32-NEXT: addi r4, r4, 12 101; P9-AIX32-NEXT: lxvwsx vs0, 0, r4 102; P9-AIX32-NEXT: stxv vs0, 0(r3) 103; P9-AIX32-NEXT: blr 104; 105; P8-AIX32-LABEL: test2: 106; P8-AIX32: # %bb.0: # %entry 107; P8-AIX32-NEXT: addi r4, r4, 12 108; P8-AIX32-NEXT: lfiwzx f0, 0, r4 109; P8-AIX32-NEXT: xxspltw vs0, vs0, 1 110; P8-AIX32-NEXT: stxvw4x vs0, 0, r3 111; P8-AIX32-NEXT: blr 112; 113; P7-AIX32-LABEL: test2: 114; P7-AIX32: # %bb.0: # %entry 115; P7-AIX32-NEXT: addi r4, r4, 12 116; P7-AIX32-NEXT: lfiwzx f0, 0, r4 117; P7-AIX32-NEXT: xxspltw vs0, vs0, 1 118; P7-AIX32-NEXT: stxvw4x vs0, 0, r3 119; P7-AIX32-NEXT: blr 120entry: 121 %arrayidx = getelementptr inbounds float, ptr %a, i64 3 122 %0 = load float, ptr %arrayidx, align 4 123 %splat.splatinsert.i = insertelement <4 x float> undef, float %0, i32 0 124 %splat.splat.i = shufflevector <4 x float> %splat.splatinsert.i, <4 x float> undef, <4 x i32> zeroinitializer 125 store <4 x float> %splat.splat.i, ptr %c, align 16 126 ret void 127} 128 129; v4i32 130define dso_local void @test3(ptr nocapture %c, ptr nocapture readonly %a) local_unnamed_addr { 131; P9-LABEL: test3: 132; P9: # %bb.0: # %entry 133; P9-NEXT: addi r4, r4, 12 134; P9-NEXT: lxvwsx vs0, 0, r4 135; P9-NEXT: stxv vs0, 0(r3) 136; P9-NEXT: blr 137; 138; P8-LABEL: test3: 139; P8: # %bb.0: # %entry 140; P8-NEXT: addi r4, r4, 12 141; P8-NEXT: lfiwzx f0, 0, r4 142; P8-NEXT: xxspltw vs0, vs0, 1 143; P8-NEXT: stxvd2x vs0, 0, r3 144; P8-NEXT: blr 145; 146; P7-LABEL: test3: 147; P7: # %bb.0: # %entry 148; P7-NEXT: addi r4, r4, 12 149; P7-NEXT: lfiwzx f0, 0, r4 150; P7-NEXT: xxspltw vs0, vs0, 1 151; P7-NEXT: stxvw4x vs0, 0, r3 152; P7-NEXT: blr 153; 154; P9-AIX32-LABEL: test3: 155; P9-AIX32: # %bb.0: # %entry 156; P9-AIX32-NEXT: addi r4, r4, 12 157; P9-AIX32-NEXT: lxvwsx vs0, 0, r4 158; P9-AIX32-NEXT: stxv vs0, 0(r3) 159; P9-AIX32-NEXT: blr 160; 161; P8-AIX32-LABEL: test3: 162; P8-AIX32: # %bb.0: # %entry 163; P8-AIX32-NEXT: addi r4, r4, 12 164; P8-AIX32-NEXT: lfiwzx f0, 0, r4 165; P8-AIX32-NEXT: xxspltw vs0, vs0, 1 166; P8-AIX32-NEXT: stxvw4x vs0, 0, r3 167; P8-AIX32-NEXT: blr 168; 169; P7-AIX32-LABEL: test3: 170; P7-AIX32: # %bb.0: # %entry 171; P7-AIX32-NEXT: addi r4, r4, 12 172; P7-AIX32-NEXT: lfiwzx f0, 0, r4 173; P7-AIX32-NEXT: xxspltw vs0, vs0, 1 174; P7-AIX32-NEXT: stxvw4x vs0, 0, r3 175; P7-AIX32-NEXT: blr 176entry: 177 %arrayidx = getelementptr inbounds i32, ptr %a, i64 3 178 %0 = load i32, ptr %arrayidx, align 4 179 %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %0, i32 0 180 %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer 181 store <4 x i32> %splat.splat.i, ptr %c, align 16 182 ret void 183} 184 185 186; v2i64 187define dso_local void @test4(ptr nocapture %c, ptr nocapture readonly %a) local_unnamed_addr { 188; P9-LABEL: test4: 189; P9: # %bb.0: # %entry 190; P9-NEXT: addi r4, r4, 24 191; P9-NEXT: lxvdsx vs0, 0, r4 192; P9-NEXT: stxv vs0, 0(r3) 193; P9-NEXT: blr 194; 195; P8-LABEL: test4: 196; P8: # %bb.0: # %entry 197; P8-NEXT: addi r4, r4, 24 198; P8-NEXT: lxvdsx vs0, 0, r4 199; P8-NEXT: stxvd2x vs0, 0, r3 200; P8-NEXT: blr 201; 202; P7-LABEL: test4: 203; P7: # %bb.0: # %entry 204; P7-NEXT: addi r4, r4, 24 205; P7-NEXT: lxvdsx vs0, 0, r4 206; P7-NEXT: stxvd2x vs0, 0, r3 207; P7-NEXT: blr 208; 209; P9-AIX32-LABEL: test4: 210; P9-AIX32: # %bb.0: # %entry 211; P9-AIX32-NEXT: li r5, 28 212; P9-AIX32-NEXT: lxvwsx vs0, r4, r5 213; P9-AIX32-NEXT: li r5, 24 214; P9-AIX32-NEXT: lxvwsx vs1, r4, r5 215; P9-AIX32-NEXT: lwz r4, L..C0(r2) # %const.0 216; P9-AIX32-NEXT: lxv vs2, 0(r4) 217; P9-AIX32-NEXT: xxperm vs0, vs1, vs2 218; P9-AIX32-NEXT: stxv vs0, 0(r3) 219; P9-AIX32-NEXT: blr 220; 221; P8-AIX32-LABEL: test4: 222; P8-AIX32: # %bb.0: # %entry 223; P8-AIX32-NEXT: li r5, 28 224; P8-AIX32-NEXT: lfiwzx f0, r4, r5 225; P8-AIX32-NEXT: li r5, 24 226; P8-AIX32-NEXT: xxspltw v2, vs0, 1 227; P8-AIX32-NEXT: lfiwzx f0, r4, r5 228; P8-AIX32-NEXT: lwz r4, L..C0(r2) # %const.0 229; P8-AIX32-NEXT: lxvw4x v4, 0, r4 230; P8-AIX32-NEXT: xxspltw v3, vs0, 1 231; P8-AIX32-NEXT: vperm v2, v3, v2, v4 232; P8-AIX32-NEXT: stxvw4x v2, 0, r3 233; P8-AIX32-NEXT: blr 234; 235; P7-AIX32-LABEL: test4: 236; P7-AIX32: # %bb.0: # %entry 237; P7-AIX32-NEXT: li r5, 28 238; P7-AIX32-NEXT: lfiwzx f0, r4, r5 239; P7-AIX32-NEXT: li r5, 24 240; P7-AIX32-NEXT: xxspltw v2, vs0, 1 241; P7-AIX32-NEXT: lfiwzx f0, r4, r5 242; P7-AIX32-NEXT: lwz r4, L..C0(r2) # %const.0 243; P7-AIX32-NEXT: lxvw4x v4, 0, r4 244; P7-AIX32-NEXT: xxspltw v3, vs0, 1 245; P7-AIX32-NEXT: vperm v2, v3, v2, v4 246; P7-AIX32-NEXT: stxvw4x v2, 0, r3 247; P7-AIX32-NEXT: blr 248entry: 249 %arrayidx = getelementptr inbounds i64, ptr %a, i64 3 250 %0 = load i64, ptr %arrayidx, align 8 251 %splat.splatinsert.i = insertelement <2 x i64> undef, i64 %0, i32 0 252 %splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer 253 store <2 x i64> %splat.splat.i, ptr %c, align 16 254 ret void 255} 256 257; sext v2i64 258define void @test5(ptr %a, ptr %in) { 259; P9-LABEL: test5: 260; P9: # %bb.0: # %entry 261; P9-NEXT: lfiwax f0, 0, r4 262; P9-NEXT: xxspltd vs0, f0, 0 263; P9-NEXT: stxv vs0, 0(r3) 264; P9-NEXT: blr 265; 266; P8-LABEL: test5: 267; P8: # %bb.0: # %entry 268; P8-NEXT: lfiwax f0, 0, r4 269; P8-NEXT: xxspltd vs0, f0, 0 270; P8-NEXT: stxvd2x vs0, 0, r3 271; P8-NEXT: blr 272; 273; P7-LABEL: test5: 274; P7: # %bb.0: # %entry 275; P7-NEXT: lfiwax f0, 0, r4 276; P7-NEXT: xxspltd vs0, f0, 0 277; P7-NEXT: stxvd2x vs0, 0, r3 278; P7-NEXT: blr 279; 280; P9-AIX32-LABEL: test5: 281; P9-AIX32: # %bb.0: # %entry 282; P9-AIX32-NEXT: lwz r4, 0(r4) 283; P9-AIX32-NEXT: srawi r5, r4, 31 284; P9-AIX32-NEXT: stw r4, -16(r1) 285; P9-AIX32-NEXT: lwz r4, L..C1(r2) # %const.0 286; P9-AIX32-NEXT: lxv vs1, -16(r1) 287; P9-AIX32-NEXT: stw r5, -32(r1) 288; P9-AIX32-NEXT: lxv vs2, -32(r1) 289; P9-AIX32-NEXT: lxv vs0, 0(r4) 290; P9-AIX32-NEXT: xxperm vs1, vs2, vs0 291; P9-AIX32-NEXT: stxv vs1, 0(r3) 292; P9-AIX32-NEXT: blr 293; 294; P8-AIX32-LABEL: test5: 295; P8-AIX32: # %bb.0: # %entry 296; P8-AIX32-NEXT: lwz r4, 0(r4) 297; P8-AIX32-NEXT: srawi r5, r4, 31 298; P8-AIX32-NEXT: stw r4, -16(r1) 299; P8-AIX32-NEXT: lwz r4, L..C1(r2) # %const.0 300; P8-AIX32-NEXT: stw r5, -32(r1) 301; P8-AIX32-NEXT: lxvw4x v2, 0, r4 302; P8-AIX32-NEXT: addi r4, r1, -16 303; P8-AIX32-NEXT: lxvw4x v3, 0, r4 304; P8-AIX32-NEXT: addi r4, r1, -32 305; P8-AIX32-NEXT: lxvw4x v4, 0, r4 306; P8-AIX32-NEXT: vperm v2, v4, v3, v2 307; P8-AIX32-NEXT: stxvw4x v2, 0, r3 308; P8-AIX32-NEXT: blr 309; 310; P7-AIX32-LABEL: test5: 311; P7-AIX32: # %bb.0: # %entry 312; P7-AIX32-NEXT: lwz r4, 0(r4) 313; P7-AIX32-NEXT: stw r4, -16(r1) 314; P7-AIX32-NEXT: srawi r4, r4, 31 315; P7-AIX32-NEXT: stw r4, -32(r1) 316; P7-AIX32-NEXT: lwz r4, L..C1(r2) # %const.0 317; P7-AIX32-NEXT: lxvw4x v2, 0, r4 318; P7-AIX32-NEXT: addi r4, r1, -16 319; P7-AIX32-NEXT: lxvw4x v3, 0, r4 320; P7-AIX32-NEXT: addi r4, r1, -32 321; P7-AIX32-NEXT: lxvw4x v4, 0, r4 322; P7-AIX32-NEXT: vperm v2, v4, v3, v2 323; P7-AIX32-NEXT: stxvw4x v2, 0, r3 324; P7-AIX32-NEXT: blr 325entry: 326 %0 = load i32, ptr %in, align 4 327 %conv = sext i32 %0 to i64 328 %splat.splatinsert.i = insertelement <2 x i64> poison, i64 %conv, i32 0 329 %splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> poison, <2 x i32> zeroinitializer 330 store <2 x i64> %splat.splat.i, ptr %a, align 16 331 ret void 332} 333 334; zext v2i64 335define void @test6(ptr %a, ptr %in) { 336; P9-LABEL: test6: 337; P9: # %bb.0: # %entry 338; P9-NEXT: lfiwzx f0, 0, r4 339; P9-NEXT: xxspltd vs0, f0, 0 340; P9-NEXT: stxv vs0, 0(r3) 341; P9-NEXT: blr 342; 343; P8-LABEL: test6: 344; P8: # %bb.0: # %entry 345; P8-NEXT: lfiwzx f0, 0, r4 346; P8-NEXT: xxspltd vs0, f0, 0 347; P8-NEXT: stxvd2x vs0, 0, r3 348; P8-NEXT: blr 349; 350; P7-LABEL: test6: 351; P7: # %bb.0: # %entry 352; P7-NEXT: lfiwzx f0, 0, r4 353; P7-NEXT: xxspltd vs0, f0, 0 354; P7-NEXT: stxvd2x vs0, 0, r3 355; P7-NEXT: blr 356; 357; P9-AIX32-LABEL: test6: 358; P9-AIX32: # %bb.0: # %entry 359; P9-AIX32-NEXT: lwz r5, L..C2(r2) # %const.0 360; P9-AIX32-NEXT: lxvwsx vs1, 0, r4 361; P9-AIX32-NEXT: xxlxor vs2, vs2, vs2 362; P9-AIX32-NEXT: lxv vs0, 0(r5) 363; P9-AIX32-NEXT: xxperm vs1, vs2, vs0 364; P9-AIX32-NEXT: stxv vs1, 0(r3) 365; P9-AIX32-NEXT: blr 366; 367; P8-AIX32-LABEL: test6: 368; P8-AIX32: # %bb.0: # %entry 369; P8-AIX32-NEXT: lfiwzx f0, 0, r4 370; P8-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0 371; P8-AIX32-NEXT: xxlxor v4, v4, v4 372; P8-AIX32-NEXT: lxvw4x v3, 0, r4 373; P8-AIX32-NEXT: xxspltw v2, vs0, 1 374; P8-AIX32-NEXT: vperm v2, v4, v2, v3 375; P8-AIX32-NEXT: stxvw4x v2, 0, r3 376; P8-AIX32-NEXT: blr 377; 378; P7-AIX32-LABEL: test6: 379; P7-AIX32: # %bb.0: # %entry 380; P7-AIX32-NEXT: lfiwzx f0, 0, r4 381; P7-AIX32-NEXT: lwz r4, L..C2(r2) # %const.0 382; P7-AIX32-NEXT: xxlxor v4, v4, v4 383; P7-AIX32-NEXT: lxvw4x v3, 0, r4 384; P7-AIX32-NEXT: xxspltw v2, vs0, 1 385; P7-AIX32-NEXT: vperm v2, v4, v2, v3 386; P7-AIX32-NEXT: stxvw4x v2, 0, r3 387; P7-AIX32-NEXT: blr 388entry: 389 %0 = load i32, ptr %in, align 4 390 %conv = zext i32 %0 to i64 391 %splat.splatinsert.i = insertelement <2 x i64> poison, i64 %conv, i32 0 392 %splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> poison, <2 x i32> zeroinitializer 393 store <2 x i64> %splat.splat.i, ptr %a, align 16 394 ret void 395} 396 397; v8i16 398define void @test7(ptr %a, ptr %in) { 399; P9-LABEL: test7: 400; P9: # %bb.0: # %entry 401; P9-NEXT: lxsihzx v2, 0, r4 402; P9-NEXT: vsplth v2, v2, 3 403; P9-NEXT: stxv v2, 0(r3) 404; P9-NEXT: blr 405; 406; P8-LABEL: test7: 407; P8: # %bb.0: # %entry 408; P8-NEXT: lhzx r4, 0, r4 409; P8-NEXT: mtvsrwz v2, r4 410; P8-NEXT: vsplth v2, v2, 3 411; P8-NEXT: xxswapd vs0, v2 412; P8-NEXT: stxvd2x vs0, 0, r3 413; P8-NEXT: blr 414; 415; P7-LABEL: test7: 416; P7: # %bb.0: # %entry 417; P7-NEXT: li r5, 1 418; P7-NEXT: lvx v2, 0, r4 419; P7-NEXT: lvsl v4, 0, r4 420; P7-NEXT: lvx v3, r5, r4 421; P7-NEXT: vperm v2, v2, v3, v4 422; P7-NEXT: vsplth v2, v2, 0 423; P7-NEXT: stxvw4x v2, 0, r3 424; P7-NEXT: blr 425; 426; P9-AIX32-LABEL: test7: 427; P9-AIX32: # %bb.0: # %entry 428; P9-AIX32-NEXT: lxsihzx v2, 0, r4 429; P9-AIX32-NEXT: vsplth v2, v2, 3 430; P9-AIX32-NEXT: stxv v2, 0(r3) 431; P9-AIX32-NEXT: blr 432; 433; P8-AIX32-LABEL: test7: 434; P8-AIX32: # %bb.0: # %entry 435; P8-AIX32-NEXT: lhzx r4, 0, r4 436; P8-AIX32-NEXT: mtvsrwz v2, r4 437; P8-AIX32-NEXT: vsplth v2, v2, 3 438; P8-AIX32-NEXT: stxvw4x v2, 0, r3 439; P8-AIX32-NEXT: blr 440; 441; P7-AIX32-LABEL: test7: 442; P7-AIX32: # %bb.0: # %entry 443; P7-AIX32-NEXT: li r5, 1 444; P7-AIX32-NEXT: lvx v2, 0, r4 445; P7-AIX32-NEXT: lvsl v4, 0, r4 446; P7-AIX32-NEXT: lvx v3, r5, r4 447; P7-AIX32-NEXT: vperm v2, v2, v3, v4 448; P7-AIX32-NEXT: vsplth v2, v2, 0 449; P7-AIX32-NEXT: stxvw4x v2, 0, r3 450; P7-AIX32-NEXT: blr 451entry: 452 %0 = load i16, ptr %in, align 2 453 %splat.splatinsert.i = insertelement <8 x i16> poison, i16 %0, i32 0 454 %splat.splat.i = shufflevector <8 x i16> %splat.splatinsert.i, <8 x i16> poison, <8 x i32> zeroinitializer 455 store <8 x i16> %splat.splat.i, ptr %a, align 16 456 ret void 457} 458 459; v16i8 460define void @test8(ptr %a, ptr %in) { 461; P9-LABEL: test8: 462; P9: # %bb.0: # %entry 463; P9-NEXT: lxsibzx v2, 0, r4 464; P9-NEXT: vspltb v2, v2, 7 465; P9-NEXT: stxv v2, 0(r3) 466; P9-NEXT: blr 467; 468; P8-LABEL: test8: 469; P8: # %bb.0: # %entry 470; P8-NEXT: lbzx r4, 0, r4 471; P8-NEXT: mtvsrwz v2, r4 472; P8-NEXT: vspltb v2, v2, 7 473; P8-NEXT: xxswapd vs0, v2 474; P8-NEXT: stxvd2x vs0, 0, r3 475; P8-NEXT: blr 476; 477; P7-LABEL: test8: 478; P7: # %bb.0: # %entry 479; P7-NEXT: lvsl v2, 0, r4 480; P7-NEXT: lvx v3, 0, r4 481; P7-NEXT: vperm v2, v3, v3, v2 482; P7-NEXT: vspltb v2, v2, 0 483; P7-NEXT: stxvw4x v2, 0, r3 484; P7-NEXT: blr 485; 486; P9-AIX32-LABEL: test8: 487; P9-AIX32: # %bb.0: # %entry 488; P9-AIX32-NEXT: lxsibzx v2, 0, r4 489; P9-AIX32-NEXT: vspltb v2, v2, 7 490; P9-AIX32-NEXT: stxv v2, 0(r3) 491; P9-AIX32-NEXT: blr 492; 493; P8-AIX32-LABEL: test8: 494; P8-AIX32: # %bb.0: # %entry 495; P8-AIX32-NEXT: lbzx r4, 0, r4 496; P8-AIX32-NEXT: mtvsrwz v2, r4 497; P8-AIX32-NEXT: vspltb v2, v2, 7 498; P8-AIX32-NEXT: stxvw4x v2, 0, r3 499; P8-AIX32-NEXT: blr 500; 501; P7-AIX32-LABEL: test8: 502; P7-AIX32: # %bb.0: # %entry 503; P7-AIX32-NEXT: lvsl v2, 0, r4 504; P7-AIX32-NEXT: lvx v3, 0, r4 505; P7-AIX32-NEXT: vperm v2, v3, v3, v2 506; P7-AIX32-NEXT: vspltb v2, v2, 0 507; P7-AIX32-NEXT: stxvw4x v2, 0, r3 508; P7-AIX32-NEXT: blr 509entry: 510 %0 = load i8, ptr %in, align 1 511 %splat.splatinsert.i = insertelement <16 x i8> poison, i8 %0, i32 0 512 %splat.splat.i = shufflevector <16 x i8> %splat.splatinsert.i, <16 x i8> poison, <16 x i32> zeroinitializer 513 store <16 x i8> %splat.splat.i, ptr %a, align 16 514 ret void 515} 516 517define <16 x i8> @unadjusted_lxvwsx(ptr %s, ptr %t) { 518; P9-LABEL: unadjusted_lxvwsx: 519; P9: # %bb.0: # %entry 520; P9-NEXT: lxvwsx v2, 0, r3 521; P9-NEXT: blr 522; 523; P8-LABEL: unadjusted_lxvwsx: 524; P8: # %bb.0: # %entry 525; P8-NEXT: lfiwzx f0, 0, r3 526; P8-NEXT: xxspltw v2, vs0, 1 527; P8-NEXT: blr 528; 529; P7-LABEL: unadjusted_lxvwsx: 530; P7: # %bb.0: # %entry 531; P7-NEXT: lfiwzx f0, 0, r3 532; P7-NEXT: xxspltw v2, vs0, 1 533; P7-NEXT: blr 534; 535; P9-AIX32-LABEL: unadjusted_lxvwsx: 536; P9-AIX32: # %bb.0: # %entry 537; P9-AIX32-NEXT: lxvwsx v2, 0, r3 538; P9-AIX32-NEXT: blr 539; 540; P8-AIX32-LABEL: unadjusted_lxvwsx: 541; P8-AIX32: # %bb.0: # %entry 542; P8-AIX32-NEXT: lfiwzx f0, 0, r3 543; P8-AIX32-NEXT: xxspltw v2, vs0, 1 544; P8-AIX32-NEXT: blr 545; 546; P7-AIX32-LABEL: unadjusted_lxvwsx: 547; P7-AIX32: # %bb.0: # %entry 548; P7-AIX32-NEXT: lfiwzx f0, 0, r3 549; P7-AIX32-NEXT: xxspltw v2, vs0, 1 550; P7-AIX32-NEXT: blr 551 entry: 552 %0 = load <4 x i8>, ptr %s, align 4 553 %1 = shufflevector <4 x i8> %0, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 554 ret <16 x i8> %1 555} 556 557define <16 x i8> @adjusted_lxvwsx(ptr %s, ptr %t) { 558; P9-LABEL: adjusted_lxvwsx: 559; P9: # %bb.0: # %entry 560; P9-NEXT: addi r3, r3, 4 561; P9-NEXT: lxvwsx v2, 0, r3 562; P9-NEXT: blr 563; 564; P8-LABEL: adjusted_lxvwsx: 565; P8: # %bb.0: # %entry 566; P8-NEXT: lfdx f0, 0, r3 567; P8-NEXT: xxspltw v2, vs0, 0 568; P8-NEXT: blr 569; 570; P7-LABEL: adjusted_lxvwsx: 571; P7: # %bb.0: # %entry 572; P7-NEXT: ld r3, 0(r3) 573; P7-NEXT: std r3, -16(r1) 574; P7-NEXT: std r3, -8(r1) 575; P7-NEXT: addi r3, r1, -16 576; P7-NEXT: lxvw4x vs0, 0, r3 577; P7-NEXT: xxspltw v2, vs0, 1 578; P7-NEXT: blr 579; 580; P9-AIX32-LABEL: adjusted_lxvwsx: 581; P9-AIX32: # %bb.0: # %entry 582; P9-AIX32-NEXT: addi r3, r3, 4 583; P9-AIX32-NEXT: lxvwsx v2, 0, r3 584; P9-AIX32-NEXT: blr 585; 586; P8-AIX32-LABEL: adjusted_lxvwsx: 587; P8-AIX32: # %bb.0: # %entry 588; P8-AIX32-NEXT: addi r3, r3, 4 589; P8-AIX32-NEXT: lfiwzx f0, 0, r3 590; P8-AIX32-NEXT: xxspltw v2, vs0, 1 591; P8-AIX32-NEXT: blr 592; 593; P7-AIX32-LABEL: adjusted_lxvwsx: 594; P7-AIX32: # %bb.0: # %entry 595; P7-AIX32-NEXT: addi r3, r3, 4 596; P7-AIX32-NEXT: lfiwzx f0, 0, r3 597; P7-AIX32-NEXT: xxspltw v2, vs0, 1 598; P7-AIX32-NEXT: blr 599 entry: 600 %0 = load <8 x i8>, ptr %s, align 8 601 %1 = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 602 ret <16 x i8> %1 603} 604 605define <16 x i8> @unadjusted_lxvwsx_v16i8(ptr %s, <16 x i8> %t) { 606; P9-LABEL: unadjusted_lxvwsx_v16i8: 607; P9: # %bb.0: # %entry 608; P9-NEXT: lxvwsx v2, 0, r3 609; P9-NEXT: blr 610; 611; P8-LABEL: unadjusted_lxvwsx_v16i8: 612; P8: # %bb.0: # %entry 613; P8-NEXT: lxvd2x vs0, 0, r3 614; P8-NEXT: xxswapd v2, vs0 615; P8-NEXT: xxspltw v2, v2, 3 616; P8-NEXT: blr 617; 618; P7-LABEL: unadjusted_lxvwsx_v16i8: 619; P7: # %bb.0: # %entry 620; P7-NEXT: lxvw4x vs0, 0, r3 621; P7-NEXT: xxspltw v2, vs0, 0 622; P7-NEXT: blr 623; 624; P9-AIX32-LABEL: unadjusted_lxvwsx_v16i8: 625; P9-AIX32: # %bb.0: # %entry 626; P9-AIX32-NEXT: lxvwsx v2, 0, r3 627; P9-AIX32-NEXT: blr 628; 629; P8-AIX32-LABEL: unadjusted_lxvwsx_v16i8: 630; P8-AIX32: # %bb.0: # %entry 631; P8-AIX32-NEXT: lxvw4x vs0, 0, r3 632; P8-AIX32-NEXT: xxspltw v2, vs0, 0 633; P8-AIX32-NEXT: blr 634; 635; P7-AIX32-LABEL: unadjusted_lxvwsx_v16i8: 636; P7-AIX32: # %bb.0: # %entry 637; P7-AIX32-NEXT: lxvw4x vs0, 0, r3 638; P7-AIX32-NEXT: xxspltw v2, vs0, 0 639; P7-AIX32-NEXT: blr 640 entry: 641 %0 = load <16 x i8>, ptr %s, align 16 642 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 643 ret <16 x i8> %1 644} 645 646define <16 x i8> @adjusted_lxvwsx_v16i8(ptr %s, <16 x i8> %t) { 647; P9-LABEL: adjusted_lxvwsx_v16i8: 648; P9: # %bb.0: # %entry 649; P9-NEXT: addi r3, r3, 4 650; P9-NEXT: lxvwsx v2, 0, r3 651; P9-NEXT: blr 652; 653; P8-LABEL: adjusted_lxvwsx_v16i8: 654; P8: # %bb.0: # %entry 655; P8-NEXT: lxvd2x vs0, 0, r3 656; P8-NEXT: xxswapd v2, vs0 657; P8-NEXT: xxspltw v2, v2, 2 658; P8-NEXT: blr 659; 660; P7-LABEL: adjusted_lxvwsx_v16i8: 661; P7: # %bb.0: # %entry 662; P7-NEXT: lxvw4x vs0, 0, r3 663; P7-NEXT: xxspltw v2, vs0, 1 664; P7-NEXT: blr 665; 666; P9-AIX32-LABEL: adjusted_lxvwsx_v16i8: 667; P9-AIX32: # %bb.0: # %entry 668; P9-AIX32-NEXT: addi r3, r3, 4 669; P9-AIX32-NEXT: lxvwsx v2, 0, r3 670; P9-AIX32-NEXT: blr 671; 672; P8-AIX32-LABEL: adjusted_lxvwsx_v16i8: 673; P8-AIX32: # %bb.0: # %entry 674; P8-AIX32-NEXT: lxvw4x vs0, 0, r3 675; P8-AIX32-NEXT: xxspltw v2, vs0, 1 676; P8-AIX32-NEXT: blr 677; 678; P7-AIX32-LABEL: adjusted_lxvwsx_v16i8: 679; P7-AIX32: # %bb.0: # %entry 680; P7-AIX32-NEXT: lxvw4x vs0, 0, r3 681; P7-AIX32-NEXT: xxspltw v2, vs0, 1 682; P7-AIX32-NEXT: blr 683 entry: 684 %0 = load <16 x i8>, ptr %s, align 16 685 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 686 ret <16 x i8> %1 687} 688 689define <16 x i8> @adjusted_lxvwsx_v16i8_2(ptr %s, <16 x i8> %t) { 690; P9-LABEL: adjusted_lxvwsx_v16i8_2: 691; P9: # %bb.0: # %entry 692; P9-NEXT: addi r3, r3, 8 693; P9-NEXT: lxvwsx v2, 0, r3 694; P9-NEXT: blr 695; 696; P8-LABEL: adjusted_lxvwsx_v16i8_2: 697; P8: # %bb.0: # %entry 698; P8-NEXT: lxvd2x vs0, 0, r3 699; P8-NEXT: xxswapd v2, vs0 700; P8-NEXT: xxspltw v2, v2, 1 701; P8-NEXT: blr 702; 703; P7-LABEL: adjusted_lxvwsx_v16i8_2: 704; P7: # %bb.0: # %entry 705; P7-NEXT: lxvw4x vs0, 0, r3 706; P7-NEXT: xxspltw v2, vs0, 2 707; P7-NEXT: blr 708; 709; P9-AIX32-LABEL: adjusted_lxvwsx_v16i8_2: 710; P9-AIX32: # %bb.0: # %entry 711; P9-AIX32-NEXT: addi r3, r3, 8 712; P9-AIX32-NEXT: lxvwsx v2, 0, r3 713; P9-AIX32-NEXT: blr 714; 715; P8-AIX32-LABEL: adjusted_lxvwsx_v16i8_2: 716; P8-AIX32: # %bb.0: # %entry 717; P8-AIX32-NEXT: lxvw4x vs0, 0, r3 718; P8-AIX32-NEXT: xxspltw v2, vs0, 2 719; P8-AIX32-NEXT: blr 720; 721; P7-AIX32-LABEL: adjusted_lxvwsx_v16i8_2: 722; P7-AIX32: # %bb.0: # %entry 723; P7-AIX32-NEXT: lxvw4x vs0, 0, r3 724; P7-AIX32-NEXT: xxspltw v2, vs0, 2 725; P7-AIX32-NEXT: blr 726 entry: 727 %0 = load <16 x i8>, ptr %s, align 16 728 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11> 729 ret <16 x i8> %1 730} 731 732define <16 x i8> @adjusted_lxvwsx_v16i8_3(ptr %s, <16 x i8> %t) { 733; P9-LABEL: adjusted_lxvwsx_v16i8_3: 734; P9: # %bb.0: # %entry 735; P9-NEXT: addi r3, r3, 12 736; P9-NEXT: lxvwsx v2, 0, r3 737; P9-NEXT: blr 738; 739; P8-LABEL: adjusted_lxvwsx_v16i8_3: 740; P8: # %bb.0: # %entry 741; P8-NEXT: lxvd2x vs0, 0, r3 742; P8-NEXT: xxswapd v2, vs0 743; P8-NEXT: xxspltw v2, v2, 0 744; P8-NEXT: blr 745; 746; P7-LABEL: adjusted_lxvwsx_v16i8_3: 747; P7: # %bb.0: # %entry 748; P7-NEXT: lxvw4x vs0, 0, r3 749; P7-NEXT: xxspltw v2, vs0, 3 750; P7-NEXT: blr 751; 752; P9-AIX32-LABEL: adjusted_lxvwsx_v16i8_3: 753; P9-AIX32: # %bb.0: # %entry 754; P9-AIX32-NEXT: addi r3, r3, 12 755; P9-AIX32-NEXT: lxvwsx v2, 0, r3 756; P9-AIX32-NEXT: blr 757; 758; P8-AIX32-LABEL: adjusted_lxvwsx_v16i8_3: 759; P8-AIX32: # %bb.0: # %entry 760; P8-AIX32-NEXT: lxvw4x vs0, 0, r3 761; P8-AIX32-NEXT: xxspltw v2, vs0, 3 762; P8-AIX32-NEXT: blr 763; 764; P7-AIX32-LABEL: adjusted_lxvwsx_v16i8_3: 765; P7-AIX32: # %bb.0: # %entry 766; P7-AIX32-NEXT: lxvw4x vs0, 0, r3 767; P7-AIX32-NEXT: xxspltw v2, vs0, 3 768; P7-AIX32-NEXT: blr 769 entry: 770 %0 = load <16 x i8>, ptr %s, align 16 771 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15> 772 ret <16 x i8> %1 773} 774 775define <16 x i8> @unadjusted_lxvdsx(ptr %s, ptr %t) { 776; P9-LABEL: unadjusted_lxvdsx: 777; P9: # %bb.0: # %entry 778; P9-NEXT: lxvdsx v2, 0, r3 779; P9-NEXT: blr 780; 781; P8-LABEL: unadjusted_lxvdsx: 782; P8: # %bb.0: # %entry 783; P8-NEXT: lxvdsx v2, 0, r3 784; P8-NEXT: blr 785; 786; P7-LABEL: unadjusted_lxvdsx: 787; P7: # %bb.0: # %entry 788; P7-NEXT: lxvdsx v2, 0, r3 789; P7-NEXT: blr 790; 791; P9-AIX32-LABEL: unadjusted_lxvdsx: 792; P9-AIX32: # %bb.0: # %entry 793; P9-AIX32-NEXT: li r4, 4 794; P9-AIX32-NEXT: lxvwsx vs1, 0, r3 795; P9-AIX32-NEXT: lxvwsx vs0, r3, r4 796; P9-AIX32-NEXT: xxmrghw vs0, vs1, vs0 797; P9-AIX32-NEXT: xxmrghd v2, vs0, vs0 798; P9-AIX32-NEXT: blr 799; 800; P8-AIX32-LABEL: unadjusted_lxvdsx: 801; P8-AIX32: # %bb.0: # %entry 802; P8-AIX32-NEXT: li r4, 4 803; P8-AIX32-NEXT: lfiwzx f1, 0, r3 804; P8-AIX32-NEXT: lfiwzx f0, r3, r4 805; P8-AIX32-NEXT: xxspltw vs1, vs1, 1 806; P8-AIX32-NEXT: xxspltw vs0, vs0, 1 807; P8-AIX32-NEXT: xxmrghw vs0, vs1, vs0 808; P8-AIX32-NEXT: xxmrghd v2, vs0, vs0 809; P8-AIX32-NEXT: blr 810; 811; P7-AIX32-LABEL: unadjusted_lxvdsx: 812; P7-AIX32: # %bb.0: # %entry 813; P7-AIX32-NEXT: li r4, 4 814; P7-AIX32-NEXT: lfiwzx f1, 0, r3 815; P7-AIX32-NEXT: lfiwzx f0, r3, r4 816; P7-AIX32-NEXT: xxspltw vs1, vs1, 1 817; P7-AIX32-NEXT: xxspltw vs0, vs0, 1 818; P7-AIX32-NEXT: xxmrghw vs0, vs1, vs0 819; P7-AIX32-NEXT: xxmrghd v2, vs0, vs0 820; P7-AIX32-NEXT: blr 821 entry: 822 %0 = load <8 x i8>, ptr %s, align 8 823 %1 = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 824 ret <16 x i8> %1 825} 826 827define <16 x i8> @unadjusted_lxvdsx_v16i8(ptr %s, <16 x i8> %t) { 828; P9-LABEL: unadjusted_lxvdsx_v16i8: 829; P9: # %bb.0: # %entry 830; P9-NEXT: lxvdsx v2, 0, r3 831; P9-NEXT: blr 832; 833; P8-LABEL: unadjusted_lxvdsx_v16i8: 834; P8: # %bb.0: # %entry 835; P8-NEXT: lxvdsx v2, 0, r3 836; P8-NEXT: blr 837; 838; P7-LABEL: unadjusted_lxvdsx_v16i8: 839; P7: # %bb.0: # %entry 840; P7-NEXT: lxvdsx v2, 0, r3 841; P7-NEXT: blr 842; 843; P9-AIX32-LABEL: unadjusted_lxvdsx_v16i8: 844; P9-AIX32: # %bb.0: # %entry 845; P9-AIX32-NEXT: lxvdsx v2, 0, r3 846; P9-AIX32-NEXT: blr 847; 848; P8-AIX32-LABEL: unadjusted_lxvdsx_v16i8: 849; P8-AIX32: # %bb.0: # %entry 850; P8-AIX32-NEXT: lxvdsx v2, 0, r3 851; P8-AIX32-NEXT: blr 852; 853; P7-AIX32-LABEL: unadjusted_lxvdsx_v16i8: 854; P7-AIX32: # %bb.0: # %entry 855; P7-AIX32-NEXT: lxvdsx v2, 0, r3 856; P7-AIX32-NEXT: blr 857 entry: 858 %0 = load <16 x i8>, ptr %s, align 16 859 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 860 ret <16 x i8> %1 861} 862 863define <16 x i8> @adjusted_lxvdsx_v16i8(ptr %s, <16 x i8> %t) { 864; P9-LABEL: adjusted_lxvdsx_v16i8: 865; P9: # %bb.0: # %entry 866; P9-NEXT: addi r3, r3, 8 867; P9-NEXT: lxvdsx v2, 0, r3 868; P9-NEXT: blr 869; 870; P8-LABEL: adjusted_lxvdsx_v16i8: 871; P8: # %bb.0: # %entry 872; P8-NEXT: addi r3, r3, 8 873; P8-NEXT: lxvdsx v2, 0, r3 874; P8-NEXT: blr 875; 876; P7-LABEL: adjusted_lxvdsx_v16i8: 877; P7: # %bb.0: # %entry 878; P7-NEXT: addi r3, r3, 8 879; P7-NEXT: lxvdsx v2, 0, r3 880; P7-NEXT: blr 881; 882; P9-AIX32-LABEL: adjusted_lxvdsx_v16i8: 883; P9-AIX32: # %bb.0: # %entry 884; P9-AIX32-NEXT: addi r3, r3, 8 885; P9-AIX32-NEXT: lxvdsx v2, 0, r3 886; P9-AIX32-NEXT: blr 887; 888; P8-AIX32-LABEL: adjusted_lxvdsx_v16i8: 889; P8-AIX32: # %bb.0: # %entry 890; P8-AIX32-NEXT: addi r3, r3, 8 891; P8-AIX32-NEXT: lxvdsx v2, 0, r3 892; P8-AIX32-NEXT: blr 893; 894; P7-AIX32-LABEL: adjusted_lxvdsx_v16i8: 895; P7-AIX32: # %bb.0: # %entry 896; P7-AIX32-NEXT: addi r3, r3, 8 897; P7-AIX32-NEXT: lxvdsx v2, 0, r3 898; P7-AIX32-NEXT: blr 899 entry: 900 %0 = load <16 x i8>, ptr %s, align 16 901 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 902 ret <16 x i8> %1 903} 904 905define <8 x i16> @test_unaligned_v8i16(ptr %Ptr) { 906; P9-LABEL: test_unaligned_v8i16: 907; P9: # %bb.0: # %entry 908; P9-NEXT: addi r3, r3, 6 909; P9-NEXT: lxsihzx v2, 0, r3 910; P9-NEXT: vsplth v2, v2, 3 911; P9-NEXT: blr 912; 913; P8-LABEL: test_unaligned_v8i16: 914; P8: # %bb.0: # %entry 915; P8-NEXT: lhz r3, 6(r3) 916; P8-NEXT: mtvsrwz v2, r3 917; P8-NEXT: vsplth v2, v2, 3 918; P8-NEXT: blr 919; 920; P7-LABEL: test_unaligned_v8i16: 921; P7: # %bb.0: # %entry 922; P7-NEXT: addi r3, r3, 6 923; P7-NEXT: li r4, 1 924; P7-NEXT: lvx v2, 0, r3 925; P7-NEXT: lvx v3, r4, r3 926; P7-NEXT: lvsl v4, 0, r3 927; P7-NEXT: vperm v2, v2, v3, v4 928; P7-NEXT: vsplth v2, v2, 0 929; P7-NEXT: blr 930; 931; P9-AIX32-LABEL: test_unaligned_v8i16: 932; P9-AIX32: # %bb.0: # %entry 933; P9-AIX32-NEXT: addi r3, r3, 6 934; P9-AIX32-NEXT: lxsihzx v2, 0, r3 935; P9-AIX32-NEXT: vsplth v2, v2, 3 936; P9-AIX32-NEXT: blr 937; 938; P8-AIX32-LABEL: test_unaligned_v8i16: 939; P8-AIX32: # %bb.0: # %entry 940; P8-AIX32-NEXT: lhz r3, 6(r3) 941; P8-AIX32-NEXT: mtvsrwz v2, r3 942; P8-AIX32-NEXT: vsplth v2, v2, 3 943; P8-AIX32-NEXT: blr 944; 945; P7-AIX32-LABEL: test_unaligned_v8i16: 946; P7-AIX32: # %bb.0: # %entry 947; P7-AIX32-NEXT: addi r3, r3, 6 948; P7-AIX32-NEXT: li r4, 1 949; P7-AIX32-NEXT: lvx v2, 0, r3 950; P7-AIX32-NEXT: lvx v3, r4, r3 951; P7-AIX32-NEXT: lvsl v4, 0, r3 952; P7-AIX32-NEXT: vperm v2, v2, v3, v4 953; P7-AIX32-NEXT: vsplth v2, v2, 0 954; P7-AIX32-NEXT: blr 955entry: 956 %add.ptr = getelementptr inbounds i16, ptr %Ptr, i64 3 957 %0 = load i16, ptr %add.ptr, align 16 958 %splat.splatinsert = insertelement <8 x i16> poison, i16 %0, i32 0 959 %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> poison, <8 x i32> zeroinitializer 960 ret <8 x i16> %splat.splat 961} 962 963define <16 x i8> @test_unaligned_v16i8(ptr %Ptr) { 964; P9-LABEL: test_unaligned_v16i8: 965; P9: # %bb.0: # %entry 966; P9-NEXT: addi r3, r3, 3 967; P9-NEXT: lxsibzx v2, 0, r3 968; P9-NEXT: vspltb v2, v2, 7 969; P9-NEXT: blr 970; 971; P8-LABEL: test_unaligned_v16i8: 972; P8: # %bb.0: # %entry 973; P8-NEXT: lbz r3, 3(r3) 974; P8-NEXT: mtvsrwz v2, r3 975; P8-NEXT: vspltb v2, v2, 7 976; P8-NEXT: blr 977; 978; P7-LABEL: test_unaligned_v16i8: 979; P7: # %bb.0: # %entry 980; P7-NEXT: addi r3, r3, 3 981; P7-NEXT: lvsl v2, 0, r3 982; P7-NEXT: lvx v3, 0, r3 983; P7-NEXT: vperm v2, v3, v3, v2 984; P7-NEXT: vspltb v2, v2, 0 985; P7-NEXT: blr 986; 987; P9-AIX32-LABEL: test_unaligned_v16i8: 988; P9-AIX32: # %bb.0: # %entry 989; P9-AIX32-NEXT: addi r3, r3, 3 990; P9-AIX32-NEXT: lxsibzx v2, 0, r3 991; P9-AIX32-NEXT: vspltb v2, v2, 7 992; P9-AIX32-NEXT: blr 993; 994; P8-AIX32-LABEL: test_unaligned_v16i8: 995; P8-AIX32: # %bb.0: # %entry 996; P8-AIX32-NEXT: lbz r3, 3(r3) 997; P8-AIX32-NEXT: mtvsrwz v2, r3 998; P8-AIX32-NEXT: vspltb v2, v2, 7 999; P8-AIX32-NEXT: blr 1000; 1001; P7-AIX32-LABEL: test_unaligned_v16i8: 1002; P7-AIX32: # %bb.0: # %entry 1003; P7-AIX32-NEXT: addi r3, r3, 3 1004; P7-AIX32-NEXT: lvsl v2, 0, r3 1005; P7-AIX32-NEXT: lvx v3, 0, r3 1006; P7-AIX32-NEXT: vperm v2, v3, v3, v2 1007; P7-AIX32-NEXT: vspltb v2, v2, 0 1008; P7-AIX32-NEXT: blr 1009entry: 1010 %add.ptr = getelementptr inbounds i8, ptr %Ptr, i64 3 1011 %0 = load i8, ptr %add.ptr, align 16 1012 %splat.splatinsert = insertelement <16 x i8> poison, i8 %0, i32 0 1013 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> poison, <16 x i32> zeroinitializer 1014 ret <16 x i8> %splat.splat 1015} 1016 1017define <8 x i16> @test_aligned_v8i16_1(ptr %Ptr) { 1018; P9-LABEL: test_aligned_v8i16_1: 1019; P9: # %bb.0: # %entry 1020; P9-NEXT: lxsihzx v2, 0, r3 1021; P9-NEXT: vsplth v2, v2, 3 1022; P9-NEXT: blr 1023; 1024; P8-LABEL: test_aligned_v8i16_1: 1025; P8: # %bb.0: # %entry 1026; P8-NEXT: lvx v2, 0, r3 1027; P8-NEXT: vsplth v2, v2, 7 1028; P8-NEXT: blr 1029; 1030; P7-LABEL: test_aligned_v8i16_1: 1031; P7: # %bb.0: # %entry 1032; P7-NEXT: lvx v2, 0, r3 1033; P7-NEXT: vsplth v2, v2, 0 1034; P7-NEXT: blr 1035; 1036; P9-AIX32-LABEL: test_aligned_v8i16_1: 1037; P9-AIX32: # %bb.0: # %entry 1038; P9-AIX32-NEXT: lxsihzx v2, 0, r3 1039; P9-AIX32-NEXT: vsplth v2, v2, 3 1040; P9-AIX32-NEXT: blr 1041; 1042; P8-AIX32-LABEL: test_aligned_v8i16_1: 1043; P8-AIX32: # %bb.0: # %entry 1044; P8-AIX32-NEXT: lvx v2, 0, r3 1045; P8-AIX32-NEXT: vsplth v2, v2, 0 1046; P8-AIX32-NEXT: blr 1047; 1048; P7-AIX32-LABEL: test_aligned_v8i16_1: 1049; P7-AIX32: # %bb.0: # %entry 1050; P7-AIX32-NEXT: lvx v2, 0, r3 1051; P7-AIX32-NEXT: vsplth v2, v2, 0 1052; P7-AIX32-NEXT: blr 1053entry: 1054 %0 = load i16, ptr %Ptr, align 16 1055 %splat.splatinsert = insertelement <8 x i16> poison, i16 %0, i32 0 1056 %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> poison, <8 x i32> zeroinitializer 1057 ret <8 x i16> %splat.splat 1058} 1059 1060define <8 x i16> @test_aligned_v8i16_2(ptr %Ptr) { 1061; P9-LABEL: test_aligned_v8i16_2: 1062; P9: # %bb.0: # %entry 1063; P9-NEXT: addi r3, r3, 32 1064; P9-NEXT: lxsihzx v2, 0, r3 1065; P9-NEXT: vsplth v2, v2, 3 1066; P9-NEXT: blr 1067; 1068; P8-LABEL: test_aligned_v8i16_2: 1069; P8: # %bb.0: # %entry 1070; P8-NEXT: addi r3, r3, 32 1071; P8-NEXT: lvx v2, 0, r3 1072; P8-NEXT: vsplth v2, v2, 7 1073; P8-NEXT: blr 1074; 1075; P7-LABEL: test_aligned_v8i16_2: 1076; P7: # %bb.0: # %entry 1077; P7-NEXT: addi r3, r3, 32 1078; P7-NEXT: lvx v2, 0, r3 1079; P7-NEXT: vsplth v2, v2, 0 1080; P7-NEXT: blr 1081; 1082; P9-AIX32-LABEL: test_aligned_v8i16_2: 1083; P9-AIX32: # %bb.0: # %entry 1084; P9-AIX32-NEXT: addi r3, r3, 32 1085; P9-AIX32-NEXT: lxsihzx v2, 0, r3 1086; P9-AIX32-NEXT: vsplth v2, v2, 3 1087; P9-AIX32-NEXT: blr 1088; 1089; P8-AIX32-LABEL: test_aligned_v8i16_2: 1090; P8-AIX32: # %bb.0: # %entry 1091; P8-AIX32-NEXT: addi r3, r3, 32 1092; P8-AIX32-NEXT: lvx v2, 0, r3 1093; P8-AIX32-NEXT: vsplth v2, v2, 0 1094; P8-AIX32-NEXT: blr 1095; 1096; P7-AIX32-LABEL: test_aligned_v8i16_2: 1097; P7-AIX32: # %bb.0: # %entry 1098; P7-AIX32-NEXT: addi r3, r3, 32 1099; P7-AIX32-NEXT: lvx v2, 0, r3 1100; P7-AIX32-NEXT: vsplth v2, v2, 0 1101; P7-AIX32-NEXT: blr 1102entry: 1103 %add.ptr = getelementptr inbounds i16, ptr %Ptr, i64 16 1104 %0 = load i16, ptr %add.ptr, align 16 1105 %splat.splatinsert = insertelement <8 x i16> poison, i16 %0, i32 0 1106 %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> poison, <8 x i32> zeroinitializer 1107 ret <8 x i16> %splat.splat 1108} 1109 1110define <16 x i8> @test_aligned_v16i8_1(ptr %Ptr) { 1111; P9-LABEL: test_aligned_v16i8_1: 1112; P9: # %bb.0: # %entry 1113; P9-NEXT: lxsibzx v2, 0, r3 1114; P9-NEXT: vspltb v2, v2, 7 1115; P9-NEXT: blr 1116; 1117; P8-LABEL: test_aligned_v16i8_1: 1118; P8: # %bb.0: # %entry 1119; P8-NEXT: lvx v2, 0, r3 1120; P8-NEXT: vspltb v2, v2, 15 1121; P8-NEXT: blr 1122; 1123; P7-LABEL: test_aligned_v16i8_1: 1124; P7: # %bb.0: # %entry 1125; P7-NEXT: lvx v2, 0, r3 1126; P7-NEXT: vspltb v2, v2, 0 1127; P7-NEXT: blr 1128; 1129; P9-AIX32-LABEL: test_aligned_v16i8_1: 1130; P9-AIX32: # %bb.0: # %entry 1131; P9-AIX32-NEXT: lxsibzx v2, 0, r3 1132; P9-AIX32-NEXT: vspltb v2, v2, 7 1133; P9-AIX32-NEXT: blr 1134; 1135; P8-AIX32-LABEL: test_aligned_v16i8_1: 1136; P8-AIX32: # %bb.0: # %entry 1137; P8-AIX32-NEXT: lvx v2, 0, r3 1138; P8-AIX32-NEXT: vspltb v2, v2, 0 1139; P8-AIX32-NEXT: blr 1140; 1141; P7-AIX32-LABEL: test_aligned_v16i8_1: 1142; P7-AIX32: # %bb.0: # %entry 1143; P7-AIX32-NEXT: lvx v2, 0, r3 1144; P7-AIX32-NEXT: vspltb v2, v2, 0 1145; P7-AIX32-NEXT: blr 1146entry: 1147 %0 = load i8, ptr %Ptr, align 16 1148 %splat.splatinsert = insertelement <16 x i8> poison, i8 %0, i32 0 1149 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> poison, <16 x i32> zeroinitializer 1150 ret <16 x i8> %splat.splat 1151} 1152 1153define <16 x i8> @test_aligned_v16i8_2(ptr %Ptr) { 1154; P9-LABEL: test_aligned_v16i8_2: 1155; P9: # %bb.0: # %entry 1156; P9-NEXT: addi r3, r3, 16 1157; P9-NEXT: lxsibzx v2, 0, r3 1158; P9-NEXT: vspltb v2, v2, 7 1159; P9-NEXT: blr 1160; 1161; P8-LABEL: test_aligned_v16i8_2: 1162; P8: # %bb.0: # %entry 1163; P8-NEXT: addi r3, r3, 16 1164; P8-NEXT: lvx v2, 0, r3 1165; P8-NEXT: vspltb v2, v2, 15 1166; P8-NEXT: blr 1167; 1168; P7-LABEL: test_aligned_v16i8_2: 1169; P7: # %bb.0: # %entry 1170; P7-NEXT: addi r3, r3, 16 1171; P7-NEXT: lvx v2, 0, r3 1172; P7-NEXT: vspltb v2, v2, 0 1173; P7-NEXT: blr 1174; 1175; P9-AIX32-LABEL: test_aligned_v16i8_2: 1176; P9-AIX32: # %bb.0: # %entry 1177; P9-AIX32-NEXT: addi r3, r3, 16 1178; P9-AIX32-NEXT: lxsibzx v2, 0, r3 1179; P9-AIX32-NEXT: vspltb v2, v2, 7 1180; P9-AIX32-NEXT: blr 1181; 1182; P8-AIX32-LABEL: test_aligned_v16i8_2: 1183; P8-AIX32: # %bb.0: # %entry 1184; P8-AIX32-NEXT: addi r3, r3, 16 1185; P8-AIX32-NEXT: lvx v2, 0, r3 1186; P8-AIX32-NEXT: vspltb v2, v2, 0 1187; P8-AIX32-NEXT: blr 1188; 1189; P7-AIX32-LABEL: test_aligned_v16i8_2: 1190; P7-AIX32: # %bb.0: # %entry 1191; P7-AIX32-NEXT: addi r3, r3, 16 1192; P7-AIX32-NEXT: lvx v2, 0, r3 1193; P7-AIX32-NEXT: vspltb v2, v2, 0 1194; P7-AIX32-NEXT: blr 1195entry: 1196 %add.ptr = getelementptr inbounds i8, ptr %Ptr, i64 16 1197 %0 = load i8, ptr %add.ptr, align 16 1198 %splat.splatinsert = insertelement <16 x i8> poison, i8 %0, i32 0 1199 %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> poison, <16 x i32> zeroinitializer 1200 ret <16 x i8> %splat.splat 1201} 1202 1203; The following test case should not produce a load and splat node, 1204; as we cannot handle extending loads (from f32 to f64), and this test 1205; shows that there are multiple uses of the extending load (other than 1206; a build vector node). `lxvdsx` should not be produced in this case. 1207define <2 x double> @test_v2f64_multiple_use(ptr nocapture readonly %a, ptr nocapture %b, ptr nocapture %c) { 1208; P9-LABEL: test_v2f64_multiple_use: 1209; P9: # %bb.0: # %entry 1210; P9-NEXT: lfs f0, 0(r3) 1211; P9-NEXT: xxspltd v2, vs0, 0 1212; P9-NEXT: lfd f1, 0(r4) 1213; P9-NEXT: xsadddp f1, f1, f0 1214; P9-NEXT: stfd f1, 0(r4) 1215; P9-NEXT: lfd f1, 0(r5) 1216; P9-NEXT: xsadddp f1, f1, f0 1217; P9-NEXT: stfd f1, 0(r5) 1218; P9-NEXT: blr 1219; 1220; P8-LABEL: test_v2f64_multiple_use: 1221; P8: # %bb.0: # %entry 1222; P8-NEXT: lfs f0, 0(r3) 1223; P8-NEXT: lfd f1, 0(r4) 1224; P8-NEXT: xsadddp f1, f1, f0 1225; P8-NEXT: stfd f1, 0(r4) 1226; P8-NEXT: lfd f1, 0(r5) 1227; P8-NEXT: xxspltd v2, vs0, 0 1228; P8-NEXT: xsadddp f0, f1, f0 1229; P8-NEXT: stfd f0, 0(r5) 1230; P8-NEXT: blr 1231; 1232; P7-LABEL: test_v2f64_multiple_use: 1233; P7: # %bb.0: # %entry 1234; P7-NEXT: lfs f0, 0(r3) 1235; P7-NEXT: lfd f1, 0(r4) 1236; P7-NEXT: xsadddp f1, f1, f0 1237; P7-NEXT: stfd f1, 0(r4) 1238; P7-NEXT: lfd f1, 0(r5) 1239; P7-NEXT: xxspltd v2, vs0, 0 1240; P7-NEXT: xsadddp f0, f1, f0 1241; P7-NEXT: stfd f0, 0(r5) 1242; P7-NEXT: blr 1243; 1244; P9-AIX32-LABEL: test_v2f64_multiple_use: 1245; P9-AIX32: # %bb.0: # %entry 1246; P9-AIX32-NEXT: lfs f0, 0(r3) 1247; P9-AIX32-NEXT: xxmrghd v2, vs0, vs0 1248; P9-AIX32-NEXT: lfd f1, 0(r4) 1249; P9-AIX32-NEXT: xsadddp f1, f1, f0 1250; P9-AIX32-NEXT: stfd f1, 0(r4) 1251; P9-AIX32-NEXT: lfd f1, 0(r5) 1252; P9-AIX32-NEXT: xsadddp f1, f1, f0 1253; P9-AIX32-NEXT: stfd f1, 0(r5) 1254; P9-AIX32-NEXT: blr 1255; 1256; P8-AIX32-LABEL: test_v2f64_multiple_use: 1257; P8-AIX32: # %bb.0: # %entry 1258; P8-AIX32-NEXT: lfs f0, 0(r3) 1259; P8-AIX32-NEXT: lfd f1, 0(r4) 1260; P8-AIX32-NEXT: xsadddp f1, f1, f0 1261; P8-AIX32-NEXT: stfd f1, 0(r4) 1262; P8-AIX32-NEXT: lfd f1, 0(r5) 1263; P8-AIX32-NEXT: xxmrghd v2, vs0, vs0 1264; P8-AIX32-NEXT: xsadddp f0, f1, f0 1265; P8-AIX32-NEXT: stfd f0, 0(r5) 1266; P8-AIX32-NEXT: blr 1267; 1268; P7-AIX32-LABEL: test_v2f64_multiple_use: 1269; P7-AIX32: # %bb.0: # %entry 1270; P7-AIX32-NEXT: lfs f0, 0(r3) 1271; P7-AIX32-NEXT: lfd f1, 0(r4) 1272; P7-AIX32-NEXT: xsadddp f1, f1, f0 1273; P7-AIX32-NEXT: stfd f1, 0(r4) 1274; P7-AIX32-NEXT: lfd f1, 0(r5) 1275; P7-AIX32-NEXT: xxmrghd v2, vs0, vs0 1276; P7-AIX32-NEXT: xsadddp f0, f1, f0 1277; P7-AIX32-NEXT: stfd f0, 0(r5) 1278; P7-AIX32-NEXT: blr 1279entry: 1280 %0 = load float, ptr %a, align 4 1281 %conv = fpext float %0 to double 1282 %1 = load double, ptr %b, align 8 1283 %add = fadd double %1, %conv 1284 store double %add, ptr %b, align 8 1285 %2 = load double, ptr %c, align 8 1286 %add2 = fadd double %2, %conv 1287 store double %add2, ptr %c, align 8 1288 %vecinit = insertelement <2 x double> undef, double %conv, i64 0 1289 %vecinit5 = shufflevector <2 x double> %vecinit, <2 x double> poison, <2 x i32> zeroinitializer 1290 ret <2 x double> %vecinit5 1291} 1292 1293define <4 x i32> @test_splatW(ptr %ptr) { 1294; P9-LABEL: test_splatW: 1295; P9: # %bb.0: # %entry 1296; P9-NEXT: lxvwsx v2, 0, r3 1297; P9-NEXT: blr 1298; 1299; P8-LABEL: test_splatW: 1300; P8: # %bb.0: # %entry 1301; P8-NEXT: lxvd2x vs0, 0, r3 1302; P8-NEXT: xxswapd v2, vs0 1303; P8-NEXT: xxspltw v2, v2, 3 1304; P8-NEXT: blr 1305; 1306; P7-LABEL: test_splatW: 1307; P7: # %bb.0: # %entry 1308; P7-NEXT: lxvw4x vs0, 0, r3 1309; P7-NEXT: xxspltw v2, vs0, 0 1310; P7-NEXT: blr 1311; 1312; P9-AIX32-LABEL: test_splatW: 1313; P9-AIX32: # %bb.0: # %entry 1314; P9-AIX32-NEXT: lxvwsx v2, 0, r3 1315; P9-AIX32-NEXT: blr 1316; 1317; P8-AIX32-LABEL: test_splatW: 1318; P8-AIX32: # %bb.0: # %entry 1319; P8-AIX32-NEXT: lxvw4x vs0, 0, r3 1320; P8-AIX32-NEXT: xxspltw v2, vs0, 0 1321; P8-AIX32-NEXT: blr 1322; 1323; P7-AIX32-LABEL: test_splatW: 1324; P7-AIX32: # %bb.0: # %entry 1325; P7-AIX32-NEXT: lxvw4x vs0, 0, r3 1326; P7-AIX32-NEXT: xxspltw v2, vs0, 0 1327; P7-AIX32-NEXT: blr 1328entry: 1329 %0 = load <8 x i16>, ptr %ptr, align 16 1330 %1 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1331 %2 = bitcast<8 x i16> %1 to <4 x i32> 1332 ret <4 x i32> %2 1333} 1334 1335define <4 x i32> @test_splatD(ptr %ptr) { 1336; P9-LABEL: test_splatD: 1337; P9: # %bb.0: # %entry 1338; P9-NEXT: lxvdsx v2, 0, r3 1339; P9-NEXT: blr 1340; 1341; P8-LABEL: test_splatD: 1342; P8: # %bb.0: # %entry 1343; P8-NEXT: lxvdsx v2, 0, r3 1344; P8-NEXT: blr 1345; 1346; P7-LABEL: test_splatD: 1347; P7: # %bb.0: # %entry 1348; P7-NEXT: lxvdsx v2, 0, r3 1349; P7-NEXT: blr 1350; 1351; P9-AIX32-LABEL: test_splatD: 1352; P9-AIX32: # %bb.0: # %entry 1353; P9-AIX32-NEXT: lxvdsx v2, 0, r3 1354; P9-AIX32-NEXT: blr 1355; 1356; P8-AIX32-LABEL: test_splatD: 1357; P8-AIX32: # %bb.0: # %entry 1358; P8-AIX32-NEXT: lxvdsx v2, 0, r3 1359; P8-AIX32-NEXT: blr 1360; 1361; P7-AIX32-LABEL: test_splatD: 1362; P7-AIX32: # %bb.0: # %entry 1363; P7-AIX32-NEXT: lxvdsx v2, 0, r3 1364; P7-AIX32-NEXT: blr 1365entry: 1366 %0 = load <8 x i16>, ptr %ptr, align 16 1367 %1 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1368 %2 = bitcast<8 x i16> %1 to <4 x i32> 1369 ret <4 x i32> %2 1370} 1371