1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 3; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE 4; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 5; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE 6; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 7; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE 8; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 9; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE 10 11define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { 12; P9LE-LABEL: fold_srem_vec_1: 13; P9LE: # %bb.0: 14; P9LE-NEXT: li r3, 0 15; P9LE-NEXT: lis r4, -21386 16; P9LE-NEXT: vextuhrx r3, r3, v2 17; P9LE-NEXT: ori r4, r4, 37253 18; P9LE-NEXT: extsh r3, r3 19; P9LE-NEXT: mulhw r4, r3, r4 20; P9LE-NEXT: add r4, r4, r3 21; P9LE-NEXT: srwi r5, r4, 31 22; P9LE-NEXT: srawi r4, r4, 6 23; P9LE-NEXT: add r4, r4, r5 24; P9LE-NEXT: mulli r4, r4, 95 25; P9LE-NEXT: sub r3, r3, r4 26; P9LE-NEXT: lis r4, 31710 27; P9LE-NEXT: mtvsrd v3, r3 28; P9LE-NEXT: li r3, 2 29; P9LE-NEXT: ori r4, r4, 63421 30; P9LE-NEXT: vextuhrx r3, r3, v2 31; P9LE-NEXT: extsh r3, r3 32; P9LE-NEXT: mulhw r4, r3, r4 33; P9LE-NEXT: sub r4, r4, r3 34; P9LE-NEXT: srwi r5, r4, 31 35; P9LE-NEXT: srawi r4, r4, 6 36; P9LE-NEXT: add r4, r4, r5 37; P9LE-NEXT: mulli r4, r4, -124 38; P9LE-NEXT: sub r3, r3, r4 39; P9LE-NEXT: lis r4, 21399 40; P9LE-NEXT: mtvsrd v4, r3 41; P9LE-NEXT: li r3, 4 42; P9LE-NEXT: ori r4, r4, 33437 43; P9LE-NEXT: vextuhrx r3, r3, v2 44; P9LE-NEXT: vmrghh v3, v4, v3 45; P9LE-NEXT: extsh r3, r3 46; P9LE-NEXT: mulhw r4, r3, r4 47; P9LE-NEXT: srwi r5, r4, 31 48; P9LE-NEXT: srawi r4, r4, 5 49; P9LE-NEXT: add r4, r4, r5 50; P9LE-NEXT: mulli r4, r4, 98 51; P9LE-NEXT: sub r3, r3, r4 52; P9LE-NEXT: lis r4, -16728 53; P9LE-NEXT: mtvsrd v4, r3 54; P9LE-NEXT: li r3, 6 55; P9LE-NEXT: ori r4, r4, 63249 56; P9LE-NEXT: vextuhrx r3, r3, v2 57; P9LE-NEXT: extsh r3, r3 58; P9LE-NEXT: mulhw r4, r3, r4 59; P9LE-NEXT: srwi r5, r4, 31 60; P9LE-NEXT: srawi r4, r4, 8 61; P9LE-NEXT: add r4, r4, r5 62; P9LE-NEXT: mulli r4, r4, -1003 63; P9LE-NEXT: sub r3, r3, r4 64; P9LE-NEXT: mtvsrd v2, r3 65; P9LE-NEXT: vmrghh v2, v2, v4 66; P9LE-NEXT: xxmrglw v2, v2, v3 67; P9LE-NEXT: blr 68; 69; P9BE-LABEL: fold_srem_vec_1: 70; P9BE: # %bb.0: 71; P9BE-NEXT: li r3, 2 72; P9BE-NEXT: lis r4, 31710 73; P9BE-NEXT: vextuhlx r3, r3, v2 74; P9BE-NEXT: ori r4, r4, 63421 75; P9BE-NEXT: extsh r3, r3 76; P9BE-NEXT: mulhw r4, r3, r4 77; P9BE-NEXT: sub r4, r4, r3 78; P9BE-NEXT: srwi r5, r4, 31 79; P9BE-NEXT: srawi r4, r4, 6 80; P9BE-NEXT: add r4, r4, r5 81; P9BE-NEXT: mulli r4, r4, -124 82; P9BE-NEXT: sub r3, r3, r4 83; P9BE-NEXT: lis r4, -21386 84; P9BE-NEXT: mtfprwz f0, r3 85; P9BE-NEXT: li r3, 0 86; P9BE-NEXT: ori r4, r4, 37253 87; P9BE-NEXT: vextuhlx r3, r3, v2 88; P9BE-NEXT: extsh r3, r3 89; P9BE-NEXT: mulhw r4, r3, r4 90; P9BE-NEXT: add r4, r4, r3 91; P9BE-NEXT: srwi r5, r4, 31 92; P9BE-NEXT: srawi r4, r4, 6 93; P9BE-NEXT: add r4, r4, r5 94; P9BE-NEXT: mulli r4, r4, 95 95; P9BE-NEXT: sub r3, r3, r4 96; P9BE-NEXT: lis r4, -16728 97; P9BE-NEXT: mtfprwz f1, r3 98; P9BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha 99; P9BE-NEXT: ori r4, r4, 63249 100; P9BE-NEXT: addi r3, r3, .LCPI0_0@toc@l 101; P9BE-NEXT: lxv vs2, 0(r3) 102; P9BE-NEXT: li r3, 6 103; P9BE-NEXT: vextuhlx r3, r3, v2 104; P9BE-NEXT: extsh r3, r3 105; P9BE-NEXT: xxperm vs0, vs1, vs2 106; P9BE-NEXT: mulhw r4, r3, r4 107; P9BE-NEXT: srwi r5, r4, 31 108; P9BE-NEXT: srawi r4, r4, 8 109; P9BE-NEXT: add r4, r4, r5 110; P9BE-NEXT: mulli r4, r4, -1003 111; P9BE-NEXT: sub r3, r3, r4 112; P9BE-NEXT: lis r4, 21399 113; P9BE-NEXT: mtfprwz f1, r3 114; P9BE-NEXT: li r3, 4 115; P9BE-NEXT: ori r4, r4, 33437 116; P9BE-NEXT: vextuhlx r3, r3, v2 117; P9BE-NEXT: extsh r3, r3 118; P9BE-NEXT: mulhw r4, r3, r4 119; P9BE-NEXT: srwi r5, r4, 31 120; P9BE-NEXT: srawi r4, r4, 5 121; P9BE-NEXT: add r4, r4, r5 122; P9BE-NEXT: mulli r4, r4, 98 123; P9BE-NEXT: sub r3, r3, r4 124; P9BE-NEXT: mtfprwz f3, r3 125; P9BE-NEXT: xxperm vs1, vs3, vs2 126; P9BE-NEXT: xxmrghw v2, vs0, vs1 127; P9BE-NEXT: blr 128; 129; P8LE-LABEL: fold_srem_vec_1: 130; P8LE: # %bb.0: 131; P8LE-NEXT: xxswapd vs0, v2 132; P8LE-NEXT: lis r4, 21399 133; P8LE-NEXT: lis r5, -16728 134; P8LE-NEXT: lis r6, -21386 135; P8LE-NEXT: mffprd r3, f0 136; P8LE-NEXT: ori r4, r4, 33437 137; P8LE-NEXT: ori r5, r5, 63249 138; P8LE-NEXT: ori r6, r6, 37253 139; P8LE-NEXT: rldicl r7, r3, 32, 48 140; P8LE-NEXT: rldicl r8, r3, 16, 48 141; P8LE-NEXT: clrldi r9, r3, 48 142; P8LE-NEXT: rldicl r3, r3, 48, 48 143; P8LE-NEXT: extsh r7, r7 144; P8LE-NEXT: extsh r8, r8 145; P8LE-NEXT: extsh r9, r9 146; P8LE-NEXT: extsh r3, r3 147; P8LE-NEXT: mulhw r4, r7, r4 148; P8LE-NEXT: mulhw r5, r8, r5 149; P8LE-NEXT: mulhw r6, r9, r6 150; P8LE-NEXT: srwi r10, r4, 31 151; P8LE-NEXT: srawi r4, r4, 5 152; P8LE-NEXT: add r6, r6, r9 153; P8LE-NEXT: add r4, r4, r10 154; P8LE-NEXT: srwi r10, r5, 31 155; P8LE-NEXT: srawi r5, r5, 8 156; P8LE-NEXT: mulli r4, r4, 98 157; P8LE-NEXT: add r5, r5, r10 158; P8LE-NEXT: srwi r10, r6, 31 159; P8LE-NEXT: srawi r6, r6, 6 160; P8LE-NEXT: add r6, r6, r10 161; P8LE-NEXT: mulli r5, r5, -1003 162; P8LE-NEXT: sub r4, r7, r4 163; P8LE-NEXT: mtvsrd v2, r4 164; P8LE-NEXT: mulli r4, r6, 95 165; P8LE-NEXT: sub r5, r8, r5 166; P8LE-NEXT: mtvsrd v3, r5 167; P8LE-NEXT: sub r4, r9, r4 168; P8LE-NEXT: mtvsrd v4, r4 169; P8LE-NEXT: lis r4, 31710 170; P8LE-NEXT: ori r4, r4, 63421 171; P8LE-NEXT: mulhw r4, r3, r4 172; P8LE-NEXT: sub r4, r4, r3 173; P8LE-NEXT: srwi r5, r4, 31 174; P8LE-NEXT: srawi r4, r4, 6 175; P8LE-NEXT: add r4, r4, r5 176; P8LE-NEXT: mulli r4, r4, -124 177; P8LE-NEXT: sub r3, r3, r4 178; P8LE-NEXT: vmrghh v2, v3, v2 179; P8LE-NEXT: mtvsrd v3, r3 180; P8LE-NEXT: vmrghh v3, v3, v4 181; P8LE-NEXT: xxmrglw v2, v2, v3 182; P8LE-NEXT: blr 183; 184; P8BE-LABEL: fold_srem_vec_1: 185; P8BE: # %bb.0: 186; P8BE-NEXT: mfvsrd r3, v2 187; P8BE-NEXT: addis r6, r2, .LCPI0_0@toc@ha 188; P8BE-NEXT: lis r4, -16728 189; P8BE-NEXT: lis r5, 21399 190; P8BE-NEXT: lis r7, 31710 191; P8BE-NEXT: addi r6, r6, .LCPI0_0@toc@l 192; P8BE-NEXT: ori r4, r4, 63249 193; P8BE-NEXT: ori r5, r5, 33437 194; P8BE-NEXT: ori r7, r7, 63421 195; P8BE-NEXT: lxvw4x v2, 0, r6 196; P8BE-NEXT: clrldi r6, r3, 48 197; P8BE-NEXT: rldicl r8, r3, 48, 48 198; P8BE-NEXT: rldicl r9, r3, 32, 48 199; P8BE-NEXT: rldicl r3, r3, 16, 48 200; P8BE-NEXT: extsh r6, r6 201; P8BE-NEXT: extsh r8, r8 202; P8BE-NEXT: extsh r9, r9 203; P8BE-NEXT: extsh r3, r3 204; P8BE-NEXT: mulhw r4, r6, r4 205; P8BE-NEXT: mulhw r5, r8, r5 206; P8BE-NEXT: mulhw r7, r9, r7 207; P8BE-NEXT: srwi r10, r4, 31 208; P8BE-NEXT: srawi r4, r4, 8 209; P8BE-NEXT: sub r7, r7, r9 210; P8BE-NEXT: add r4, r4, r10 211; P8BE-NEXT: srwi r10, r5, 31 212; P8BE-NEXT: srawi r5, r5, 5 213; P8BE-NEXT: mulli r4, r4, -1003 214; P8BE-NEXT: add r5, r5, r10 215; P8BE-NEXT: srwi r10, r7, 31 216; P8BE-NEXT: srawi r7, r7, 6 217; P8BE-NEXT: add r7, r7, r10 218; P8BE-NEXT: mulli r5, r5, 98 219; P8BE-NEXT: sub r4, r6, r4 220; P8BE-NEXT: mtvsrwz v3, r4 221; P8BE-NEXT: mulli r4, r7, -124 222; P8BE-NEXT: sub r5, r8, r5 223; P8BE-NEXT: mtvsrwz v4, r5 224; P8BE-NEXT: sub r4, r9, r4 225; P8BE-NEXT: mtvsrwz v5, r4 226; P8BE-NEXT: lis r4, -21386 227; P8BE-NEXT: ori r4, r4, 37253 228; P8BE-NEXT: mulhw r4, r3, r4 229; P8BE-NEXT: add r4, r4, r3 230; P8BE-NEXT: srwi r5, r4, 31 231; P8BE-NEXT: srawi r4, r4, 6 232; P8BE-NEXT: add r4, r4, r5 233; P8BE-NEXT: mulli r4, r4, 95 234; P8BE-NEXT: sub r3, r3, r4 235; P8BE-NEXT: vperm v3, v4, v3, v2 236; P8BE-NEXT: mtvsrwz v4, r3 237; P8BE-NEXT: vperm v2, v4, v5, v2 238; P8BE-NEXT: xxmrghw v2, v2, v3 239; P8BE-NEXT: blr 240 %1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003> 241 ret <4 x i16> %1 242} 243 244define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { 245; P9LE-LABEL: fold_srem_vec_2: 246; P9LE: # %bb.0: 247; P9LE-NEXT: li r3, 0 248; P9LE-NEXT: lis r4, -21386 249; P9LE-NEXT: vextuhrx r3, r3, v2 250; P9LE-NEXT: ori r4, r4, 37253 251; P9LE-NEXT: extsh r3, r3 252; P9LE-NEXT: mulhw r5, r3, r4 253; P9LE-NEXT: add r5, r5, r3 254; P9LE-NEXT: srwi r6, r5, 31 255; P9LE-NEXT: srawi r5, r5, 6 256; P9LE-NEXT: add r5, r5, r6 257; P9LE-NEXT: mulli r5, r5, 95 258; P9LE-NEXT: sub r3, r3, r5 259; P9LE-NEXT: mtvsrd v3, r3 260; P9LE-NEXT: li r3, 2 261; P9LE-NEXT: vextuhrx r3, r3, v2 262; P9LE-NEXT: extsh r3, r3 263; P9LE-NEXT: mulhw r5, r3, r4 264; P9LE-NEXT: add r5, r5, r3 265; P9LE-NEXT: srwi r6, r5, 31 266; P9LE-NEXT: srawi r5, r5, 6 267; P9LE-NEXT: add r5, r5, r6 268; P9LE-NEXT: mulli r5, r5, 95 269; P9LE-NEXT: sub r3, r3, r5 270; P9LE-NEXT: mtvsrd v4, r3 271; P9LE-NEXT: li r3, 4 272; P9LE-NEXT: vextuhrx r3, r3, v2 273; P9LE-NEXT: vmrghh v3, v4, v3 274; P9LE-NEXT: extsh r3, r3 275; P9LE-NEXT: mulhw r5, r3, r4 276; P9LE-NEXT: add r5, r5, r3 277; P9LE-NEXT: srwi r6, r5, 31 278; P9LE-NEXT: srawi r5, r5, 6 279; P9LE-NEXT: add r5, r5, r6 280; P9LE-NEXT: mulli r5, r5, 95 281; P9LE-NEXT: sub r3, r3, r5 282; P9LE-NEXT: mtvsrd v4, r3 283; P9LE-NEXT: li r3, 6 284; P9LE-NEXT: vextuhrx r3, r3, v2 285; P9LE-NEXT: extsh r3, r3 286; P9LE-NEXT: mulhw r4, r3, r4 287; P9LE-NEXT: add r4, r4, r3 288; P9LE-NEXT: srwi r5, r4, 31 289; P9LE-NEXT: srawi r4, r4, 6 290; P9LE-NEXT: add r4, r4, r5 291; P9LE-NEXT: mulli r4, r4, 95 292; P9LE-NEXT: sub r3, r3, r4 293; P9LE-NEXT: mtvsrd v2, r3 294; P9LE-NEXT: vmrghh v2, v2, v4 295; P9LE-NEXT: xxmrglw v2, v2, v3 296; P9LE-NEXT: blr 297; 298; P9BE-LABEL: fold_srem_vec_2: 299; P9BE: # %bb.0: 300; P9BE-NEXT: li r3, 6 301; P9BE-NEXT: lis r4, -21386 302; P9BE-NEXT: vextuhlx r3, r3, v2 303; P9BE-NEXT: ori r4, r4, 37253 304; P9BE-NEXT: extsh r3, r3 305; P9BE-NEXT: mulhw r5, r3, r4 306; P9BE-NEXT: add r5, r5, r3 307; P9BE-NEXT: srwi r6, r5, 31 308; P9BE-NEXT: srawi r5, r5, 6 309; P9BE-NEXT: add r5, r5, r6 310; P9BE-NEXT: mulli r5, r5, 95 311; P9BE-NEXT: sub r3, r3, r5 312; P9BE-NEXT: mtfprwz f0, r3 313; P9BE-NEXT: li r3, 4 314; P9BE-NEXT: vextuhlx r3, r3, v2 315; P9BE-NEXT: extsh r3, r3 316; P9BE-NEXT: mulhw r5, r3, r4 317; P9BE-NEXT: add r5, r5, r3 318; P9BE-NEXT: srwi r6, r5, 31 319; P9BE-NEXT: srawi r5, r5, 6 320; P9BE-NEXT: add r5, r5, r6 321; P9BE-NEXT: mulli r5, r5, 95 322; P9BE-NEXT: sub r3, r3, r5 323; P9BE-NEXT: mtfprwz f1, r3 324; P9BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha 325; P9BE-NEXT: addi r3, r3, .LCPI1_0@toc@l 326; P9BE-NEXT: lxv vs2, 0(r3) 327; P9BE-NEXT: li r3, 2 328; P9BE-NEXT: vextuhlx r3, r3, v2 329; P9BE-NEXT: extsh r3, r3 330; P9BE-NEXT: xxperm vs0, vs1, vs2 331; P9BE-NEXT: mulhw r5, r3, r4 332; P9BE-NEXT: add r5, r5, r3 333; P9BE-NEXT: srwi r6, r5, 31 334; P9BE-NEXT: srawi r5, r5, 6 335; P9BE-NEXT: add r5, r5, r6 336; P9BE-NEXT: mulli r5, r5, 95 337; P9BE-NEXT: sub r3, r3, r5 338; P9BE-NEXT: mtfprwz f1, r3 339; P9BE-NEXT: li r3, 0 340; P9BE-NEXT: vextuhlx r3, r3, v2 341; P9BE-NEXT: extsh r3, r3 342; P9BE-NEXT: mulhw r4, r3, r4 343; P9BE-NEXT: add r4, r4, r3 344; P9BE-NEXT: srwi r5, r4, 31 345; P9BE-NEXT: srawi r4, r4, 6 346; P9BE-NEXT: add r4, r4, r5 347; P9BE-NEXT: mulli r4, r4, 95 348; P9BE-NEXT: sub r3, r3, r4 349; P9BE-NEXT: mtfprwz f3, r3 350; P9BE-NEXT: xxperm vs1, vs3, vs2 351; P9BE-NEXT: xxmrghw v2, vs1, vs0 352; P9BE-NEXT: blr 353; 354; P8LE-LABEL: fold_srem_vec_2: 355; P8LE: # %bb.0: 356; P8LE-NEXT: xxswapd vs0, v2 357; P8LE-NEXT: lis r4, -21386 358; P8LE-NEXT: mffprd r3, f0 359; P8LE-NEXT: ori r4, r4, 37253 360; P8LE-NEXT: clrldi r5, r3, 48 361; P8LE-NEXT: rldicl r6, r3, 48, 48 362; P8LE-NEXT: rldicl r7, r3, 32, 48 363; P8LE-NEXT: rldicl r3, r3, 16, 48 364; P8LE-NEXT: extsh r5, r5 365; P8LE-NEXT: extsh r6, r6 366; P8LE-NEXT: extsh r7, r7 367; P8LE-NEXT: extsh r3, r3 368; P8LE-NEXT: mulhw r8, r5, r4 369; P8LE-NEXT: mulhw r9, r6, r4 370; P8LE-NEXT: mulhw r10, r7, r4 371; P8LE-NEXT: mulhw r4, r3, r4 372; P8LE-NEXT: add r8, r8, r5 373; P8LE-NEXT: add r9, r9, r6 374; P8LE-NEXT: add r10, r10, r7 375; P8LE-NEXT: add r4, r4, r3 376; P8LE-NEXT: srwi r11, r8, 31 377; P8LE-NEXT: srawi r8, r8, 6 378; P8LE-NEXT: add r8, r8, r11 379; P8LE-NEXT: srwi r11, r9, 31 380; P8LE-NEXT: srawi r9, r9, 6 381; P8LE-NEXT: mulli r8, r8, 95 382; P8LE-NEXT: add r9, r9, r11 383; P8LE-NEXT: srwi r11, r10, 31 384; P8LE-NEXT: srawi r10, r10, 6 385; P8LE-NEXT: add r10, r10, r11 386; P8LE-NEXT: srwi r11, r4, 31 387; P8LE-NEXT: srawi r4, r4, 6 388; P8LE-NEXT: add r4, r4, r11 389; P8LE-NEXT: sub r5, r5, r8 390; P8LE-NEXT: mulli r8, r9, 95 391; P8LE-NEXT: mulli r4, r4, 95 392; P8LE-NEXT: mtvsrd v2, r5 393; P8LE-NEXT: sub r6, r6, r8 394; P8LE-NEXT: mulli r8, r10, 95 395; P8LE-NEXT: sub r3, r3, r4 396; P8LE-NEXT: mtvsrd v3, r6 397; P8LE-NEXT: sub r7, r7, r8 398; P8LE-NEXT: mtvsrd v4, r7 399; P8LE-NEXT: vmrghh v2, v3, v2 400; P8LE-NEXT: mtvsrd v3, r3 401; P8LE-NEXT: vmrghh v3, v3, v4 402; P8LE-NEXT: xxmrglw v2, v3, v2 403; P8LE-NEXT: blr 404; 405; P8BE-LABEL: fold_srem_vec_2: 406; P8BE: # %bb.0: 407; P8BE-NEXT: mfvsrd r3, v2 408; P8BE-NEXT: lis r4, -21386 409; P8BE-NEXT: ori r4, r4, 37253 410; P8BE-NEXT: clrldi r5, r3, 48 411; P8BE-NEXT: rldicl r6, r3, 48, 48 412; P8BE-NEXT: rldicl r7, r3, 32, 48 413; P8BE-NEXT: rldicl r3, r3, 16, 48 414; P8BE-NEXT: extsh r5, r5 415; P8BE-NEXT: extsh r6, r6 416; P8BE-NEXT: extsh r7, r7 417; P8BE-NEXT: extsh r3, r3 418; P8BE-NEXT: mulhw r8, r5, r4 419; P8BE-NEXT: mulhw r9, r6, r4 420; P8BE-NEXT: mulhw r10, r7, r4 421; P8BE-NEXT: mulhw r4, r3, r4 422; P8BE-NEXT: add r8, r8, r5 423; P8BE-NEXT: add r9, r9, r6 424; P8BE-NEXT: add r10, r10, r7 425; P8BE-NEXT: add r4, r4, r3 426; P8BE-NEXT: srwi r11, r8, 31 427; P8BE-NEXT: srawi r8, r8, 6 428; P8BE-NEXT: add r8, r8, r11 429; P8BE-NEXT: srwi r11, r9, 31 430; P8BE-NEXT: srawi r9, r9, 6 431; P8BE-NEXT: mulli r8, r8, 95 432; P8BE-NEXT: add r9, r9, r11 433; P8BE-NEXT: srwi r11, r10, 31 434; P8BE-NEXT: srawi r10, r10, 6 435; P8BE-NEXT: add r10, r10, r11 436; P8BE-NEXT: srwi r11, r4, 31 437; P8BE-NEXT: srawi r4, r4, 6 438; P8BE-NEXT: add r4, r4, r11 439; P8BE-NEXT: addis r11, r2, .LCPI1_0@toc@ha 440; P8BE-NEXT: sub r5, r5, r8 441; P8BE-NEXT: mulli r8, r9, 95 442; P8BE-NEXT: addi r11, r11, .LCPI1_0@toc@l 443; P8BE-NEXT: mulli r4, r4, 95 444; P8BE-NEXT: mtvsrwz v3, r5 445; P8BE-NEXT: lxvw4x v2, 0, r11 446; P8BE-NEXT: sub r6, r6, r8 447; P8BE-NEXT: mulli r8, r10, 95 448; P8BE-NEXT: sub r3, r3, r4 449; P8BE-NEXT: mtvsrwz v4, r6 450; P8BE-NEXT: sub r7, r7, r8 451; P8BE-NEXT: mtvsrwz v5, r7 452; P8BE-NEXT: vperm v3, v4, v3, v2 453; P8BE-NEXT: mtvsrwz v4, r3 454; P8BE-NEXT: vperm v2, v4, v5, v2 455; P8BE-NEXT: xxmrghw v2, v2, v3 456; P8BE-NEXT: blr 457 %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 458 ret <4 x i16> %1 459} 460 461 462; Don't fold if we can combine srem with sdiv. 463define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { 464; P9LE-LABEL: combine_srem_sdiv: 465; P9LE: # %bb.0: 466; P9LE-NEXT: li r3, 0 467; P9LE-NEXT: lis r4, -21386 468; P9LE-NEXT: vextuhrx r3, r3, v2 469; P9LE-NEXT: ori r4, r4, 37253 470; P9LE-NEXT: extsh r3, r3 471; P9LE-NEXT: mulhw r5, r3, r4 472; P9LE-NEXT: add r5, r5, r3 473; P9LE-NEXT: srwi r6, r5, 31 474; P9LE-NEXT: srawi r5, r5, 6 475; P9LE-NEXT: add r5, r5, r6 476; P9LE-NEXT: mulli r6, r5, 95 477; P9LE-NEXT: sub r3, r3, r6 478; P9LE-NEXT: mtvsrd v3, r3 479; P9LE-NEXT: li r3, 2 480; P9LE-NEXT: vextuhrx r3, r3, v2 481; P9LE-NEXT: extsh r6, r3 482; P9LE-NEXT: mulhw r7, r6, r4 483; P9LE-NEXT: add r6, r7, r6 484; P9LE-NEXT: srwi r7, r6, 31 485; P9LE-NEXT: srawi r6, r6, 6 486; P9LE-NEXT: add r6, r6, r7 487; P9LE-NEXT: mulli r7, r6, 95 488; P9LE-NEXT: sub r3, r3, r7 489; P9LE-NEXT: mtvsrd v4, r3 490; P9LE-NEXT: li r3, 4 491; P9LE-NEXT: vextuhrx r3, r3, v2 492; P9LE-NEXT: vmrghh v3, v4, v3 493; P9LE-NEXT: extsh r7, r3 494; P9LE-NEXT: mulhw r8, r7, r4 495; P9LE-NEXT: add r7, r8, r7 496; P9LE-NEXT: srwi r8, r7, 31 497; P9LE-NEXT: srawi r7, r7, 6 498; P9LE-NEXT: add r7, r7, r8 499; P9LE-NEXT: mulli r8, r7, 95 500; P9LE-NEXT: sub r3, r3, r8 501; P9LE-NEXT: mtvsrd v4, r3 502; P9LE-NEXT: li r3, 6 503; P9LE-NEXT: vextuhrx r3, r3, v2 504; P9LE-NEXT: extsh r8, r3 505; P9LE-NEXT: mulhw r4, r8, r4 506; P9LE-NEXT: add r4, r4, r8 507; P9LE-NEXT: srwi r8, r4, 31 508; P9LE-NEXT: srawi r4, r4, 6 509; P9LE-NEXT: add r4, r4, r8 510; P9LE-NEXT: mulli r8, r4, 95 511; P9LE-NEXT: mtvsrd v5, r4 512; P9LE-NEXT: sub r3, r3, r8 513; P9LE-NEXT: mtvsrd v2, r3 514; P9LE-NEXT: vmrghh v2, v2, v4 515; P9LE-NEXT: mtvsrd v4, r6 516; P9LE-NEXT: xxmrglw v2, v2, v3 517; P9LE-NEXT: mtvsrd v3, r5 518; P9LE-NEXT: vmrghh v3, v4, v3 519; P9LE-NEXT: mtvsrd v4, r7 520; P9LE-NEXT: vmrghh v4, v5, v4 521; P9LE-NEXT: xxmrglw v3, v4, v3 522; P9LE-NEXT: vadduhm v2, v2, v3 523; P9LE-NEXT: blr 524; 525; P9BE-LABEL: combine_srem_sdiv: 526; P9BE: # %bb.0: 527; P9BE-NEXT: li r3, 6 528; P9BE-NEXT: lis r5, -21386 529; P9BE-NEXT: vextuhlx r3, r3, v2 530; P9BE-NEXT: ori r5, r5, 37253 531; P9BE-NEXT: extsh r4, r3 532; P9BE-NEXT: mulhw r6, r4, r5 533; P9BE-NEXT: add r4, r6, r4 534; P9BE-NEXT: srwi r6, r4, 31 535; P9BE-NEXT: srawi r4, r4, 6 536; P9BE-NEXT: add r4, r4, r6 537; P9BE-NEXT: mulli r6, r4, 95 538; P9BE-NEXT: sub r3, r3, r6 539; P9BE-NEXT: mtfprwz f0, r3 540; P9BE-NEXT: li r3, 4 541; P9BE-NEXT: vextuhlx r3, r3, v2 542; P9BE-NEXT: extsh r6, r3 543; P9BE-NEXT: mulhw r7, r6, r5 544; P9BE-NEXT: add r6, r7, r6 545; P9BE-NEXT: srwi r7, r6, 31 546; P9BE-NEXT: srawi r6, r6, 6 547; P9BE-NEXT: add r6, r6, r7 548; P9BE-NEXT: mulli r7, r6, 95 549; P9BE-NEXT: sub r3, r3, r7 550; P9BE-NEXT: mtfprwz f1, r3 551; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha 552; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l 553; P9BE-NEXT: lxv vs2, 0(r3) 554; P9BE-NEXT: li r3, 2 555; P9BE-NEXT: vextuhlx r3, r3, v2 556; P9BE-NEXT: extsh r7, r3 557; P9BE-NEXT: xxperm vs0, vs1, vs2 558; P9BE-NEXT: mulhw r8, r7, r5 559; P9BE-NEXT: add r7, r8, r7 560; P9BE-NEXT: srwi r8, r7, 31 561; P9BE-NEXT: srawi r7, r7, 6 562; P9BE-NEXT: add r7, r7, r8 563; P9BE-NEXT: mulli r8, r7, 95 564; P9BE-NEXT: sub r3, r3, r8 565; P9BE-NEXT: mtfprwz f1, r3 566; P9BE-NEXT: li r3, 0 567; P9BE-NEXT: vextuhlx r3, r3, v2 568; P9BE-NEXT: extsh r3, r3 569; P9BE-NEXT: mulhw r5, r3, r5 570; P9BE-NEXT: add r5, r5, r3 571; P9BE-NEXT: srwi r8, r5, 31 572; P9BE-NEXT: srawi r5, r5, 6 573; P9BE-NEXT: add r5, r5, r8 574; P9BE-NEXT: mulli r8, r5, 95 575; P9BE-NEXT: sub r3, r3, r8 576; P9BE-NEXT: mtfprwz f3, r3 577; P9BE-NEXT: xxperm vs1, vs3, vs2 578; P9BE-NEXT: mtfprwz f3, r5 579; P9BE-NEXT: xxmrghw v2, vs1, vs0 580; P9BE-NEXT: mtfprwz f0, r4 581; P9BE-NEXT: mtfprwz f1, r6 582; P9BE-NEXT: xxperm vs0, vs1, vs2 583; P9BE-NEXT: mtfprwz f1, r7 584; P9BE-NEXT: xxperm vs1, vs3, vs2 585; P9BE-NEXT: xxmrghw v3, vs1, vs0 586; P9BE-NEXT: vadduhm v2, v2, v3 587; P9BE-NEXT: blr 588; 589; P8LE-LABEL: combine_srem_sdiv: 590; P8LE: # %bb.0: 591; P8LE-NEXT: xxswapd vs0, v2 592; P8LE-NEXT: lis r4, -21386 593; P8LE-NEXT: mffprd r3, f0 594; P8LE-NEXT: ori r4, r4, 37253 595; P8LE-NEXT: rldicl r6, r3, 48, 48 596; P8LE-NEXT: rldicl r7, r3, 32, 48 597; P8LE-NEXT: clrldi r5, r3, 48 598; P8LE-NEXT: rldicl r3, r3, 16, 48 599; P8LE-NEXT: extsh r8, r6 600; P8LE-NEXT: extsh r9, r7 601; P8LE-NEXT: extsh r5, r5 602; P8LE-NEXT: extsh r10, r3 603; P8LE-NEXT: mulhw r11, r8, r4 604; P8LE-NEXT: add r8, r11, r8 605; P8LE-NEXT: mulhw r11, r9, r4 606; P8LE-NEXT: add r9, r11, r9 607; P8LE-NEXT: mulhw r11, r5, r4 608; P8LE-NEXT: mulhw r4, r10, r4 609; P8LE-NEXT: add r11, r11, r5 610; P8LE-NEXT: add r4, r4, r10 611; P8LE-NEXT: srwi r10, r11, 31 612; P8LE-NEXT: srawi r11, r11, 6 613; P8LE-NEXT: add r10, r11, r10 614; P8LE-NEXT: srwi r11, r8, 31 615; P8LE-NEXT: srawi r8, r8, 6 616; P8LE-NEXT: add r8, r8, r11 617; P8LE-NEXT: srwi r11, r9, 31 618; P8LE-NEXT: srawi r9, r9, 6 619; P8LE-NEXT: mtvsrd v2, r10 620; P8LE-NEXT: add r9, r9, r11 621; P8LE-NEXT: srwi r11, r4, 31 622; P8LE-NEXT: srawi r4, r4, 6 623; P8LE-NEXT: mtvsrd v3, r8 624; P8LE-NEXT: add r4, r4, r11 625; P8LE-NEXT: mulli r11, r10, 95 626; P8LE-NEXT: sub r5, r5, r11 627; P8LE-NEXT: mulli r11, r8, 95 628; P8LE-NEXT: mtvsrd v4, r5 629; P8LE-NEXT: sub r6, r6, r11 630; P8LE-NEXT: mulli r11, r9, 95 631; P8LE-NEXT: mtvsrd v5, r6 632; P8LE-NEXT: sub r7, r7, r11 633; P8LE-NEXT: mulli r11, r4, 95 634; P8LE-NEXT: mtvsrd v0, r7 635; P8LE-NEXT: sub r3, r3, r11 636; P8LE-NEXT: vmrghh v2, v3, v2 637; P8LE-NEXT: mtvsrd v3, r9 638; P8LE-NEXT: vmrghh v4, v5, v4 639; P8LE-NEXT: mtvsrd v5, r3 640; P8LE-NEXT: vmrghh v5, v5, v0 641; P8LE-NEXT: mtvsrd v0, r4 642; P8LE-NEXT: xxmrglw v4, v5, v4 643; P8LE-NEXT: vmrghh v3, v0, v3 644; P8LE-NEXT: xxmrglw v2, v3, v2 645; P8LE-NEXT: vadduhm v2, v4, v2 646; P8LE-NEXT: blr 647; 648; P8BE-LABEL: combine_srem_sdiv: 649; P8BE: # %bb.0: 650; P8BE-NEXT: mfvsrd r4, v2 651; P8BE-NEXT: lis r5, -21386 652; P8BE-NEXT: ori r5, r5, 37253 653; P8BE-NEXT: clrldi r3, r4, 48 654; P8BE-NEXT: rldicl r6, r4, 48, 48 655; P8BE-NEXT: rldicl r7, r4, 32, 48 656; P8BE-NEXT: rldicl r4, r4, 16, 48 657; P8BE-NEXT: extsh r8, r3 658; P8BE-NEXT: extsh r9, r6 659; P8BE-NEXT: extsh r10, r7 660; P8BE-NEXT: extsh r4, r4 661; P8BE-NEXT: mulhw r11, r8, r5 662; P8BE-NEXT: add r8, r11, r8 663; P8BE-NEXT: mulhw r11, r9, r5 664; P8BE-NEXT: add r9, r11, r9 665; P8BE-NEXT: mulhw r11, r10, r5 666; P8BE-NEXT: mulhw r5, r4, r5 667; P8BE-NEXT: add r10, r11, r10 668; P8BE-NEXT: srwi r11, r8, 31 669; P8BE-NEXT: srawi r8, r8, 6 670; P8BE-NEXT: add r5, r5, r4 671; P8BE-NEXT: add r8, r8, r11 672; P8BE-NEXT: srwi r11, r9, 31 673; P8BE-NEXT: srawi r9, r9, 6 674; P8BE-NEXT: add r9, r9, r11 675; P8BE-NEXT: srwi r11, r10, 31 676; P8BE-NEXT: srawi r10, r10, 6 677; P8BE-NEXT: mtvsrwz v3, r8 678; P8BE-NEXT: add r10, r10, r11 679; P8BE-NEXT: srwi r11, r5, 31 680; P8BE-NEXT: srawi r5, r5, 6 681; P8BE-NEXT: mtvsrwz v4, r9 682; P8BE-NEXT: add r5, r5, r11 683; P8BE-NEXT: mulli r11, r8, 95 684; P8BE-NEXT: sub r3, r3, r11 685; P8BE-NEXT: mulli r11, r9, 95 686; P8BE-NEXT: mtvsrwz v5, r3 687; P8BE-NEXT: sub r6, r6, r11 688; P8BE-NEXT: mulli r11, r10, 95 689; P8BE-NEXT: mtvsrwz v0, r6 690; P8BE-NEXT: sub r7, r7, r11 691; P8BE-NEXT: mulli r11, r5, 95 692; P8BE-NEXT: mtvsrwz v1, r7 693; P8BE-NEXT: sub r4, r4, r11 694; P8BE-NEXT: addis r11, r2, .LCPI2_0@toc@ha 695; P8BE-NEXT: addi r11, r11, .LCPI2_0@toc@l 696; P8BE-NEXT: lxvw4x v2, 0, r11 697; P8BE-NEXT: vperm v5, v0, v5, v2 698; P8BE-NEXT: mtvsrwz v0, r4 699; P8BE-NEXT: vperm v3, v4, v3, v2 700; P8BE-NEXT: mtvsrwz v4, r10 701; P8BE-NEXT: vperm v0, v0, v1, v2 702; P8BE-NEXT: mtvsrwz v1, r5 703; P8BE-NEXT: vperm v2, v1, v4, v2 704; P8BE-NEXT: xxmrghw v4, v0, v5 705; P8BE-NEXT: xxmrghw v2, v2, v3 706; P8BE-NEXT: vadduhm v2, v4, v2 707; P8BE-NEXT: blr 708 %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 709 %2 = sdiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 710 %3 = add <4 x i16> %1, %2 711 ret <4 x i16> %3 712} 713 714; Don't fold for divisors that are a power of two. 715define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) { 716; P9LE-LABEL: dont_fold_srem_power_of_two: 717; P9LE: # %bb.0: 718; P9LE-NEXT: li r3, 0 719; P9LE-NEXT: vextuhrx r3, r3, v2 720; P9LE-NEXT: extsh r3, r3 721; P9LE-NEXT: srawi r4, r3, 6 722; P9LE-NEXT: addze r4, r4 723; P9LE-NEXT: slwi r4, r4, 6 724; P9LE-NEXT: sub r3, r3, r4 725; P9LE-NEXT: mtvsrd v3, r3 726; P9LE-NEXT: li r3, 2 727; P9LE-NEXT: vextuhrx r3, r3, v2 728; P9LE-NEXT: extsh r3, r3 729; P9LE-NEXT: srawi r4, r3, 5 730; P9LE-NEXT: addze r4, r4 731; P9LE-NEXT: slwi r4, r4, 5 732; P9LE-NEXT: sub r3, r3, r4 733; P9LE-NEXT: lis r4, -21386 734; P9LE-NEXT: mtvsrd v4, r3 735; P9LE-NEXT: li r3, 6 736; P9LE-NEXT: ori r4, r4, 37253 737; P9LE-NEXT: vextuhrx r3, r3, v2 738; P9LE-NEXT: vmrghh v3, v4, v3 739; P9LE-NEXT: extsh r3, r3 740; P9LE-NEXT: mulhw r4, r3, r4 741; P9LE-NEXT: add r4, r4, r3 742; P9LE-NEXT: srwi r5, r4, 31 743; P9LE-NEXT: srawi r4, r4, 6 744; P9LE-NEXT: add r4, r4, r5 745; P9LE-NEXT: mulli r4, r4, 95 746; P9LE-NEXT: sub r3, r3, r4 747; P9LE-NEXT: mtvsrd v4, r3 748; P9LE-NEXT: li r3, 4 749; P9LE-NEXT: vextuhrx r3, r3, v2 750; P9LE-NEXT: extsh r3, r3 751; P9LE-NEXT: srawi r4, r3, 3 752; P9LE-NEXT: addze r4, r4 753; P9LE-NEXT: slwi r4, r4, 3 754; P9LE-NEXT: sub r3, r3, r4 755; P9LE-NEXT: mtvsrd v2, r3 756; P9LE-NEXT: vmrghh v2, v4, v2 757; P9LE-NEXT: xxmrglw v2, v2, v3 758; P9LE-NEXT: blr 759; 760; P9BE-LABEL: dont_fold_srem_power_of_two: 761; P9BE: # %bb.0: 762; P9BE-NEXT: li r3, 2 763; P9BE-NEXT: vextuhlx r3, r3, v2 764; P9BE-NEXT: extsh r3, r3 765; P9BE-NEXT: srawi r4, r3, 5 766; P9BE-NEXT: addze r4, r4 767; P9BE-NEXT: slwi r4, r4, 5 768; P9BE-NEXT: sub r3, r3, r4 769; P9BE-NEXT: mtfprwz f0, r3 770; P9BE-NEXT: li r3, 0 771; P9BE-NEXT: vextuhlx r3, r3, v2 772; P9BE-NEXT: extsh r3, r3 773; P9BE-NEXT: srawi r4, r3, 6 774; P9BE-NEXT: addze r4, r4 775; P9BE-NEXT: slwi r4, r4, 6 776; P9BE-NEXT: sub r3, r3, r4 777; P9BE-NEXT: lis r4, -21386 778; P9BE-NEXT: mtfprwz f1, r3 779; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha 780; P9BE-NEXT: ori r4, r4, 37253 781; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l 782; P9BE-NEXT: lxv vs2, 0(r3) 783; P9BE-NEXT: li r3, 6 784; P9BE-NEXT: vextuhlx r3, r3, v2 785; P9BE-NEXT: extsh r3, r3 786; P9BE-NEXT: xxperm vs0, vs1, vs2 787; P9BE-NEXT: mulhw r4, r3, r4 788; P9BE-NEXT: add r4, r4, r3 789; P9BE-NEXT: srwi r5, r4, 31 790; P9BE-NEXT: srawi r4, r4, 6 791; P9BE-NEXT: add r4, r4, r5 792; P9BE-NEXT: mulli r4, r4, 95 793; P9BE-NEXT: sub r3, r3, r4 794; P9BE-NEXT: mtfprwz f1, r3 795; P9BE-NEXT: li r3, 4 796; P9BE-NEXT: vextuhlx r3, r3, v2 797; P9BE-NEXT: extsh r3, r3 798; P9BE-NEXT: srawi r4, r3, 3 799; P9BE-NEXT: addze r4, r4 800; P9BE-NEXT: slwi r4, r4, 3 801; P9BE-NEXT: sub r3, r3, r4 802; P9BE-NEXT: mtfprwz f3, r3 803; P9BE-NEXT: xxperm vs1, vs3, vs2 804; P9BE-NEXT: xxmrghw v2, vs0, vs1 805; P9BE-NEXT: blr 806; 807; P8LE-LABEL: dont_fold_srem_power_of_two: 808; P8LE: # %bb.0: 809; P8LE-NEXT: xxswapd vs0, v2 810; P8LE-NEXT: mffprd r3, f0 811; P8LE-NEXT: clrldi r4, r3, 48 812; P8LE-NEXT: extsh r4, r4 813; P8LE-NEXT: srawi r5, r4, 6 814; P8LE-NEXT: addze r5, r5 815; P8LE-NEXT: slwi r5, r5, 6 816; P8LE-NEXT: sub r4, r4, r5 817; P8LE-NEXT: mtvsrd v2, r4 818; P8LE-NEXT: rldicl r4, r3, 48, 48 819; P8LE-NEXT: extsh r4, r4 820; P8LE-NEXT: srawi r5, r4, 5 821; P8LE-NEXT: addze r5, r5 822; P8LE-NEXT: slwi r5, r5, 5 823; P8LE-NEXT: sub r4, r4, r5 824; P8LE-NEXT: lis r5, -21386 825; P8LE-NEXT: mtvsrd v3, r4 826; P8LE-NEXT: rldicl r4, r3, 16, 48 827; P8LE-NEXT: ori r5, r5, 37253 828; P8LE-NEXT: rldicl r3, r3, 32, 48 829; P8LE-NEXT: extsh r4, r4 830; P8LE-NEXT: extsh r3, r3 831; P8LE-NEXT: mulhw r5, r4, r5 832; P8LE-NEXT: add r5, r5, r4 833; P8LE-NEXT: srwi r6, r5, 31 834; P8LE-NEXT: srawi r5, r5, 6 835; P8LE-NEXT: add r5, r5, r6 836; P8LE-NEXT: mulli r5, r5, 95 837; P8LE-NEXT: sub r4, r4, r5 838; P8LE-NEXT: vmrghh v2, v3, v2 839; P8LE-NEXT: mtvsrd v3, r4 840; P8LE-NEXT: srawi r4, r3, 3 841; P8LE-NEXT: addze r4, r4 842; P8LE-NEXT: slwi r4, r4, 3 843; P8LE-NEXT: sub r3, r3, r4 844; P8LE-NEXT: mtvsrd v4, r3 845; P8LE-NEXT: vmrghh v3, v3, v4 846; P8LE-NEXT: xxmrglw v2, v3, v2 847; P8LE-NEXT: blr 848; 849; P8BE-LABEL: dont_fold_srem_power_of_two: 850; P8BE: # %bb.0: 851; P8BE-NEXT: mfvsrd r3, v2 852; P8BE-NEXT: rldicl r4, r3, 32, 48 853; P8BE-NEXT: extsh r4, r4 854; P8BE-NEXT: srawi r5, r4, 5 855; P8BE-NEXT: addze r5, r5 856; P8BE-NEXT: slwi r5, r5, 5 857; P8BE-NEXT: sub r4, r4, r5 858; P8BE-NEXT: mtvsrwz v2, r4 859; P8BE-NEXT: rldicl r4, r3, 16, 48 860; P8BE-NEXT: extsh r4, r4 861; P8BE-NEXT: srawi r5, r4, 6 862; P8BE-NEXT: addze r5, r5 863; P8BE-NEXT: slwi r5, r5, 6 864; P8BE-NEXT: sub r4, r4, r5 865; P8BE-NEXT: lis r5, -21386 866; P8BE-NEXT: mtvsrwz v3, r4 867; P8BE-NEXT: addis r4, r2, .LCPI3_0@toc@ha 868; P8BE-NEXT: ori r5, r5, 37253 869; P8BE-NEXT: addi r4, r4, .LCPI3_0@toc@l 870; P8BE-NEXT: lxvw4x v4, 0, r4 871; P8BE-NEXT: clrldi r4, r3, 48 872; P8BE-NEXT: rldicl r3, r3, 48, 48 873; P8BE-NEXT: extsh r4, r4 874; P8BE-NEXT: extsh r3, r3 875; P8BE-NEXT: mulhw r5, r4, r5 876; P8BE-NEXT: add r5, r5, r4 877; P8BE-NEXT: srwi r6, r5, 31 878; P8BE-NEXT: srawi r5, r5, 6 879; P8BE-NEXT: add r5, r5, r6 880; P8BE-NEXT: mulli r5, r5, 95 881; P8BE-NEXT: sub r4, r4, r5 882; P8BE-NEXT: vperm v2, v3, v2, v4 883; P8BE-NEXT: mtvsrwz v3, r4 884; P8BE-NEXT: srawi r4, r3, 3 885; P8BE-NEXT: addze r4, r4 886; P8BE-NEXT: slwi r4, r4, 3 887; P8BE-NEXT: sub r3, r3, r4 888; P8BE-NEXT: mtvsrwz v5, r3 889; P8BE-NEXT: vperm v3, v5, v3, v4 890; P8BE-NEXT: xxmrghw v2, v2, v3 891; P8BE-NEXT: blr 892 %1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95> 893 ret <4 x i16> %1 894} 895 896; Don't fold if the divisor is one. 897define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { 898; P9LE-LABEL: dont_fold_srem_one: 899; P9LE: # %bb.0: 900; P9LE-NEXT: li r3, 2 901; P9LE-NEXT: lis r4, -14230 902; P9LE-NEXT: vextuhrx r3, r3, v2 903; P9LE-NEXT: ori r4, r4, 30865 904; P9LE-NEXT: extsh r3, r3 905; P9LE-NEXT: mulhw r4, r3, r4 906; P9LE-NEXT: add r4, r4, r3 907; P9LE-NEXT: srwi r5, r4, 31 908; P9LE-NEXT: srawi r4, r4, 9 909; P9LE-NEXT: add r4, r4, r5 910; P9LE-NEXT: mulli r4, r4, 654 911; P9LE-NEXT: sub r3, r3, r4 912; P9LE-NEXT: lis r4, -19946 913; P9LE-NEXT: mtvsrd v3, r3 914; P9LE-NEXT: li r3, 0 915; P9LE-NEXT: ori r4, r4, 17097 916; P9LE-NEXT: mtvsrd v4, r3 917; P9LE-NEXT: li r3, 4 918; P9LE-NEXT: vextuhrx r3, r3, v2 919; P9LE-NEXT: vmrghh v3, v3, v4 920; P9LE-NEXT: extsh r3, r3 921; P9LE-NEXT: mulhw r4, r3, r4 922; P9LE-NEXT: add r4, r4, r3 923; P9LE-NEXT: srwi r5, r4, 31 924; P9LE-NEXT: srawi r4, r4, 4 925; P9LE-NEXT: add r4, r4, r5 926; P9LE-NEXT: mulli r4, r4, 23 927; P9LE-NEXT: sub r3, r3, r4 928; P9LE-NEXT: lis r4, 24749 929; P9LE-NEXT: mtvsrd v4, r3 930; P9LE-NEXT: li r3, 6 931; P9LE-NEXT: ori r4, r4, 47143 932; P9LE-NEXT: vextuhrx r3, r3, v2 933; P9LE-NEXT: extsh r3, r3 934; P9LE-NEXT: mulhw r4, r3, r4 935; P9LE-NEXT: srwi r5, r4, 31 936; P9LE-NEXT: srawi r4, r4, 11 937; P9LE-NEXT: add r4, r4, r5 938; P9LE-NEXT: mulli r4, r4, 5423 939; P9LE-NEXT: sub r3, r3, r4 940; P9LE-NEXT: mtvsrd v2, r3 941; P9LE-NEXT: vmrghh v2, v2, v4 942; P9LE-NEXT: xxmrglw v2, v2, v3 943; P9LE-NEXT: blr 944; 945; P9BE-LABEL: dont_fold_srem_one: 946; P9BE: # %bb.0: 947; P9BE-NEXT: li r3, 4 948; P9BE-NEXT: lis r4, -19946 949; P9BE-NEXT: vextuhlx r3, r3, v2 950; P9BE-NEXT: ori r4, r4, 17097 951; P9BE-NEXT: extsh r3, r3 952; P9BE-NEXT: mulhw r4, r3, r4 953; P9BE-NEXT: add r4, r4, r3 954; P9BE-NEXT: srwi r5, r4, 31 955; P9BE-NEXT: srawi r4, r4, 4 956; P9BE-NEXT: add r4, r4, r5 957; P9BE-NEXT: mulli r4, r4, 23 958; P9BE-NEXT: sub r3, r3, r4 959; P9BE-NEXT: lis r4, 24749 960; P9BE-NEXT: mtfprwz f0, r3 961; P9BE-NEXT: li r3, 6 962; P9BE-NEXT: ori r4, r4, 47143 963; P9BE-NEXT: vextuhlx r3, r3, v2 964; P9BE-NEXT: extsh r3, r3 965; P9BE-NEXT: mulhw r4, r3, r4 966; P9BE-NEXT: srwi r5, r4, 31 967; P9BE-NEXT: srawi r4, r4, 11 968; P9BE-NEXT: add r4, r4, r5 969; P9BE-NEXT: mulli r4, r4, 5423 970; P9BE-NEXT: sub r3, r3, r4 971; P9BE-NEXT: lis r4, -14230 972; P9BE-NEXT: mtfprwz f1, r3 973; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha 974; P9BE-NEXT: ori r4, r4, 30865 975; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l 976; P9BE-NEXT: lxv vs2, 0(r3) 977; P9BE-NEXT: li r3, 2 978; P9BE-NEXT: vextuhlx r3, r3, v2 979; P9BE-NEXT: extsh r3, r3 980; P9BE-NEXT: xxperm vs1, vs0, vs2 981; P9BE-NEXT: mulhw r4, r3, r4 982; P9BE-NEXT: add r4, r4, r3 983; P9BE-NEXT: srwi r5, r4, 31 984; P9BE-NEXT: srawi r4, r4, 9 985; P9BE-NEXT: add r4, r4, r5 986; P9BE-NEXT: mulli r4, r4, 654 987; P9BE-NEXT: sub r3, r3, r4 988; P9BE-NEXT: mtfprwz f0, r3 989; P9BE-NEXT: li r3, 0 990; P9BE-NEXT: mtfprwz f3, r3 991; P9BE-NEXT: xxperm vs0, vs3, vs2 992; P9BE-NEXT: xxmrghw v2, vs0, vs1 993; P9BE-NEXT: blr 994; 995; P8LE-LABEL: dont_fold_srem_one: 996; P8LE: # %bb.0: 997; P8LE-NEXT: xxswapd vs0, v2 998; P8LE-NEXT: lis r8, 24749 999; P8LE-NEXT: lis r4, -19946 1000; P8LE-NEXT: lis r5, -14230 1001; P8LE-NEXT: mffprd r3, f0 1002; P8LE-NEXT: ori r8, r8, 47143 1003; P8LE-NEXT: ori r4, r4, 17097 1004; P8LE-NEXT: ori r5, r5, 30865 1005; P8LE-NEXT: rldicl r6, r3, 32, 48 1006; P8LE-NEXT: rldicl r7, r3, 48, 48 1007; P8LE-NEXT: rldicl r3, r3, 16, 48 1008; P8LE-NEXT: extsh r3, r3 1009; P8LE-NEXT: extsh r6, r6 1010; P8LE-NEXT: extsh r7, r7 1011; P8LE-NEXT: mulhw r8, r3, r8 1012; P8LE-NEXT: mulhw r4, r6, r4 1013; P8LE-NEXT: mulhw r5, r7, r5 1014; P8LE-NEXT: srwi r9, r8, 31 1015; P8LE-NEXT: srawi r8, r8, 11 1016; P8LE-NEXT: add r4, r4, r6 1017; P8LE-NEXT: add r5, r5, r7 1018; P8LE-NEXT: add r8, r8, r9 1019; P8LE-NEXT: srwi r9, r4, 31 1020; P8LE-NEXT: srawi r4, r4, 4 1021; P8LE-NEXT: mulli r8, r8, 5423 1022; P8LE-NEXT: add r4, r4, r9 1023; P8LE-NEXT: srwi r9, r5, 31 1024; P8LE-NEXT: srawi r5, r5, 9 1025; P8LE-NEXT: add r5, r5, r9 1026; P8LE-NEXT: sub r3, r3, r8 1027; P8LE-NEXT: mtvsrd v2, r3 1028; P8LE-NEXT: mulli r3, r4, 23 1029; P8LE-NEXT: mulli r4, r5, 654 1030; P8LE-NEXT: sub r3, r6, r3 1031; P8LE-NEXT: sub r4, r7, r4 1032; P8LE-NEXT: mtvsrd v3, r3 1033; P8LE-NEXT: li r3, 0 1034; P8LE-NEXT: mtvsrd v4, r3 1035; P8LE-NEXT: vmrghh v2, v2, v3 1036; P8LE-NEXT: mtvsrd v3, r4 1037; P8LE-NEXT: vmrghh v3, v3, v4 1038; P8LE-NEXT: xxmrglw v2, v2, v3 1039; P8LE-NEXT: blr 1040; 1041; P8BE-LABEL: dont_fold_srem_one: 1042; P8BE: # %bb.0: 1043; P8BE-NEXT: mfvsrd r3, v2 1044; P8BE-NEXT: lis r4, -19946 1045; P8BE-NEXT: lis r8, 24749 1046; P8BE-NEXT: lis r5, -14230 1047; P8BE-NEXT: ori r4, r4, 17097 1048; P8BE-NEXT: ori r8, r8, 47143 1049; P8BE-NEXT: ori r5, r5, 30865 1050; P8BE-NEXT: rldicl r6, r3, 48, 48 1051; P8BE-NEXT: rldicl r7, r3, 32, 48 1052; P8BE-NEXT: clrldi r3, r3, 48 1053; P8BE-NEXT: extsh r6, r6 1054; P8BE-NEXT: extsh r3, r3 1055; P8BE-NEXT: extsh r7, r7 1056; P8BE-NEXT: mulhw r4, r6, r4 1057; P8BE-NEXT: mulhw r8, r3, r8 1058; P8BE-NEXT: mulhw r5, r7, r5 1059; P8BE-NEXT: add r4, r4, r6 1060; P8BE-NEXT: srwi r9, r8, 31 1061; P8BE-NEXT: srawi r8, r8, 11 1062; P8BE-NEXT: add r5, r5, r7 1063; P8BE-NEXT: add r8, r8, r9 1064; P8BE-NEXT: srwi r9, r4, 31 1065; P8BE-NEXT: srawi r4, r4, 4 1066; P8BE-NEXT: add r4, r4, r9 1067; P8BE-NEXT: mulli r8, r8, 5423 1068; P8BE-NEXT: srwi r9, r5, 31 1069; P8BE-NEXT: srawi r5, r5, 9 1070; P8BE-NEXT: mulli r4, r4, 23 1071; P8BE-NEXT: add r5, r5, r9 1072; P8BE-NEXT: addis r9, r2, .LCPI4_0@toc@ha 1073; P8BE-NEXT: addi r9, r9, .LCPI4_0@toc@l 1074; P8BE-NEXT: mulli r5, r5, 654 1075; P8BE-NEXT: sub r3, r3, r8 1076; P8BE-NEXT: lxvw4x v2, 0, r9 1077; P8BE-NEXT: sub r4, r6, r4 1078; P8BE-NEXT: mtvsrwz v3, r3 1079; P8BE-NEXT: mtvsrwz v4, r4 1080; P8BE-NEXT: sub r3, r7, r5 1081; P8BE-NEXT: vperm v3, v4, v3, v2 1082; P8BE-NEXT: mtvsrwz v4, r3 1083; P8BE-NEXT: li r3, 0 1084; P8BE-NEXT: mtvsrwz v5, r3 1085; P8BE-NEXT: vperm v2, v5, v4, v2 1086; P8BE-NEXT: xxmrghw v2, v2, v3 1087; P8BE-NEXT: blr 1088 %1 = srem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423> 1089 ret <4 x i16> %1 1090} 1091 1092; Don't fold if the divisor is 2^15. 1093define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { 1094; P9LE-LABEL: dont_fold_urem_i16_smax: 1095; P9LE: # %bb.0: 1096; P9LE-NEXT: li r3, 4 1097; P9LE-NEXT: lis r4, -19946 1098; P9LE-NEXT: vextuhrx r3, r3, v2 1099; P9LE-NEXT: ori r4, r4, 17097 1100; P9LE-NEXT: extsh r3, r3 1101; P9LE-NEXT: mulhw r4, r3, r4 1102; P9LE-NEXT: add r4, r4, r3 1103; P9LE-NEXT: srwi r5, r4, 31 1104; P9LE-NEXT: srawi r4, r4, 4 1105; P9LE-NEXT: add r4, r4, r5 1106; P9LE-NEXT: mulli r4, r4, 23 1107; P9LE-NEXT: sub r3, r3, r4 1108; P9LE-NEXT: lis r4, 24749 1109; P9LE-NEXT: mtvsrd v3, r3 1110; P9LE-NEXT: li r3, 6 1111; P9LE-NEXT: ori r4, r4, 47143 1112; P9LE-NEXT: vextuhrx r3, r3, v2 1113; P9LE-NEXT: extsh r3, r3 1114; P9LE-NEXT: mulhw r4, r3, r4 1115; P9LE-NEXT: srwi r5, r4, 31 1116; P9LE-NEXT: srawi r4, r4, 11 1117; P9LE-NEXT: add r4, r4, r5 1118; P9LE-NEXT: mulli r4, r4, 5423 1119; P9LE-NEXT: sub r3, r3, r4 1120; P9LE-NEXT: mtvsrd v4, r3 1121; P9LE-NEXT: li r3, 2 1122; P9LE-NEXT: vextuhrx r3, r3, v2 1123; P9LE-NEXT: vmrghh v3, v4, v3 1124; P9LE-NEXT: extsh r3, r3 1125; P9LE-NEXT: srawi r4, r3, 15 1126; P9LE-NEXT: addze r4, r4 1127; P9LE-NEXT: slwi r4, r4, 15 1128; P9LE-NEXT: sub r3, r3, r4 1129; P9LE-NEXT: mtvsrd v2, r3 1130; P9LE-NEXT: li r3, 0 1131; P9LE-NEXT: mtvsrd v4, r3 1132; P9LE-NEXT: vmrghh v2, v2, v4 1133; P9LE-NEXT: xxmrglw v2, v3, v2 1134; P9LE-NEXT: blr 1135; 1136; P9BE-LABEL: dont_fold_urem_i16_smax: 1137; P9BE: # %bb.0: 1138; P9BE-NEXT: li r3, 4 1139; P9BE-NEXT: lis r4, -19946 1140; P9BE-NEXT: vextuhlx r3, r3, v2 1141; P9BE-NEXT: ori r4, r4, 17097 1142; P9BE-NEXT: extsh r3, r3 1143; P9BE-NEXT: mulhw r4, r3, r4 1144; P9BE-NEXT: add r4, r4, r3 1145; P9BE-NEXT: srwi r5, r4, 31 1146; P9BE-NEXT: srawi r4, r4, 4 1147; P9BE-NEXT: add r4, r4, r5 1148; P9BE-NEXT: mulli r4, r4, 23 1149; P9BE-NEXT: sub r3, r3, r4 1150; P9BE-NEXT: lis r4, 24749 1151; P9BE-NEXT: mtfprwz f0, r3 1152; P9BE-NEXT: li r3, 6 1153; P9BE-NEXT: ori r4, r4, 47143 1154; P9BE-NEXT: vextuhlx r3, r3, v2 1155; P9BE-NEXT: extsh r3, r3 1156; P9BE-NEXT: mulhw r4, r3, r4 1157; P9BE-NEXT: srwi r5, r4, 31 1158; P9BE-NEXT: srawi r4, r4, 11 1159; P9BE-NEXT: add r4, r4, r5 1160; P9BE-NEXT: mulli r4, r4, 5423 1161; P9BE-NEXT: sub r3, r3, r4 1162; P9BE-NEXT: mtfprwz f1, r3 1163; P9BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha 1164; P9BE-NEXT: addi r3, r3, .LCPI5_0@toc@l 1165; P9BE-NEXT: lxv vs2, 0(r3) 1166; P9BE-NEXT: li r3, 2 1167; P9BE-NEXT: vextuhlx r3, r3, v2 1168; P9BE-NEXT: extsh r3, r3 1169; P9BE-NEXT: xxperm vs1, vs0, vs2 1170; P9BE-NEXT: srawi r4, r3, 15 1171; P9BE-NEXT: addze r4, r4 1172; P9BE-NEXT: slwi r4, r4, 15 1173; P9BE-NEXT: sub r3, r3, r4 1174; P9BE-NEXT: mtfprwz f0, r3 1175; P9BE-NEXT: li r3, 0 1176; P9BE-NEXT: mtfprwz f3, r3 1177; P9BE-NEXT: xxperm vs0, vs3, vs2 1178; P9BE-NEXT: xxmrghw v2, vs0, vs1 1179; P9BE-NEXT: blr 1180; 1181; P8LE-LABEL: dont_fold_urem_i16_smax: 1182; P8LE: # %bb.0: 1183; P8LE-NEXT: xxswapd vs0, v2 1184; P8LE-NEXT: lis r4, -19946 1185; P8LE-NEXT: lis r6, 24749 1186; P8LE-NEXT: li r8, 0 1187; P8LE-NEXT: mffprd r3, f0 1188; P8LE-NEXT: ori r4, r4, 17097 1189; P8LE-NEXT: ori r6, r6, 47143 1190; P8LE-NEXT: mtvsrd v2, r8 1191; P8LE-NEXT: rldicl r5, r3, 32, 48 1192; P8LE-NEXT: rldicl r7, r3, 16, 48 1193; P8LE-NEXT: rldicl r3, r3, 48, 48 1194; P8LE-NEXT: extsh r5, r5 1195; P8LE-NEXT: extsh r7, r7 1196; P8LE-NEXT: extsh r3, r3 1197; P8LE-NEXT: mulhw r4, r5, r4 1198; P8LE-NEXT: mulhw r6, r7, r6 1199; P8LE-NEXT: add r4, r4, r5 1200; P8LE-NEXT: srwi r8, r6, 31 1201; P8LE-NEXT: srawi r6, r6, 11 1202; P8LE-NEXT: add r6, r6, r8 1203; P8LE-NEXT: srwi r8, r4, 31 1204; P8LE-NEXT: srawi r4, r4, 4 1205; P8LE-NEXT: add r4, r4, r8 1206; P8LE-NEXT: mulli r6, r6, 5423 1207; P8LE-NEXT: mulli r4, r4, 23 1208; P8LE-NEXT: sub r6, r7, r6 1209; P8LE-NEXT: sub r4, r5, r4 1210; P8LE-NEXT: srawi r5, r3, 15 1211; P8LE-NEXT: mtvsrd v3, r6 1212; P8LE-NEXT: addze r5, r5 1213; P8LE-NEXT: mtvsrd v4, r4 1214; P8LE-NEXT: slwi r4, r5, 15 1215; P8LE-NEXT: sub r3, r3, r4 1216; P8LE-NEXT: vmrghh v3, v3, v4 1217; P8LE-NEXT: mtvsrd v4, r3 1218; P8LE-NEXT: vmrghh v2, v4, v2 1219; P8LE-NEXT: xxmrglw v2, v3, v2 1220; P8LE-NEXT: blr 1221; 1222; P8BE-LABEL: dont_fold_urem_i16_smax: 1223; P8BE: # %bb.0: 1224; P8BE-NEXT: mfvsrd r3, v2 1225; P8BE-NEXT: lis r4, -19946 1226; P8BE-NEXT: lis r6, 24749 1227; P8BE-NEXT: ori r4, r4, 17097 1228; P8BE-NEXT: ori r6, r6, 47143 1229; P8BE-NEXT: rldicl r5, r3, 48, 48 1230; P8BE-NEXT: clrldi r7, r3, 48 1231; P8BE-NEXT: rldicl r3, r3, 32, 48 1232; P8BE-NEXT: extsh r5, r5 1233; P8BE-NEXT: extsh r7, r7 1234; P8BE-NEXT: extsh r3, r3 1235; P8BE-NEXT: mulhw r4, r5, r4 1236; P8BE-NEXT: mulhw r6, r7, r6 1237; P8BE-NEXT: add r4, r4, r5 1238; P8BE-NEXT: srwi r8, r6, 31 1239; P8BE-NEXT: srawi r6, r6, 11 1240; P8BE-NEXT: add r6, r6, r8 1241; P8BE-NEXT: srwi r8, r4, 31 1242; P8BE-NEXT: srawi r4, r4, 4 1243; P8BE-NEXT: add r4, r4, r8 1244; P8BE-NEXT: addis r8, r2, .LCPI5_0@toc@ha 1245; P8BE-NEXT: mulli r6, r6, 5423 1246; P8BE-NEXT: mulli r4, r4, 23 1247; P8BE-NEXT: addi r8, r8, .LCPI5_0@toc@l 1248; P8BE-NEXT: lxvw4x v2, 0, r8 1249; P8BE-NEXT: srawi r8, r3, 15 1250; P8BE-NEXT: sub r6, r7, r6 1251; P8BE-NEXT: addze r8, r8 1252; P8BE-NEXT: sub r4, r5, r4 1253; P8BE-NEXT: mtvsrwz v3, r6 1254; P8BE-NEXT: slwi r8, r8, 15 1255; P8BE-NEXT: mtvsrwz v4, r4 1256; P8BE-NEXT: sub r3, r3, r8 1257; P8BE-NEXT: vperm v3, v4, v3, v2 1258; P8BE-NEXT: mtvsrwz v4, r3 1259; P8BE-NEXT: li r3, 0 1260; P8BE-NEXT: mtvsrwz v5, r3 1261; P8BE-NEXT: vperm v2, v5, v4, v2 1262; P8BE-NEXT: xxmrghw v2, v2, v3 1263; P8BE-NEXT: blr 1264 %1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423> 1265 ret <4 x i16> %1 1266} 1267 1268; Don't fold i64 srem. 1269define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) { 1270; P9LE-LABEL: dont_fold_srem_i64: 1271; P9LE: # %bb.0: 1272; P9LE-NEXT: lis r4, 12374 1273; P9LE-NEXT: mfvsrd r3, v3 1274; P9LE-NEXT: ori r4, r4, 56339 1275; P9LE-NEXT: rldic r4, r4, 33, 1 1276; P9LE-NEXT: oris r4, r4, 58853 1277; P9LE-NEXT: ori r4, r4, 6055 1278; P9LE-NEXT: mulhd r4, r3, r4 1279; P9LE-NEXT: rldicl r5, r4, 1, 63 1280; P9LE-NEXT: sradi r4, r4, 11 1281; P9LE-NEXT: add r4, r4, r5 1282; P9LE-NEXT: lis r5, 5698 1283; P9LE-NEXT: mulli r4, r4, 5423 1284; P9LE-NEXT: ori r5, r5, 51289 1285; P9LE-NEXT: rldic r5, r5, 35, 0 1286; P9LE-NEXT: oris r5, r5, 22795 1287; P9LE-NEXT: sub r3, r3, r4 1288; P9LE-NEXT: mfvsrld r4, v3 1289; P9LE-NEXT: ori r5, r5, 8549 1290; P9LE-NEXT: mulhd r5, r4, r5 1291; P9LE-NEXT: add r5, r5, r4 1292; P9LE-NEXT: rldicl r6, r5, 1, 63 1293; P9LE-NEXT: sradi r5, r5, 4 1294; P9LE-NEXT: add r5, r5, r6 1295; P9LE-NEXT: mulli r5, r5, 23 1296; P9LE-NEXT: sub r4, r4, r5 1297; P9LE-NEXT: mtvsrdd v3, r3, r4 1298; P9LE-NEXT: lis r4, 3206 1299; P9LE-NEXT: mfvsrd r3, v2 1300; P9LE-NEXT: ori r4, r4, 42889 1301; P9LE-NEXT: rldic r4, r4, 35, 1 1302; P9LE-NEXT: oris r4, r4, 1603 1303; P9LE-NEXT: ori r4, r4, 21445 1304; P9LE-NEXT: mulhd r4, r3, r4 1305; P9LE-NEXT: rldicl r5, r4, 1, 63 1306; P9LE-NEXT: sradi r4, r4, 8 1307; P9LE-NEXT: add r4, r4, r5 1308; P9LE-NEXT: mulli r4, r4, 654 1309; P9LE-NEXT: sub r3, r3, r4 1310; P9LE-NEXT: li r4, 0 1311; P9LE-NEXT: mtvsrdd v2, r3, r4 1312; P9LE-NEXT: blr 1313; 1314; P9BE-LABEL: dont_fold_srem_i64: 1315; P9BE: # %bb.0: 1316; P9BE-NEXT: lis r4, 12374 1317; P9BE-NEXT: mfvsrld r3, v3 1318; P9BE-NEXT: ori r4, r4, 56339 1319; P9BE-NEXT: rldic r4, r4, 33, 1 1320; P9BE-NEXT: oris r4, r4, 58853 1321; P9BE-NEXT: ori r4, r4, 6055 1322; P9BE-NEXT: mulhd r4, r3, r4 1323; P9BE-NEXT: rldicl r5, r4, 1, 63 1324; P9BE-NEXT: sradi r4, r4, 11 1325; P9BE-NEXT: add r4, r4, r5 1326; P9BE-NEXT: lis r5, 5698 1327; P9BE-NEXT: ori r5, r5, 51289 1328; P9BE-NEXT: mulli r4, r4, 5423 1329; P9BE-NEXT: rldic r5, r5, 35, 0 1330; P9BE-NEXT: oris r5, r5, 22795 1331; P9BE-NEXT: sub r3, r3, r4 1332; P9BE-NEXT: mfvsrd r4, v3 1333; P9BE-NEXT: ori r5, r5, 8549 1334; P9BE-NEXT: mulhd r5, r4, r5 1335; P9BE-NEXT: add r5, r5, r4 1336; P9BE-NEXT: rldicl r6, r5, 1, 63 1337; P9BE-NEXT: sradi r5, r5, 4 1338; P9BE-NEXT: add r5, r5, r6 1339; P9BE-NEXT: mulli r5, r5, 23 1340; P9BE-NEXT: sub r4, r4, r5 1341; P9BE-NEXT: mtvsrdd v3, r4, r3 1342; P9BE-NEXT: lis r4, 3206 1343; P9BE-NEXT: mfvsrld r3, v2 1344; P9BE-NEXT: ori r4, r4, 42889 1345; P9BE-NEXT: rldic r4, r4, 35, 1 1346; P9BE-NEXT: oris r4, r4, 1603 1347; P9BE-NEXT: ori r4, r4, 21445 1348; P9BE-NEXT: mulhd r4, r3, r4 1349; P9BE-NEXT: rldicl r5, r4, 1, 63 1350; P9BE-NEXT: sradi r4, r4, 8 1351; P9BE-NEXT: add r4, r4, r5 1352; P9BE-NEXT: mulli r4, r4, 654 1353; P9BE-NEXT: sub r3, r3, r4 1354; P9BE-NEXT: mtvsrdd v2, 0, r3 1355; P9BE-NEXT: blr 1356; 1357; P8LE-LABEL: dont_fold_srem_i64: 1358; P8LE: # %bb.0: 1359; P8LE-NEXT: lis r4, 12374 1360; P8LE-NEXT: lis r3, 5698 1361; P8LE-NEXT: lis r5, 3206 1362; P8LE-NEXT: xxswapd vs0, v3 1363; P8LE-NEXT: mfvsrd r7, v3 1364; P8LE-NEXT: mfvsrd r8, v2 1365; P8LE-NEXT: ori r4, r4, 56339 1366; P8LE-NEXT: ori r3, r3, 51289 1367; P8LE-NEXT: ori r5, r5, 42889 1368; P8LE-NEXT: mffprd r6, f0 1369; P8LE-NEXT: rldic r4, r4, 33, 1 1370; P8LE-NEXT: rldic r3, r3, 35, 0 1371; P8LE-NEXT: rldic r5, r5, 35, 1 1372; P8LE-NEXT: oris r4, r4, 58853 1373; P8LE-NEXT: oris r3, r3, 22795 1374; P8LE-NEXT: oris r5, r5, 1603 1375; P8LE-NEXT: ori r4, r4, 6055 1376; P8LE-NEXT: ori r3, r3, 8549 1377; P8LE-NEXT: ori r5, r5, 21445 1378; P8LE-NEXT: mulhd r4, r7, r4 1379; P8LE-NEXT: mulhd r3, r6, r3 1380; P8LE-NEXT: mulhd r5, r8, r5 1381; P8LE-NEXT: rldicl r9, r4, 1, 63 1382; P8LE-NEXT: sradi r4, r4, 11 1383; P8LE-NEXT: add r3, r3, r6 1384; P8LE-NEXT: add r4, r4, r9 1385; P8LE-NEXT: rldicl r9, r5, 1, 63 1386; P8LE-NEXT: sradi r5, r5, 8 1387; P8LE-NEXT: add r5, r5, r9 1388; P8LE-NEXT: rldicl r9, r3, 1, 63 1389; P8LE-NEXT: sradi r3, r3, 4 1390; P8LE-NEXT: mulli r4, r4, 5423 1391; P8LE-NEXT: add r3, r3, r9 1392; P8LE-NEXT: mulli r5, r5, 654 1393; P8LE-NEXT: mulli r3, r3, 23 1394; P8LE-NEXT: sub r4, r7, r4 1395; P8LE-NEXT: mtfprd f0, r4 1396; P8LE-NEXT: sub r5, r8, r5 1397; P8LE-NEXT: sub r3, r6, r3 1398; P8LE-NEXT: mtfprd f1, r3 1399; P8LE-NEXT: li r3, 0 1400; P8LE-NEXT: xxmrghd v3, vs0, vs1 1401; P8LE-NEXT: mtfprd f0, r5 1402; P8LE-NEXT: mtfprd f1, r3 1403; P8LE-NEXT: xxmrghd v2, vs0, vs1 1404; P8LE-NEXT: blr 1405; 1406; P8BE-LABEL: dont_fold_srem_i64: 1407; P8BE: # %bb.0: 1408; P8BE-NEXT: lis r4, 12374 1409; P8BE-NEXT: lis r3, 5698 1410; P8BE-NEXT: lis r5, 3206 1411; P8BE-NEXT: xxswapd vs0, v3 1412; P8BE-NEXT: xxswapd vs1, v2 1413; P8BE-NEXT: mfvsrd r6, v3 1414; P8BE-NEXT: ori r4, r4, 56339 1415; P8BE-NEXT: ori r3, r3, 51289 1416; P8BE-NEXT: ori r5, r5, 42889 1417; P8BE-NEXT: mffprd r7, f0 1418; P8BE-NEXT: mffprd r8, f1 1419; P8BE-NEXT: rldic r4, r4, 33, 1 1420; P8BE-NEXT: rldic r3, r3, 35, 0 1421; P8BE-NEXT: rldic r5, r5, 35, 1 1422; P8BE-NEXT: oris r4, r4, 58853 1423; P8BE-NEXT: oris r3, r3, 22795 1424; P8BE-NEXT: oris r5, r5, 1603 1425; P8BE-NEXT: ori r4, r4, 6055 1426; P8BE-NEXT: ori r3, r3, 8549 1427; P8BE-NEXT: ori r5, r5, 21445 1428; P8BE-NEXT: mulhd r4, r7, r4 1429; P8BE-NEXT: mulhd r3, r6, r3 1430; P8BE-NEXT: mulhd r5, r8, r5 1431; P8BE-NEXT: rldicl r9, r4, 1, 63 1432; P8BE-NEXT: sradi r4, r4, 11 1433; P8BE-NEXT: add r3, r3, r6 1434; P8BE-NEXT: add r4, r4, r9 1435; P8BE-NEXT: rldicl r9, r5, 1, 63 1436; P8BE-NEXT: sradi r5, r5, 8 1437; P8BE-NEXT: add r5, r5, r9 1438; P8BE-NEXT: rldicl r9, r3, 1, 63 1439; P8BE-NEXT: sradi r3, r3, 4 1440; P8BE-NEXT: mulli r4, r4, 5423 1441; P8BE-NEXT: add r3, r3, r9 1442; P8BE-NEXT: mulli r5, r5, 654 1443; P8BE-NEXT: mulli r3, r3, 23 1444; P8BE-NEXT: sub r4, r7, r4 1445; P8BE-NEXT: mtfprd f0, r4 1446; P8BE-NEXT: sub r5, r8, r5 1447; P8BE-NEXT: sub r3, r6, r3 1448; P8BE-NEXT: mtfprd f1, r3 1449; P8BE-NEXT: li r3, 0 1450; P8BE-NEXT: xxmrghd v3, vs1, vs0 1451; P8BE-NEXT: mtfprd f0, r5 1452; P8BE-NEXT: mtfprd f1, r3 1453; P8BE-NEXT: xxmrghd v2, vs1, vs0 1454; P8BE-NEXT: blr 1455 %1 = srem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423> 1456 ret <4 x i64> %1 1457} 1458