1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 3; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9LE 4; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 5; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P9BE 6; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 7; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8LE 8; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ 9; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,P8BE 10 11define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { 12; P9LE-LABEL: fold_urem_vec_1: 13; P9LE: # %bb.0: 14; P9LE-NEXT: li r3, 0 15; P9LE-NEXT: lis r4, 689 16; P9LE-NEXT: vextuhrx r3, r3, v2 17; P9LE-NEXT: ori r4, r4, 55879 18; P9LE-NEXT: clrlwi r3, r3, 16 19; P9LE-NEXT: mulhwu r4, r3, r4 20; P9LE-NEXT: mulli r4, r4, 95 21; P9LE-NEXT: sub r3, r3, r4 22; P9LE-NEXT: lis r4, 528 23; P9LE-NEXT: mtvsrd v3, r3 24; P9LE-NEXT: li r3, 2 25; P9LE-NEXT: ori r4, r4, 33826 26; P9LE-NEXT: vextuhrx r3, r3, v2 27; P9LE-NEXT: clrlwi r3, r3, 16 28; P9LE-NEXT: mulhwu r4, r3, r4 29; P9LE-NEXT: mulli r4, r4, 124 30; P9LE-NEXT: sub r3, r3, r4 31; P9LE-NEXT: lis r4, 668 32; P9LE-NEXT: mtvsrd v4, r3 33; P9LE-NEXT: li r3, 4 34; P9LE-NEXT: ori r4, r4, 48149 35; P9LE-NEXT: vextuhrx r3, r3, v2 36; P9LE-NEXT: vmrghh v3, v4, v3 37; P9LE-NEXT: clrlwi r3, r3, 16 38; P9LE-NEXT: mulhwu r4, r3, r4 39; P9LE-NEXT: mulli r4, r4, 98 40; P9LE-NEXT: sub r3, r3, r4 41; P9LE-NEXT: lis r4, 65 42; P9LE-NEXT: mtvsrd v4, r3 43; P9LE-NEXT: li r3, 6 44; P9LE-NEXT: ori r4, r4, 22281 45; P9LE-NEXT: vextuhrx r3, r3, v2 46; P9LE-NEXT: clrlwi r3, r3, 16 47; P9LE-NEXT: mulhwu r4, r3, r4 48; P9LE-NEXT: mulli r4, r4, 1003 49; P9LE-NEXT: sub r3, r3, r4 50; P9LE-NEXT: mtvsrd v2, r3 51; P9LE-NEXT: vmrghh v2, v2, v4 52; P9LE-NEXT: xxmrglw v2, v2, v3 53; P9LE-NEXT: blr 54; 55; P9BE-LABEL: fold_urem_vec_1: 56; P9BE: # %bb.0: 57; P9BE-NEXT: li r3, 6 58; P9BE-NEXT: lis r4, 65 59; P9BE-NEXT: vextuhlx r3, r3, v2 60; P9BE-NEXT: ori r4, r4, 22281 61; P9BE-NEXT: clrlwi r3, r3, 16 62; P9BE-NEXT: mulhwu r4, r3, r4 63; P9BE-NEXT: mulli r4, r4, 1003 64; P9BE-NEXT: sub r3, r3, r4 65; P9BE-NEXT: lis r4, 668 66; P9BE-NEXT: mtfprwz f0, r3 67; P9BE-NEXT: li r3, 4 68; P9BE-NEXT: ori r4, r4, 48149 69; P9BE-NEXT: vextuhlx r3, r3, v2 70; P9BE-NEXT: clrlwi r3, r3, 16 71; P9BE-NEXT: mulhwu r4, r3, r4 72; P9BE-NEXT: mulli r4, r4, 98 73; P9BE-NEXT: sub r3, r3, r4 74; P9BE-NEXT: lis r4, 528 75; P9BE-NEXT: mtfprwz f1, r3 76; P9BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha 77; P9BE-NEXT: ori r4, r4, 33826 78; P9BE-NEXT: addi r3, r3, .LCPI0_0@toc@l 79; P9BE-NEXT: lxv vs2, 0(r3) 80; P9BE-NEXT: li r3, 2 81; P9BE-NEXT: vextuhlx r3, r3, v2 82; P9BE-NEXT: clrlwi r3, r3, 16 83; P9BE-NEXT: xxperm vs0, vs1, vs2 84; P9BE-NEXT: mulhwu r4, r3, r4 85; P9BE-NEXT: mulli r4, r4, 124 86; P9BE-NEXT: sub r3, r3, r4 87; P9BE-NEXT: lis r4, 689 88; P9BE-NEXT: mtfprwz f1, r3 89; P9BE-NEXT: li r3, 0 90; P9BE-NEXT: ori r4, r4, 55879 91; P9BE-NEXT: vextuhlx r3, r3, v2 92; P9BE-NEXT: clrlwi r3, r3, 16 93; P9BE-NEXT: mulhwu r4, r3, r4 94; P9BE-NEXT: mulli r4, r4, 95 95; P9BE-NEXT: sub r3, r3, r4 96; P9BE-NEXT: mtfprwz f3, r3 97; P9BE-NEXT: xxperm vs1, vs3, vs2 98; P9BE-NEXT: xxmrghw v2, vs1, vs0 99; P9BE-NEXT: blr 100; 101; P8LE-LABEL: fold_urem_vec_1: 102; P8LE: # %bb.0: 103; P8LE-NEXT: xxswapd vs0, v2 104; P8LE-NEXT: lis r4, 689 105; P8LE-NEXT: lis r5, 528 106; P8LE-NEXT: lis r6, 668 107; P8LE-NEXT: mffprd r3, f0 108; P8LE-NEXT: ori r4, r4, 55879 109; P8LE-NEXT: ori r5, r5, 33826 110; P8LE-NEXT: ori r6, r6, 48149 111; P8LE-NEXT: clrldi r7, r3, 48 112; P8LE-NEXT: clrlwi r7, r7, 16 113; P8LE-NEXT: mulhwu r4, r7, r4 114; P8LE-NEXT: mulli r4, r4, 95 115; P8LE-NEXT: sub r4, r7, r4 116; P8LE-NEXT: rldicl r7, r3, 48, 48 117; P8LE-NEXT: clrlwi r7, r7, 16 118; P8LE-NEXT: mtvsrd v2, r4 119; P8LE-NEXT: lis r4, 65 120; P8LE-NEXT: mulhwu r5, r7, r5 121; P8LE-NEXT: ori r4, r4, 22281 122; P8LE-NEXT: mulli r5, r5, 124 123; P8LE-NEXT: sub r5, r7, r5 124; P8LE-NEXT: rldicl r7, r3, 32, 48 125; P8LE-NEXT: rldicl r3, r3, 16, 48 126; P8LE-NEXT: clrlwi r7, r7, 16 127; P8LE-NEXT: clrlwi r3, r3, 16 128; P8LE-NEXT: mtvsrd v3, r5 129; P8LE-NEXT: mulhwu r6, r7, r6 130; P8LE-NEXT: mulhwu r4, r3, r4 131; P8LE-NEXT: mulli r6, r6, 98 132; P8LE-NEXT: mulli r4, r4, 1003 133; P8LE-NEXT: sub r6, r7, r6 134; P8LE-NEXT: sub r3, r3, r4 135; P8LE-NEXT: mtvsrd v4, r3 136; P8LE-NEXT: vmrghh v2, v3, v2 137; P8LE-NEXT: mtvsrd v3, r6 138; P8LE-NEXT: vmrghh v3, v4, v3 139; P8LE-NEXT: xxmrglw v2, v3, v2 140; P8LE-NEXT: blr 141; 142; P8BE-LABEL: fold_urem_vec_1: 143; P8BE: # %bb.0: 144; P8BE-NEXT: mfvsrd r3, v2 145; P8BE-NEXT: addis r6, r2, .LCPI0_0@toc@ha 146; P8BE-NEXT: lis r4, 65 147; P8BE-NEXT: lis r5, 668 148; P8BE-NEXT: lis r7, 528 149; P8BE-NEXT: addi r6, r6, .LCPI0_0@toc@l 150; P8BE-NEXT: ori r4, r4, 22281 151; P8BE-NEXT: ori r5, r5, 48149 152; P8BE-NEXT: ori r7, r7, 33826 153; P8BE-NEXT: lxvw4x v2, 0, r6 154; P8BE-NEXT: clrldi r6, r3, 48 155; P8BE-NEXT: clrlwi r6, r6, 16 156; P8BE-NEXT: mulhwu r4, r6, r4 157; P8BE-NEXT: mulli r4, r4, 1003 158; P8BE-NEXT: sub r4, r6, r4 159; P8BE-NEXT: rldicl r6, r3, 48, 48 160; P8BE-NEXT: clrlwi r6, r6, 16 161; P8BE-NEXT: mtvsrwz v3, r4 162; P8BE-NEXT: lis r4, 689 163; P8BE-NEXT: mulhwu r5, r6, r5 164; P8BE-NEXT: ori r4, r4, 55879 165; P8BE-NEXT: mulli r5, r5, 98 166; P8BE-NEXT: sub r5, r6, r5 167; P8BE-NEXT: rldicl r6, r3, 32, 48 168; P8BE-NEXT: rldicl r3, r3, 16, 48 169; P8BE-NEXT: clrlwi r6, r6, 16 170; P8BE-NEXT: clrlwi r3, r3, 16 171; P8BE-NEXT: mtvsrwz v4, r5 172; P8BE-NEXT: mulhwu r7, r6, r7 173; P8BE-NEXT: mulhwu r4, r3, r4 174; P8BE-NEXT: mulli r7, r7, 124 175; P8BE-NEXT: mulli r4, r4, 95 176; P8BE-NEXT: sub r6, r6, r7 177; P8BE-NEXT: sub r3, r3, r4 178; P8BE-NEXT: mtvsrwz v5, r3 179; P8BE-NEXT: vperm v3, v4, v3, v2 180; P8BE-NEXT: mtvsrwz v4, r6 181; P8BE-NEXT: vperm v2, v5, v4, v2 182; P8BE-NEXT: xxmrghw v2, v2, v3 183; P8BE-NEXT: blr 184 %1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003> 185 ret <4 x i16> %1 186} 187 188define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { 189; P9LE-LABEL: fold_urem_vec_2: 190; P9LE: # %bb.0: 191; P9LE-NEXT: li r3, 0 192; P9LE-NEXT: lis r4, 689 193; P9LE-NEXT: vextuhrx r3, r3, v2 194; P9LE-NEXT: ori r4, r4, 55879 195; P9LE-NEXT: clrlwi r3, r3, 16 196; P9LE-NEXT: mulhwu r5, r3, r4 197; P9LE-NEXT: mulli r5, r5, 95 198; P9LE-NEXT: sub r3, r3, r5 199; P9LE-NEXT: mtvsrd v3, r3 200; P9LE-NEXT: li r3, 2 201; P9LE-NEXT: vextuhrx r3, r3, v2 202; P9LE-NEXT: clrlwi r3, r3, 16 203; P9LE-NEXT: mulhwu r5, r3, r4 204; P9LE-NEXT: mulli r5, r5, 95 205; P9LE-NEXT: sub r3, r3, r5 206; P9LE-NEXT: mtvsrd v4, r3 207; P9LE-NEXT: li r3, 4 208; P9LE-NEXT: vextuhrx r3, r3, v2 209; P9LE-NEXT: vmrghh v3, v4, v3 210; P9LE-NEXT: clrlwi r3, r3, 16 211; P9LE-NEXT: mulhwu r5, r3, r4 212; P9LE-NEXT: mulli r5, r5, 95 213; P9LE-NEXT: sub r3, r3, r5 214; P9LE-NEXT: mtvsrd v4, r3 215; P9LE-NEXT: li r3, 6 216; P9LE-NEXT: vextuhrx r3, r3, v2 217; P9LE-NEXT: clrlwi r3, r3, 16 218; P9LE-NEXT: mulhwu r4, r3, r4 219; P9LE-NEXT: mulli r4, r4, 95 220; P9LE-NEXT: sub r3, r3, r4 221; P9LE-NEXT: mtvsrd v2, r3 222; P9LE-NEXT: vmrghh v2, v2, v4 223; P9LE-NEXT: xxmrglw v2, v2, v3 224; P9LE-NEXT: blr 225; 226; P9BE-LABEL: fold_urem_vec_2: 227; P9BE: # %bb.0: 228; P9BE-NEXT: li r3, 6 229; P9BE-NEXT: lis r4, 689 230; P9BE-NEXT: vextuhlx r3, r3, v2 231; P9BE-NEXT: ori r4, r4, 55879 232; P9BE-NEXT: clrlwi r3, r3, 16 233; P9BE-NEXT: mulhwu r5, r3, r4 234; P9BE-NEXT: mulli r5, r5, 95 235; P9BE-NEXT: sub r3, r3, r5 236; P9BE-NEXT: mtfprwz f0, r3 237; P9BE-NEXT: li r3, 4 238; P9BE-NEXT: vextuhlx r3, r3, v2 239; P9BE-NEXT: clrlwi r3, r3, 16 240; P9BE-NEXT: mulhwu r5, r3, r4 241; P9BE-NEXT: mulli r5, r5, 95 242; P9BE-NEXT: sub r3, r3, r5 243; P9BE-NEXT: mtfprwz f1, r3 244; P9BE-NEXT: addis r3, r2, .LCPI1_0@toc@ha 245; P9BE-NEXT: addi r3, r3, .LCPI1_0@toc@l 246; P9BE-NEXT: lxv vs2, 0(r3) 247; P9BE-NEXT: li r3, 2 248; P9BE-NEXT: vextuhlx r3, r3, v2 249; P9BE-NEXT: clrlwi r3, r3, 16 250; P9BE-NEXT: xxperm vs0, vs1, vs2 251; P9BE-NEXT: mulhwu r5, r3, r4 252; P9BE-NEXT: mulli r5, r5, 95 253; P9BE-NEXT: sub r3, r3, r5 254; P9BE-NEXT: mtfprwz f1, r3 255; P9BE-NEXT: li r3, 0 256; P9BE-NEXT: vextuhlx r3, r3, v2 257; P9BE-NEXT: clrlwi r3, r3, 16 258; P9BE-NEXT: mulhwu r4, r3, r4 259; P9BE-NEXT: mulli r4, r4, 95 260; P9BE-NEXT: sub r3, r3, r4 261; P9BE-NEXT: mtfprwz f3, r3 262; P9BE-NEXT: xxperm vs1, vs3, vs2 263; P9BE-NEXT: xxmrghw v2, vs1, vs0 264; P9BE-NEXT: blr 265; 266; P8LE-LABEL: fold_urem_vec_2: 267; P8LE: # %bb.0: 268; P8LE-NEXT: xxswapd vs0, v2 269; P8LE-NEXT: lis r4, 689 270; P8LE-NEXT: mffprd r3, f0 271; P8LE-NEXT: ori r4, r4, 55879 272; P8LE-NEXT: clrldi r5, r3, 48 273; P8LE-NEXT: rldicl r6, r3, 48, 48 274; P8LE-NEXT: rldicl r7, r3, 32, 48 275; P8LE-NEXT: rldicl r3, r3, 16, 48 276; P8LE-NEXT: clrlwi r5, r5, 16 277; P8LE-NEXT: clrlwi r6, r6, 16 278; P8LE-NEXT: clrlwi r7, r7, 16 279; P8LE-NEXT: clrlwi r3, r3, 16 280; P8LE-NEXT: mulhwu r8, r5, r4 281; P8LE-NEXT: mulli r8, r8, 95 282; P8LE-NEXT: sub r5, r5, r8 283; P8LE-NEXT: mulhwu r8, r6, r4 284; P8LE-NEXT: mtvsrd v2, r5 285; P8LE-NEXT: mulli r8, r8, 95 286; P8LE-NEXT: sub r6, r6, r8 287; P8LE-NEXT: mulhwu r8, r7, r4 288; P8LE-NEXT: mulhwu r4, r3, r4 289; P8LE-NEXT: mtvsrd v3, r6 290; P8LE-NEXT: mulli r8, r8, 95 291; P8LE-NEXT: mulli r4, r4, 95 292; P8LE-NEXT: sub r7, r7, r8 293; P8LE-NEXT: sub r3, r3, r4 294; P8LE-NEXT: mtvsrd v4, r7 295; P8LE-NEXT: vmrghh v2, v3, v2 296; P8LE-NEXT: mtvsrd v3, r3 297; P8LE-NEXT: vmrghh v3, v3, v4 298; P8LE-NEXT: xxmrglw v2, v3, v2 299; P8LE-NEXT: blr 300; 301; P8BE-LABEL: fold_urem_vec_2: 302; P8BE: # %bb.0: 303; P8BE-NEXT: mfvsrd r3, v2 304; P8BE-NEXT: addis r5, r2, .LCPI1_0@toc@ha 305; P8BE-NEXT: lis r4, 689 306; P8BE-NEXT: addi r5, r5, .LCPI1_0@toc@l 307; P8BE-NEXT: ori r4, r4, 55879 308; P8BE-NEXT: lxvw4x v2, 0, r5 309; P8BE-NEXT: clrldi r5, r3, 48 310; P8BE-NEXT: rldicl r6, r3, 48, 48 311; P8BE-NEXT: rldicl r7, r3, 32, 48 312; P8BE-NEXT: rldicl r3, r3, 16, 48 313; P8BE-NEXT: clrlwi r5, r5, 16 314; P8BE-NEXT: clrlwi r6, r6, 16 315; P8BE-NEXT: clrlwi r7, r7, 16 316; P8BE-NEXT: clrlwi r3, r3, 16 317; P8BE-NEXT: mulhwu r8, r5, r4 318; P8BE-NEXT: mulli r8, r8, 95 319; P8BE-NEXT: sub r5, r5, r8 320; P8BE-NEXT: mulhwu r8, r6, r4 321; P8BE-NEXT: mtvsrwz v3, r5 322; P8BE-NEXT: mulli r8, r8, 95 323; P8BE-NEXT: sub r6, r6, r8 324; P8BE-NEXT: mulhwu r8, r7, r4 325; P8BE-NEXT: mulhwu r4, r3, r4 326; P8BE-NEXT: mtvsrwz v4, r6 327; P8BE-NEXT: mulli r8, r8, 95 328; P8BE-NEXT: mulli r4, r4, 95 329; P8BE-NEXT: sub r7, r7, r8 330; P8BE-NEXT: sub r3, r3, r4 331; P8BE-NEXT: mtvsrwz v5, r7 332; P8BE-NEXT: vperm v3, v4, v3, v2 333; P8BE-NEXT: mtvsrwz v4, r3 334; P8BE-NEXT: vperm v2, v4, v5, v2 335; P8BE-NEXT: xxmrghw v2, v2, v3 336; P8BE-NEXT: blr 337 %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 338 ret <4 x i16> %1 339} 340 341 342; Don't fold if we can combine urem with udiv. 343define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { 344; P9LE-LABEL: combine_urem_udiv: 345; P9LE: # %bb.0: 346; P9LE-NEXT: li r3, 0 347; P9LE-NEXT: lis r4, 689 348; P9LE-NEXT: vextuhrx r3, r3, v2 349; P9LE-NEXT: ori r4, r4, 55879 350; P9LE-NEXT: clrlwi r3, r3, 16 351; P9LE-NEXT: mulhwu r5, r3, r4 352; P9LE-NEXT: mulli r6, r5, 95 353; P9LE-NEXT: sub r3, r3, r6 354; P9LE-NEXT: mtvsrd v3, r3 355; P9LE-NEXT: li r3, 2 356; P9LE-NEXT: vextuhrx r3, r3, v2 357; P9LE-NEXT: clrlwi r6, r3, 16 358; P9LE-NEXT: mulhwu r6, r6, r4 359; P9LE-NEXT: mulli r7, r6, 95 360; P9LE-NEXT: sub r3, r3, r7 361; P9LE-NEXT: mtvsrd v4, r3 362; P9LE-NEXT: li r3, 4 363; P9LE-NEXT: vextuhrx r3, r3, v2 364; P9LE-NEXT: vmrghh v3, v4, v3 365; P9LE-NEXT: clrlwi r7, r3, 16 366; P9LE-NEXT: mulhwu r7, r7, r4 367; P9LE-NEXT: mulli r8, r7, 95 368; P9LE-NEXT: sub r3, r3, r8 369; P9LE-NEXT: mtvsrd v4, r3 370; P9LE-NEXT: li r3, 6 371; P9LE-NEXT: vextuhrx r3, r3, v2 372; P9LE-NEXT: clrlwi r8, r3, 16 373; P9LE-NEXT: mulhwu r4, r8, r4 374; P9LE-NEXT: mulli r8, r4, 95 375; P9LE-NEXT: mtvsrd v5, r4 376; P9LE-NEXT: sub r3, r3, r8 377; P9LE-NEXT: mtvsrd v2, r3 378; P9LE-NEXT: vmrghh v2, v2, v4 379; P9LE-NEXT: mtvsrd v4, r6 380; P9LE-NEXT: xxmrglw v2, v2, v3 381; P9LE-NEXT: mtvsrd v3, r5 382; P9LE-NEXT: vmrghh v3, v4, v3 383; P9LE-NEXT: mtvsrd v4, r7 384; P9LE-NEXT: vmrghh v4, v5, v4 385; P9LE-NEXT: xxmrglw v3, v4, v3 386; P9LE-NEXT: vadduhm v2, v2, v3 387; P9LE-NEXT: blr 388; 389; P9BE-LABEL: combine_urem_udiv: 390; P9BE: # %bb.0: 391; P9BE-NEXT: li r3, 6 392; P9BE-NEXT: lis r5, 689 393; P9BE-NEXT: vextuhlx r3, r3, v2 394; P9BE-NEXT: ori r5, r5, 55879 395; P9BE-NEXT: clrlwi r4, r3, 16 396; P9BE-NEXT: mulhwu r4, r4, r5 397; P9BE-NEXT: mulli r6, r4, 95 398; P9BE-NEXT: sub r3, r3, r6 399; P9BE-NEXT: mtfprwz f0, r3 400; P9BE-NEXT: li r3, 4 401; P9BE-NEXT: vextuhlx r3, r3, v2 402; P9BE-NEXT: clrlwi r6, r3, 16 403; P9BE-NEXT: mulhwu r6, r6, r5 404; P9BE-NEXT: mulli r7, r6, 95 405; P9BE-NEXT: sub r3, r3, r7 406; P9BE-NEXT: mtfprwz f1, r3 407; P9BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha 408; P9BE-NEXT: addi r3, r3, .LCPI2_0@toc@l 409; P9BE-NEXT: lxv vs2, 0(r3) 410; P9BE-NEXT: li r3, 2 411; P9BE-NEXT: vextuhlx r3, r3, v2 412; P9BE-NEXT: clrlwi r7, r3, 16 413; P9BE-NEXT: xxperm vs0, vs1, vs2 414; P9BE-NEXT: mulhwu r7, r7, r5 415; P9BE-NEXT: mulli r8, r7, 95 416; P9BE-NEXT: sub r3, r3, r8 417; P9BE-NEXT: mtfprwz f1, r3 418; P9BE-NEXT: li r3, 0 419; P9BE-NEXT: vextuhlx r3, r3, v2 420; P9BE-NEXT: clrlwi r3, r3, 16 421; P9BE-NEXT: mulhwu r5, r3, r5 422; P9BE-NEXT: mulli r8, r5, 95 423; P9BE-NEXT: sub r3, r3, r8 424; P9BE-NEXT: mtfprwz f3, r3 425; P9BE-NEXT: xxperm vs1, vs3, vs2 426; P9BE-NEXT: mtfprwz f3, r5 427; P9BE-NEXT: xxmrghw v2, vs1, vs0 428; P9BE-NEXT: mtfprwz f0, r4 429; P9BE-NEXT: mtfprwz f1, r6 430; P9BE-NEXT: xxperm vs0, vs1, vs2 431; P9BE-NEXT: mtfprwz f1, r7 432; P9BE-NEXT: xxperm vs1, vs3, vs2 433; P9BE-NEXT: xxmrghw v3, vs1, vs0 434; P9BE-NEXT: vadduhm v2, v2, v3 435; P9BE-NEXT: blr 436; 437; P8LE-LABEL: combine_urem_udiv: 438; P8LE: # %bb.0: 439; P8LE-NEXT: xxswapd vs0, v2 440; P8LE-NEXT: lis r4, 689 441; P8LE-NEXT: mffprd r3, f0 442; P8LE-NEXT: ori r4, r4, 55879 443; P8LE-NEXT: clrldi r5, r3, 48 444; P8LE-NEXT: rldicl r6, r3, 48, 48 445; P8LE-NEXT: rldicl r7, r3, 32, 48 446; P8LE-NEXT: rldicl r3, r3, 16, 48 447; P8LE-NEXT: clrlwi r5, r5, 16 448; P8LE-NEXT: clrlwi r8, r6, 16 449; P8LE-NEXT: clrlwi r9, r7, 16 450; P8LE-NEXT: clrlwi r10, r3, 16 451; P8LE-NEXT: mulhwu r11, r5, r4 452; P8LE-NEXT: mulhwu r8, r8, r4 453; P8LE-NEXT: mulhwu r9, r9, r4 454; P8LE-NEXT: mulhwu r4, r10, r4 455; P8LE-NEXT: mulli r10, r11, 95 456; P8LE-NEXT: mtvsrd v2, r11 457; P8LE-NEXT: mtvsrd v3, r8 458; P8LE-NEXT: sub r5, r5, r10 459; P8LE-NEXT: mulli r10, r8, 95 460; P8LE-NEXT: mtvsrd v4, r5 461; P8LE-NEXT: sub r6, r6, r10 462; P8LE-NEXT: mulli r10, r9, 95 463; P8LE-NEXT: mtvsrd v5, r6 464; P8LE-NEXT: sub r7, r7, r10 465; P8LE-NEXT: mulli r10, r4, 95 466; P8LE-NEXT: mtvsrd v0, r7 467; P8LE-NEXT: sub r3, r3, r10 468; P8LE-NEXT: vmrghh v2, v3, v2 469; P8LE-NEXT: mtvsrd v3, r9 470; P8LE-NEXT: vmrghh v4, v5, v4 471; P8LE-NEXT: mtvsrd v5, r3 472; P8LE-NEXT: vmrghh v5, v5, v0 473; P8LE-NEXT: mtvsrd v0, r4 474; P8LE-NEXT: xxmrglw v4, v5, v4 475; P8LE-NEXT: vmrghh v3, v0, v3 476; P8LE-NEXT: xxmrglw v2, v3, v2 477; P8LE-NEXT: vadduhm v2, v4, v2 478; P8LE-NEXT: blr 479; 480; P8BE-LABEL: combine_urem_udiv: 481; P8BE: # %bb.0: 482; P8BE-NEXT: mfvsrd r3, v2 483; P8BE-NEXT: lis r4, 689 484; P8BE-NEXT: ori r4, r4, 55879 485; P8BE-NEXT: clrldi r5, r3, 48 486; P8BE-NEXT: rldicl r6, r3, 48, 48 487; P8BE-NEXT: rldicl r7, r3, 32, 48 488; P8BE-NEXT: rldicl r3, r3, 16, 48 489; P8BE-NEXT: clrlwi r8, r5, 16 490; P8BE-NEXT: clrlwi r9, r6, 16 491; P8BE-NEXT: clrlwi r10, r7, 16 492; P8BE-NEXT: clrlwi r3, r3, 16 493; P8BE-NEXT: mulhwu r8, r8, r4 494; P8BE-NEXT: mulhwu r9, r9, r4 495; P8BE-NEXT: mulhwu r10, r10, r4 496; P8BE-NEXT: mulhwu r4, r3, r4 497; P8BE-NEXT: mulli r11, r8, 95 498; P8BE-NEXT: mtvsrwz v3, r8 499; P8BE-NEXT: mtvsrwz v4, r9 500; P8BE-NEXT: sub r5, r5, r11 501; P8BE-NEXT: mulli r11, r9, 95 502; P8BE-NEXT: mtvsrwz v5, r5 503; P8BE-NEXT: sub r6, r6, r11 504; P8BE-NEXT: mulli r11, r10, 95 505; P8BE-NEXT: mtvsrwz v0, r6 506; P8BE-NEXT: sub r7, r7, r11 507; P8BE-NEXT: mulli r11, r4, 95 508; P8BE-NEXT: mtvsrwz v1, r7 509; P8BE-NEXT: sub r3, r3, r11 510; P8BE-NEXT: addis r11, r2, .LCPI2_0@toc@ha 511; P8BE-NEXT: addi r11, r11, .LCPI2_0@toc@l 512; P8BE-NEXT: lxvw4x v2, 0, r11 513; P8BE-NEXT: vperm v5, v0, v5, v2 514; P8BE-NEXT: mtvsrwz v0, r3 515; P8BE-NEXT: vperm v3, v4, v3, v2 516; P8BE-NEXT: mtvsrwz v4, r10 517; P8BE-NEXT: vperm v0, v0, v1, v2 518; P8BE-NEXT: mtvsrwz v1, r4 519; P8BE-NEXT: vperm v2, v1, v4, v2 520; P8BE-NEXT: xxmrghw v4, v0, v5 521; P8BE-NEXT: xxmrghw v2, v2, v3 522; P8BE-NEXT: vadduhm v2, v4, v2 523; P8BE-NEXT: blr 524 %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 525 %2 = udiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 526 %3 = add <4 x i16> %1, %2 527 ret <4 x i16> %3 528} 529 530; Don't fold for divisors that are a power of two. 531define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { 532; P9LE-LABEL: dont_fold_urem_power_of_two: 533; P9LE: # %bb.0: 534; P9LE-NEXT: li r3, 0 535; P9LE-NEXT: lis r4, 689 536; P9LE-NEXT: vextuhrx r3, r3, v2 537; P9LE-NEXT: ori r4, r4, 55879 538; P9LE-NEXT: clrlwi r3, r3, 26 539; P9LE-NEXT: mtvsrd v3, r3 540; P9LE-NEXT: li r3, 2 541; P9LE-NEXT: vextuhrx r3, r3, v2 542; P9LE-NEXT: clrlwi r3, r3, 27 543; P9LE-NEXT: mtvsrd v4, r3 544; P9LE-NEXT: li r3, 6 545; P9LE-NEXT: vextuhrx r3, r3, v2 546; P9LE-NEXT: vmrghh v3, v4, v3 547; P9LE-NEXT: clrlwi r3, r3, 16 548; P9LE-NEXT: mulhwu r4, r3, r4 549; P9LE-NEXT: mulli r4, r4, 95 550; P9LE-NEXT: sub r3, r3, r4 551; P9LE-NEXT: mtvsrd v4, r3 552; P9LE-NEXT: li r3, 4 553; P9LE-NEXT: vextuhrx r3, r3, v2 554; P9LE-NEXT: clrlwi r3, r3, 29 555; P9LE-NEXT: mtvsrd v2, r3 556; P9LE-NEXT: vmrghh v2, v4, v2 557; P9LE-NEXT: xxmrglw v2, v2, v3 558; P9LE-NEXT: blr 559; 560; P9BE-LABEL: dont_fold_urem_power_of_two: 561; P9BE: # %bb.0: 562; P9BE-NEXT: li r3, 2 563; P9BE-NEXT: lis r4, 689 564; P9BE-NEXT: vextuhlx r3, r3, v2 565; P9BE-NEXT: ori r4, r4, 55879 566; P9BE-NEXT: clrlwi r3, r3, 27 567; P9BE-NEXT: mtfprwz f0, r3 568; P9BE-NEXT: li r3, 0 569; P9BE-NEXT: vextuhlx r3, r3, v2 570; P9BE-NEXT: clrlwi r3, r3, 26 571; P9BE-NEXT: mtfprwz f1, r3 572; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha 573; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l 574; P9BE-NEXT: lxv vs2, 0(r3) 575; P9BE-NEXT: li r3, 6 576; P9BE-NEXT: vextuhlx r3, r3, v2 577; P9BE-NEXT: clrlwi r3, r3, 16 578; P9BE-NEXT: xxperm vs0, vs1, vs2 579; P9BE-NEXT: mulhwu r4, r3, r4 580; P9BE-NEXT: mulli r4, r4, 95 581; P9BE-NEXT: sub r3, r3, r4 582; P9BE-NEXT: mtfprwz f1, r3 583; P9BE-NEXT: li r3, 4 584; P9BE-NEXT: vextuhlx r3, r3, v2 585; P9BE-NEXT: clrlwi r3, r3, 29 586; P9BE-NEXT: mtfprwz f3, r3 587; P9BE-NEXT: xxperm vs1, vs3, vs2 588; P9BE-NEXT: xxmrghw v2, vs0, vs1 589; P9BE-NEXT: blr 590; 591; P8LE-LABEL: dont_fold_urem_power_of_two: 592; P8LE: # %bb.0: 593; P8LE-NEXT: xxswapd vs0, v2 594; P8LE-NEXT: mffprd r3, f0 595; P8LE-NEXT: clrldi r4, r3, 48 596; P8LE-NEXT: clrlwi r4, r4, 26 597; P8LE-NEXT: mtvsrd v2, r4 598; P8LE-NEXT: rldicl r4, r3, 48, 48 599; P8LE-NEXT: clrlwi r4, r4, 27 600; P8LE-NEXT: mtvsrd v3, r4 601; P8LE-NEXT: rldicl r4, r3, 32, 48 602; P8LE-NEXT: rldicl r3, r3, 16, 48 603; P8LE-NEXT: clrlwi r4, r4, 29 604; P8LE-NEXT: clrlwi r3, r3, 16 605; P8LE-NEXT: vmrghh v2, v3, v2 606; P8LE-NEXT: mtvsrd v3, r4 607; P8LE-NEXT: lis r4, 689 608; P8LE-NEXT: ori r4, r4, 55879 609; P8LE-NEXT: mulhwu r4, r3, r4 610; P8LE-NEXT: mulli r4, r4, 95 611; P8LE-NEXT: sub r3, r3, r4 612; P8LE-NEXT: mtvsrd v4, r3 613; P8LE-NEXT: vmrghh v3, v4, v3 614; P8LE-NEXT: xxmrglw v2, v3, v2 615; P8LE-NEXT: blr 616; 617; P8BE-LABEL: dont_fold_urem_power_of_two: 618; P8BE: # %bb.0: 619; P8BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha 620; P8BE-NEXT: lis r5, 689 621; P8BE-NEXT: addi r3, r3, .LCPI3_0@toc@l 622; P8BE-NEXT: ori r5, r5, 55879 623; P8BE-NEXT: lxvw4x v3, 0, r3 624; P8BE-NEXT: mfvsrd r3, v2 625; P8BE-NEXT: rldicl r4, r3, 32, 48 626; P8BE-NEXT: clrlwi r4, r4, 27 627; P8BE-NEXT: mtvsrwz v2, r4 628; P8BE-NEXT: rldicl r4, r3, 16, 48 629; P8BE-NEXT: clrlwi r4, r4, 26 630; P8BE-NEXT: mtvsrwz v4, r4 631; P8BE-NEXT: clrldi r4, r3, 48 632; P8BE-NEXT: rldicl r3, r3, 48, 48 633; P8BE-NEXT: clrlwi r4, r4, 16 634; P8BE-NEXT: clrlwi r3, r3, 29 635; P8BE-NEXT: mulhwu r5, r4, r5 636; P8BE-NEXT: mtvsrwz v5, r3 637; P8BE-NEXT: mulli r5, r5, 95 638; P8BE-NEXT: sub r4, r4, r5 639; P8BE-NEXT: vperm v2, v4, v2, v3 640; P8BE-NEXT: mtvsrwz v4, r4 641; P8BE-NEXT: vperm v3, v5, v4, v3 642; P8BE-NEXT: xxmrghw v2, v2, v3 643; P8BE-NEXT: blr 644 %1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95> 645 ret <4 x i16> %1 646} 647 648; Don't fold if the divisor is one. 649define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { 650; P9LE-LABEL: dont_fold_urem_one: 651; P9LE: # %bb.0: 652; P9LE-NEXT: li r3, 4 653; P9LE-NEXT: lis r4, 2849 654; P9LE-NEXT: vextuhrx r3, r3, v2 655; P9LE-NEXT: ori r4, r4, 25645 656; P9LE-NEXT: clrlwi r3, r3, 16 657; P9LE-NEXT: mulhwu r4, r3, r4 658; P9LE-NEXT: mulli r4, r4, 23 659; P9LE-NEXT: sub r3, r3, r4 660; P9LE-NEXT: lis r4, 12 661; P9LE-NEXT: mtvsrd v3, r3 662; P9LE-NEXT: li r3, 6 663; P9LE-NEXT: ori r4, r4, 5560 664; P9LE-NEXT: vextuhrx r3, r3, v2 665; P9LE-NEXT: clrlwi r3, r3, 16 666; P9LE-NEXT: mulhwu r4, r3, r4 667; P9LE-NEXT: mulli r4, r4, 5423 668; P9LE-NEXT: sub r3, r3, r4 669; P9LE-NEXT: lis r4, 100 670; P9LE-NEXT: mtvsrd v4, r3 671; P9LE-NEXT: li r3, 2 672; P9LE-NEXT: ori r4, r4, 13629 673; P9LE-NEXT: vextuhrx r3, r3, v2 674; P9LE-NEXT: vmrghh v3, v4, v3 675; P9LE-NEXT: clrlwi r3, r3, 16 676; P9LE-NEXT: mulhwu r4, r3, r4 677; P9LE-NEXT: mulli r4, r4, 654 678; P9LE-NEXT: sub r3, r3, r4 679; P9LE-NEXT: mtvsrd v2, r3 680; P9LE-NEXT: li r3, 0 681; P9LE-NEXT: mtvsrd v4, r3 682; P9LE-NEXT: vmrghh v2, v2, v4 683; P9LE-NEXT: xxmrglw v2, v3, v2 684; P9LE-NEXT: blr 685; 686; P9BE-LABEL: dont_fold_urem_one: 687; P9BE: # %bb.0: 688; P9BE-NEXT: li r3, 6 689; P9BE-NEXT: lis r4, 12 690; P9BE-NEXT: vextuhlx r3, r3, v2 691; P9BE-NEXT: ori r4, r4, 5560 692; P9BE-NEXT: clrlwi r3, r3, 16 693; P9BE-NEXT: mulhwu r4, r3, r4 694; P9BE-NEXT: mulli r4, r4, 5423 695; P9BE-NEXT: sub r3, r3, r4 696; P9BE-NEXT: lis r4, 2849 697; P9BE-NEXT: mtfprwz f0, r3 698; P9BE-NEXT: li r3, 4 699; P9BE-NEXT: ori r4, r4, 25645 700; P9BE-NEXT: vextuhlx r3, r3, v2 701; P9BE-NEXT: clrlwi r3, r3, 16 702; P9BE-NEXT: mulhwu r4, r3, r4 703; P9BE-NEXT: mulli r4, r4, 23 704; P9BE-NEXT: sub r3, r3, r4 705; P9BE-NEXT: lis r4, 100 706; P9BE-NEXT: mtfprwz f1, r3 707; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha 708; P9BE-NEXT: ori r4, r4, 13629 709; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l 710; P9BE-NEXT: lxv vs2, 0(r3) 711; P9BE-NEXT: li r3, 2 712; P9BE-NEXT: vextuhlx r3, r3, v2 713; P9BE-NEXT: clrlwi r3, r3, 16 714; P9BE-NEXT: xxperm vs0, vs1, vs2 715; P9BE-NEXT: mulhwu r4, r3, r4 716; P9BE-NEXT: mulli r4, r4, 654 717; P9BE-NEXT: sub r3, r3, r4 718; P9BE-NEXT: mtfprwz f1, r3 719; P9BE-NEXT: li r3, 0 720; P9BE-NEXT: mtfprwz f3, r3 721; P9BE-NEXT: xxperm vs1, vs3, vs2 722; P9BE-NEXT: xxmrghw v2, vs1, vs0 723; P9BE-NEXT: blr 724; 725; P8LE-LABEL: dont_fold_urem_one: 726; P8LE: # %bb.0: 727; P8LE-NEXT: xxswapd vs0, v2 728; P8LE-NEXT: li r4, 0 729; P8LE-NEXT: lis r5, 100 730; P8LE-NEXT: lis r6, 2849 731; P8LE-NEXT: mffprd r3, f0 732; P8LE-NEXT: mtvsrd v2, r4 733; P8LE-NEXT: ori r4, r5, 13629 734; P8LE-NEXT: ori r5, r6, 25645 735; P8LE-NEXT: rldicl r6, r3, 48, 48 736; P8LE-NEXT: clrlwi r6, r6, 16 737; P8LE-NEXT: mulhwu r4, r6, r4 738; P8LE-NEXT: mulli r4, r4, 654 739; P8LE-NEXT: sub r4, r6, r4 740; P8LE-NEXT: rldicl r6, r3, 32, 48 741; P8LE-NEXT: rldicl r3, r3, 16, 48 742; P8LE-NEXT: mtvsrd v3, r4 743; P8LE-NEXT: lis r4, 12 744; P8LE-NEXT: clrlwi r6, r6, 16 745; P8LE-NEXT: clrlwi r3, r3, 16 746; P8LE-NEXT: ori r4, r4, 5560 747; P8LE-NEXT: mulhwu r5, r6, r5 748; P8LE-NEXT: mulhwu r4, r3, r4 749; P8LE-NEXT: mulli r5, r5, 23 750; P8LE-NEXT: mulli r4, r4, 5423 751; P8LE-NEXT: sub r5, r6, r5 752; P8LE-NEXT: sub r3, r3, r4 753; P8LE-NEXT: mtvsrd v4, r3 754; P8LE-NEXT: vmrghh v2, v3, v2 755; P8LE-NEXT: mtvsrd v3, r5 756; P8LE-NEXT: vmrghh v3, v4, v3 757; P8LE-NEXT: xxmrglw v2, v3, v2 758; P8LE-NEXT: blr 759; 760; P8BE-LABEL: dont_fold_urem_one: 761; P8BE: # %bb.0: 762; P8BE-NEXT: mfvsrd r3, v2 763; P8BE-NEXT: addis r6, r2, .LCPI4_0@toc@ha 764; P8BE-NEXT: lis r4, 12 765; P8BE-NEXT: lis r5, 2849 766; P8BE-NEXT: addi r6, r6, .LCPI4_0@toc@l 767; P8BE-NEXT: ori r4, r4, 5560 768; P8BE-NEXT: ori r5, r5, 25645 769; P8BE-NEXT: lxvw4x v2, 0, r6 770; P8BE-NEXT: clrldi r6, r3, 48 771; P8BE-NEXT: clrlwi r6, r6, 16 772; P8BE-NEXT: mulhwu r4, r6, r4 773; P8BE-NEXT: mulli r4, r4, 5423 774; P8BE-NEXT: sub r4, r6, r4 775; P8BE-NEXT: rldicl r6, r3, 48, 48 776; P8BE-NEXT: rldicl r3, r3, 32, 48 777; P8BE-NEXT: clrlwi r6, r6, 16 778; P8BE-NEXT: clrlwi r3, r3, 16 779; P8BE-NEXT: mtvsrwz v3, r4 780; P8BE-NEXT: mulhwu r5, r6, r5 781; P8BE-NEXT: mulli r5, r5, 23 782; P8BE-NEXT: sub r5, r6, r5 783; P8BE-NEXT: lis r6, 100 784; P8BE-NEXT: ori r6, r6, 13629 785; P8BE-NEXT: mtvsrwz v4, r5 786; P8BE-NEXT: mulhwu r6, r3, r6 787; P8BE-NEXT: mulli r6, r6, 654 788; P8BE-NEXT: sub r3, r3, r6 789; P8BE-NEXT: vperm v3, v4, v3, v2 790; P8BE-NEXT: mtvsrwz v4, r3 791; P8BE-NEXT: li r3, 0 792; P8BE-NEXT: mtvsrwz v5, r3 793; P8BE-NEXT: vperm v2, v5, v4, v2 794; P8BE-NEXT: xxmrghw v2, v2, v3 795; P8BE-NEXT: blr 796 %1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423> 797 ret <4 x i16> %1 798} 799 800; Don't fold if the divisor is 2^16. 801define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) { 802; CHECK-LABEL: dont_fold_urem_i16_smax: 803; CHECK: # %bb.0: 804; CHECK-NEXT: blr 805 %1 = urem <4 x i16> %x, <i16 1, i16 65536, i16 23, i16 5423> 806 ret <4 x i16> %1 807} 808 809; Don't fold i64 urem. 810define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) { 811; P9LE-LABEL: dont_fold_urem_i64: 812; P9LE: # %bb.0: 813; P9LE-NEXT: lis r4, 1602 814; P9LE-NEXT: mfvsrld r3, v3 815; P9LE-NEXT: ori r4, r4, 51289 816; P9LE-NEXT: rldic r4, r4, 36, 1 817; P9LE-NEXT: oris r4, r4, 45590 818; P9LE-NEXT: ori r4, r4, 17097 819; P9LE-NEXT: mulhdu r4, r3, r4 820; P9LE-NEXT: sub r5, r3, r4 821; P9LE-NEXT: rldicl r5, r5, 63, 1 822; P9LE-NEXT: add r4, r5, r4 823; P9LE-NEXT: lis r5, -16037 824; P9LE-NEXT: rldicl r4, r4, 60, 4 825; P9LE-NEXT: ori r5, r5, 28749 826; P9LE-NEXT: mulli r4, r4, 23 827; P9LE-NEXT: rldic r5, r5, 32, 0 828; P9LE-NEXT: oris r5, r5, 52170 829; P9LE-NEXT: ori r5, r5, 12109 830; P9LE-NEXT: sub r3, r3, r4 831; P9LE-NEXT: mfvsrd r4, v3 832; P9LE-NEXT: mulhdu r5, r4, r5 833; P9LE-NEXT: rldicl r5, r5, 52, 12 834; P9LE-NEXT: mulli r5, r5, 5423 835; P9LE-NEXT: sub r4, r4, r5 836; P9LE-NEXT: lis r5, 3206 837; P9LE-NEXT: ori r5, r5, 42889 838; P9LE-NEXT: mtvsrdd v3, r4, r3 839; P9LE-NEXT: mfvsrd r3, v2 840; P9LE-NEXT: rldic r5, r5, 35, 1 841; P9LE-NEXT: rldicl r4, r3, 63, 1 842; P9LE-NEXT: oris r5, r5, 1603 843; P9LE-NEXT: ori r5, r5, 21445 844; P9LE-NEXT: mulhdu r4, r4, r5 845; P9LE-NEXT: rldicl r4, r4, 57, 7 846; P9LE-NEXT: mulli r4, r4, 654 847; P9LE-NEXT: sub r3, r3, r4 848; P9LE-NEXT: li r4, 0 849; P9LE-NEXT: mtvsrdd v2, r3, r4 850; P9LE-NEXT: blr 851; 852; P9BE-LABEL: dont_fold_urem_i64: 853; P9BE: # %bb.0: 854; P9BE-NEXT: lis r4, 1602 855; P9BE-NEXT: mfvsrd r3, v3 856; P9BE-NEXT: ori r4, r4, 51289 857; P9BE-NEXT: rldic r4, r4, 36, 1 858; P9BE-NEXT: oris r4, r4, 45590 859; P9BE-NEXT: ori r4, r4, 17097 860; P9BE-NEXT: mulhdu r4, r3, r4 861; P9BE-NEXT: sub r5, r3, r4 862; P9BE-NEXT: rldicl r5, r5, 63, 1 863; P9BE-NEXT: add r4, r5, r4 864; P9BE-NEXT: lis r5, -16037 865; P9BE-NEXT: rldicl r4, r4, 60, 4 866; P9BE-NEXT: ori r5, r5, 28749 867; P9BE-NEXT: mulli r4, r4, 23 868; P9BE-NEXT: rldic r5, r5, 32, 0 869; P9BE-NEXT: oris r5, r5, 52170 870; P9BE-NEXT: ori r5, r5, 12109 871; P9BE-NEXT: sub r3, r3, r4 872; P9BE-NEXT: mfvsrld r4, v3 873; P9BE-NEXT: mulhdu r5, r4, r5 874; P9BE-NEXT: rldicl r5, r5, 52, 12 875; P9BE-NEXT: mulli r5, r5, 5423 876; P9BE-NEXT: sub r4, r4, r5 877; P9BE-NEXT: lis r5, 3206 878; P9BE-NEXT: ori r5, r5, 42889 879; P9BE-NEXT: mtvsrdd v3, r3, r4 880; P9BE-NEXT: mfvsrld r3, v2 881; P9BE-NEXT: rldic r5, r5, 35, 1 882; P9BE-NEXT: rldicl r4, r3, 63, 1 883; P9BE-NEXT: oris r5, r5, 1603 884; P9BE-NEXT: ori r5, r5, 21445 885; P9BE-NEXT: mulhdu r4, r4, r5 886; P9BE-NEXT: rldicl r4, r4, 57, 7 887; P9BE-NEXT: mulli r4, r4, 654 888; P9BE-NEXT: sub r3, r3, r4 889; P9BE-NEXT: mtvsrdd v2, 0, r3 890; P9BE-NEXT: blr 891; 892; P8LE-LABEL: dont_fold_urem_i64: 893; P8LE: # %bb.0: 894; P8LE-NEXT: lis r3, 1602 895; P8LE-NEXT: xxswapd vs0, v3 896; P8LE-NEXT: lis r5, 3206 897; P8LE-NEXT: mfvsrd r6, v2 898; P8LE-NEXT: mfvsrd r8, v3 899; P8LE-NEXT: ori r3, r3, 51289 900; P8LE-NEXT: ori r5, r5, 42889 901; P8LE-NEXT: rldic r4, r3, 36, 1 902; P8LE-NEXT: mffprd r3, f0 903; P8LE-NEXT: rldic r5, r5, 35, 1 904; P8LE-NEXT: rldicl r7, r6, 63, 1 905; P8LE-NEXT: oris r4, r4, 45590 906; P8LE-NEXT: oris r5, r5, 1603 907; P8LE-NEXT: ori r4, r4, 17097 908; P8LE-NEXT: ori r5, r5, 21445 909; P8LE-NEXT: mulhdu r4, r3, r4 910; P8LE-NEXT: mulhdu r5, r7, r5 911; P8LE-NEXT: sub r7, r3, r4 912; P8LE-NEXT: rldicl r5, r5, 57, 7 913; P8LE-NEXT: rldicl r7, r7, 63, 1 914; P8LE-NEXT: mulli r5, r5, 654 915; P8LE-NEXT: add r4, r7, r4 916; P8LE-NEXT: lis r7, -16037 917; P8LE-NEXT: ori r7, r7, 28749 918; P8LE-NEXT: rldicl r4, r4, 60, 4 919; P8LE-NEXT: sub r5, r6, r5 920; P8LE-NEXT: rldic r7, r7, 32, 0 921; P8LE-NEXT: mulli r4, r4, 23 922; P8LE-NEXT: oris r7, r7, 52170 923; P8LE-NEXT: ori r7, r7, 12109 924; P8LE-NEXT: sub r3, r3, r4 925; P8LE-NEXT: mulhdu r7, r8, r7 926; P8LE-NEXT: mtfprd f1, r3 927; P8LE-NEXT: li r3, 0 928; P8LE-NEXT: rldicl r7, r7, 52, 12 929; P8LE-NEXT: mulli r7, r7, 5423 930; P8LE-NEXT: sub r7, r8, r7 931; P8LE-NEXT: mtfprd f0, r7 932; P8LE-NEXT: xxmrghd v3, vs0, vs1 933; P8LE-NEXT: mtfprd f0, r5 934; P8LE-NEXT: mtfprd f1, r3 935; P8LE-NEXT: xxmrghd v2, vs0, vs1 936; P8LE-NEXT: blr 937; 938; P8BE-LABEL: dont_fold_urem_i64: 939; P8BE: # %bb.0: 940; P8BE-NEXT: lis r3, 1602 941; P8BE-NEXT: mfvsrd r4, v3 942; P8BE-NEXT: lis r5, 3206 943; P8BE-NEXT: xxswapd vs0, v2 944; P8BE-NEXT: xxswapd vs1, v3 945; P8BE-NEXT: ori r3, r3, 51289 946; P8BE-NEXT: ori r5, r5, 42889 947; P8BE-NEXT: mffprd r6, f0 948; P8BE-NEXT: mffprd r8, f1 949; P8BE-NEXT: rldic r3, r3, 36, 1 950; P8BE-NEXT: rldic r5, r5, 35, 1 951; P8BE-NEXT: oris r3, r3, 45590 952; P8BE-NEXT: oris r5, r5, 1603 953; P8BE-NEXT: rldicl r7, r6, 63, 1 954; P8BE-NEXT: ori r3, r3, 17097 955; P8BE-NEXT: ori r5, r5, 21445 956; P8BE-NEXT: mulhdu r3, r4, r3 957; P8BE-NEXT: mulhdu r5, r7, r5 958; P8BE-NEXT: sub r7, r4, r3 959; P8BE-NEXT: rldicl r5, r5, 57, 7 960; P8BE-NEXT: rldicl r7, r7, 63, 1 961; P8BE-NEXT: mulli r5, r5, 654 962; P8BE-NEXT: add r3, r7, r3 963; P8BE-NEXT: lis r7, -16037 964; P8BE-NEXT: ori r7, r7, 28749 965; P8BE-NEXT: rldicl r3, r3, 60, 4 966; P8BE-NEXT: sub r5, r6, r5 967; P8BE-NEXT: rldic r7, r7, 32, 0 968; P8BE-NEXT: mulli r3, r3, 23 969; P8BE-NEXT: oris r7, r7, 52170 970; P8BE-NEXT: ori r7, r7, 12109 971; P8BE-NEXT: sub r3, r4, r3 972; P8BE-NEXT: mulhdu r7, r8, r7 973; P8BE-NEXT: mtfprd f1, r3 974; P8BE-NEXT: li r3, 0 975; P8BE-NEXT: rldicl r7, r7, 52, 12 976; P8BE-NEXT: mulli r7, r7, 5423 977; P8BE-NEXT: sub r7, r8, r7 978; P8BE-NEXT: mtfprd f0, r7 979; P8BE-NEXT: xxmrghd v3, vs1, vs0 980; P8BE-NEXT: mtfprd f0, r5 981; P8BE-NEXT: mtfprd f1, r3 982; P8BE-NEXT: xxmrghd v2, vs1, vs0 983; P8BE-NEXT: blr 984 %1 = urem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423> 985 ret <4 x i64> %1 986} 987