1; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ 2; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE \ 3; RUN: --implicit-check-not xxswapd 4 5; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ 6; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE 7 8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ 9; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE 10 11; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ 12; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX 13 14; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ 15; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX 16 17; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ 18; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \ 19; RUN: --implicit-check-not xxswapd 20 21; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ 22; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX 23 24; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ 25; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX 26 27; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ 28; RUN: -mcpu=pwr8 -mattr=-vsx < %s | \ 29; RUN: FileCheck %s -check-prefix=CHECK-LE-NOVSX --implicit-check-not xxswapd 30 31; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ 32; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ 33; RUN: FileCheck %s -check-prefix=CHECK-P9 --implicit-check-not xxswapd 34 35; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ 36; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s -check-prefix=CHECK-P9 37 38; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ 39; RUN: -mcpu=pwr9 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \ 40; RUN: --implicit-check-not xxswapd 41 42; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ 43; RUN: -mcpu=pwr9 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX 44 45; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ 46; RUN: -mcpu=pwr9 -mattr=-power9-vector -mattr=-direct-move < %s | \ 47; RUN: FileCheck %s -check-prefix=CHECK-LE --implicit-check-not xxswapd 48 49@x = common global <1 x i128> zeroinitializer, align 16 50@y = common global <1 x i128> zeroinitializer, align 16 51@a = common global i128 zeroinitializer, align 16 52@b = common global i128 zeroinitializer, align 16 53 54; VSX: 55; %a is passed in register 34 56; The value of 1 is stored in the TOC. 57; On LE, ensure the value of 1 is swapped before being used (using xxswapd). 58; VMX (no VSX): 59; %a is passed in register 2 60; The value of 1 is stored in the TOC. 61; No swaps are necessary when using P8 Vector instructions on LE 62define <1 x i128> @v1i128_increment_by_one(<1 x i128> %a) nounwind { 63 %tmp = add <1 x i128> %a, <i128 1> 64 ret <1 x i128> %tmp 65 66; FIXME: Seems a 128-bit literal is materialized by loading from the TOC. There 67; should be a better way of doing this. 68 69; CHECK-LE-LABEL: @v1i128_increment_by_one 70; CHECK-LE: lxvd2x [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} 71; CHECK-LE: xxswapd 35, [[VAL]] 72; CHECK-LE: vadduqm 2, 2, 3 73; CHECK-LE: blr 74 75; CHECK-P9-LABEL: @v1i128_increment_by_one 76; The below FIXME is due to the lowering for BUILD_VECTOR that will be fixed 77; in a subsequent patch. 78; FIXME: li [[R1:r[0-9]+]], 1 79; FIXME: li [[R2:r[0-9]+]], 0 80; FIXME: mtvsrdd [[V1:v[0-9]+]], [[R2]], [[R1]] 81; CHECK-P9: lxv [[V1:v[0-9]+]] 82; CHECK-P9: vadduqm v2, v2, [[V1]] 83; CHECK-P9: blr 84 85; CHECK-BE-LABEL: @v1i128_increment_by_one 86; CHECK-BE: lxvd2x 35, {{[0-9]+}}, {{[0-9]+}} 87; CHECK-BE-NOT: xxswapd 88; CHECK-BE: vadduqm 2, 2, 3 89; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}} 90; CHECK-BE: blr 91 92; CHECK-NOVSX-LABEL: @v1i128_increment_by_one 93; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}} 94; CHECK-NOVSX-NOT: stxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} 95; CHECK-NOVSX: lvx [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} 96; CHECK-NOVSX-NOT: lxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} 97; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}} 98; CHECK-NOVSX: vadduqm 2, 2, [[VAL]] 99; CHECK-NOVSX: blr 100} 101 102; VSX: 103; %a is passed in register 34 104; %b is passed in register 35 105; No swaps are necessary when using P8 Vector instructions on LE 106; VMX (no VSX): 107; %a is passewd in register 2 108; %b is passed in register 3 109; On LE, do not need to swap contents of 2 and 3 because the lvx/stvx 110; instructions no not swap elements 111define <1 x i128> @v1i128_increment_by_val(<1 x i128> %a, <1 x i128> %b) nounwind { 112 %tmp = add <1 x i128> %a, %b 113 ret <1 x i128> %tmp 114 115; CHECK-LE-LABEL: @v1i128_increment_by_val 116; CHECK-LE-NOT: xxswapd 117; CHECK-LE: adduqm 2, 2, 3 118; CHECK-LE: blr 119 120; CHECK-BE-LABEL: @v1i128_increment_by_val 121; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 34 122; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 35 123; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}} 124; CHECK-BE: adduqm 2, 2, 3 125; CHECK-BE: blr 126 127; CHECK-NOVSX-LABEL: @v1i128_increment_by_val 128; CHECK-NOVSX-NOT: xxswapd 34, {{[0-9]+}} 129; CHECK-NOVSX: adduqm 2, 2, 3 130; CHECK-NOVSX: blr 131} 132 133; Little Endian (VSX and VMX): 134; Lower 64-bits of %a are passed in register 3 135; Upper 64-bits of %a are passed in register 4 136; Increment lower 64-bits using addic (immediate value of 1) 137; Increment upper 64-bits using add zero extended 138; Results are placed in registers 3 and 4 139; Big Endian (VSX and VMX) 140; Lower 64-bits of %a are passed in register 4 141; Upper 64-bits of %a are passed in register 3 142; Increment lower 64-bits using addic (immediate value of 1) 143; Increment upper 64-bits using add zero extended 144; Results are placed in registers 3 and 4 145define i128 @i128_increment_by_one(i128 %a) nounwind { 146 %tmp = add i128 %a, 1 147 ret i128 %tmp 148; CHECK-LE-LABEL: @i128_increment_by_one 149; CHECK-LE: addic 3, 3, 1 150; CHECK-LE-NEXT: addze 4, 4 151; CHECK-LE: blr 152 153; CHECK-BE-LABEL: @i128_increment_by_one 154; CHECK-BE: addic 4, 4, 1 155; CHECK-BE-NEXT: addze 3, 3 156; CHECK-BE: blr 157 158; CHECK-LE-NOVSX-LABEL: @i128_increment_by_one 159; CHECK-LE-NOVSX: addic 3, 3, 1 160; CHECK-LE-NOVSX-NEXT: addze 4, 4 161; CHECK-LE-NOVSX: blr 162 163; CHECK-BE-NOVSX-LABEL: @i128_increment_by_one 164; CHECK-BE-NOVSX: addic 4, 4, 1 165; CHECK-BE-NOVSX-NEXT: addze 3, 3 166; CHECK-BE-NOVSX: blr 167} 168 169; Little Endian (VSX and VMX): 170; Lower 64-bits of %a are passed in register 3 171; Upper 64-bits of %a are passed in register 4 172; Lower 64-bits of %b are passed in register 5 173; Upper 64-bits of %b are passed in register 6 174; Add the lower 64-bits using addc on registers 3 and 5 175; Add the upper 64-bits using adde on registers 4 and 6 176; Registers 3 and 4 should hold the result 177; Big Endian (VSX and VMX): 178; Upper 64-bits of %a are passed in register 3 179; Lower 64-bits of %a are passed in register 4 180; Upper 64-bits of %b are passed in register 5 181; Lower 64-bits of %b are passed in register 6 182; Add the lower 64-bits using addc on registers 4 and 6 183; Add the upper 64-bits using adde on registers 3 and 5 184; Registers 3 and 4 should hold the result 185define i128 @i128_increment_by_val(i128 %a, i128 %b) nounwind { 186 %tmp = add i128 %a, %b 187 ret i128 %tmp 188; CHECK-LE-LABEL: @i128_increment_by_val 189; CHECK-LE: addc 3, 3, 5 190; CHECK-LE-NEXT: adde 4, 4, 6 191; CHECK-LE: blr 192 193; CHECK-BE-LABEL: @i128_increment_by_val 194; CHECK-BE: addc 4, 4, 6 195; CHECK-BE-NEXT: adde 3, 3, 5 196; CHECK-BE: blr 197 198; CHECK-LE-NOVSX-LABEL: @i128_increment_by_val 199; CHECK-LE-NOVSX: addc 3, 3, 5 200; CHECK-LE-NOVSX-NEXT: adde 4, 4, 6 201; CHECK-LE-NOVSX: blr 202 203; CHECK-BE-NOVSX-LABEL: @i128_increment_by_val 204; CHECK-BE-NOVSX: addc 4, 4, 6 205; CHECK-BE-NOVSX-NEXT: adde 3, 3, 5 206; CHECK-BE-NOVSX: blr 207} 208 209 210; Callsites for the routines defined above. 211; Ensure the parameters are loaded in the same order that is expected by the 212; callee. See comments for individual functions above for details on registers 213; used for parameters. 214define <1 x i128> @call_v1i128_increment_by_one() nounwind { 215 %tmp = load <1 x i128>, ptr @x, align 16 216 %ret = call <1 x i128> @v1i128_increment_by_one(<1 x i128> %tmp) 217 ret <1 x i128> %ret 218 219; CHECK-LE-LABEL: @call_v1i128_increment_by_one 220; CHECK-LE: lxvd2x [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} 221; CHECK-LE: xxswapd 34, [[VAL]] 222; CHECK-LE: bl v1i128_increment_by_one 223; CHECK-LE: blr 224 225; CHECK-P9-LABEL: @call_v1i128_increment_by_one 226; CHECK-P9: lxv 227; CHECK-P9: bl {{.?}}v1i128_increment_by_one 228; CHECK-P9: blr 229 230; CHECK-BE-LABEL: @call_v1i128_increment_by_one 231; CHECK-BE: lxvw4x 34, {{[0-9]+}}, {{[0-9]+}} 232; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}} 233; CHECK-BE: bl {{.?}}v1i128_increment_by_one 234; CHECK-BE: blr 235 236; CHECK-NOVSX-LABEL: @call_v1i128_increment_by_one 237; CHECK-NOVSX: lvx 2, {{[0-9]+}}, {{[0-9]+}} 238; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}} 239; CHECK-NOVSX: bl {{.?}}v1i128_increment_by_one 240; CHECK-NOVSX: blr 241} 242 243define <1 x i128> @call_v1i128_increment_by_val() nounwind { 244 %tmp = load <1 x i128>, ptr @x, align 16 245 %tmp2 = load <1 x i128>, ptr @y, align 16 246 %ret = call <1 x i128> @v1i128_increment_by_val(<1 x i128> %tmp, <1 x i128> %tmp2) 247 ret <1 x i128> %ret 248 249; CHECK-LE-LABEL: @call_v1i128_increment_by_val 250; CHECK-LE: lxvd2x [[VAL1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} 251; CHECK-LE-DAG: lxvd2x [[VAL2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} 252; CHECK-LE-DAG: xxswapd 34, [[VAL1]] 253; CHECK-LE: xxswapd 35, [[VAL2]] 254; CHECK-LE: bl v1i128_increment_by_val 255; CHECK-LE: blr 256 257; CHECK-P9-LABEL: @call_v1i128_increment_by_val 258; CHECK-P9-DAG: lxv v2 259; CHECK-P9-DAG: lxv v3 260; CHECK-P9: bl {{.?}}v1i128_increment_by_val 261; CHECK-P9: blr 262 263; CHECK-BE-LABEL: @call_v1i128_increment_by_val 264 265 266; CHECK-BE-DAG: lxvw4x 35, {{[0-9]+}}, {{[0-9]+}} 267; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}} 268; CHECK-BE-NOT: xxswapd 35, {{[0-9]+}} 269; CHECK-BE: bl {{.?}}v1i128_increment_by_val 270; CHECK-BE: blr 271 272; CHECK-NOVSX-LABEL: @call_v1i128_increment_by_val 273; CHECK-NOVSX-DAG: lvx 2, {{[0-9]+}}, {{[0-9]+}} 274; CHECK-NOVSX-DAG: lvx 3, {{[0-9]+}}, {{[0-9]+}} 275; CHECK-NOVSX-NOT: xxswapd 34, {{[0-9]+}} 276; CHECK-NOVSX-NOT: xxswapd 35, {{[0-9]+}} 277; CHECK-NOVSX: bl {{.?}}v1i128_increment_by_val 278; CHECK-NOVSX: blr 279 280} 281 282define i128 @call_i128_increment_by_one() nounwind { 283 %tmp = load i128, ptr @a, align 16 284 %ret = call i128 @i128_increment_by_one(i128 %tmp) 285 ret i128 %ret 286; %ret4 = call i128 @i128_increment_by_val(i128 %tmp2, i128 %tmp2) 287; CHECK-LE-LABEL: @call_i128_increment_by_one 288; CHECK-LE-DAG: ld 3, 0([[BASEREG:[0-9]+]]) 289; CHECK-LE-DAG: ld 4, 8([[BASEREG]]) 290; CHECK-LE: bl i128_increment_by_one 291; CHECK-LE: blr 292 293; CHECK-BE-LABEL: @call_i128_increment_by_one 294; CHECK-BE-DAG: ld 3, 0([[BASEREG:[0-9]+]]) 295; CHECK-BE-DAG: ld 4, 8([[BASEREG]]) 296; CHECK-BE: bl {{.?}}i128_increment_by_one 297; CHECK-BE: blr 298 299; CHECK-NOVSX-LABEL: @call_i128_increment_by_one 300; CHECK-NOVSX-DAG: ld 3, 0([[BASEREG:[0-9]+]]) 301; CHECK-NOVSX-DAG: ld 4, 8([[BASEREG]]) 302; CHECK-NOVSX: bl {{.?}}i128_increment_by_one 303; CHECK-NOVSX: blr 304} 305 306define i128 @call_i128_increment_by_val() nounwind { 307 %tmp = load i128, ptr @a, align 16 308 %tmp2 = load i128, ptr @b, align 16 309 %ret = call i128 @i128_increment_by_val(i128 %tmp, i128 %tmp2) 310 ret i128 %ret 311; CHECK-LE-LABEL: @call_i128_increment_by_val 312; CHECK-LE-DAG: ld 3, 0([[P1BASEREG:[0-9]+]]) 313; CHECK-LE-DAG: ld 4, 8([[P1BASEREG]]) 314; CHECK-LE-DAG: ld 5, 0([[P2BASEREG:[0-9]+]]) 315; CHECK-LE-DAG: ld 6, 8([[P2BASEREG]]) 316; CHECK-LE: bl i128_increment_by_val 317; CHECK-LE: blr 318 319; CHECK-BE-LABEL: @call_i128_increment_by_val 320; CHECK-BE-DAG: ld 3, 0([[P1BASEREG:[0-9]+]]) 321; CHECK-BE-DAG: ld 4, 8([[P1BASEREG]]) 322; CHECK-BE-DAG: ld 5, 0([[P2BASEREG:[0-9]+]]) 323; CHECK-BE-DAG: ld 6, 8([[P2BASEREG]]) 324; CHECK-BE: bl {{.?}}i128_increment_by_val 325; CHECK-BE: blr 326 327; CHECK-NOVSX-LABEL: @call_i128_increment_by_val 328; CHECK-NOVSX-DAG: ld 3, 0([[P1BASEREG:[0-9]+]]) 329; CHECK-NOVSX-DAG: ld 4, 8([[P1BASEREG]]) 330; CHECK-NOVSX-DAG: ld 5, 0([[P2BASEREG:[0-9]+]]) 331; CHECK-NOVSX-DAG: ld 6, 8([[P2BASEREG]]) 332; CHECK-NOVSX: bl {{.?}}i128_increment_by_val 333; CHECK-NOVSX: blr 334} 335 336define i128 @callee_i128_split(i32 %i, i128 %i1280, i32 %i4, i32 %i5, 337 i32 %i6, i32 %i7, i128 %i1281, i32 %i8, i128 %i1282){ 338entry: 339 %tmp = add i128 %i1280, %i1281 340 %tmp1 = add i128 %tmp, %i1282 341 342 ret i128 %tmp1 343} 344; CHECK-LE-LABEL: @callee_i128_split 345; CHECK-LE-DAG: ld [[TMPREG:[0-9]+]], [[OFFSET:[0-9]+]](1) 346; CHECK-LE-DAG: addc [[TMPREG2:[0-9]+]], 4, 10 347; CHECK-LE-DAG: adde [[TMPREG3:[0-9]+]], 5, [[TMPREG]] 348 349; CHECK-LE-DAG: ld [[TMPREG4:[0-9]+]], [[OFFSET2:[0-9]+]](1) 350; CHECK-LE-DAG: ld [[TMPREG5:[0-9]+]], [[OFFSET3:[0-9]+]](1) 351; CHECK-LE-DAG: addc 3, [[TMPREG2]], [[TMPREG4]] 352; CHECK-LE-DAG: adde 4, [[TMPREG3]], [[TMPREG5]] 353 354; CHECK-BE-LABEL: @callee_i128_split 355; CHECK-BE-DAG: ld [[TMPREG:[0-9]+]], [[OFFSET:[0-9]+]](1) 356; CHECK-BE-DAG: addc [[TMPREG3:[0-9]+]], 5, [[TMPREG]] 357; CHECK-BE-DAG: adde [[TMPREG2:[0-9]+]], 4, 10 358 359; CHECK-BE-DAG: ld [[TMPREG4:[0-9]+]], [[OFFSET2:[0-9]+]](1) 360; CHECK-BE-DAG: ld [[TMPREG5:[0-9]+]], [[OFFSET3:[0-9]+]](1) 361; CHECK-BE-DAG: addc 4, [[TMPREG3]], [[TMPREG4]] 362; CHECK-BE-DAG: adde 3, [[TMPREG2]], [[TMPREG5]] 363 364define i128 @i128_split() { 365entry: 366 %0 = load i128, ptr @a, align 16 367 %1 = load i128, ptr @b, align 16 368 %call = tail call i128 @callee_i128_split(i32 1, i128 %0, i32 4, i32 5, 369 i32 6, i32 7, i128 %1, i32 8, i128 9) 370 ret i128 %call 371} 372 373; CHECK-LE-LABEL: @i128_split 374; CHECK-LE-DAG: li 3, 1 375; CHECK-LE-DAG: ld 4, 0([[P2BASEREG:[0-9]+]]) 376; CHECK-LE-DAG: ld 5, 8([[P2BASEREG]]) 377; CHECK-LE-DAG: li 6, 4 378; CHECK-LE-DAG: li 7, 5 379; CHECK-LE-DAG: li 8, 6 380; CHECK-LE-DAG: li 9, 7 381; CHECK-LE-DAG: ld 10, 0([[P7BASEREG:[0-9]+]]) 382; CHECK-LE-DAG: ld [[TMPREG:[0-9]+]], 8([[P7BASEREG]]) 383; CHECK-LE-DAG: std [[TMPREG]], [[OFFSET:[0-9]+]](1) 384; CHECK-LE: bl callee_i128_split 385 386 387; CHECK-BE-LABEL: @i128_split 388; CHECK-BE-DAG: li 3, 1 389; CHECK-BE-DAG: ld 4, 0([[P2BASEREG:[0-9]+]]) 390; CHECK-BE-DAG: ld 5, 8([[P2BASEREG]]) 391; CHECK-BE-DAG: li 6, 4 392; CHECK-BE-DAG: li 7, 5 393; CHECK-BE-DAG: li 8, 6 394; CHECK-BE-DAG: li 9, 7 395; CHECK-BE-DAG: ld 10, 0([[P7BASEREG:[0-9]+]]) 396; CHECK-BE-DAG: ld [[TMPREG:[0-9]+]], 8([[P7BASEREG]]) 397; CHECK-BE-DAG: std [[TMPREG]], [[OFFSET:[0-9]+]](1) 398; CHECK-BE: bl {{.?}}callee_i128_split 399 400; CHECK-NOVSX-LABEL: @i128_split 401; CHECK-NOVSX-DAG: li 3, 1 402; CHECK-NOVSX-DAG: ld 4, 0([[P2BASEREG:[0-9]+]]) 403; CHECK-NOVSX-DAG: ld 5, 8([[P2BASEREG]]) 404; CHECK-NOVSX-DAG: li 6, 4 405; CHECK-NOVSX-DAG: li 7, 5 406; CHECK-NOVSX-DAG: li 8, 6 407; CHECK-NOVSX-DAG: li 9, 7 408; CHECK-NOVSX-DAG: ld 10, 0([[P7BASEREG:[0-9]+]]) 409; CHECK-NOVSX-DAG: ld [[TMPREG:[0-9]+]], 8([[P7BASEREG]]) 410; CHECK-NOVSX-DAG: std [[TMPREG]], [[OFFSET:[0-9]+]](1) 411; CHECK-NOVSX: bl {{.?}}callee_i128_split 412