1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 < %s | FileCheck %s 3 4; ============================================================================ ; 5; 8-bit vector width 6; ============================================================================ ; 7 8define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { 9; CHECK-LABEL: out_v1i8( 10; CHECK: { 11; CHECK-NEXT: .reg .b16 %rs<8>; 12; CHECK-EMPTY: 13; CHECK-NEXT: // %bb.0: 14; CHECK-NEXT: ld.param.u8 %rs1, [out_v1i8_param_0]; 15; CHECK-NEXT: ld.param.u8 %rs2, [out_v1i8_param_2]; 16; CHECK-NEXT: and.b16 %rs3, %rs1, %rs2; 17; CHECK-NEXT: ld.param.u8 %rs4, [out_v1i8_param_1]; 18; CHECK-NEXT: not.b16 %rs5, %rs2; 19; CHECK-NEXT: and.b16 %rs6, %rs4, %rs5; 20; CHECK-NEXT: or.b16 %rs7, %rs3, %rs6; 21; CHECK-NEXT: st.param.b8 [func_retval0], %rs7; 22; CHECK-NEXT: ret; 23 %mx = and <1 x i8> %x, %mask 24 %notmask = xor <1 x i8> %mask, <i8 -1> 25 %my = and <1 x i8> %y, %notmask 26 %r = or <1 x i8> %mx, %my 27 ret <1 x i8> %r 28} 29 30; ============================================================================ ; 31; 16-bit vector width 32; ============================================================================ ; 33 34define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { 35; CHECK-LABEL: out_v1i16( 36; CHECK: { 37; CHECK-NEXT: .reg .b16 %rs<8>; 38; CHECK-EMPTY: 39; CHECK-NEXT: // %bb.0: 40; CHECK-NEXT: ld.param.u16 %rs1, [out_v1i16_param_0]; 41; CHECK-NEXT: ld.param.u16 %rs2, [out_v1i16_param_2]; 42; CHECK-NEXT: and.b16 %rs3, %rs1, %rs2; 43; CHECK-NEXT: ld.param.u16 %rs4, [out_v1i16_param_1]; 44; CHECK-NEXT: not.b16 %rs5, %rs2; 45; CHECK-NEXT: and.b16 %rs6, %rs4, %rs5; 46; CHECK-NEXT: or.b16 %rs7, %rs3, %rs6; 47; CHECK-NEXT: st.param.b16 [func_retval0], %rs7; 48; CHECK-NEXT: ret; 49 %mx = and <1 x i16> %x, %mask 50 %notmask = xor <1 x i16> %mask, <i16 -1> 51 %my = and <1 x i16> %y, %notmask 52 %r = or <1 x i16> %mx, %my 53 ret <1 x i16> %r 54} 55 56; ============================================================================ ; 57; 32-bit vector width 58; ============================================================================ ; 59 60define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { 61; CHECK-LABEL: out_v4i8( 62; CHECK: { 63; CHECK-NEXT: .reg .b32 %r<8>; 64; CHECK-EMPTY: 65; CHECK-NEXT: // %bb.0: 66; CHECK-NEXT: ld.param.u32 %r1, [out_v4i8_param_1]; 67; CHECK-NEXT: ld.param.u32 %r2, [out_v4i8_param_0]; 68; CHECK-NEXT: ld.param.u32 %r3, [out_v4i8_param_2]; 69; CHECK-NEXT: and.b32 %r4, %r2, %r3; 70; CHECK-NEXT: xor.b32 %r5, %r3, -1; 71; CHECK-NEXT: and.b32 %r6, %r1, %r5; 72; CHECK-NEXT: or.b32 %r7, %r4, %r6; 73; CHECK-NEXT: st.param.b32 [func_retval0], %r7; 74; CHECK-NEXT: ret; 75 %mx = and <4 x i8> %x, %mask 76 %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1> 77 %my = and <4 x i8> %y, %notmask 78 %r = or <4 x i8> %mx, %my 79 ret <4 x i8> %r 80} 81 82define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { 83; CHECK-LABEL: out_v4i8_undef( 84; CHECK: { 85; CHECK-NEXT: .reg .b32 %r<8>; 86; CHECK-EMPTY: 87; CHECK-NEXT: // %bb.0: 88; CHECK-NEXT: ld.param.u32 %r1, [out_v4i8_undef_param_1]; 89; CHECK-NEXT: ld.param.u32 %r2, [out_v4i8_undef_param_0]; 90; CHECK-NEXT: ld.param.u32 %r3, [out_v4i8_undef_param_2]; 91; CHECK-NEXT: and.b32 %r4, %r2, %r3; 92; CHECK-NEXT: xor.b32 %r5, %r3, -16711681; 93; CHECK-NEXT: and.b32 %r6, %r1, %r5; 94; CHECK-NEXT: or.b32 %r7, %r4, %r6; 95; CHECK-NEXT: st.param.b32 [func_retval0], %r7; 96; CHECK-NEXT: ret; 97 %mx = and <4 x i8> %x, %mask 98 %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 undef, i8 -1> 99 %my = and <4 x i8> %y, %notmask 100 %r = or <4 x i8> %mx, %my 101 ret <4 x i8> %r 102} 103 104define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { 105; CHECK-LABEL: out_v2i16( 106; CHECK: { 107; CHECK-NEXT: .reg .b32 %r<8>; 108; CHECK-EMPTY: 109; CHECK-NEXT: // %bb.0: 110; CHECK-NEXT: ld.param.u32 %r1, [out_v2i16_param_1]; 111; CHECK-NEXT: ld.param.u32 %r2, [out_v2i16_param_0]; 112; CHECK-NEXT: ld.param.u32 %r3, [out_v2i16_param_2]; 113; CHECK-NEXT: and.b32 %r4, %r2, %r3; 114; CHECK-NEXT: xor.b32 %r5, %r3, -1; 115; CHECK-NEXT: and.b32 %r6, %r1, %r5; 116; CHECK-NEXT: or.b32 %r7, %r4, %r6; 117; CHECK-NEXT: st.param.b32 [func_retval0], %r7; 118; CHECK-NEXT: ret; 119 %mx = and <2 x i16> %x, %mask 120 %notmask = xor <2 x i16> %mask, <i16 -1, i16 -1> 121 %my = and <2 x i16> %y, %notmask 122 %r = or <2 x i16> %mx, %my 123 ret <2 x i16> %r 124} 125 126define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { 127; CHECK-LABEL: out_v1i32( 128; CHECK: { 129; CHECK-NEXT: .reg .b32 %r<8>; 130; CHECK-EMPTY: 131; CHECK-NEXT: // %bb.0: 132; CHECK-NEXT: ld.param.u32 %r1, [out_v1i32_param_0]; 133; CHECK-NEXT: ld.param.u32 %r2, [out_v1i32_param_2]; 134; CHECK-NEXT: and.b32 %r3, %r1, %r2; 135; CHECK-NEXT: ld.param.u32 %r4, [out_v1i32_param_1]; 136; CHECK-NEXT: not.b32 %r5, %r2; 137; CHECK-NEXT: and.b32 %r6, %r4, %r5; 138; CHECK-NEXT: or.b32 %r7, %r3, %r6; 139; CHECK-NEXT: st.param.b32 [func_retval0], %r7; 140; CHECK-NEXT: ret; 141 %mx = and <1 x i32> %x, %mask 142 %notmask = xor <1 x i32> %mask, <i32 -1> 143 %my = and <1 x i32> %y, %notmask 144 %r = or <1 x i32> %mx, %my 145 ret <1 x i32> %r 146} 147 148; ============================================================================ ; 149; 64-bit vector width 150; ============================================================================ ; 151 152define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { 153; CHECK-LABEL: out_v8i8( 154; CHECK: { 155; CHECK-NEXT: .reg .b32 %r<15>; 156; CHECK-EMPTY: 157; CHECK-NEXT: // %bb.0: 158; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [out_v8i8_param_0]; 159; CHECK-NEXT: ld.param.v2.u32 {%r3, %r4}, [out_v8i8_param_2]; 160; CHECK-NEXT: and.b32 %r5, %r1, %r3; 161; CHECK-NEXT: and.b32 %r6, %r2, %r4; 162; CHECK-NEXT: ld.param.v2.u32 {%r7, %r8}, [out_v8i8_param_1]; 163; CHECK-NEXT: xor.b32 %r9, %r4, -1; 164; CHECK-NEXT: xor.b32 %r10, %r3, -1; 165; CHECK-NEXT: and.b32 %r11, %r7, %r10; 166; CHECK-NEXT: and.b32 %r12, %r8, %r9; 167; CHECK-NEXT: or.b32 %r13, %r6, %r12; 168; CHECK-NEXT: or.b32 %r14, %r5, %r11; 169; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r14, %r13}; 170; CHECK-NEXT: ret; 171 %mx = and <8 x i8> %x, %mask 172 %notmask = xor <8 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 173 %my = and <8 x i8> %y, %notmask 174 %r = or <8 x i8> %mx, %my 175 ret <8 x i8> %r 176} 177 178define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { 179; CHECK-LABEL: out_v4i16( 180; CHECK: { 181; CHECK-NEXT: .reg .b32 %r<15>; 182; CHECK-EMPTY: 183; CHECK-NEXT: // %bb.0: 184; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [out_v4i16_param_0]; 185; CHECK-NEXT: ld.param.v2.u32 {%r3, %r4}, [out_v4i16_param_2]; 186; CHECK-NEXT: and.b32 %r5, %r1, %r3; 187; CHECK-NEXT: and.b32 %r6, %r2, %r4; 188; CHECK-NEXT: ld.param.v2.u32 {%r7, %r8}, [out_v4i16_param_1]; 189; CHECK-NEXT: xor.b32 %r9, %r4, -1; 190; CHECK-NEXT: xor.b32 %r10, %r3, -1; 191; CHECK-NEXT: and.b32 %r11, %r7, %r10; 192; CHECK-NEXT: and.b32 %r12, %r8, %r9; 193; CHECK-NEXT: or.b32 %r13, %r6, %r12; 194; CHECK-NEXT: or.b32 %r14, %r5, %r11; 195; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r14, %r13}; 196; CHECK-NEXT: ret; 197 %mx = and <4 x i16> %x, %mask 198 %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1> 199 %my = and <4 x i16> %y, %notmask 200 %r = or <4 x i16> %mx, %my 201 ret <4 x i16> %r 202} 203 204define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { 205; CHECK-LABEL: out_v4i16_undef( 206; CHECK: { 207; CHECK-NEXT: .reg .b32 %r<15>; 208; CHECK-EMPTY: 209; CHECK-NEXT: // %bb.0: 210; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [out_v4i16_undef_param_0]; 211; CHECK-NEXT: ld.param.v2.u32 {%r3, %r4}, [out_v4i16_undef_param_2]; 212; CHECK-NEXT: and.b32 %r5, %r1, %r3; 213; CHECK-NEXT: and.b32 %r6, %r2, %r4; 214; CHECK-NEXT: ld.param.v2.u32 {%r7, %r8}, [out_v4i16_undef_param_1]; 215; CHECK-NEXT: xor.b32 %r9, %r4, -65536; 216; CHECK-NEXT: xor.b32 %r10, %r3, -1; 217; CHECK-NEXT: and.b32 %r11, %r7, %r10; 218; CHECK-NEXT: and.b32 %r12, %r8, %r9; 219; CHECK-NEXT: or.b32 %r13, %r6, %r12; 220; CHECK-NEXT: or.b32 %r14, %r5, %r11; 221; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r14, %r13}; 222; CHECK-NEXT: ret; 223 %mx = and <4 x i16> %x, %mask 224 %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 undef, i16 -1> 225 %my = and <4 x i16> %y, %notmask 226 %r = or <4 x i16> %mx, %my 227 ret <4 x i16> %r 228} 229 230define <2 x i32> @out_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { 231; CHECK-LABEL: out_v2i32( 232; CHECK: { 233; CHECK-NEXT: .reg .b32 %r<15>; 234; CHECK-EMPTY: 235; CHECK-NEXT: // %bb.0: 236; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [out_v2i32_param_0]; 237; CHECK-NEXT: ld.param.v2.u32 {%r3, %r4}, [out_v2i32_param_2]; 238; CHECK-NEXT: and.b32 %r5, %r1, %r3; 239; CHECK-NEXT: and.b32 %r6, %r2, %r4; 240; CHECK-NEXT: ld.param.v2.u32 {%r7, %r8}, [out_v2i32_param_1]; 241; CHECK-NEXT: not.b32 %r9, %r4; 242; CHECK-NEXT: not.b32 %r10, %r3; 243; CHECK-NEXT: and.b32 %r11, %r7, %r10; 244; CHECK-NEXT: and.b32 %r12, %r8, %r9; 245; CHECK-NEXT: or.b32 %r13, %r6, %r12; 246; CHECK-NEXT: or.b32 %r14, %r5, %r11; 247; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r14, %r13}; 248; CHECK-NEXT: ret; 249 %mx = and <2 x i32> %x, %mask 250 %notmask = xor <2 x i32> %mask, <i32 -1, i32 -1> 251 %my = and <2 x i32> %y, %notmask 252 %r = or <2 x i32> %mx, %my 253 ret <2 x i32> %r 254} 255 256define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { 257; CHECK-LABEL: out_v1i64( 258; CHECK: { 259; CHECK-NEXT: .reg .b64 %rd<8>; 260; CHECK-EMPTY: 261; CHECK-NEXT: // %bb.0: 262; CHECK-NEXT: ld.param.u64 %rd1, [out_v1i64_param_0]; 263; CHECK-NEXT: ld.param.u64 %rd2, [out_v1i64_param_2]; 264; CHECK-NEXT: and.b64 %rd3, %rd1, %rd2; 265; CHECK-NEXT: ld.param.u64 %rd4, [out_v1i64_param_1]; 266; CHECK-NEXT: not.b64 %rd5, %rd2; 267; CHECK-NEXT: and.b64 %rd6, %rd4, %rd5; 268; CHECK-NEXT: or.b64 %rd7, %rd3, %rd6; 269; CHECK-NEXT: st.param.b64 [func_retval0], %rd7; 270; CHECK-NEXT: ret; 271 %mx = and <1 x i64> %x, %mask 272 %notmask = xor <1 x i64> %mask, <i64 -1> 273 %my = and <1 x i64> %y, %notmask 274 %r = or <1 x i64> %mx, %my 275 ret <1 x i64> %r 276} 277 278; ============================================================================ ; 279; 128-bit vector width 280; ============================================================================ ; 281 282define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { 283; CHECK-LABEL: out_v16i8( 284; CHECK: { 285; CHECK-NEXT: .reg .b32 %r<29>; 286; CHECK-EMPTY: 287; CHECK-NEXT: // %bb.0: 288; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [out_v16i8_param_0]; 289; CHECK-NEXT: ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [out_v16i8_param_2]; 290; CHECK-NEXT: and.b32 %r9, %r1, %r5; 291; CHECK-NEXT: and.b32 %r10, %r2, %r6; 292; CHECK-NEXT: and.b32 %r11, %r3, %r7; 293; CHECK-NEXT: and.b32 %r12, %r4, %r8; 294; CHECK-NEXT: ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [out_v16i8_param_1]; 295; CHECK-NEXT: xor.b32 %r17, %r8, -1; 296; CHECK-NEXT: xor.b32 %r18, %r7, -1; 297; CHECK-NEXT: xor.b32 %r19, %r6, -1; 298; CHECK-NEXT: xor.b32 %r20, %r5, -1; 299; CHECK-NEXT: and.b32 %r21, %r13, %r20; 300; CHECK-NEXT: and.b32 %r22, %r14, %r19; 301; CHECK-NEXT: and.b32 %r23, %r15, %r18; 302; CHECK-NEXT: and.b32 %r24, %r16, %r17; 303; CHECK-NEXT: or.b32 %r25, %r12, %r24; 304; CHECK-NEXT: or.b32 %r26, %r11, %r23; 305; CHECK-NEXT: or.b32 %r27, %r10, %r22; 306; CHECK-NEXT: or.b32 %r28, %r9, %r21; 307; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r28, %r27, %r26, %r25}; 308; CHECK-NEXT: ret; 309 %mx = and <16 x i8> %x, %mask 310 %notmask = xor <16 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 311 %my = and <16 x i8> %y, %notmask 312 %r = or <16 x i8> %mx, %my 313 ret <16 x i8> %r 314} 315 316define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind { 317; CHECK-LABEL: out_v8i16( 318; CHECK: { 319; CHECK-NEXT: .reg .b32 %r<29>; 320; CHECK-EMPTY: 321; CHECK-NEXT: // %bb.0: 322; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [out_v8i16_param_0]; 323; CHECK-NEXT: ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [out_v8i16_param_2]; 324; CHECK-NEXT: and.b32 %r9, %r1, %r5; 325; CHECK-NEXT: and.b32 %r10, %r2, %r6; 326; CHECK-NEXT: and.b32 %r11, %r3, %r7; 327; CHECK-NEXT: and.b32 %r12, %r4, %r8; 328; CHECK-NEXT: ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [out_v8i16_param_1]; 329; CHECK-NEXT: xor.b32 %r17, %r8, -1; 330; CHECK-NEXT: xor.b32 %r18, %r7, -1; 331; CHECK-NEXT: xor.b32 %r19, %r6, -1; 332; CHECK-NEXT: xor.b32 %r20, %r5, -1; 333; CHECK-NEXT: and.b32 %r21, %r13, %r20; 334; CHECK-NEXT: and.b32 %r22, %r14, %r19; 335; CHECK-NEXT: and.b32 %r23, %r15, %r18; 336; CHECK-NEXT: and.b32 %r24, %r16, %r17; 337; CHECK-NEXT: or.b32 %r25, %r12, %r24; 338; CHECK-NEXT: or.b32 %r26, %r11, %r23; 339; CHECK-NEXT: or.b32 %r27, %r10, %r22; 340; CHECK-NEXT: or.b32 %r28, %r9, %r21; 341; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r28, %r27, %r26, %r25}; 342; CHECK-NEXT: ret; 343 %mx = and <8 x i16> %x, %mask 344 %notmask = xor <8 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 345 %my = and <8 x i16> %y, %notmask 346 %r = or <8 x i16> %mx, %my 347 ret <8 x i16> %r 348} 349 350define <4 x i32> @out_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind { 351; CHECK-LABEL: out_v4i32( 352; CHECK: { 353; CHECK-NEXT: .reg .b32 %r<29>; 354; CHECK-EMPTY: 355; CHECK-NEXT: // %bb.0: 356; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [out_v4i32_param_0]; 357; CHECK-NEXT: ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [out_v4i32_param_2]; 358; CHECK-NEXT: and.b32 %r9, %r1, %r5; 359; CHECK-NEXT: and.b32 %r10, %r2, %r6; 360; CHECK-NEXT: and.b32 %r11, %r3, %r7; 361; CHECK-NEXT: and.b32 %r12, %r4, %r8; 362; CHECK-NEXT: ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [out_v4i32_param_1]; 363; CHECK-NEXT: not.b32 %r17, %r8; 364; CHECK-NEXT: not.b32 %r18, %r7; 365; CHECK-NEXT: not.b32 %r19, %r6; 366; CHECK-NEXT: not.b32 %r20, %r5; 367; CHECK-NEXT: and.b32 %r21, %r13, %r20; 368; CHECK-NEXT: and.b32 %r22, %r14, %r19; 369; CHECK-NEXT: and.b32 %r23, %r15, %r18; 370; CHECK-NEXT: and.b32 %r24, %r16, %r17; 371; CHECK-NEXT: or.b32 %r25, %r12, %r24; 372; CHECK-NEXT: or.b32 %r26, %r11, %r23; 373; CHECK-NEXT: or.b32 %r27, %r10, %r22; 374; CHECK-NEXT: or.b32 %r28, %r9, %r21; 375; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r28, %r27, %r26, %r25}; 376; CHECK-NEXT: ret; 377 %mx = and <4 x i32> %x, %mask 378 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 379 %my = and <4 x i32> %y, %notmask 380 %r = or <4 x i32> %mx, %my 381 ret <4 x i32> %r 382} 383 384define <4 x i32> @out_v4i32_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind { 385; CHECK-LABEL: out_v4i32_undef( 386; CHECK: { 387; CHECK-NEXT: .reg .b32 %r<26>; 388; CHECK-EMPTY: 389; CHECK-NEXT: // %bb.0: 390; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [out_v4i32_undef_param_0]; 391; CHECK-NEXT: ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [out_v4i32_undef_param_2]; 392; CHECK-NEXT: and.b32 %r9, %r3, %r7; 393; CHECK-NEXT: and.b32 %r10, %r1, %r5; 394; CHECK-NEXT: and.b32 %r11, %r2, %r6; 395; CHECK-NEXT: and.b32 %r12, %r4, %r8; 396; CHECK-NEXT: ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [out_v4i32_undef_param_1]; 397; CHECK-NEXT: not.b32 %r17, %r8; 398; CHECK-NEXT: not.b32 %r18, %r6; 399; CHECK-NEXT: not.b32 %r19, %r5; 400; CHECK-NEXT: and.b32 %r20, %r13, %r19; 401; CHECK-NEXT: and.b32 %r21, %r14, %r18; 402; CHECK-NEXT: and.b32 %r22, %r16, %r17; 403; CHECK-NEXT: or.b32 %r23, %r12, %r22; 404; CHECK-NEXT: or.b32 %r24, %r11, %r21; 405; CHECK-NEXT: or.b32 %r25, %r10, %r20; 406; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r25, %r24, %r9, %r23}; 407; CHECK-NEXT: ret; 408 %mx = and <4 x i32> %x, %mask 409 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 undef, i32 -1> 410 %my = and <4 x i32> %y, %notmask 411 %r = or <4 x i32> %mx, %my 412 ret <4 x i32> %r 413} 414 415define <2 x i64> @out_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { 416; CHECK-LABEL: out_v2i64( 417; CHECK: { 418; CHECK-NEXT: .reg .b64 %rd<15>; 419; CHECK-EMPTY: 420; CHECK-NEXT: // %bb.0: 421; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [out_v2i64_param_0]; 422; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [out_v2i64_param_2]; 423; CHECK-NEXT: and.b64 %rd5, %rd1, %rd3; 424; CHECK-NEXT: and.b64 %rd6, %rd2, %rd4; 425; CHECK-NEXT: ld.param.v2.u64 {%rd7, %rd8}, [out_v2i64_param_1]; 426; CHECK-NEXT: not.b64 %rd9, %rd4; 427; CHECK-NEXT: not.b64 %rd10, %rd3; 428; CHECK-NEXT: and.b64 %rd11, %rd7, %rd10; 429; CHECK-NEXT: and.b64 %rd12, %rd8, %rd9; 430; CHECK-NEXT: or.b64 %rd13, %rd6, %rd12; 431; CHECK-NEXT: or.b64 %rd14, %rd5, %rd11; 432; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd14, %rd13}; 433; CHECK-NEXT: ret; 434 %mx = and <2 x i64> %x, %mask 435 %notmask = xor <2 x i64> %mask, <i64 -1, i64 -1> 436 %my = and <2 x i64> %y, %notmask 437 %r = or <2 x i64> %mx, %my 438 ret <2 x i64> %r 439} 440 441;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 442; Should be the same as the previous one. 443;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 444 445; ============================================================================ ; 446; 8-bit vector width 447; ============================================================================ ; 448 449define <1 x i8> @in_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { 450; CHECK-LABEL: in_v1i8( 451; CHECK: { 452; CHECK-NEXT: .reg .b16 %rs<7>; 453; CHECK-EMPTY: 454; CHECK-NEXT: // %bb.0: 455; CHECK-NEXT: ld.param.u8 %rs1, [in_v1i8_param_0]; 456; CHECK-NEXT: ld.param.u8 %rs2, [in_v1i8_param_1]; 457; CHECK-NEXT: xor.b16 %rs3, %rs1, %rs2; 458; CHECK-NEXT: ld.param.u8 %rs4, [in_v1i8_param_2]; 459; CHECK-NEXT: and.b16 %rs5, %rs3, %rs4; 460; CHECK-NEXT: xor.b16 %rs6, %rs5, %rs2; 461; CHECK-NEXT: st.param.b8 [func_retval0], %rs6; 462; CHECK-NEXT: ret; 463 %n0 = xor <1 x i8> %x, %y 464 %n1 = and <1 x i8> %n0, %mask 465 %r = xor <1 x i8> %n1, %y 466 ret <1 x i8> %r 467} 468 469; ============================================================================ ; 470; 16-bit vector width 471; ============================================================================ ; 472 473define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { 474; CHECK-LABEL: in_v1i16( 475; CHECK: { 476; CHECK-NEXT: .reg .b16 %rs<7>; 477; CHECK-EMPTY: 478; CHECK-NEXT: // %bb.0: 479; CHECK-NEXT: ld.param.u16 %rs1, [in_v1i16_param_0]; 480; CHECK-NEXT: ld.param.u16 %rs2, [in_v1i16_param_1]; 481; CHECK-NEXT: xor.b16 %rs3, %rs1, %rs2; 482; CHECK-NEXT: ld.param.u16 %rs4, [in_v1i16_param_2]; 483; CHECK-NEXT: and.b16 %rs5, %rs3, %rs4; 484; CHECK-NEXT: xor.b16 %rs6, %rs5, %rs2; 485; CHECK-NEXT: st.param.b16 [func_retval0], %rs6; 486; CHECK-NEXT: ret; 487 %n0 = xor <1 x i16> %x, %y 488 %n1 = and <1 x i16> %n0, %mask 489 %r = xor <1 x i16> %n1, %y 490 ret <1 x i16> %r 491} 492 493; ============================================================================ ; 494; 32-bit vector width 495; ============================================================================ ; 496 497define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { 498; CHECK-LABEL: in_v4i8( 499; CHECK: { 500; CHECK-NEXT: .reg .b32 %r<7>; 501; CHECK-EMPTY: 502; CHECK-NEXT: // %bb.0: 503; CHECK-NEXT: ld.param.u32 %r1, [in_v4i8_param_0]; 504; CHECK-NEXT: ld.param.u32 %r2, [in_v4i8_param_1]; 505; CHECK-NEXT: xor.b32 %r3, %r1, %r2; 506; CHECK-NEXT: ld.param.u32 %r4, [in_v4i8_param_2]; 507; CHECK-NEXT: and.b32 %r5, %r3, %r4; 508; CHECK-NEXT: xor.b32 %r6, %r5, %r2; 509; CHECK-NEXT: st.param.b32 [func_retval0], %r6; 510; CHECK-NEXT: ret; 511 %n0 = xor <4 x i8> %x, %y 512 %n1 = and <4 x i8> %n0, %mask 513 %r = xor <4 x i8> %n1, %y 514 ret <4 x i8> %r 515} 516 517define <2 x i16> @in_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { 518; CHECK-LABEL: in_v2i16( 519; CHECK: { 520; CHECK-NEXT: .reg .b32 %r<7>; 521; CHECK-EMPTY: 522; CHECK-NEXT: // %bb.0: 523; CHECK-NEXT: ld.param.u32 %r1, [in_v2i16_param_0]; 524; CHECK-NEXT: ld.param.u32 %r2, [in_v2i16_param_1]; 525; CHECK-NEXT: xor.b32 %r3, %r1, %r2; 526; CHECK-NEXT: ld.param.u32 %r4, [in_v2i16_param_2]; 527; CHECK-NEXT: and.b32 %r5, %r3, %r4; 528; CHECK-NEXT: xor.b32 %r6, %r5, %r2; 529; CHECK-NEXT: st.param.b32 [func_retval0], %r6; 530; CHECK-NEXT: ret; 531 %n0 = xor <2 x i16> %x, %y 532 %n1 = and <2 x i16> %n0, %mask 533 %r = xor <2 x i16> %n1, %y 534 ret <2 x i16> %r 535} 536 537define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { 538; CHECK-LABEL: in_v1i32( 539; CHECK: { 540; CHECK-NEXT: .reg .b32 %r<7>; 541; CHECK-EMPTY: 542; CHECK-NEXT: // %bb.0: 543; CHECK-NEXT: ld.param.u32 %r1, [in_v1i32_param_0]; 544; CHECK-NEXT: ld.param.u32 %r2, [in_v1i32_param_1]; 545; CHECK-NEXT: xor.b32 %r3, %r1, %r2; 546; CHECK-NEXT: ld.param.u32 %r4, [in_v1i32_param_2]; 547; CHECK-NEXT: and.b32 %r5, %r3, %r4; 548; CHECK-NEXT: xor.b32 %r6, %r5, %r2; 549; CHECK-NEXT: st.param.b32 [func_retval0], %r6; 550; CHECK-NEXT: ret; 551 %n0 = xor <1 x i32> %x, %y 552 %n1 = and <1 x i32> %n0, %mask 553 %r = xor <1 x i32> %n1, %y 554 ret <1 x i32> %r 555} 556 557; ============================================================================ ; 558; 64-bit vector width 559; ============================================================================ ; 560 561define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { 562; CHECK-LABEL: in_v8i8( 563; CHECK: { 564; CHECK-NEXT: .reg .b32 %r<13>; 565; CHECK-EMPTY: 566; CHECK-NEXT: // %bb.0: 567; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [in_v8i8_param_0]; 568; CHECK-NEXT: ld.param.v2.u32 {%r3, %r4}, [in_v8i8_param_1]; 569; CHECK-NEXT: ld.param.v2.u32 {%r5, %r6}, [in_v8i8_param_2]; 570; CHECK-NEXT: xor.b32 %r7, %r2, %r4; 571; CHECK-NEXT: and.b32 %r8, %r7, %r6; 572; CHECK-NEXT: xor.b32 %r9, %r8, %r4; 573; CHECK-NEXT: xor.b32 %r10, %r1, %r3; 574; CHECK-NEXT: and.b32 %r11, %r10, %r5; 575; CHECK-NEXT: xor.b32 %r12, %r11, %r3; 576; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r12, %r9}; 577; CHECK-NEXT: ret; 578 %n0 = xor <8 x i8> %x, %y 579 %n1 = and <8 x i8> %n0, %mask 580 %r = xor <8 x i8> %n1, %y 581 ret <8 x i8> %r 582} 583 584define <4 x i16> @in_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { 585; CHECK-LABEL: in_v4i16( 586; CHECK: { 587; CHECK-NEXT: .reg .b32 %r<13>; 588; CHECK-EMPTY: 589; CHECK-NEXT: // %bb.0: 590; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [in_v4i16_param_0]; 591; CHECK-NEXT: ld.param.v2.u32 {%r3, %r4}, [in_v4i16_param_1]; 592; CHECK-NEXT: ld.param.v2.u32 {%r5, %r6}, [in_v4i16_param_2]; 593; CHECK-NEXT: xor.b32 %r7, %r2, %r4; 594; CHECK-NEXT: and.b32 %r8, %r7, %r6; 595; CHECK-NEXT: xor.b32 %r9, %r8, %r4; 596; CHECK-NEXT: xor.b32 %r10, %r1, %r3; 597; CHECK-NEXT: and.b32 %r11, %r10, %r5; 598; CHECK-NEXT: xor.b32 %r12, %r11, %r3; 599; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r12, %r9}; 600; CHECK-NEXT: ret; 601 %n0 = xor <4 x i16> %x, %y 602 %n1 = and <4 x i16> %n0, %mask 603 %r = xor <4 x i16> %n1, %y 604 ret <4 x i16> %r 605} 606 607define <2 x i32> @in_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { 608; CHECK-LABEL: in_v2i32( 609; CHECK: { 610; CHECK-NEXT: .reg .b32 %r<13>; 611; CHECK-EMPTY: 612; CHECK-NEXT: // %bb.0: 613; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [in_v2i32_param_0]; 614; CHECK-NEXT: ld.param.v2.u32 {%r3, %r4}, [in_v2i32_param_1]; 615; CHECK-NEXT: xor.b32 %r5, %r2, %r4; 616; CHECK-NEXT: xor.b32 %r6, %r1, %r3; 617; CHECK-NEXT: ld.param.v2.u32 {%r7, %r8}, [in_v2i32_param_2]; 618; CHECK-NEXT: and.b32 %r9, %r6, %r7; 619; CHECK-NEXT: and.b32 %r10, %r5, %r8; 620; CHECK-NEXT: xor.b32 %r11, %r10, %r4; 621; CHECK-NEXT: xor.b32 %r12, %r9, %r3; 622; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r12, %r11}; 623; CHECK-NEXT: ret; 624 %n0 = xor <2 x i32> %x, %y 625 %n1 = and <2 x i32> %n0, %mask 626 %r = xor <2 x i32> %n1, %y 627 ret <2 x i32> %r 628} 629 630define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { 631; CHECK-LABEL: in_v1i64( 632; CHECK: { 633; CHECK-NEXT: .reg .b64 %rd<7>; 634; CHECK-EMPTY: 635; CHECK-NEXT: // %bb.0: 636; CHECK-NEXT: ld.param.u64 %rd1, [in_v1i64_param_0]; 637; CHECK-NEXT: ld.param.u64 %rd2, [in_v1i64_param_1]; 638; CHECK-NEXT: xor.b64 %rd3, %rd1, %rd2; 639; CHECK-NEXT: ld.param.u64 %rd4, [in_v1i64_param_2]; 640; CHECK-NEXT: and.b64 %rd5, %rd3, %rd4; 641; CHECK-NEXT: xor.b64 %rd6, %rd5, %rd2; 642; CHECK-NEXT: st.param.b64 [func_retval0], %rd6; 643; CHECK-NEXT: ret; 644 %n0 = xor <1 x i64> %x, %y 645 %n1 = and <1 x i64> %n0, %mask 646 %r = xor <1 x i64> %n1, %y 647 ret <1 x i64> %r 648} 649 650; ============================================================================ ; 651; 128-bit vector width 652; ============================================================================ ; 653 654define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { 655; CHECK-LABEL: in_v16i8( 656; CHECK: { 657; CHECK-NEXT: .reg .b32 %r<25>; 658; CHECK-EMPTY: 659; CHECK-NEXT: // %bb.0: 660; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [in_v16i8_param_0]; 661; CHECK-NEXT: ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [in_v16i8_param_1]; 662; CHECK-NEXT: xor.b32 %r9, %r4, %r8; 663; CHECK-NEXT: xor.b32 %r10, %r3, %r7; 664; CHECK-NEXT: xor.b32 %r11, %r2, %r6; 665; CHECK-NEXT: xor.b32 %r12, %r1, %r5; 666; CHECK-NEXT: ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [in_v16i8_param_2]; 667; CHECK-NEXT: and.b32 %r17, %r12, %r13; 668; CHECK-NEXT: and.b32 %r18, %r11, %r14; 669; CHECK-NEXT: and.b32 %r19, %r10, %r15; 670; CHECK-NEXT: and.b32 %r20, %r9, %r16; 671; CHECK-NEXT: xor.b32 %r21, %r20, %r8; 672; CHECK-NEXT: xor.b32 %r22, %r19, %r7; 673; CHECK-NEXT: xor.b32 %r23, %r18, %r6; 674; CHECK-NEXT: xor.b32 %r24, %r17, %r5; 675; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r24, %r23, %r22, %r21}; 676; CHECK-NEXT: ret; 677 %n0 = xor <16 x i8> %x, %y 678 %n1 = and <16 x i8> %n0, %mask 679 %r = xor <16 x i8> %n1, %y 680 ret <16 x i8> %r 681} 682 683define <8 x i16> @in_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind { 684; CHECK-LABEL: in_v8i16( 685; CHECK: { 686; CHECK-NEXT: .reg .b32 %r<25>; 687; CHECK-EMPTY: 688; CHECK-NEXT: // %bb.0: 689; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [in_v8i16_param_0]; 690; CHECK-NEXT: ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [in_v8i16_param_1]; 691; CHECK-NEXT: xor.b32 %r9, %r4, %r8; 692; CHECK-NEXT: xor.b32 %r10, %r3, %r7; 693; CHECK-NEXT: xor.b32 %r11, %r2, %r6; 694; CHECK-NEXT: xor.b32 %r12, %r1, %r5; 695; CHECK-NEXT: ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [in_v8i16_param_2]; 696; CHECK-NEXT: and.b32 %r17, %r12, %r13; 697; CHECK-NEXT: and.b32 %r18, %r11, %r14; 698; CHECK-NEXT: and.b32 %r19, %r10, %r15; 699; CHECK-NEXT: and.b32 %r20, %r9, %r16; 700; CHECK-NEXT: xor.b32 %r21, %r20, %r8; 701; CHECK-NEXT: xor.b32 %r22, %r19, %r7; 702; CHECK-NEXT: xor.b32 %r23, %r18, %r6; 703; CHECK-NEXT: xor.b32 %r24, %r17, %r5; 704; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r24, %r23, %r22, %r21}; 705; CHECK-NEXT: ret; 706 %n0 = xor <8 x i16> %x, %y 707 %n1 = and <8 x i16> %n0, %mask 708 %r = xor <8 x i16> %n1, %y 709 ret <8 x i16> %r 710} 711 712define <4 x i32> @in_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind { 713; CHECK-LABEL: in_v4i32( 714; CHECK: { 715; CHECK-NEXT: .reg .b32 %r<25>; 716; CHECK-EMPTY: 717; CHECK-NEXT: // %bb.0: 718; CHECK-NEXT: ld.param.v4.u32 {%r1, %r2, %r3, %r4}, [in_v4i32_param_0]; 719; CHECK-NEXT: ld.param.v4.u32 {%r5, %r6, %r7, %r8}, [in_v4i32_param_1]; 720; CHECK-NEXT: xor.b32 %r9, %r4, %r8; 721; CHECK-NEXT: xor.b32 %r10, %r3, %r7; 722; CHECK-NEXT: xor.b32 %r11, %r2, %r6; 723; CHECK-NEXT: xor.b32 %r12, %r1, %r5; 724; CHECK-NEXT: ld.param.v4.u32 {%r13, %r14, %r15, %r16}, [in_v4i32_param_2]; 725; CHECK-NEXT: and.b32 %r17, %r12, %r13; 726; CHECK-NEXT: and.b32 %r18, %r11, %r14; 727; CHECK-NEXT: and.b32 %r19, %r10, %r15; 728; CHECK-NEXT: and.b32 %r20, %r9, %r16; 729; CHECK-NEXT: xor.b32 %r21, %r20, %r8; 730; CHECK-NEXT: xor.b32 %r22, %r19, %r7; 731; CHECK-NEXT: xor.b32 %r23, %r18, %r6; 732; CHECK-NEXT: xor.b32 %r24, %r17, %r5; 733; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r24, %r23, %r22, %r21}; 734; CHECK-NEXT: ret; 735 %n0 = xor <4 x i32> %x, %y 736 %n1 = and <4 x i32> %n0, %mask 737 %r = xor <4 x i32> %n1, %y 738 ret <4 x i32> %r 739} 740 741define <2 x i64> @in_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { 742; CHECK-LABEL: in_v2i64( 743; CHECK: { 744; CHECK-NEXT: .reg .b64 %rd<13>; 745; CHECK-EMPTY: 746; CHECK-NEXT: // %bb.0: 747; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [in_v2i64_param_0]; 748; CHECK-NEXT: ld.param.v2.u64 {%rd3, %rd4}, [in_v2i64_param_1]; 749; CHECK-NEXT: xor.b64 %rd5, %rd2, %rd4; 750; CHECK-NEXT: xor.b64 %rd6, %rd1, %rd3; 751; CHECK-NEXT: ld.param.v2.u64 {%rd7, %rd8}, [in_v2i64_param_2]; 752; CHECK-NEXT: and.b64 %rd9, %rd6, %rd7; 753; CHECK-NEXT: and.b64 %rd10, %rd5, %rd8; 754; CHECK-NEXT: xor.b64 %rd11, %rd10, %rd4; 755; CHECK-NEXT: xor.b64 %rd12, %rd9, %rd3; 756; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd12, %rd11}; 757; CHECK-NEXT: ret; 758 %n0 = xor <2 x i64> %x, %y 759 %n1 = and <2 x i64> %n0, %mask 760 %r = xor <2 x i64> %n1, %y 761 ret <2 x i64> %r 762} 763