1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s 3; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 4; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 5; RUN: llc -global-isel -amdgpu-codegenprepare-widen-16-bit-ops=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 6 7define amdgpu_ps i32 @s_andn2_i32(i32 inreg %src0, i32 inreg %src1) { 8; GCN-LABEL: s_andn2_i32: 9; GCN: ; %bb.0: 10; GCN-NEXT: s_andn2_b32 s0, s2, s3 11; GCN-NEXT: ; return to shader part epilog 12; 13; GFX10-LABEL: s_andn2_i32: 14; GFX10: ; %bb.0: 15; GFX10-NEXT: s_andn2_b32 s0, s2, s3 16; GFX10-NEXT: ; return to shader part epilog 17; 18; GFX11-LABEL: s_andn2_i32: 19; GFX11: ; %bb.0: 20; GFX11-NEXT: s_and_not1_b32 s0, s2, s3 21; GFX11-NEXT: ; return to shader part epilog 22 %not.src1 = xor i32 %src1, -1 23 %and = and i32 %src0, %not.src1 24 ret i32 %and 25} 26 27define amdgpu_ps i32 @s_andn2_i32_commute(i32 inreg %src0, i32 inreg %src1) { 28; GCN-LABEL: s_andn2_i32_commute: 29; GCN: ; %bb.0: 30; GCN-NEXT: s_andn2_b32 s0, s2, s3 31; GCN-NEXT: ; return to shader part epilog 32; 33; GFX10-LABEL: s_andn2_i32_commute: 34; GFX10: ; %bb.0: 35; GFX10-NEXT: s_andn2_b32 s0, s2, s3 36; GFX10-NEXT: ; return to shader part epilog 37; 38; GFX11-LABEL: s_andn2_i32_commute: 39; GFX11: ; %bb.0: 40; GFX11-NEXT: s_and_not1_b32 s0, s2, s3 41; GFX11-NEXT: ; return to shader part epilog 42 %not.src1 = xor i32 %src1, -1 43 %and = and i32 %not.src1, %src0 44 ret i32 %and 45} 46 47define amdgpu_ps { i32, i32 } @s_andn2_i32_multi_use(i32 inreg %src0, i32 inreg %src1) { 48; GCN-LABEL: s_andn2_i32_multi_use: 49; GCN: ; %bb.0: 50; GCN-NEXT: s_not_b32 s1, s3 51; GCN-NEXT: s_andn2_b32 s0, s2, s3 52; GCN-NEXT: ; return to shader part epilog 53; 54; GFX10-LABEL: s_andn2_i32_multi_use: 55; GFX10: ; %bb.0: 56; GFX10-NEXT: s_andn2_b32 s0, s2, s3 57; GFX10-NEXT: s_not_b32 s1, s3 58; GFX10-NEXT: ; return to shader part epilog 59; 60; GFX11-LABEL: s_andn2_i32_multi_use: 61; GFX11: ; %bb.0: 62; GFX11-NEXT: s_and_not1_b32 s0, s2, s3 63; GFX11-NEXT: s_not_b32 s1, s3 64; GFX11-NEXT: ; return to shader part epilog 65 %not.src1 = xor i32 %src1, -1 66 %and = and i32 %src0, %not.src1 67 %insert.0 = insertvalue { i32, i32 } undef, i32 %and, 0 68 %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %not.src1, 1 69 ret { i32, i32 } %insert.1 70} 71 72define amdgpu_ps { i32, i32 } @s_andn2_i32_multi_foldable_use(i32 inreg %src0, i32 inreg %src1, i32 inreg %src2) { 73; GCN-LABEL: s_andn2_i32_multi_foldable_use: 74; GCN: ; %bb.0: 75; GCN-NEXT: s_andn2_b32 s0, s2, s4 76; GCN-NEXT: s_andn2_b32 s1, s3, s4 77; GCN-NEXT: ; return to shader part epilog 78; 79; GFX10-LABEL: s_andn2_i32_multi_foldable_use: 80; GFX10: ; %bb.0: 81; GFX10-NEXT: s_andn2_b32 s0, s2, s4 82; GFX10-NEXT: s_andn2_b32 s1, s3, s4 83; GFX10-NEXT: ; return to shader part epilog 84; 85; GFX11-LABEL: s_andn2_i32_multi_foldable_use: 86; GFX11: ; %bb.0: 87; GFX11-NEXT: s_and_not1_b32 s0, s2, s4 88; GFX11-NEXT: s_and_not1_b32 s1, s3, s4 89; GFX11-NEXT: ; return to shader part epilog 90 %not.src2 = xor i32 %src2, -1 91 %and0 = and i32 %src0, %not.src2 92 %and1 = and i32 %src1, %not.src2 93 %insert.0 = insertvalue { i32, i32 } undef, i32 %and0, 0 94 %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %and1, 1 95 ret { i32, i32 } %insert.1 96} 97 98define i32 @v_andn2_i32(i32 %src0, i32 %src1) { 99; GCN-LABEL: v_andn2_i32: 100; GCN: ; %bb.0: 101; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; GCN-NEXT: v_not_b32_e32 v1, v1 103; GCN-NEXT: v_and_b32_e32 v0, v0, v1 104; GCN-NEXT: s_setpc_b64 s[30:31] 105; 106; GFX10PLUS-LABEL: v_andn2_i32: 107; GFX10PLUS: ; %bb.0: 108; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 109; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1 110; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1 111; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 112 %not.src1 = xor i32 %src1, -1 113 %and = and i32 %src0, %not.src1 114 ret i32 %and 115} 116 117define amdgpu_ps float @v_andn2_i32_sv(i32 inreg %src0, i32 %src1) { 118; GCN-LABEL: v_andn2_i32_sv: 119; GCN: ; %bb.0: 120; GCN-NEXT: v_not_b32_e32 v0, v0 121; GCN-NEXT: v_and_b32_e32 v0, s2, v0 122; GCN-NEXT: ; return to shader part epilog 123; 124; GFX10PLUS-LABEL: v_andn2_i32_sv: 125; GFX10PLUS: ; %bb.0: 126; GFX10PLUS-NEXT: v_not_b32_e32 v0, v0 127; GFX10PLUS-NEXT: v_and_b32_e32 v0, s2, v0 128; GFX10PLUS-NEXT: ; return to shader part epilog 129 %not.src1 = xor i32 %src1, -1 130 %and = and i32 %src0, %not.src1 131 %cast = bitcast i32 %and to float 132 ret float %cast 133} 134 135define amdgpu_ps float @v_andn2_i32_vs(i32 %src0, i32 inreg %src1) { 136; GCN-LABEL: v_andn2_i32_vs: 137; GCN: ; %bb.0: 138; GCN-NEXT: s_not_b32 s0, s2 139; GCN-NEXT: v_and_b32_e32 v0, s0, v0 140; GCN-NEXT: ; return to shader part epilog 141; 142; GFX10PLUS-LABEL: v_andn2_i32_vs: 143; GFX10PLUS: ; %bb.0: 144; GFX10PLUS-NEXT: s_not_b32 s0, s2 145; GFX10PLUS-NEXT: v_and_b32_e32 v0, s0, v0 146; GFX10PLUS-NEXT: ; return to shader part epilog 147 %not.src1 = xor i32 %src1, -1 148 %and = and i32 %src0, %not.src1 149 %cast = bitcast i32 %and to float 150 ret float %cast 151} 152 153define amdgpu_ps i64 @s_andn2_i64(i64 inreg %src0, i64 inreg %src1) { 154; GCN-LABEL: s_andn2_i64: 155; GCN: ; %bb.0: 156; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5] 157; GCN-NEXT: ; return to shader part epilog 158; 159; GFX10-LABEL: s_andn2_i64: 160; GFX10: ; %bb.0: 161; GFX10-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5] 162; GFX10-NEXT: ; return to shader part epilog 163; 164; GFX11-LABEL: s_andn2_i64: 165; GFX11: ; %bb.0: 166; GFX11-NEXT: s_and_not1_b64 s[0:1], s[2:3], s[4:5] 167; GFX11-NEXT: ; return to shader part epilog 168 %not.src1 = xor i64 %src1, -1 169 %and = and i64 %src0, %not.src1 170 ret i64 %and 171} 172 173define amdgpu_ps i64 @s_andn2_i64_commute(i64 inreg %src0, i64 inreg %src1) { 174; GCN-LABEL: s_andn2_i64_commute: 175; GCN: ; %bb.0: 176; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5] 177; GCN-NEXT: ; return to shader part epilog 178; 179; GFX10-LABEL: s_andn2_i64_commute: 180; GFX10: ; %bb.0: 181; GFX10-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5] 182; GFX10-NEXT: ; return to shader part epilog 183; 184; GFX11-LABEL: s_andn2_i64_commute: 185; GFX11: ; %bb.0: 186; GFX11-NEXT: s_and_not1_b64 s[0:1], s[2:3], s[4:5] 187; GFX11-NEXT: ; return to shader part epilog 188 %not.src1 = xor i64 %src1, -1 189 %and = and i64 %not.src1, %src0 190 ret i64 %and 191} 192 193define amdgpu_ps { i64, i64 } @s_andn2_i64_multi_foldable_use(i64 inreg %src0, i64 inreg %src1, i64 inreg %src2) { 194; GCN-LABEL: s_andn2_i64_multi_foldable_use: 195; GCN: ; %bb.0: 196; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[6:7] 197; GCN-NEXT: s_andn2_b64 s[2:3], s[4:5], s[6:7] 198; GCN-NEXT: ; return to shader part epilog 199; 200; GFX10-LABEL: s_andn2_i64_multi_foldable_use: 201; GFX10: ; %bb.0: 202; GFX10-NEXT: s_andn2_b64 s[0:1], s[2:3], s[6:7] 203; GFX10-NEXT: s_andn2_b64 s[2:3], s[4:5], s[6:7] 204; GFX10-NEXT: ; return to shader part epilog 205; 206; GFX11-LABEL: s_andn2_i64_multi_foldable_use: 207; GFX11: ; %bb.0: 208; GFX11-NEXT: s_and_not1_b64 s[0:1], s[2:3], s[6:7] 209; GFX11-NEXT: s_and_not1_b64 s[2:3], s[4:5], s[6:7] 210; GFX11-NEXT: ; return to shader part epilog 211 %not.src2 = xor i64 %src2, -1 212 %and0 = and i64 %src0, %not.src2 213 %and1 = and i64 %src1, %not.src2 214 %insert.0 = insertvalue { i64, i64 } undef, i64 %and0, 0 215 %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %and1, 1 216 ret { i64, i64 } %insert.1 217} 218 219define amdgpu_ps { i64, i64 } @s_andn2_i64_multi_use(i64 inreg %src0, i64 inreg %src1) { 220; GCN-LABEL: s_andn2_i64_multi_use: 221; GCN: ; %bb.0: 222; GCN-NEXT: s_not_b64 s[6:7], s[4:5] 223; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5] 224; GCN-NEXT: s_mov_b32 s2, s6 225; GCN-NEXT: s_mov_b32 s3, s7 226; GCN-NEXT: ; return to shader part epilog 227; 228; GFX10-LABEL: s_andn2_i64_multi_use: 229; GFX10: ; %bb.0: 230; GFX10-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5] 231; GFX10-NEXT: s_not_b64 s[2:3], s[4:5] 232; GFX10-NEXT: ; return to shader part epilog 233; 234; GFX11-LABEL: s_andn2_i64_multi_use: 235; GFX11: ; %bb.0: 236; GFX11-NEXT: s_and_not1_b64 s[0:1], s[2:3], s[4:5] 237; GFX11-NEXT: s_not_b64 s[2:3], s[4:5] 238; GFX11-NEXT: ; return to shader part epilog 239 %not.src1 = xor i64 %src1, -1 240 %and = and i64 %src0, %not.src1 241 %insert.0 = insertvalue { i64, i64 } undef, i64 %and, 0 242 %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %not.src1, 1 243 ret { i64, i64 } %insert.1 244} 245 246define i64 @v_andn2_i64(i64 %src0, i64 %src1) { 247; GCN-LABEL: v_andn2_i64: 248; GCN: ; %bb.0: 249; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 250; GCN-NEXT: v_not_b32_e32 v2, v2 251; GCN-NEXT: v_not_b32_e32 v3, v3 252; GCN-NEXT: v_and_b32_e32 v0, v0, v2 253; GCN-NEXT: v_and_b32_e32 v1, v1, v3 254; GCN-NEXT: s_setpc_b64 s[30:31] 255; 256; GFX10PLUS-LABEL: v_andn2_i64: 257; GFX10PLUS: ; %bb.0: 258; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 259; GFX10PLUS-NEXT: v_not_b32_e32 v2, v2 260; GFX10PLUS-NEXT: v_not_b32_e32 v3, v3 261; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2 262; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v3 263; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 264 %not.src1 = xor i64 %src1, -1 265 %and = and i64 %src0, %not.src1 266 ret i64 %and 267} 268 269define amdgpu_ps <2 x float> @v_andn2_i64_sv(i64 inreg %src0, i64 %src1) { 270; GCN-LABEL: v_andn2_i64_sv: 271; GCN: ; %bb.0: 272; GCN-NEXT: v_not_b32_e32 v0, v0 273; GCN-NEXT: v_not_b32_e32 v1, v1 274; GCN-NEXT: v_and_b32_e32 v0, s2, v0 275; GCN-NEXT: v_and_b32_e32 v1, s3, v1 276; GCN-NEXT: ; return to shader part epilog 277; 278; GFX10PLUS-LABEL: v_andn2_i64_sv: 279; GFX10PLUS: ; %bb.0: 280; GFX10PLUS-NEXT: v_not_b32_e32 v0, v0 281; GFX10PLUS-NEXT: v_not_b32_e32 v1, v1 282; GFX10PLUS-NEXT: v_and_b32_e32 v0, s2, v0 283; GFX10PLUS-NEXT: v_and_b32_e32 v1, s3, v1 284; GFX10PLUS-NEXT: ; return to shader part epilog 285 %not.src1 = xor i64 %src1, -1 286 %and = and i64 %src0, %not.src1 287 %cast = bitcast i64 %and to <2 x float> 288 ret <2 x float> %cast 289} 290 291define amdgpu_ps <2 x float> @v_andn2_i64_vs(i64 %src0, i64 inreg %src1) { 292; GCN-LABEL: v_andn2_i64_vs: 293; GCN: ; %bb.0: 294; GCN-NEXT: s_not_b64 s[0:1], s[2:3] 295; GCN-NEXT: v_and_b32_e32 v0, s0, v0 296; GCN-NEXT: v_and_b32_e32 v1, s1, v1 297; GCN-NEXT: ; return to shader part epilog 298; 299; GFX10PLUS-LABEL: v_andn2_i64_vs: 300; GFX10PLUS: ; %bb.0: 301; GFX10PLUS-NEXT: s_not_b64 s[0:1], s[2:3] 302; GFX10PLUS-NEXT: v_and_b32_e32 v0, s0, v0 303; GFX10PLUS-NEXT: v_and_b32_e32 v1, s1, v1 304; GFX10PLUS-NEXT: ; return to shader part epilog 305 %not.src1 = xor i64 %src1, -1 306 %and = and i64 %src0, %not.src1 307 %cast = bitcast i64 %and to <2 x float> 308 ret <2 x float> %cast 309} 310 311define amdgpu_ps <2 x i32> @s_andn2_v2i32(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { 312; GCN-LABEL: s_andn2_v2i32: 313; GCN: ; %bb.0: 314; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5] 315; GCN-NEXT: ; return to shader part epilog 316; 317; GFX10-LABEL: s_andn2_v2i32: 318; GFX10: ; %bb.0: 319; GFX10-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5] 320; GFX10-NEXT: ; return to shader part epilog 321; 322; GFX11-LABEL: s_andn2_v2i32: 323; GFX11: ; %bb.0: 324; GFX11-NEXT: s_and_not1_b64 s[0:1], s[2:3], s[4:5] 325; GFX11-NEXT: ; return to shader part epilog 326 %not.src1 = xor <2 x i32> %src1, <i32 -1, i32 -1> 327 %and = and <2 x i32> %src0, %not.src1 328 ret <2 x i32> %and 329} 330 331define amdgpu_ps <2 x i32> @s_andn2_v2i32_commute(<2 x i32> inreg %src0, <2 x i32> inreg %src1) { 332; GCN-LABEL: s_andn2_v2i32_commute: 333; GCN: ; %bb.0: 334; GCN-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5] 335; GCN-NEXT: ; return to shader part epilog 336; 337; GFX10-LABEL: s_andn2_v2i32_commute: 338; GFX10: ; %bb.0: 339; GFX10-NEXT: s_andn2_b64 s[0:1], s[2:3], s[4:5] 340; GFX10-NEXT: ; return to shader part epilog 341; 342; GFX11-LABEL: s_andn2_v2i32_commute: 343; GFX11: ; %bb.0: 344; GFX11-NEXT: s_and_not1_b64 s[0:1], s[2:3], s[4:5] 345; GFX11-NEXT: ; return to shader part epilog 346 %not.src1 = xor <2 x i32> %src1, <i32 -1, i32 -1> 347 %and = and <2 x i32> %not.src1, %src0 348 ret <2 x i32> %and 349} 350 351define amdgpu_ps i16 @s_andn2_i16(i16 inreg %src0, i16 inreg %src1) { 352; GCN-LABEL: s_andn2_i16: 353; GCN: ; %bb.0: 354; GCN-NEXT: s_andn2_b32 s0, s2, s3 355; GCN-NEXT: ; return to shader part epilog 356; 357; GFX10-LABEL: s_andn2_i16: 358; GFX10: ; %bb.0: 359; GFX10-NEXT: s_andn2_b32 s0, s2, s3 360; GFX10-NEXT: ; return to shader part epilog 361; 362; GFX11-LABEL: s_andn2_i16: 363; GFX11: ; %bb.0: 364; GFX11-NEXT: s_and_not1_b32 s0, s2, s3 365; GFX11-NEXT: ; return to shader part epilog 366 %not.src1 = xor i16 %src1, -1 367 %and = and i16 %src0, %not.src1 368 ret i16 %and 369} 370 371define amdgpu_ps i16 @s_andn2_i16_commute(i16 inreg %src0, i16 inreg %src1) { 372; GCN-LABEL: s_andn2_i16_commute: 373; GCN: ; %bb.0: 374; GCN-NEXT: s_andn2_b32 s0, s2, s3 375; GCN-NEXT: ; return to shader part epilog 376; 377; GFX10-LABEL: s_andn2_i16_commute: 378; GFX10: ; %bb.0: 379; GFX10-NEXT: s_andn2_b32 s0, s2, s3 380; GFX10-NEXT: ; return to shader part epilog 381; 382; GFX11-LABEL: s_andn2_i16_commute: 383; GFX11: ; %bb.0: 384; GFX11-NEXT: s_and_not1_b32 s0, s2, s3 385; GFX11-NEXT: ; return to shader part epilog 386 %not.src1 = xor i16 %src1, -1 387 %and = and i16 %not.src1, %src0 388 ret i16 %and 389} 390 391define amdgpu_ps { i16, i16 } @s_andn2_i16_multi_use(i16 inreg %src0, i16 inreg %src1) { 392; GCN-LABEL: s_andn2_i16_multi_use: 393; GCN: ; %bb.0: 394; GCN-NEXT: s_xor_b32 s1, s3, -1 395; GCN-NEXT: s_andn2_b32 s0, s2, s3 396; GCN-NEXT: ; return to shader part epilog 397; 398; GFX10-LABEL: s_andn2_i16_multi_use: 399; GFX10: ; %bb.0: 400; GFX10-NEXT: s_andn2_b32 s0, s2, s3 401; GFX10-NEXT: s_xor_b32 s1, s3, -1 402; GFX10-NEXT: ; return to shader part epilog 403; 404; GFX11-LABEL: s_andn2_i16_multi_use: 405; GFX11: ; %bb.0: 406; GFX11-NEXT: s_and_not1_b32 s0, s2, s3 407; GFX11-NEXT: s_xor_b32 s1, s3, -1 408; GFX11-NEXT: ; return to shader part epilog 409 %not.src1 = xor i16 %src1, -1 410 %and = and i16 %src0, %not.src1 411 %insert.0 = insertvalue { i16, i16 } undef, i16 %and, 0 412 %insert.1 = insertvalue { i16, i16 } %insert.0, i16 %not.src1, 1 413 ret { i16, i16 } %insert.1 414} 415 416define amdgpu_ps { i16, i16 } @s_andn2_i16_multi_foldable_use(i16 inreg %src0, i16 inreg %src1, i16 inreg %src2) { 417; GCN-LABEL: s_andn2_i16_multi_foldable_use: 418; GCN: ; %bb.0: 419; GCN-NEXT: s_andn2_b32 s0, s2, s4 420; GCN-NEXT: s_andn2_b32 s1, s3, s4 421; GCN-NEXT: ; return to shader part epilog 422; 423; GFX10-LABEL: s_andn2_i16_multi_foldable_use: 424; GFX10: ; %bb.0: 425; GFX10-NEXT: s_andn2_b32 s0, s2, s4 426; GFX10-NEXT: s_andn2_b32 s1, s3, s4 427; GFX10-NEXT: ; return to shader part epilog 428; 429; GFX11-LABEL: s_andn2_i16_multi_foldable_use: 430; GFX11: ; %bb.0: 431; GFX11-NEXT: s_and_not1_b32 s0, s2, s4 432; GFX11-NEXT: s_and_not1_b32 s1, s3, s4 433; GFX11-NEXT: ; return to shader part epilog 434 %not.src2 = xor i16 %src2, -1 435 %and0 = and i16 %src0, %not.src2 436 %and1 = and i16 %src1, %not.src2 437 %insert.0 = insertvalue { i16, i16 } undef, i16 %and0, 0 438 %insert.1 = insertvalue { i16, i16 } %insert.0, i16 %and1, 1 439 ret { i16, i16 } %insert.1 440} 441 442define i16 @v_andn2_i16(i16 %src0, i16 %src1) { 443; GCN-LABEL: v_andn2_i16: 444; GCN: ; %bb.0: 445; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 446; GCN-NEXT: v_xor_b32_e32 v1, -1, v1 447; GCN-NEXT: v_and_b32_e32 v0, v0, v1 448; GCN-NEXT: s_setpc_b64 s[30:31] 449; 450; GFX10PLUS-LABEL: v_andn2_i16: 451; GFX10PLUS: ; %bb.0: 452; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 453; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1 454; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1 455; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 456 %not.src1 = xor i16 %src1, -1 457 %and = and i16 %src0, %not.src1 458 ret i16 %and 459} 460 461define amdgpu_ps float @v_andn2_i16_sv(i16 inreg %src0, i16 %src1) { 462; GCN-LABEL: v_andn2_i16_sv: 463; GCN: ; %bb.0: 464; GCN-NEXT: v_xor_b32_e32 v0, -1, v0 465; GCN-NEXT: v_and_b32_e32 v0, s2, v0 466; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0 467; GCN-NEXT: ; return to shader part epilog 468; 469; GFX10PLUS-LABEL: v_andn2_i16_sv: 470; GFX10PLUS: ; %bb.0: 471; GFX10PLUS-NEXT: v_xor_b32_e32 v0, -1, v0 472; GFX10PLUS-NEXT: v_and_b32_e32 v0, s2, v0 473; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0 474; GFX10PLUS-NEXT: ; return to shader part epilog 475 %not.src1 = xor i16 %src1, -1 476 %and = and i16 %src0, %not.src1 477 %zext = zext i16 %and to i32 478 %cast.zext = bitcast i32 %zext to float 479 ret float %cast.zext 480} 481 482define amdgpu_ps float @v_andn2_i16_vs(i16 %src0, i16 inreg %src1) { 483; GCN-LABEL: v_andn2_i16_vs: 484; GCN: ; %bb.0: 485; GCN-NEXT: s_xor_b32 s0, s2, -1 486; GCN-NEXT: v_and_b32_e32 v0, s0, v0 487; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0 488; GCN-NEXT: ; return to shader part epilog 489; 490; GFX10PLUS-LABEL: v_andn2_i16_vs: 491; GFX10PLUS: ; %bb.0: 492; GFX10PLUS-NEXT: s_xor_b32 s0, s2, -1 493; GFX10PLUS-NEXT: v_and_b32_e32 v0, s0, v0 494; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0 495; GFX10PLUS-NEXT: ; return to shader part epilog 496 %not.src1 = xor i16 %src1, -1 497 %and = and i16 %src0, %not.src1 498 %zext = zext i16 %and to i32 499 %cast.zext = bitcast i32 %zext to float 500 ret float %cast.zext 501} 502 503define amdgpu_ps i32 @s_andn2_v2i16(<2 x i16> inreg %src0, <2 x i16> inreg %src1) { 504; GFX6-LABEL: s_andn2_v2i16: 505; GFX6: ; %bb.0: 506; GFX6-NEXT: s_lshl_b32 s0, s3, 16 507; GFX6-NEXT: s_and_b32 s1, s2, 0xffff 508; GFX6-NEXT: s_or_b32 s0, s0, s1 509; GFX6-NEXT: s_lshl_b32 s1, s5, 16 510; GFX6-NEXT: s_and_b32 s2, s4, 0xffff 511; GFX6-NEXT: s_or_b32 s1, s1, s2 512; GFX6-NEXT: s_xor_b32 s1, s1, -1 513; GFX6-NEXT: s_and_b32 s0, s0, s1 514; GFX6-NEXT: ; return to shader part epilog 515; 516; GFX9-LABEL: s_andn2_v2i16: 517; GFX9: ; %bb.0: 518; GFX9-NEXT: s_andn2_b32 s0, s2, s3 519; GFX9-NEXT: ; return to shader part epilog 520; 521; GFX10-LABEL: s_andn2_v2i16: 522; GFX10: ; %bb.0: 523; GFX10-NEXT: s_andn2_b32 s0, s2, s3 524; GFX10-NEXT: ; return to shader part epilog 525; 526; GFX11-LABEL: s_andn2_v2i16: 527; GFX11: ; %bb.0: 528; GFX11-NEXT: s_and_not1_b32 s0, s2, s3 529; GFX11-NEXT: ; return to shader part epilog 530 %not.src1 = xor <2 x i16> %src1, <i16 -1, i16 -1> 531 %and = and <2 x i16> %src0, %not.src1 532 %cast = bitcast <2 x i16> %and to i32 533 ret i32 %cast 534} 535 536define amdgpu_ps i32 @s_andn2_v2i16_commute(<2 x i16> inreg %src0, <2 x i16> inreg %src1) { 537; GFX6-LABEL: s_andn2_v2i16_commute: 538; GFX6: ; %bb.0: 539; GFX6-NEXT: s_lshl_b32 s0, s3, 16 540; GFX6-NEXT: s_and_b32 s1, s2, 0xffff 541; GFX6-NEXT: s_or_b32 s0, s0, s1 542; GFX6-NEXT: s_lshl_b32 s1, s5, 16 543; GFX6-NEXT: s_and_b32 s2, s4, 0xffff 544; GFX6-NEXT: s_or_b32 s1, s1, s2 545; GFX6-NEXT: s_xor_b32 s1, s1, -1 546; GFX6-NEXT: s_and_b32 s0, s1, s0 547; GFX6-NEXT: ; return to shader part epilog 548; 549; GFX9-LABEL: s_andn2_v2i16_commute: 550; GFX9: ; %bb.0: 551; GFX9-NEXT: s_andn2_b32 s0, s2, s3 552; GFX9-NEXT: ; return to shader part epilog 553; 554; GFX10-LABEL: s_andn2_v2i16_commute: 555; GFX10: ; %bb.0: 556; GFX10-NEXT: s_andn2_b32 s0, s2, s3 557; GFX10-NEXT: ; return to shader part epilog 558; 559; GFX11-LABEL: s_andn2_v2i16_commute: 560; GFX11: ; %bb.0: 561; GFX11-NEXT: s_and_not1_b32 s0, s2, s3 562; GFX11-NEXT: ; return to shader part epilog 563 %not.src1 = xor <2 x i16> %src1, <i16 -1, i16 -1> 564 %and = and <2 x i16> %not.src1, %src0 565 %cast = bitcast <2 x i16> %and to i32 566 ret i32 %cast 567} 568 569define amdgpu_ps { i32, i32 } @s_andn2_v2i16_multi_use(<2 x i16> inreg %src0, <2 x i16> inreg %src1) { 570; GFX6-LABEL: s_andn2_v2i16_multi_use: 571; GFX6: ; %bb.0: 572; GFX6-NEXT: s_lshl_b32 s0, s3, 16 573; GFX6-NEXT: s_and_b32 s1, s2, 0xffff 574; GFX6-NEXT: s_or_b32 s0, s0, s1 575; GFX6-NEXT: s_lshl_b32 s1, s5, 16 576; GFX6-NEXT: s_and_b32 s2, s4, 0xffff 577; GFX6-NEXT: s_or_b32 s1, s1, s2 578; GFX6-NEXT: s_xor_b32 s1, s1, -1 579; GFX6-NEXT: s_and_b32 s0, s0, s1 580; GFX6-NEXT: ; return to shader part epilog 581; 582; GFX9-LABEL: s_andn2_v2i16_multi_use: 583; GFX9: ; %bb.0: 584; GFX9-NEXT: s_xor_b32 s1, s3, -1 585; GFX9-NEXT: s_andn2_b32 s0, s2, s3 586; GFX9-NEXT: ; return to shader part epilog 587; 588; GFX10-LABEL: s_andn2_v2i16_multi_use: 589; GFX10: ; %bb.0: 590; GFX10-NEXT: s_andn2_b32 s0, s2, s3 591; GFX10-NEXT: s_xor_b32 s1, s3, -1 592; GFX10-NEXT: ; return to shader part epilog 593; 594; GFX11-LABEL: s_andn2_v2i16_multi_use: 595; GFX11: ; %bb.0: 596; GFX11-NEXT: s_and_not1_b32 s0, s2, s3 597; GFX11-NEXT: s_xor_b32 s1, s3, -1 598; GFX11-NEXT: ; return to shader part epilog 599 %not.src1 = xor <2 x i16> %src1, <i16 -1, i16 -1> 600 %and = and <2 x i16> %src0, %not.src1 601 602 %cast.0 = bitcast <2 x i16> %and to i32 603 %cast.1 = bitcast <2 x i16> %not.src1 to i32 604 %insert.0 = insertvalue { i32, i32 } undef, i32 %cast.0, 0 605 %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %cast.1, 1 606 ret { i32, i32 } %insert.1 607} 608 609define amdgpu_ps { i32, i32 } @s_andn2_v2i16_multi_foldable_use(<2 x i16> inreg %src0, <2 x i16> inreg %src1, <2 x i16> inreg %src2) { 610; GFX6-LABEL: s_andn2_v2i16_multi_foldable_use: 611; GFX6: ; %bb.0: 612; GFX6-NEXT: s_lshl_b32 s0, s3, 16 613; GFX6-NEXT: s_and_b32 s1, s2, 0xffff 614; GFX6-NEXT: s_or_b32 s0, s0, s1 615; GFX6-NEXT: s_lshl_b32 s1, s5, 16 616; GFX6-NEXT: s_and_b32 s2, s4, 0xffff 617; GFX6-NEXT: s_or_b32 s1, s1, s2 618; GFX6-NEXT: s_lshl_b32 s2, s7, 16 619; GFX6-NEXT: s_and_b32 s3, s6, 0xffff 620; GFX6-NEXT: s_or_b32 s2, s2, s3 621; GFX6-NEXT: s_xor_b32 s2, s2, -1 622; GFX6-NEXT: s_and_b32 s0, s0, s2 623; GFX6-NEXT: s_and_b32 s1, s1, s2 624; GFX6-NEXT: ; return to shader part epilog 625; 626; GFX9-LABEL: s_andn2_v2i16_multi_foldable_use: 627; GFX9: ; %bb.0: 628; GFX9-NEXT: s_andn2_b32 s0, s2, s4 629; GFX9-NEXT: s_andn2_b32 s1, s3, s4 630; GFX9-NEXT: ; return to shader part epilog 631; 632; GFX10-LABEL: s_andn2_v2i16_multi_foldable_use: 633; GFX10: ; %bb.0: 634; GFX10-NEXT: s_andn2_b32 s0, s2, s4 635; GFX10-NEXT: s_andn2_b32 s1, s3, s4 636; GFX10-NEXT: ; return to shader part epilog 637; 638; GFX11-LABEL: s_andn2_v2i16_multi_foldable_use: 639; GFX11: ; %bb.0: 640; GFX11-NEXT: s_and_not1_b32 s0, s2, s4 641; GFX11-NEXT: s_and_not1_b32 s1, s3, s4 642; GFX11-NEXT: ; return to shader part epilog 643 %not.src2 = xor <2 x i16> %src2, <i16 -1, i16 -1> 644 %and0 = and <2 x i16> %src0, %not.src2 645 %and1 = and <2 x i16> %src1, %not.src2 646 647 %cast.0 = bitcast <2 x i16> %and0 to i32 648 %cast.1 = bitcast <2 x i16> %and1 to i32 649 %insert.0 = insertvalue { i32, i32 } undef, i32 %cast.0, 0 650 %insert.1 = insertvalue { i32, i32 } %insert.0, i32 %cast.1, 1 651 ret { i32, i32 } %insert.1 652} 653 654define <2 x i16> @v_andn2_v2i16(<2 x i16> %src0, <2 x i16> %src1) { 655; GFX6-LABEL: v_andn2_v2i16: 656; GFX6: ; %bb.0: 657; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 658; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 659; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 660; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 661; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 662; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 663; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 664; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1 665; GFX6-NEXT: v_and_b32_e32 v0, v0, v1 666; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 667; GFX6-NEXT: s_setpc_b64 s[30:31] 668; 669; GFX9-LABEL: v_andn2_v2i16: 670; GFX9: ; %bb.0: 671; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 672; GFX9-NEXT: v_xor_b32_e32 v1, -1, v1 673; GFX9-NEXT: v_and_b32_e32 v0, v0, v1 674; GFX9-NEXT: s_setpc_b64 s[30:31] 675; 676; GFX10PLUS-LABEL: v_andn2_v2i16: 677; GFX10PLUS: ; %bb.0: 678; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 679; GFX10PLUS-NEXT: v_xor_b32_e32 v1, -1, v1 680; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1 681; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 682 %not.src1 = xor <2 x i16> %src1, <i16 -1, i16 -1> 683 %and = and <2 x i16> %src0, %not.src1 684 ret <2 x i16> %and 685} 686 687 688define amdgpu_ps i48 @s_andn2_v3i16(<3 x i16> inreg %src0, <3 x i16> inreg %src1) { 689; GFX6-LABEL: s_andn2_v3i16: 690; GFX6: ; %bb.0: 691; GFX6-NEXT: s_and_b32 s6, s6, 0xffff 692; GFX6-NEXT: s_mov_b32 s0, -1 693; GFX6-NEXT: s_and_b32 s5, s5, 0xffff 694; GFX6-NEXT: s_lshl_b32 s6, s6, 16 695; GFX6-NEXT: s_and_b32 s3, s3, 0xffff 696; GFX6-NEXT: s_mov_b32 s1, 0xffff 697; GFX6-NEXT: s_or_b32 s6, s5, s6 698; GFX6-NEXT: s_and_b32 s7, s7, 0xffff 699; GFX6-NEXT: s_and_b32 s2, s2, 0xffff 700; GFX6-NEXT: s_lshl_b32 s3, s3, 16 701; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1] 702; GFX6-NEXT: s_or_b32 s2, s2, s3 703; GFX6-NEXT: s_and_b32 s3, s4, 0xffff 704; GFX6-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 705; GFX6-NEXT: s_lshr_b32 s2, s0, 16 706; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 707; GFX6-NEXT: s_lshl_b32 s2, s2, 16 708; GFX6-NEXT: s_or_b32 s0, s0, s2 709; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 710; GFX6-NEXT: ; return to shader part epilog 711; 712; GFX9-LABEL: s_andn2_v3i16: 713; GFX9: ; %bb.0: 714; GFX9-NEXT: s_mov_b64 s[0:1], -1 715; GFX9-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] 716; GFX9-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 717; GFX9-NEXT: s_lshr_b32 s2, s0, 16 718; GFX9-NEXT: s_and_b32 s0, s0, 0xffff 719; GFX9-NEXT: s_lshl_b32 s2, s2, 16 720; GFX9-NEXT: s_or_b32 s0, s0, s2 721; GFX9-NEXT: s_and_b32 s1, s1, 0xffff 722; GFX9-NEXT: ; return to shader part epilog 723; 724; GFX10PLUS-LABEL: s_andn2_v3i16: 725; GFX10PLUS: ; %bb.0: 726; GFX10PLUS-NEXT: s_mov_b64 s[0:1], -1 727; GFX10PLUS-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] 728; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 729; GFX10PLUS-NEXT: s_lshr_b32 s2, s0, 16 730; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff 731; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, 16 732; GFX10PLUS-NEXT: s_and_b32 s1, s1, 0xffff 733; GFX10PLUS-NEXT: s_or_b32 s0, s0, s2 734; GFX10PLUS-NEXT: ; return to shader part epilog 735 %not.src1 = xor <3 x i16> %src1, <i16 -1, i16 -1, i16 -1> 736 %and = and <3 x i16> %src0, %not.src1 737 %cast = bitcast <3 x i16> %and to i48 738 ret i48 %cast 739} 740 741define amdgpu_ps i48 @s_andn2_v3i16_commute(<3 x i16> inreg %src0, <3 x i16> inreg %src1) { 742; GFX6-LABEL: s_andn2_v3i16_commute: 743; GFX6: ; %bb.0: 744; GFX6-NEXT: s_and_b32 s6, s6, 0xffff 745; GFX6-NEXT: s_mov_b32 s0, -1 746; GFX6-NEXT: s_and_b32 s5, s5, 0xffff 747; GFX6-NEXT: s_lshl_b32 s6, s6, 16 748; GFX6-NEXT: s_and_b32 s3, s3, 0xffff 749; GFX6-NEXT: s_mov_b32 s1, 0xffff 750; GFX6-NEXT: s_or_b32 s6, s5, s6 751; GFX6-NEXT: s_and_b32 s7, s7, 0xffff 752; GFX6-NEXT: s_and_b32 s2, s2, 0xffff 753; GFX6-NEXT: s_lshl_b32 s3, s3, 16 754; GFX6-NEXT: s_xor_b64 s[0:1], s[6:7], s[0:1] 755; GFX6-NEXT: s_or_b32 s2, s2, s3 756; GFX6-NEXT: s_and_b32 s3, s4, 0xffff 757; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 758; GFX6-NEXT: s_lshr_b32 s2, s0, 16 759; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 760; GFX6-NEXT: s_lshl_b32 s2, s2, 16 761; GFX6-NEXT: s_or_b32 s0, s0, s2 762; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 763; GFX6-NEXT: ; return to shader part epilog 764; 765; GFX9-LABEL: s_andn2_v3i16_commute: 766; GFX9: ; %bb.0: 767; GFX9-NEXT: s_mov_b64 s[0:1], -1 768; GFX9-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] 769; GFX9-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 770; GFX9-NEXT: s_lshr_b32 s2, s0, 16 771; GFX9-NEXT: s_and_b32 s0, s0, 0xffff 772; GFX9-NEXT: s_lshl_b32 s2, s2, 16 773; GFX9-NEXT: s_or_b32 s0, s0, s2 774; GFX9-NEXT: s_and_b32 s1, s1, 0xffff 775; GFX9-NEXT: ; return to shader part epilog 776; 777; GFX10PLUS-LABEL: s_andn2_v3i16_commute: 778; GFX10PLUS: ; %bb.0: 779; GFX10PLUS-NEXT: s_mov_b64 s[0:1], -1 780; GFX10PLUS-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] 781; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 782; GFX10PLUS-NEXT: s_lshr_b32 s2, s0, 16 783; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff 784; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, 16 785; GFX10PLUS-NEXT: s_and_b32 s1, s1, 0xffff 786; GFX10PLUS-NEXT: s_or_b32 s0, s0, s2 787; GFX10PLUS-NEXT: ; return to shader part epilog 788 %not.src1 = xor <3 x i16> %src1, <i16 -1, i16 -1, i16 -1> 789 %and = and <3 x i16> %not.src1, %src0 790 %cast = bitcast <3 x i16> %and to i48 791 ret i48 %cast 792} 793 794define amdgpu_ps { i48, i48 } @s_andn2_v3i16_multi_use(<3 x i16> inreg %src0, <3 x i16> inreg %src1) { 795; GFX6-LABEL: s_andn2_v3i16_multi_use: 796; GFX6: ; %bb.0: 797; GFX6-NEXT: s_and_b32 s6, s6, 0xffff 798; GFX6-NEXT: s_mov_b32 s0, -1 799; GFX6-NEXT: s_and_b32 s5, s5, 0xffff 800; GFX6-NEXT: s_lshl_b32 s6, s6, 16 801; GFX6-NEXT: s_mov_b32 s1, 0xffff 802; GFX6-NEXT: s_or_b32 s6, s5, s6 803; GFX6-NEXT: s_and_b32 s7, s7, 0xffff 804; GFX6-NEXT: s_xor_b64 s[6:7], s[6:7], s[0:1] 805; GFX6-NEXT: s_and_b32 s1, s3, 0xffff 806; GFX6-NEXT: s_and_b32 s0, s2, 0xffff 807; GFX6-NEXT: s_lshl_b32 s1, s1, 16 808; GFX6-NEXT: s_or_b32 s0, s0, s1 809; GFX6-NEXT: s_and_b32 s1, s4, 0xffff 810; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7] 811; GFX6-NEXT: s_lshr_b32 s2, s0, 16 812; GFX6-NEXT: s_lshr_b32 s5, s6, 16 813; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 814; GFX6-NEXT: s_lshl_b32 s2, s2, 16 815; GFX6-NEXT: s_or_b32 s0, s0, s2 816; GFX6-NEXT: s_and_b32 s2, s6, 0xffff 817; GFX6-NEXT: s_lshl_b32 s3, s5, 16 818; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 819; GFX6-NEXT: s_or_b32 s2, s2, s3 820; GFX6-NEXT: s_and_b32 s3, s7, 0xffff 821; GFX6-NEXT: ; return to shader part epilog 822; 823; GFX9-LABEL: s_andn2_v3i16_multi_use: 824; GFX9: ; %bb.0: 825; GFX9-NEXT: s_mov_b64 s[0:1], -1 826; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], s[0:1] 827; GFX9-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] 828; GFX9-NEXT: s_lshr_b32 s2, s0, 16 829; GFX9-NEXT: s_lshr_b32 s6, s4, 16 830; GFX9-NEXT: s_and_b32 s0, s0, 0xffff 831; GFX9-NEXT: s_lshl_b32 s2, s2, 16 832; GFX9-NEXT: s_or_b32 s0, s0, s2 833; GFX9-NEXT: s_and_b32 s2, s4, 0xffff 834; GFX9-NEXT: s_lshl_b32 s3, s6, 16 835; GFX9-NEXT: s_and_b32 s1, s1, 0xffff 836; GFX9-NEXT: s_or_b32 s2, s2, s3 837; GFX9-NEXT: s_and_b32 s3, s5, 0xffff 838; GFX9-NEXT: ; return to shader part epilog 839; 840; GFX10PLUS-LABEL: s_andn2_v3i16_multi_use: 841; GFX10PLUS: ; %bb.0: 842; GFX10PLUS-NEXT: s_mov_b64 s[0:1], -1 843; GFX10PLUS-NEXT: s_xor_b64 s[4:5], s[4:5], s[0:1] 844; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] 845; GFX10PLUS-NEXT: s_lshr_b32 s3, s4, 16 846; GFX10PLUS-NEXT: s_lshr_b32 s2, s0, 16 847; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff 848; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, 16 849; GFX10PLUS-NEXT: s_lshl_b32 s3, s3, 16 850; GFX10PLUS-NEXT: s_or_b32 s0, s0, s2 851; GFX10PLUS-NEXT: s_and_b32 s2, s4, 0xffff 852; GFX10PLUS-NEXT: s_and_b32 s1, s1, 0xffff 853; GFX10PLUS-NEXT: s_or_b32 s2, s2, s3 854; GFX10PLUS-NEXT: s_and_b32 s3, s5, 0xffff 855; GFX10PLUS-NEXT: ; return to shader part epilog 856 %not.src1 = xor <3 x i16> %src1, <i16 -1, i16 -1, i16 -1> 857 %and = and <3 x i16> %src0, %not.src1 858 %cast.0 = bitcast <3 x i16> %and to i48 859 %cast.1 = bitcast <3 x i16> %not.src1 to i48 860 %insert.0 = insertvalue { i48, i48 } undef, i48 %cast.0, 0 861 %insert.1 = insertvalue { i48, i48 } %insert.0, i48 %cast.1, 1 862 ret { i48, i48 } %insert.1 863} 864 865define <3 x i16> @v_andn2_v3i16(<3 x i16> %src0, <3 x i16> %src1) { 866; GFX6-LABEL: v_andn2_v3i16: 867; GFX6: ; %bb.0: 868; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 869; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 870; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3 871; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 872; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 873; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 874; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 875; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 876; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v5 877; GFX6-NEXT: v_xor_b32_e32 v3, -1, v3 878; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 879; GFX6-NEXT: v_xor_b32_e32 v4, 0xfff5, v4 880; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2 881; GFX6-NEXT: v_and_b32_e32 v0, v0, v3 882; GFX6-NEXT: v_and_b32_e32 v2, v1, v4 883; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 884; GFX6-NEXT: s_setpc_b64 s[30:31] 885; 886; GFX9-LABEL: v_andn2_v3i16: 887; GFX9: ; %bb.0: 888; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 889; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 890; GFX9-NEXT: v_xor_b32_e32 v3, -11, v3 891; GFX9-NEXT: v_and_b32_e32 v0, v0, v2 892; GFX9-NEXT: v_and_b32_e32 v1, v1, v3 893; GFX9-NEXT: s_setpc_b64 s[30:31] 894; 895; GFX10PLUS-LABEL: v_andn2_v3i16: 896; GFX10PLUS: ; %bb.0: 897; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 898; GFX10PLUS-NEXT: v_xor_b32_e32 v2, -1, v2 899; GFX10PLUS-NEXT: v_xor_b32_e32 v3, -11, v3 900; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2 901; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v3 902; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 903 %not.src1 = xor <3 x i16> %src1, <i16 -1, i16 -1, i16 -11> 904 %and = and <3 x i16> %src0, %not.src1 905 ret <3 x i16> %and 906} 907 908define amdgpu_ps i64 @s_andn2_v4i16(<4 x i16> inreg %src0, <4 x i16> inreg %src1) { 909; GFX6-LABEL: s_andn2_v4i16: 910; GFX6: ; %bb.0: 911; GFX6-NEXT: s_lshl_b32 s0, s3, 16 912; GFX6-NEXT: s_and_b32 s1, s2, 0xffff 913; GFX6-NEXT: s_or_b32 s0, s0, s1 914; GFX6-NEXT: s_lshl_b32 s1, s5, 16 915; GFX6-NEXT: s_and_b32 s2, s4, 0xffff 916; GFX6-NEXT: s_or_b32 s1, s1, s2 917; GFX6-NEXT: s_lshl_b32 s2, s7, 16 918; GFX6-NEXT: s_and_b32 s3, s6, 0xffff 919; GFX6-NEXT: s_or_b32 s2, s2, s3 920; GFX6-NEXT: s_lshl_b32 s3, s9, 16 921; GFX6-NEXT: s_and_b32 s4, s8, 0xffff 922; GFX6-NEXT: s_or_b32 s3, s3, s4 923; GFX6-NEXT: s_mov_b32 s4, -1 924; GFX6-NEXT: s_mov_b32 s5, s4 925; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] 926; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 927; GFX6-NEXT: ; return to shader part epilog 928; 929; GFX9-LABEL: s_andn2_v4i16: 930; GFX9: ; %bb.0: 931; GFX9-NEXT: s_mov_b32 s0, -1 932; GFX9-NEXT: s_mov_b32 s1, s0 933; GFX9-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] 934; GFX9-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 935; GFX9-NEXT: ; return to shader part epilog 936; 937; GFX10PLUS-LABEL: s_andn2_v4i16: 938; GFX10PLUS: ; %bb.0: 939; GFX10PLUS-NEXT: s_mov_b32 s0, -1 940; GFX10PLUS-NEXT: s_mov_b32 s1, s0 941; GFX10PLUS-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] 942; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 943; GFX10PLUS-NEXT: ; return to shader part epilog 944 %not.src1 = xor <4 x i16> %src1, <i16 -1, i16 -1, i16 -1, i16 -1> 945 %and = and <4 x i16> %src0, %not.src1 946 %cast = bitcast <4 x i16> %and to i64 947 ret i64 %cast 948} 949 950define amdgpu_ps i64 @s_andn2_v4i16_commute(<4 x i16> inreg %src0, <4 x i16> inreg %src1) { 951; GFX6-LABEL: s_andn2_v4i16_commute: 952; GFX6: ; %bb.0: 953; GFX6-NEXT: s_lshl_b32 s0, s3, 16 954; GFX6-NEXT: s_and_b32 s1, s2, 0xffff 955; GFX6-NEXT: s_or_b32 s0, s0, s1 956; GFX6-NEXT: s_lshl_b32 s1, s5, 16 957; GFX6-NEXT: s_and_b32 s2, s4, 0xffff 958; GFX6-NEXT: s_or_b32 s1, s1, s2 959; GFX6-NEXT: s_lshl_b32 s2, s7, 16 960; GFX6-NEXT: s_and_b32 s3, s6, 0xffff 961; GFX6-NEXT: s_or_b32 s2, s2, s3 962; GFX6-NEXT: s_lshl_b32 s3, s9, 16 963; GFX6-NEXT: s_and_b32 s4, s8, 0xffff 964; GFX6-NEXT: s_or_b32 s3, s3, s4 965; GFX6-NEXT: s_mov_b32 s4, -1 966; GFX6-NEXT: s_mov_b32 s5, s4 967; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] 968; GFX6-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] 969; GFX6-NEXT: ; return to shader part epilog 970; 971; GFX9-LABEL: s_andn2_v4i16_commute: 972; GFX9: ; %bb.0: 973; GFX9-NEXT: s_mov_b32 s0, -1 974; GFX9-NEXT: s_mov_b32 s1, s0 975; GFX9-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] 976; GFX9-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 977; GFX9-NEXT: ; return to shader part epilog 978; 979; GFX10PLUS-LABEL: s_andn2_v4i16_commute: 980; GFX10PLUS: ; %bb.0: 981; GFX10PLUS-NEXT: s_mov_b32 s0, -1 982; GFX10PLUS-NEXT: s_mov_b32 s1, s0 983; GFX10PLUS-NEXT: s_xor_b64 s[0:1], s[4:5], s[0:1] 984; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 985; GFX10PLUS-NEXT: ; return to shader part epilog 986 %not.src1 = xor <4 x i16> %src1, <i16 -1, i16 -1, i16 -1, i16 -1> 987 %and = and <4 x i16> %not.src1, %src0 988 %cast = bitcast <4 x i16> %and to i64 989 ret i64 %cast 990} 991 992define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_use(<4 x i16> inreg %src0, <4 x i16> inreg %src1) { 993; GFX6-LABEL: s_andn2_v4i16_multi_use: 994; GFX6: ; %bb.0: 995; GFX6-NEXT: s_lshl_b32 s0, s3, 16 996; GFX6-NEXT: s_and_b32 s1, s2, 0xffff 997; GFX6-NEXT: s_or_b32 s0, s0, s1 998; GFX6-NEXT: s_lshl_b32 s1, s5, 16 999; GFX6-NEXT: s_and_b32 s2, s4, 0xffff 1000; GFX6-NEXT: s_or_b32 s1, s1, s2 1001; GFX6-NEXT: s_lshl_b32 s2, s7, 16 1002; GFX6-NEXT: s_and_b32 s3, s6, 0xffff 1003; GFX6-NEXT: s_or_b32 s2, s2, s3 1004; GFX6-NEXT: s_lshl_b32 s3, s9, 16 1005; GFX6-NEXT: s_and_b32 s4, s8, 0xffff 1006; GFX6-NEXT: s_or_b32 s3, s3, s4 1007; GFX6-NEXT: s_mov_b32 s4, -1 1008; GFX6-NEXT: s_mov_b32 s5, s4 1009; GFX6-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5] 1010; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 1011; GFX6-NEXT: ; return to shader part epilog 1012; 1013; GFX9-LABEL: s_andn2_v4i16_multi_use: 1014; GFX9: ; %bb.0: 1015; GFX9-NEXT: s_mov_b32 s0, -1 1016; GFX9-NEXT: s_mov_b32 s1, s0 1017; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], s[0:1] 1018; GFX9-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] 1019; GFX9-NEXT: s_mov_b32 s2, s4 1020; GFX9-NEXT: s_mov_b32 s3, s5 1021; GFX9-NEXT: ; return to shader part epilog 1022; 1023; GFX10PLUS-LABEL: s_andn2_v4i16_multi_use: 1024; GFX10PLUS: ; %bb.0: 1025; GFX10PLUS-NEXT: s_mov_b32 s0, -1 1026; GFX10PLUS-NEXT: s_mov_b32 s1, s0 1027; GFX10PLUS-NEXT: s_xor_b64 s[4:5], s[4:5], s[0:1] 1028; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] 1029; GFX10PLUS-NEXT: s_mov_b32 s2, s4 1030; GFX10PLUS-NEXT: s_mov_b32 s3, s5 1031; GFX10PLUS-NEXT: ; return to shader part epilog 1032 %not.src1 = xor <4 x i16> %src1, <i16 -1, i16 -1, i16 -1, i16 -1> 1033 %and = and <4 x i16> %src0, %not.src1 1034 1035 %cast.0 = bitcast <4 x i16> %and to i64 1036 %cast.1 = bitcast <4 x i16> %not.src1 to i64 1037 %insert.0 = insertvalue { i64, i64 } undef, i64 %cast.0, 0 1038 %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %cast.1, 1 1039 ret { i64, i64 } %insert.1 1040} 1041 1042define amdgpu_ps { i64, i64 } @s_andn2_v4i16_multi_foldable_use(<4 x i16> inreg %src0, <4 x i16> inreg %src1, <4 x i16> inreg %src2) { 1043; GFX6-LABEL: s_andn2_v4i16_multi_foldable_use: 1044; GFX6: ; %bb.0: 1045; GFX6-NEXT: s_lshl_b32 s0, s3, 16 1046; GFX6-NEXT: s_and_b32 s1, s2, 0xffff 1047; GFX6-NEXT: s_or_b32 s0, s0, s1 1048; GFX6-NEXT: s_lshl_b32 s1, s5, 16 1049; GFX6-NEXT: s_and_b32 s2, s4, 0xffff 1050; GFX6-NEXT: s_or_b32 s1, s1, s2 1051; GFX6-NEXT: s_lshl_b32 s2, s7, 16 1052; GFX6-NEXT: s_and_b32 s3, s6, 0xffff 1053; GFX6-NEXT: s_or_b32 s2, s2, s3 1054; GFX6-NEXT: s_lshl_b32 s3, s9, 16 1055; GFX6-NEXT: s_and_b32 s4, s8, 0xffff 1056; GFX6-NEXT: s_or_b32 s3, s3, s4 1057; GFX6-NEXT: s_lshl_b32 s4, s11, 16 1058; GFX6-NEXT: s_and_b32 s5, s10, 0xffff 1059; GFX6-NEXT: s_or_b32 s4, s4, s5 1060; GFX6-NEXT: s_lshl_b32 s5, s13, 16 1061; GFX6-NEXT: s_and_b32 s6, s12, 0xffff 1062; GFX6-NEXT: s_or_b32 s5, s5, s6 1063; GFX6-NEXT: s_mov_b32 s6, -1 1064; GFX6-NEXT: s_mov_b32 s7, s6 1065; GFX6-NEXT: s_xor_b64 s[4:5], s[4:5], s[6:7] 1066; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[4:5] 1067; GFX6-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5] 1068; GFX6-NEXT: ; return to shader part epilog 1069; 1070; GFX9-LABEL: s_andn2_v4i16_multi_foldable_use: 1071; GFX9: ; %bb.0: 1072; GFX9-NEXT: s_mov_b32 s0, -1 1073; GFX9-NEXT: s_mov_b32 s1, s0 1074; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], s[0:1] 1075; GFX9-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7] 1076; GFX9-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7] 1077; GFX9-NEXT: ; return to shader part epilog 1078; 1079; GFX10PLUS-LABEL: s_andn2_v4i16_multi_foldable_use: 1080; GFX10PLUS: ; %bb.0: 1081; GFX10PLUS-NEXT: s_mov_b32 s0, -1 1082; GFX10PLUS-NEXT: s_mov_b32 s1, s0 1083; GFX10PLUS-NEXT: s_xor_b64 s[6:7], s[6:7], s[0:1] 1084; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7] 1085; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7] 1086; GFX10PLUS-NEXT: ; return to shader part epilog 1087 %not.src2 = xor <4 x i16> %src2, <i16 -1, i16 -1, i16 -1, i16 -1> 1088 %and0 = and <4 x i16> %src0, %not.src2 1089 %and1 = and <4 x i16> %src1, %not.src2 1090 1091 %cast.0 = bitcast <4 x i16> %and0 to i64 1092 %cast.1 = bitcast <4 x i16> %and1 to i64 1093 %insert.0 = insertvalue { i64, i64 } undef, i64 %cast.0, 0 1094 %insert.1 = insertvalue { i64, i64 } %insert.0, i64 %cast.1, 1 1095 ret { i64, i64 } %insert.1 1096} 1097 1098define <4 x i16> @v_andn2_v4i16(<4 x i16> %src0, <4 x i16> %src1) { 1099; GFX6-LABEL: v_andn2_v4i16: 1100; GFX6: ; %bb.0: 1101; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1102; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1103; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 1104; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 1105; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 1106; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 1107; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 1108; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v5 1109; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v4 1110; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 1111; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v7 1112; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6 1113; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 1114; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 1115; GFX6-NEXT: v_xor_b32_e32 v3, -1, v3 1116; GFX6-NEXT: v_and_b32_e32 v0, v0, v2 1117; GFX6-NEXT: v_and_b32_e32 v2, v1, v3 1118; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1119; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 1120; GFX6-NEXT: s_setpc_b64 s[30:31] 1121; 1122; GFX9-LABEL: v_andn2_v4i16: 1123; GFX9: ; %bb.0: 1124; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1125; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 1126; GFX9-NEXT: v_xor_b32_e32 v3, -1, v3 1127; GFX9-NEXT: v_and_b32_e32 v0, v0, v2 1128; GFX9-NEXT: v_and_b32_e32 v1, v1, v3 1129; GFX9-NEXT: s_setpc_b64 s[30:31] 1130; 1131; GFX10PLUS-LABEL: v_andn2_v4i16: 1132; GFX10PLUS: ; %bb.0: 1133; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1134; GFX10PLUS-NEXT: v_xor_b32_e32 v2, -1, v2 1135; GFX10PLUS-NEXT: v_xor_b32_e32 v3, -1, v3 1136; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2 1137; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v3 1138; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1139 %not.src1 = xor <4 x i16> %src1, <i16 -1, i16 -1, i16 -1, i16 -1> 1140 %and = and <4 x i16> %src0, %not.src1 1141 ret <4 x i16> %and 1142} 1143