1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -O3 -disable-peephole -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avxvnniint16 | FileCheck %s 3 4declare <4 x i32> @llvm.x86.avx2.vpdpwsud.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) 5declare <8 x i32> @llvm.x86.avx2.vpdpwsud.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) 6declare <4 x i32> @llvm.x86.avx2.vpdpwsuds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) 7declare <8 x i32> @llvm.x86.avx2.vpdpwsuds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) 8declare <4 x i32> @llvm.x86.avx2.vpdpwusd.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) 9declare <8 x i32> @llvm.x86.avx2.vpdpwusd.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) 10declare <4 x i32> @llvm.x86.avx2.vpdpwusds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) 11declare <8 x i32> @llvm.x86.avx2.vpdpwusds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) 12declare <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) 13declare <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) 14declare <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) 15declare <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) 16 17define <4 x i32> @test_int_x86_avx2_vpdpwsud_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { 18; CHECK-LABEL: test_int_x86_avx2_vpdpwsud_128: 19; CHECK: # %bb.0: 20; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 21; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] 22; CHECK-NEXT: #APP 23; CHECK-NEXT: nop # encoding: [0x90] 24; CHECK-NEXT: #NO_APP 25; CHECK-NEXT: vpdpwsud {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload 26; CHECK-NEXT: # encoding: [0xc4,0xe2,0x72,0xd2,0x44,0x24,0xe8] 27; CHECK-NEXT: retq # encoding: [0xc3] 28 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 29 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwsud.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) 30 ret <4 x i32> %ret 31} 32 33define <8 x i32> @test_int_x86_avx2_vpdpwsud_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { 34; CHECK-LABEL: test_int_x86_avx2_vpdpwsud_256: 35; CHECK: # %bb.0: 36; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 37; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] 38; CHECK-NEXT: #APP 39; CHECK-NEXT: nop # encoding: [0x90] 40; CHECK-NEXT: #NO_APP 41; CHECK-NEXT: vpdpwsud {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload 42; CHECK-NEXT: # encoding: [0xc4,0xe2,0x76,0xd2,0x44,0x24,0xd8] 43; CHECK-NEXT: retq # encoding: [0xc3] 44 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 45 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwsud.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) 46 ret <8 x i32> %ret 47} 48 49define <4 x i32> @test_int_x86_avx2_vpdpwsuds_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { 50; CHECK-LABEL: test_int_x86_avx2_vpdpwsuds_128: 51; CHECK: # %bb.0: 52; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 53; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] 54; CHECK-NEXT: #APP 55; CHECK-NEXT: nop # encoding: [0x90] 56; CHECK-NEXT: #NO_APP 57; CHECK-NEXT: vpdpwsuds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload 58; CHECK-NEXT: # encoding: [0xc4,0xe2,0x72,0xd3,0x44,0x24,0xe8] 59; CHECK-NEXT: retq # encoding: [0xc3] 60 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 61 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwsuds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) 62 ret <4 x i32> %ret 63} 64 65define <8 x i32> @test_int_x86_avx2_vpdpwsuds_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { 66; CHECK-LABEL: test_int_x86_avx2_vpdpwsuds_256: 67; CHECK: # %bb.0: 68; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 69; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] 70; CHECK-NEXT: #APP 71; CHECK-NEXT: nop # encoding: [0x90] 72; CHECK-NEXT: #NO_APP 73; CHECK-NEXT: vpdpwsuds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload 74; CHECK-NEXT: # encoding: [0xc4,0xe2,0x76,0xd3,0x44,0x24,0xd8] 75; CHECK-NEXT: retq # encoding: [0xc3] 76 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 77 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwsuds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) 78 ret <8 x i32> %ret 79} 80 81define <4 x i32> @test_int_x86_avx2_vpdpwusd_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { 82; CHECK-LABEL: test_int_x86_avx2_vpdpwusd_128: 83; CHECK: # %bb.0: 84; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 85; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] 86; CHECK-NEXT: #APP 87; CHECK-NEXT: nop # encoding: [0x90] 88; CHECK-NEXT: #NO_APP 89; CHECK-NEXT: vpdpwusd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload 90; CHECK-NEXT: # encoding: [0xc4,0xe2,0x71,0xd2,0x44,0x24,0xe8] 91; CHECK-NEXT: retq # encoding: [0xc3] 92 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 93 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwusd.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) 94 ret <4 x i32> %ret 95} 96 97define <8 x i32> @test_int_x86_avx2_vpdpwusd_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { 98; CHECK-LABEL: test_int_x86_avx2_vpdpwusd_256: 99; CHECK: # %bb.0: 100; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 101; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] 102; CHECK-NEXT: #APP 103; CHECK-NEXT: nop # encoding: [0x90] 104; CHECK-NEXT: #NO_APP 105; CHECK-NEXT: vpdpwusd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload 106; CHECK-NEXT: # encoding: [0xc4,0xe2,0x75,0xd2,0x44,0x24,0xd8] 107; CHECK-NEXT: retq # encoding: [0xc3] 108 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 109 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwusd.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) 110 ret <8 x i32> %ret 111} 112 113define <4 x i32> @test_int_x86_avx2_vpdpwusds_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { 114; CHECK-LABEL: test_int_x86_avx2_vpdpwusds_128: 115; CHECK: # %bb.0: 116; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 117; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] 118; CHECK-NEXT: #APP 119; CHECK-NEXT: nop # encoding: [0x90] 120; CHECK-NEXT: #NO_APP 121; CHECK-NEXT: vpdpwusds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload 122; CHECK-NEXT: # encoding: [0xc4,0xe2,0x71,0xd3,0x44,0x24,0xe8] 123; CHECK-NEXT: retq # encoding: [0xc3] 124 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 125 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwusds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) 126 ret <4 x i32> %ret 127} 128 129define <8 x i32> @test_int_x86_avx2_vpdpwusds_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { 130; CHECK-LABEL: test_int_x86_avx2_vpdpwusds_256: 131; CHECK: # %bb.0: 132; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 133; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] 134; CHECK-NEXT: #APP 135; CHECK-NEXT: nop # encoding: [0x90] 136; CHECK-NEXT: #NO_APP 137; CHECK-NEXT: vpdpwusds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload 138; CHECK-NEXT: # encoding: [0xc4,0xe2,0x75,0xd3,0x44,0x24,0xd8] 139; CHECK-NEXT: retq # encoding: [0xc3] 140 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 141 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwusds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) 142 ret <8 x i32> %ret 143} 144 145define <4 x i32> @test_int_x86_avx2_vpdpwuud_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { 146; CHECK-LABEL: test_int_x86_avx2_vpdpwuud_128: 147; CHECK: # %bb.0: 148; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 149; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] 150; CHECK-NEXT: #APP 151; CHECK-NEXT: nop # encoding: [0x90] 152; CHECK-NEXT: #NO_APP 153; CHECK-NEXT: vpdpwuud {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload 154; CHECK-NEXT: # encoding: [0xc4,0xe2,0x70,0xd2,0x44,0x24,0xe8] 155; CHECK-NEXT: retq # encoding: [0xc3] 156 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 157 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) 158 ret <4 x i32> %ret 159} 160 161define <4 x i32> @test_int_x86_avx2_vpdpwuud_128_commuted(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { 162; CHECK-LABEL: test_int_x86_avx2_vpdpwuud_128_commuted: 163; CHECK: # %bb.0: 164; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 165; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] 166; CHECK-NEXT: #APP 167; CHECK-NEXT: nop # encoding: [0x90] 168; CHECK-NEXT: #NO_APP 169; CHECK-NEXT: vpdpwuud {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload 170; CHECK-NEXT: # encoding: [0xc4,0xe2,0x70,0xd2,0x44,0x24,0xe8] 171; CHECK-NEXT: retq # encoding: [0xc3] 172 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 173 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %A, <4 x i32> %C, <4 x i32> %B) 174 ret <4 x i32> %ret 175} 176 177define <8 x i32> @test_int_x86_avx2_vpdpwuud_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { 178; CHECK-LABEL: test_int_x86_avx2_vpdpwuud_256: 179; CHECK: # %bb.0: 180; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 181; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] 182; CHECK-NEXT: #APP 183; CHECK-NEXT: nop # encoding: [0x90] 184; CHECK-NEXT: #NO_APP 185; CHECK-NEXT: vpdpwuud {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload 186; CHECK-NEXT: # encoding: [0xc4,0xe2,0x74,0xd2,0x44,0x24,0xd8] 187; CHECK-NEXT: retq # encoding: [0xc3] 188 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 189 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) 190 ret <8 x i32> %ret 191} 192 193define <8 x i32> @test_int_x86_avx2_vpdpwuud_256_commuted(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { 194; CHECK-LABEL: test_int_x86_avx2_vpdpwuud_256_commuted: 195; CHECK: # %bb.0: 196; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 197; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] 198; CHECK-NEXT: #APP 199; CHECK-NEXT: nop # encoding: [0x90] 200; CHECK-NEXT: #NO_APP 201; CHECK-NEXT: vpdpwuud {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload 202; CHECK-NEXT: # encoding: [0xc4,0xe2,0x74,0xd2,0x44,0x24,0xd8] 203; CHECK-NEXT: retq # encoding: [0xc3] 204 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 205 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %A, <8 x i32> %C, <8 x i32> %B) 206 ret <8 x i32> %ret 207} 208 209define <4 x i32> @test_int_x86_avx2_vpdpwuuds_128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { 210; CHECK-LABEL: test_int_x86_avx2_vpdpwuuds_128: 211; CHECK: # %bb.0: 212; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 213; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] 214; CHECK-NEXT: #APP 215; CHECK-NEXT: nop # encoding: [0x90] 216; CHECK-NEXT: #NO_APP 217; CHECK-NEXT: vpdpwuuds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload 218; CHECK-NEXT: # encoding: [0xc4,0xe2,0x70,0xd3,0x44,0x24,0xe8] 219; CHECK-NEXT: retq # encoding: [0xc3] 220 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 221 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) 222 ret <4 x i32> %ret 223} 224 225define <4 x i32> @test_int_x86_avx2_vpdpwuuds_128_commuted(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { 226; CHECK-LABEL: test_int_x86_avx2_vpdpwuuds_128_commuted: 227; CHECK: # %bb.0: 228; CHECK-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 229; CHECK-NEXT: # encoding: [0xc5,0xf8,0x29,0x54,0x24,0xe8] 230; CHECK-NEXT: #APP 231; CHECK-NEXT: nop # encoding: [0x90] 232; CHECK-NEXT: #NO_APP 233; CHECK-NEXT: vpdpwuuds {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 16-byte Folded Reload 234; CHECK-NEXT: # encoding: [0xc4,0xe2,0x70,0xd3,0x44,0x24,0xe8] 235; CHECK-NEXT: retq # encoding: [0xc3] 236 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 237 %ret = call <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %A, <4 x i32> %C, <4 x i32> %B) 238 ret <4 x i32> %ret 239} 240 241define <8 x i32> @test_int_x86_avx2_vpdpwuuds_256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { 242; CHECK-LABEL: test_int_x86_avx2_vpdpwuuds_256: 243; CHECK: # %bb.0: 244; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 245; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] 246; CHECK-NEXT: #APP 247; CHECK-NEXT: nop # encoding: [0x90] 248; CHECK-NEXT: #NO_APP 249; CHECK-NEXT: vpdpwuuds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload 250; CHECK-NEXT: # encoding: [0xc4,0xe2,0x74,0xd3,0x44,0x24,0xd8] 251; CHECK-NEXT: retq # encoding: [0xc3] 252 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 253 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) 254 ret <8 x i32> %ret 255} 256 257define <8 x i32> @test_int_x86_avx2_vpdpwuuds_256_commuted(<8 x i32> %A, <8 x i32> %B, <8 x i32> %C) { 258; CHECK-LABEL: test_int_x86_avx2_vpdpwuuds_256_commuted: 259; CHECK: # %bb.0: 260; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 261; CHECK-NEXT: # encoding: [0xc5,0xfc,0x11,0x54,0x24,0xd8] 262; CHECK-NEXT: #APP 263; CHECK-NEXT: nop # encoding: [0x90] 264; CHECK-NEXT: #NO_APP 265; CHECK-NEXT: vpdpwuuds {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload 266; CHECK-NEXT: # encoding: [0xc4,0xe2,0x74,0xd3,0x44,0x24,0xd8] 267; CHECK-NEXT: retq # encoding: [0xc3] 268 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"() 269 %ret = call <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %A, <8 x i32> %C, <8 x i32> %B) 270 ret <8 x i32> %ret 271} 272