1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.1 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X86-SSE 3; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX,AVX1,X86-AVX1 4; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX,AVX512,X86-AVX512 5; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.1 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X64-SSE 6; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX,AVX1,X64-AVX1 7; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX,AVX512,X64-AVX512 8 9; This test works just like the non-upgrade one except that it only checks 10; forms which require auto-upgrading. 11 12define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { 13; SSE-LABEL: test_x86_sse41_blendpd: 14; SSE: ## %bb.0: 15; SSE-NEXT: blendps $12, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0c,0xc1,0x0c] 16; SSE-NEXT: ## xmm0 = xmm0[0,1],xmm1[2,3] 17; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 18; 19; AVX-LABEL: test_x86_sse41_blendpd: 20; AVX: ## %bb.0: 21; AVX-NEXT: vblendps $3, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x03] 22; AVX-NEXT: ## xmm0 = xmm0[0,1],xmm1[2,3] 23; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 24 %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 6) ; <<2 x double>> [#uses=1] 25 ret <2 x double> %res 26} 27declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone 28 29 30define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) { 31; SSE-LABEL: test_x86_sse41_blendps: 32; SSE: ## %bb.0: 33; SSE-NEXT: blendps $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0c,0xc1,0x07] 34; SSE-NEXT: ## xmm0 = xmm1[0,1,2],xmm0[3] 35; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 36; 37; AVX-LABEL: test_x86_sse41_blendps: 38; AVX: ## %bb.0: 39; AVX-NEXT: vblendps $8, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08] 40; AVX-NEXT: ## xmm0 = xmm1[0,1,2],xmm0[3] 41; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 42 %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 43 ret <4 x float> %res 44} 45declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone 46 47 48define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) { 49; SSE-LABEL: test_x86_sse41_dppd: 50; SSE: ## %bb.0: 51; SSE-NEXT: dppd $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x41,0xc1,0x07] 52; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 53; 54; AVX-LABEL: test_x86_sse41_dppd: 55; AVX: ## %bb.0: 56; AVX-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x41,0xc1,0x07] 57; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 58 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 59 ret <2 x double> %res 60} 61declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone 62 63 64define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) { 65; SSE-LABEL: test_x86_sse41_dpps: 66; SSE: ## %bb.0: 67; SSE-NEXT: dpps $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x40,0xc1,0x07] 68; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 69; 70; AVX-LABEL: test_x86_sse41_dpps: 71; AVX: ## %bb.0: 72; AVX-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x40,0xc1,0x07] 73; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 74 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1] 75 ret <4 x float> %res 76} 77declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone 78 79 80define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) { 81; SSE-LABEL: test_x86_sse41_insertps: 82; SSE: ## %bb.0: 83; SSE-NEXT: insertps $17, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x21,0xc1,0x11] 84; SSE-NEXT: ## xmm0 = zero,xmm1[0],xmm0[2,3] 85; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 86; 87; AVX1-LABEL: test_x86_sse41_insertps: 88; AVX1: ## %bb.0: 89; AVX1-NEXT: vinsertps $17, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x11] 90; AVX1-NEXT: ## xmm0 = zero,xmm1[0],xmm0[2,3] 91; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 92; 93; AVX512-LABEL: test_x86_sse41_insertps: 94; AVX512: ## %bb.0: 95; AVX512-NEXT: vinsertps $17, %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x11] 96; AVX512-NEXT: ## xmm0 = zero,xmm1[0],xmm0[2,3] 97; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 98 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 17) ; <<4 x float>> [#uses=1] 99 ret <4 x float> %res 100} 101declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone 102 103 104define <2 x i64> @test_x86_sse41_movntdqa(ptr %a0) { 105; X86-SSE-LABEL: test_x86_sse41_movntdqa: 106; X86-SSE: ## %bb.0: 107; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 108; X86-SSE-NEXT: movntdqa (%eax), %xmm0 ## encoding: [0x66,0x0f,0x38,0x2a,0x00] 109; X86-SSE-NEXT: retl ## encoding: [0xc3] 110; 111; X86-AVX1-LABEL: test_x86_sse41_movntdqa: 112; X86-AVX1: ## %bb.0: 113; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 114; X86-AVX1-NEXT: vmovntdqa (%eax), %xmm0 ## encoding: [0xc4,0xe2,0x79,0x2a,0x00] 115; X86-AVX1-NEXT: retl ## encoding: [0xc3] 116; 117; X86-AVX512-LABEL: test_x86_sse41_movntdqa: 118; X86-AVX512: ## %bb.0: 119; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] 120; X86-AVX512-NEXT: vmovntdqa (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2a,0x00] 121; X86-AVX512-NEXT: retl ## encoding: [0xc3] 122; 123; X64-SSE-LABEL: test_x86_sse41_movntdqa: 124; X64-SSE: ## %bb.0: 125; X64-SSE-NEXT: movntdqa (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x38,0x2a,0x07] 126; X64-SSE-NEXT: retq ## encoding: [0xc3] 127; 128; X64-AVX1-LABEL: test_x86_sse41_movntdqa: 129; X64-AVX1: ## %bb.0: 130; X64-AVX1-NEXT: vmovntdqa (%rdi), %xmm0 ## encoding: [0xc4,0xe2,0x79,0x2a,0x07] 131; X64-AVX1-NEXT: retq ## encoding: [0xc3] 132; 133; X64-AVX512-LABEL: test_x86_sse41_movntdqa: 134; X64-AVX512: ## %bb.0: 135; X64-AVX512-NEXT: vmovntdqa (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2a,0x07] 136; X64-AVX512-NEXT: retq ## encoding: [0xc3] 137 %res = call <2 x i64> @llvm.x86.sse41.movntdqa(ptr %a0) 138 ret <2 x i64> %res 139} 140declare <2 x i64> @llvm.x86.sse41.movntdqa(ptr) nounwind readnone 141 142 143define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { 144; SSE-LABEL: test_x86_sse41_mpsadbw: 145; SSE: ## %bb.0: 146; SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x42,0xc1,0x07] 147; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 148; 149; AVX-LABEL: test_x86_sse41_mpsadbw: 150; AVX: ## %bb.0: 151; AVX-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0xc1,0x07] 152; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 153 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1] 154 ret <8 x i16> %res 155} 156declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone 157 158 159define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) { 160; SSE-LABEL: test_x86_sse41_pblendw: 161; SSE: ## %bb.0: 162; SSE-NEXT: pblendw $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0e,0xc1,0x07] 163; SSE-NEXT: ## xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7] 164; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 165; 166; AVX-LABEL: test_x86_sse41_pblendw: 167; AVX: ## %bb.0: 168; AVX-NEXT: vpblendw $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x07] 169; AVX-NEXT: ## xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7] 170; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 171 %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1] 172 ret <8 x i16> %res 173} 174declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone 175 176 177define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) { 178; SSE-LABEL: test_x86_sse41_pmovsxbd: 179; SSE: ## %bb.0: 180; SSE-NEXT: pmovsxbd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x21,0xc0] 181; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 182; 183; AVX1-LABEL: test_x86_sse41_pmovsxbd: 184; AVX1: ## %bb.0: 185; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x21,0xc0] 186; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 187; 188; AVX512-LABEL: test_x86_sse41_pmovsxbd: 189; AVX512: ## %bb.0: 190; AVX512-NEXT: vpmovsxbd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xc0] 191; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 192 %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 193 ret <4 x i32> %res 194} 195declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone 196 197 198define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) { 199; SSE-LABEL: test_x86_sse41_pmovsxbq: 200; SSE: ## %bb.0: 201; SSE-NEXT: pmovsxbq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x22,0xc0] 202; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 203; 204; AVX1-LABEL: test_x86_sse41_pmovsxbq: 205; AVX1: ## %bb.0: 206; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x22,0xc0] 207; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 208; 209; AVX512-LABEL: test_x86_sse41_pmovsxbq: 210; AVX512: ## %bb.0: 211; AVX512-NEXT: vpmovsxbq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xc0] 212; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 213 %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 214 ret <2 x i64> %res 215} 216declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone 217 218 219define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) { 220; SSE-LABEL: test_x86_sse41_pmovsxbw: 221; SSE: ## %bb.0: 222; SSE-NEXT: pmovsxbw %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x20,0xc0] 223; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 224; 225; AVX1-LABEL: test_x86_sse41_pmovsxbw: 226; AVX1: ## %bb.0: 227; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x20,0xc0] 228; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 229; 230; AVX512-LABEL: test_x86_sse41_pmovsxbw: 231; AVX512: ## %bb.0: 232; AVX512-NEXT: vpmovsxbw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xc0] 233; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 234 %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 235 ret <8 x i16> %res 236} 237declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone 238 239 240define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) { 241; SSE-LABEL: test_x86_sse41_pmovsxdq: 242; SSE: ## %bb.0: 243; SSE-NEXT: pmovsxdq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x25,0xc0] 244; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 245; 246; AVX1-LABEL: test_x86_sse41_pmovsxdq: 247; AVX1: ## %bb.0: 248; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x25,0xc0] 249; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 250; 251; AVX512-LABEL: test_x86_sse41_pmovsxdq: 252; AVX512: ## %bb.0: 253; AVX512-NEXT: vpmovsxdq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xc0] 254; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 255 %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 256 ret <2 x i64> %res 257} 258declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone 259 260 261define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) { 262; SSE-LABEL: test_x86_sse41_pmovsxwd: 263; SSE: ## %bb.0: 264; SSE-NEXT: pmovsxwd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x23,0xc0] 265; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 266; 267; AVX1-LABEL: test_x86_sse41_pmovsxwd: 268; AVX1: ## %bb.0: 269; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x23,0xc0] 270; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 271; 272; AVX512-LABEL: test_x86_sse41_pmovsxwd: 273; AVX512: ## %bb.0: 274; AVX512-NEXT: vpmovsxwd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xc0] 275; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 276 %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 277 ret <4 x i32> %res 278} 279declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone 280 281 282define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { 283; SSE-LABEL: test_x86_sse41_pmovsxwq: 284; SSE: ## %bb.0: 285; SSE-NEXT: pmovsxwq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x24,0xc0] 286; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 287; 288; AVX1-LABEL: test_x86_sse41_pmovsxwq: 289; AVX1: ## %bb.0: 290; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x24,0xc0] 291; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 292; 293; AVX512-LABEL: test_x86_sse41_pmovsxwq: 294; AVX512: ## %bb.0: 295; AVX512-NEXT: vpmovsxwq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xc0] 296; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 297 %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 298 ret <2 x i64> %res 299} 300declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone 301 302 303define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { 304; SSE-LABEL: test_x86_sse41_pmovzxbd: 305; SSE: ## %bb.0: 306; SSE-NEXT: pmovzxbd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x31,0xc0] 307; SSE-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 308; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 309; 310; AVX1-LABEL: test_x86_sse41_pmovzxbd: 311; AVX1: ## %bb.0: 312; AVX1-NEXT: vpmovzxbd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x31,0xc0] 313; AVX1-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 314; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 315; 316; AVX512-LABEL: test_x86_sse41_pmovzxbd: 317; AVX512: ## %bb.0: 318; AVX512-NEXT: vpmovzxbd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xc0] 319; AVX512-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 320; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 321 %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] 322 ret <4 x i32> %res 323} 324declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone 325 326 327define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { 328; SSE-LABEL: test_x86_sse41_pmovzxbq: 329; SSE: ## %bb.0: 330; SSE-NEXT: pmovzxbq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x32,0xc0] 331; SSE-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 332; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 333; 334; AVX1-LABEL: test_x86_sse41_pmovzxbq: 335; AVX1: ## %bb.0: 336; AVX1-NEXT: vpmovzxbq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x32,0xc0] 337; AVX1-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 338; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 339; 340; AVX512-LABEL: test_x86_sse41_pmovzxbq: 341; AVX512: ## %bb.0: 342; AVX512-NEXT: vpmovzxbq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xc0] 343; AVX512-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 344; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 345 %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] 346 ret <2 x i64> %res 347} 348declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone 349 350 351define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { 352; SSE-LABEL: test_x86_sse41_pmovzxbw: 353; SSE: ## %bb.0: 354; SSE-NEXT: pmovzxbw %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x30,0xc0] 355; SSE-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 356; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 357; 358; AVX1-LABEL: test_x86_sse41_pmovzxbw: 359; AVX1: ## %bb.0: 360; AVX1-NEXT: vpmovzxbw %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x30,0xc0] 361; AVX1-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 362; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 363; 364; AVX512-LABEL: test_x86_sse41_pmovzxbw: 365; AVX512: ## %bb.0: 366; AVX512-NEXT: vpmovzxbw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xc0] 367; AVX512-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 368; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 369 %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 370 ret <8 x i16> %res 371} 372declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone 373 374 375define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { 376; SSE-LABEL: test_x86_sse41_pmovzxdq: 377; SSE: ## %bb.0: 378; SSE-NEXT: pmovzxdq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x35,0xc0] 379; SSE-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero 380; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 381; 382; AVX1-LABEL: test_x86_sse41_pmovzxdq: 383; AVX1: ## %bb.0: 384; AVX1-NEXT: vpmovzxdq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x35,0xc0] 385; AVX1-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero 386; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 387; 388; AVX512-LABEL: test_x86_sse41_pmovzxdq: 389; AVX512: ## %bb.0: 390; AVX512-NEXT: vpmovzxdq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xc0] 391; AVX512-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero 392; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 393 %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] 394 ret <2 x i64> %res 395} 396declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone 397 398 399define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { 400; SSE-LABEL: test_x86_sse41_pmovzxwd: 401; SSE: ## %bb.0: 402; SSE-NEXT: pmovzxwd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x33,0xc0] 403; SSE-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 404; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 405; 406; AVX1-LABEL: test_x86_sse41_pmovzxwd: 407; AVX1: ## %bb.0: 408; AVX1-NEXT: vpmovzxwd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x33,0xc0] 409; AVX1-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 410; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 411; 412; AVX512-LABEL: test_x86_sse41_pmovzxwd: 413; AVX512: ## %bb.0: 414; AVX512-NEXT: vpmovzxwd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0] 415; AVX512-NEXT: ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 416; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 417 %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] 418 ret <4 x i32> %res 419} 420declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone 421 422 423define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { 424; SSE-LABEL: test_x86_sse41_pmovzxwq: 425; SSE: ## %bb.0: 426; SSE-NEXT: pmovzxwq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x34,0xc0] 427; SSE-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 428; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 429; 430; AVX1-LABEL: test_x86_sse41_pmovzxwq: 431; AVX1: ## %bb.0: 432; AVX1-NEXT: vpmovzxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x34,0xc0] 433; AVX1-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 434; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 435; 436; AVX512-LABEL: test_x86_sse41_pmovzxwq: 437; AVX512: ## %bb.0: 438; AVX512-NEXT: vpmovzxwq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xc0] 439; AVX512-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 440; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 441 %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] 442 ret <2 x i64> %res 443} 444declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone 445 446define <16 x i8> @max_epi8(<16 x i8> %a0, <16 x i8> %a1) { 447; SSE-LABEL: max_epi8: 448; SSE: ## %bb.0: 449; SSE-NEXT: pmaxsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3c,0xc1] 450; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 451; 452; AVX1-LABEL: max_epi8: 453; AVX1: ## %bb.0: 454; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3c,0xc1] 455; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 456; 457; AVX512-LABEL: max_epi8: 458; AVX512: ## %bb.0: 459; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3c,0xc1] 460; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 461 %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) 462 ret <16 x i8> %res 463} 464declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone 465 466define <16 x i8> @min_epi8(<16 x i8> %a0, <16 x i8> %a1) { 467; SSE-LABEL: min_epi8: 468; SSE: ## %bb.0: 469; SSE-NEXT: pminsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x38,0xc1] 470; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 471; 472; AVX1-LABEL: min_epi8: 473; AVX1: ## %bb.0: 474; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x38,0xc1] 475; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 476; 477; AVX512-LABEL: min_epi8: 478; AVX512: ## %bb.0: 479; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x38,0xc1] 480; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 481 %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) 482 ret <16 x i8> %res 483} 484declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone 485 486define <8 x i16> @max_epu16(<8 x i16> %a0, <8 x i16> %a1) { 487; SSE-LABEL: max_epu16: 488; SSE: ## %bb.0: 489; SSE-NEXT: pmaxuw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3e,0xc1] 490; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 491; 492; AVX1-LABEL: max_epu16: 493; AVX1: ## %bb.0: 494; AVX1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3e,0xc1] 495; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 496; 497; AVX512-LABEL: max_epu16: 498; AVX512: ## %bb.0: 499; AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xc1] 500; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 501 %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) 502 ret <8 x i16> %res 503} 504declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone 505 506define <8 x i16> @min_epu16(<8 x i16> %a0, <8 x i16> %a1) { 507; SSE-LABEL: min_epu16: 508; SSE: ## %bb.0: 509; SSE-NEXT: pminuw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3a,0xc1] 510; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 511; 512; AVX1-LABEL: min_epu16: 513; AVX1: ## %bb.0: 514; AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3a,0xc1] 515; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 516; 517; AVX512-LABEL: min_epu16: 518; AVX512: ## %bb.0: 519; AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xc1] 520; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 521 %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) 522 ret <8 x i16> %res 523} 524declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone 525 526define <4 x i32> @max_epi32(<4 x i32> %a0, <4 x i32> %a1) { 527; SSE-LABEL: max_epi32: 528; SSE: ## %bb.0: 529; SSE-NEXT: pmaxsd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3d,0xc1] 530; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 531; 532; AVX1-LABEL: max_epi32: 533; AVX1: ## %bb.0: 534; AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3d,0xc1] 535; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 536; 537; AVX512-LABEL: max_epi32: 538; AVX512: ## %bb.0: 539; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3d,0xc1] 540; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 541 %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) 542 ret <4 x i32> %res 543} 544declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone 545 546define <4 x i32> @min_epi32(<4 x i32> %a0, <4 x i32> %a1) { 547; SSE-LABEL: min_epi32: 548; SSE: ## %bb.0: 549; SSE-NEXT: pminsd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x39,0xc1] 550; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 551; 552; AVX1-LABEL: min_epi32: 553; AVX1: ## %bb.0: 554; AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x39,0xc1] 555; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 556; 557; AVX512-LABEL: min_epi32: 558; AVX512: ## %bb.0: 559; AVX512-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x39,0xc1] 560; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 561 %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) 562 ret <4 x i32> %res 563} 564declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone 565 566define <4 x i32> @max_epu32(<4 x i32> %a0, <4 x i32> %a1) { 567; SSE-LABEL: max_epu32: 568; SSE: ## %bb.0: 569; SSE-NEXT: pmaxud %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3f,0xc1] 570; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 571; 572; AVX1-LABEL: max_epu32: 573; AVX1: ## %bb.0: 574; AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3f,0xc1] 575; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 576; 577; AVX512-LABEL: max_epu32: 578; AVX512: ## %bb.0: 579; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3f,0xc1] 580; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 581 %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) 582 ret <4 x i32> %res 583} 584declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone 585 586define <4 x i32> @min_epu32(<4 x i32> %a0, <4 x i32> %a1) { 587; SSE-LABEL: min_epu32: 588; SSE: ## %bb.0: 589; SSE-NEXT: pminud %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3b,0xc1] 590; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 591; 592; AVX1-LABEL: min_epu32: 593; AVX1: ## %bb.0: 594; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3b,0xc1] 595; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 596; 597; AVX512-LABEL: min_epu32: 598; AVX512: ## %bb.0: 599; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3b,0xc1] 600; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 601 %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) 602 ret <4 x i32> %res 603} 604declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone 605 606 607define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { 608; SSE-LABEL: test_x86_sse41_pmuldq: 609; SSE: ## %bb.0: 610; SSE-NEXT: pmuldq %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x28,0xc1] 611; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 612; 613; AVX1-LABEL: test_x86_sse41_pmuldq: 614; AVX1: ## %bb.0: 615; AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x28,0xc1] 616; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 617; 618; AVX512-LABEL: test_x86_sse41_pmuldq: 619; AVX512: ## %bb.0: 620; AVX512-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1] 621; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] 622 %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] 623 ret <2 x i64> %res 624} 625declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone 626