1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX 3; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL32,AVX2 4; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=ALL32,AVX512 5; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=ALL32,AVX512,AVX512BW 6; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX-64 7; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL64,AVX2-64 8; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=ALL64,AVX512F-64 9; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=ALL64,AVX512F-64,AVX512BW-64 10 11;===-----------------------------------------------------------------------------=== 12; This test checks the ability to recognize a cross element pattern of 13; constants and perform the load via broadcasting a smaller constant 14; vector. 15; For example: 16; <i32 0, i32 1, i32 0, i32 1> => broadcast of the constant vector <i32 0, i32 1> 17;===-----------------------------------------------------------------------------=== 18 19define <16 x i8> @f16xi8_i16(<16 x i8> %a) { 20; AVX-LABEL: f16xi8_i16: 21; AVX: # %bb.0: 22; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 23; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 24; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 25; AVX-NEXT: retl 26; 27; ALL32-LABEL: f16xi8_i16: 28; ALL32: # %bb.0: 29; ALL32-NEXT: vpbroadcastw {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 30; ALL32-NEXT: vpaddb %xmm1, %xmm0, %xmm0 31; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 32; ALL32-NEXT: retl 33; 34; AVX-64-LABEL: f16xi8_i16: 35; AVX-64: # %bb.0: 36; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 37; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 38; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0 39; AVX-64-NEXT: retq 40; 41; ALL64-LABEL: f16xi8_i16: 42; ALL64: # %bb.0: 43; ALL64-NEXT: vpbroadcastw {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 44; ALL64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 45; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 46; ALL64-NEXT: retq 47 %res1 = add <16 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>, %a 48 %res2 = and <16 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>, %res1 49 ret <16 x i8> %res2 50} 51 52 53define <16 x i8> @f16xi8_i32(<16 x i8> %a) { 54; AVX-LABEL: f16xi8_i32: 55; AVX: # %bb.0: 56; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 57; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 58; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 59; AVX-NEXT: retl 60; 61; ALL32-LABEL: f16xi8_i32: 62; ALL32: # %bb.0: 63; ALL32-NEXT: vpbroadcastd {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 64; ALL32-NEXT: vpaddb %xmm1, %xmm0, %xmm0 65; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 66; ALL32-NEXT: retl 67; 68; AVX-64-LABEL: f16xi8_i32: 69; AVX-64: # %bb.0: 70; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 71; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 72; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0 73; AVX-64-NEXT: retq 74; 75; ALL64-LABEL: f16xi8_i32: 76; ALL64: # %bb.0: 77; ALL64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 78; ALL64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 79; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 80; ALL64-NEXT: retq 81 %res1 = add <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %a 82 %res2 = and <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %res1 83 ret <16 x i8> %res2 84} 85 86 87define <16 x i8> @f16xi8_i64(<16 x i8> %a) { 88; AVX-LABEL: f16xi8_i64: 89; AVX: # %bb.0: 90; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 91; AVX-NEXT: # xmm1 = mem[0,0] 92; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 93; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 94; AVX-NEXT: retl 95; 96; ALL32-LABEL: f16xi8_i64: 97; ALL32: # %bb.0: 98; ALL32-NEXT: vpbroadcastq {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 99; ALL32-NEXT: vpaddb %xmm1, %xmm0, %xmm0 100; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 101; ALL32-NEXT: retl 102; 103; AVX-64-LABEL: f16xi8_i64: 104; AVX-64: # %bb.0: 105; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 106; AVX-64-NEXT: # xmm1 = mem[0,0] 107; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 108; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0 109; AVX-64-NEXT: retq 110; 111; ALL64-LABEL: f16xi8_i64: 112; ALL64: # %bb.0: 113; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 114; ALL64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 115; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 116; ALL64-NEXT: retq 117 %res1 = add <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %a 118 %res2 = and <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %res1 119 ret <16 x i8> %res2 120} 121 122 123define <32 x i8> @f32xi8_i16(<32 x i8> %a) { 124; AVX-LABEL: f32xi8_i16: 125; AVX: # %bb.0: 126; AVX-NEXT: vbroadcastss {{.*#+}} ymm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 127; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 128; AVX-NEXT: vpaddb %xmm1, %xmm2, %xmm2 129; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 130; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 131; AVX-NEXT: vandps %ymm1, %ymm0, %ymm0 132; AVX-NEXT: retl 133; 134; ALL32-LABEL: f32xi8_i16: 135; ALL32: # %bb.0: 136; ALL32-NEXT: vpbroadcastw {{.*#+}} ymm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 137; ALL32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 138; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 139; ALL32-NEXT: retl 140; 141; AVX-64-LABEL: f32xi8_i16: 142; AVX-64: # %bb.0: 143; AVX-64-NEXT: vbroadcastss {{.*#+}} ymm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 144; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 145; AVX-64-NEXT: vpaddb %xmm1, %xmm2, %xmm2 146; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 147; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 148; AVX-64-NEXT: vandps %ymm1, %ymm0, %ymm0 149; AVX-64-NEXT: retq 150; 151; ALL64-LABEL: f32xi8_i16: 152; ALL64: # %bb.0: 153; ALL64-NEXT: vpbroadcastw {{.*#+}} ymm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 154; ALL64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 155; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 156; ALL64-NEXT: retq 157 %res1 = add <32 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>, %a 158 %res2 = and <32 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>, %res1 159 ret <32 x i8> %res2 160} 161 162 163define <32 x i8> @f32xi8_i32(<32 x i8> %a) { 164; AVX-LABEL: f32xi8_i32: 165; AVX: # %bb.0: 166; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 167; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 168; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1 169; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 170; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 171; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 172; AVX-NEXT: retl 173; 174; ALL32-LABEL: f32xi8_i32: 175; ALL32: # %bb.0: 176; ALL32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 177; ALL32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 178; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 179; ALL32-NEXT: retl 180; 181; AVX-64-LABEL: f32xi8_i32: 182; AVX-64: # %bb.0: 183; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 184; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 185; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 186; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 187; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 188; AVX-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 189; AVX-64-NEXT: retq 190; 191; ALL64-LABEL: f32xi8_i32: 192; ALL64: # %bb.0: 193; ALL64-NEXT: vpbroadcastd {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 194; ALL64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 195; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 196; ALL64-NEXT: retq 197 %res1 = add <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %a 198 %res2 = and <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %res1 199 ret <32 x i8> %res2 200} 201 202 203define <32 x i8> @f32xi8_i64(<32 x i8> %a) { 204; AVX-LABEL: f32xi8_i64: 205; AVX: # %bb.0: 206; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 207; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 208; AVX-NEXT: # xmm2 = mem[0,0] 209; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1 210; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 211; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 212; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 213; AVX-NEXT: retl 214; 215; ALL32-LABEL: f32xi8_i64: 216; ALL32: # %bb.0: 217; ALL32-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 218; ALL32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 219; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 220; ALL32-NEXT: retl 221; 222; AVX-64-LABEL: f32xi8_i64: 223; AVX-64: # %bb.0: 224; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 225; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 226; AVX-64-NEXT: # xmm2 = mem[0,0] 227; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 228; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 229; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 230; AVX-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 231; AVX-64-NEXT: retq 232; 233; ALL64-LABEL: f32xi8_i64: 234; ALL64: # %bb.0: 235; ALL64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 236; ALL64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 237; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 238; ALL64-NEXT: retq 239 %res1 = add <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %a 240 %res2 = and <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %res1 241 ret <32 x i8> %res2 242} 243 244 245define <32 x i8> @f32xi8_i128(<32 x i8> %a) { 246; AVX-LABEL: f32xi8_i128: 247; AVX: # %bb.0: 248; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 249; AVX-NEXT: # ymm1 = mem[0,1,0,1] 250; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 251; AVX-NEXT: vpaddb %xmm1, %xmm2, %xmm2 252; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 253; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 254; AVX-NEXT: vandps %ymm1, %ymm0, %ymm0 255; AVX-NEXT: retl 256; 257; ALL32-LABEL: f32xi8_i128: 258; ALL32: # %bb.0: 259; ALL32-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 260; ALL32-NEXT: # ymm1 = mem[0,1,0,1] 261; ALL32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 262; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 263; ALL32-NEXT: retl 264; 265; AVX-64-LABEL: f32xi8_i128: 266; AVX-64: # %bb.0: 267; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 268; AVX-64-NEXT: # ymm1 = mem[0,1,0,1] 269; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 270; AVX-64-NEXT: vpaddb %xmm1, %xmm2, %xmm2 271; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 272; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 273; AVX-64-NEXT: vandps %ymm1, %ymm0, %ymm0 274; AVX-64-NEXT: retq 275; 276; ALL64-LABEL: f32xi8_i128: 277; ALL64: # %bb.0: 278; ALL64-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 279; ALL64-NEXT: # ymm1 = mem[0,1,0,1] 280; ALL64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 281; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 282; ALL64-NEXT: retq 283 %res1 = add <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %a 284 %res2 = and <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %res1 285 ret <32 x i8> %res2 286} 287 288 289define <64 x i8> @f64xi8_i16(<64 x i8> %a) { 290; AVX-LABEL: f64xi8_i16: 291; AVX: # %bb.0: 292; AVX-NEXT: vbroadcastss {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 293; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3 294; AVX-NEXT: vpaddb %xmm2, %xmm3, %xmm3 295; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1 296; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 297; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3 298; AVX-NEXT: vpaddb %xmm2, %xmm3, %xmm3 299; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 300; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 301; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 302; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 303; AVX-NEXT: retl 304; 305; AVX2-LABEL: f64xi8_i16: 306; AVX2: # %bb.0: 307; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 308; AVX2-NEXT: vpaddb %ymm2, %ymm1, %ymm1 309; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 310; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 311; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 312; AVX2-NEXT: retl 313; 314; AVX512BW-LABEL: f64xi8_i16: 315; AVX512BW: # %bb.0: 316; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 317; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 318; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 319; AVX512BW-NEXT: retl 320; 321; AVX-64-LABEL: f64xi8_i16: 322; AVX-64: # %bb.0: 323; AVX-64-NEXT: vbroadcastss {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 324; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3 325; AVX-64-NEXT: vpaddb %xmm2, %xmm3, %xmm3 326; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 327; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 328; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3 329; AVX-64-NEXT: vpaddb %xmm2, %xmm3, %xmm3 330; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 331; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 332; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 333; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 334; AVX-64-NEXT: retq 335; 336; AVX2-64-LABEL: f64xi8_i16: 337; AVX2-64: # %bb.0: 338; AVX2-64-NEXT: vpbroadcastw {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 339; AVX2-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 340; AVX2-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 341; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 342; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 343; AVX2-64-NEXT: retq 344; 345; AVX512BW-64-LABEL: f64xi8_i16: 346; AVX512BW-64: # %bb.0: 347; AVX512BW-64-NEXT: vpbroadcastw {{.*#+}} zmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 348; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 349; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 350; AVX512BW-64-NEXT: retq 351 %res1 = add <64 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>, %a 352 %res2 = and <64 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>, %res1 353 ret <64 x i8> %res2 354} 355 356 357define <64 x i8> @f64i8_i32(<64 x i8> %a) { 358; AVX-LABEL: f64i8_i32: 359; AVX: # %bb.0: 360; AVX-NEXT: vbroadcastss {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 361; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3 362; AVX-NEXT: vpaddb %xmm2, %xmm3, %xmm3 363; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1 364; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 365; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3 366; AVX-NEXT: vpaddb %xmm2, %xmm3, %xmm3 367; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 368; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 369; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 370; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 371; AVX-NEXT: retl 372; 373; AVX2-LABEL: f64i8_i32: 374; AVX2: # %bb.0: 375; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 376; AVX2-NEXT: vpaddb %ymm2, %ymm1, %ymm1 377; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 378; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 379; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 380; AVX2-NEXT: retl 381; 382; AVX512BW-LABEL: f64i8_i32: 383; AVX512BW: # %bb.0: 384; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 385; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 386; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 387; AVX512BW-NEXT: retl 388; 389; AVX-64-LABEL: f64i8_i32: 390; AVX-64: # %bb.0: 391; AVX-64-NEXT: vbroadcastss {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 392; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3 393; AVX-64-NEXT: vpaddb %xmm2, %xmm3, %xmm3 394; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 395; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 396; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3 397; AVX-64-NEXT: vpaddb %xmm2, %xmm3, %xmm3 398; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 399; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 400; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 401; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 402; AVX-64-NEXT: retq 403; 404; AVX2-64-LABEL: f64i8_i32: 405; AVX2-64: # %bb.0: 406; AVX2-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 407; AVX2-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 408; AVX2-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 409; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 410; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 411; AVX2-64-NEXT: retq 412; 413; AVX512BW-64-LABEL: f64i8_i32: 414; AVX512BW-64: # %bb.0: 415; AVX512BW-64-NEXT: vpbroadcastd {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 416; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 417; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 418; AVX512BW-64-NEXT: retq 419 %res1 = add <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %a 420 %res2 = and <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %res1 421 ret <64 x i8> %res2 422} 423 424 425define <64 x i8> @f64xi8_i64(<64 x i8> %a) { 426; AVX-LABEL: f64xi8_i64: 427; AVX: # %bb.0: 428; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 429; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3 430; AVX-NEXT: vpaddb %xmm2, %xmm3, %xmm3 431; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1 432; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 433; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3 434; AVX-NEXT: vpaddb %xmm2, %xmm3, %xmm3 435; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 436; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 437; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 438; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 439; AVX-NEXT: retl 440; 441; AVX2-LABEL: f64xi8_i64: 442; AVX2: # %bb.0: 443; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 444; AVX2-NEXT: vpaddb %ymm2, %ymm1, %ymm1 445; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 446; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 447; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 448; AVX2-NEXT: retl 449; 450; AVX512BW-LABEL: f64xi8_i64: 451; AVX512BW: # %bb.0: 452; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 453; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 454; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 455; AVX512BW-NEXT: retl 456; 457; AVX-64-LABEL: f64xi8_i64: 458; AVX-64: # %bb.0: 459; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 460; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3 461; AVX-64-NEXT: vpaddb %xmm2, %xmm3, %xmm3 462; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 463; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 464; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3 465; AVX-64-NEXT: vpaddb %xmm2, %xmm3, %xmm3 466; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 467; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 468; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 469; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 470; AVX-64-NEXT: retq 471; 472; AVX2-64-LABEL: f64xi8_i64: 473; AVX2-64: # %bb.0: 474; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 475; AVX2-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 476; AVX2-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 477; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 478; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 479; AVX2-64-NEXT: retq 480; 481; AVX512BW-64-LABEL: f64xi8_i64: 482; AVX512BW-64: # %bb.0: 483; AVX512BW-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 484; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 485; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 486; AVX512BW-64-NEXT: retq 487 %res1 = add <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %a 488 %res2 = and <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %res1 489 ret <64 x i8> %res2 490} 491 492 493define <64 x i8> @f64xi8_i128(<64 x i8> %a) { 494; AVX-LABEL: f64xi8_i128: 495; AVX: # %bb.0: 496; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 497; AVX-NEXT: # ymm2 = mem[0,1,0,1] 498; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3 499; AVX-NEXT: vpaddb %xmm2, %xmm3, %xmm3 500; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1 501; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 502; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3 503; AVX-NEXT: vpaddb %xmm2, %xmm3, %xmm3 504; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 505; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 506; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 507; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 508; AVX-NEXT: retl 509; 510; AVX2-LABEL: f64xi8_i128: 511; AVX2: # %bb.0: 512; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 513; AVX2-NEXT: # ymm2 = mem[0,1,0,1] 514; AVX2-NEXT: vpaddb %ymm2, %ymm1, %ymm1 515; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 516; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 517; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 518; AVX2-NEXT: retl 519; 520; AVX512BW-LABEL: f64xi8_i128: 521; AVX512BW: # %bb.0: 522; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 523; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 524; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 525; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 526; AVX512BW-NEXT: retl 527; 528; AVX-64-LABEL: f64xi8_i128: 529; AVX-64: # %bb.0: 530; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 531; AVX-64-NEXT: # ymm2 = mem[0,1,0,1] 532; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3 533; AVX-64-NEXT: vpaddb %xmm2, %xmm3, %xmm3 534; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 535; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 536; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3 537; AVX-64-NEXT: vpaddb %xmm2, %xmm3, %xmm3 538; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 539; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 540; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 541; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 542; AVX-64-NEXT: retq 543; 544; AVX2-64-LABEL: f64xi8_i128: 545; AVX2-64: # %bb.0: 546; AVX2-64-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 547; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1] 548; AVX2-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 549; AVX2-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 550; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 551; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 552; AVX2-64-NEXT: retq 553; 554; AVX512BW-64-LABEL: f64xi8_i128: 555; AVX512BW-64: # %bb.0: 556; AVX512BW-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 557; AVX512BW-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 558; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 559; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 560; AVX512BW-64-NEXT: retq 561 %res1 = add <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %a 562 %res2 = and <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %res1 563 ret <64 x i8> %res2 564} 565 566 567define <64 x i8> @f64xi8_i256(<64 x i8> %a) { 568; AVX-LABEL: f64xi8_i256: 569; AVX: # %bb.0: 570; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 571; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 572; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2 573; AVX-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 574; AVX-NEXT: vpaddb %xmm4, %xmm1, %xmm1 575; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 576; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 577; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2 578; AVX-NEXT: vpaddb %xmm4, %xmm0, %xmm0 579; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 580; AVX-NEXT: vandps %ymm4, %ymm0, %ymm0 581; AVX-NEXT: vandps %ymm4, %ymm1, %ymm1 582; AVX-NEXT: retl 583; 584; AVX2-LABEL: f64xi8_i256: 585; AVX2: # %bb.0: 586; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 587; AVX2-NEXT: vpaddb %ymm2, %ymm1, %ymm1 588; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 589; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 590; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 591; AVX2-NEXT: retl 592; 593; AVX512BW-LABEL: f64xi8_i256: 594; AVX512BW: # %bb.0: 595; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 596; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 597; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 598; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 599; AVX512BW-NEXT: retl 600; 601; AVX-64-LABEL: f64xi8_i256: 602; AVX-64: # %bb.0: 603; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 604; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 605; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2 606; AVX-64-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 607; AVX-64-NEXT: vpaddb %xmm4, %xmm1, %xmm1 608; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 609; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 610; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2 611; AVX-64-NEXT: vpaddb %xmm4, %xmm0, %xmm0 612; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 613; AVX-64-NEXT: vandps %ymm4, %ymm0, %ymm0 614; AVX-64-NEXT: vandps %ymm4, %ymm1, %ymm1 615; AVX-64-NEXT: retq 616; 617; AVX2-64-LABEL: f64xi8_i256: 618; AVX2-64: # %bb.0: 619; AVX2-64-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 620; AVX2-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 621; AVX2-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 622; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 623; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 624; AVX2-64-NEXT: retq 625; 626; AVX512BW-64-LABEL: f64xi8_i256: 627; AVX512BW-64: # %bb.0: 628; AVX512BW-64-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 629; AVX512BW-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 630; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 631; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 632; AVX512BW-64-NEXT: retq 633 %res1 = add <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, %a 634 %res2 = and <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, %res1 635 ret <64 x i8> %res2 636} 637 638 639define <8 x i16> @f8xi16_i32(<8 x i16> %a) { 640; AVX-LABEL: f8xi16_i32: 641; AVX: # %bb.0: 642; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1] 643; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 644; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 645; AVX-NEXT: retl 646; 647; ALL32-LABEL: f8xi16_i32: 648; ALL32: # %bb.0: 649; ALL32-NEXT: vpbroadcastd {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1] 650; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0 651; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 652; ALL32-NEXT: retl 653; 654; AVX-64-LABEL: f8xi16_i32: 655; AVX-64: # %bb.0: 656; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1] 657; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 658; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0 659; AVX-64-NEXT: retq 660; 661; ALL64-LABEL: f8xi16_i32: 662; ALL64: # %bb.0: 663; ALL64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1] 664; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 665; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 666; ALL64-NEXT: retq 667 %res1 = add <8 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %a 668 %res2 = and <8 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %res1 669 ret <8 x i16> %res2 670} 671 672 673define <8 x i16> @f8xi16_i64(<8 x i16> %a) { 674; AVX-LABEL: f8xi16_i64: 675; AVX: # %bb.0: 676; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3] 677; AVX-NEXT: # xmm1 = mem[0,0] 678; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 679; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 680; AVX-NEXT: retl 681; 682; ALL32-LABEL: f8xi16_i64: 683; ALL32: # %bb.0: 684; ALL32-NEXT: vpbroadcastq {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3] 685; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0 686; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 687; ALL32-NEXT: retl 688; 689; AVX-64-LABEL: f8xi16_i64: 690; AVX-64: # %bb.0: 691; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3] 692; AVX-64-NEXT: # xmm1 = mem[0,0] 693; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 694; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0 695; AVX-64-NEXT: retq 696; 697; ALL64-LABEL: f8xi16_i64: 698; ALL64: # %bb.0: 699; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [0,1,2,3,0,1,2,3] 700; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 701; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 702; ALL64-NEXT: retq 703 %res1 = add <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %a 704 %res2 = and <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %res1 705 ret <8 x i16> %res2 706} 707 708 709define <16 x i16> @f16xi16_i32(<16 x i16> %a) { 710; AVX-LABEL: f16xi16_i32: 711; AVX: # %bb.0: 712; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 713; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1] 714; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 715; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0 716; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 717; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 718; AVX-NEXT: retl 719; 720; ALL32-LABEL: f16xi16_i32: 721; ALL32: # %bb.0: 722; ALL32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 723; ALL32-NEXT: vpaddw %ymm1, %ymm0, %ymm0 724; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 725; ALL32-NEXT: retl 726; 727; AVX-64-LABEL: f16xi16_i32: 728; AVX-64: # %bb.0: 729; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 730; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1] 731; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 732; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 733; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 734; AVX-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 735; AVX-64-NEXT: retq 736; 737; ALL64-LABEL: f16xi16_i32: 738; ALL64: # %bb.0: 739; ALL64-NEXT: vpbroadcastd {{.*#+}} ymm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 740; ALL64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 741; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 742; ALL64-NEXT: retq 743 %res1 = add <16 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %a 744 %res2 = and <16 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %res1 745 ret <16 x i16> %res2 746} 747 748 749define <16 x i16> @f16xi16_i64(<16 x i16> %a) { 750; AVX-LABEL: f16xi16_i64: 751; AVX: # %bb.0: 752; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 753; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [0,1,2,3,0,1,2,3] 754; AVX-NEXT: # xmm2 = mem[0,0] 755; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 756; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0 757; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 758; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 759; AVX-NEXT: retl 760; 761; ALL32-LABEL: f16xi16_i64: 762; ALL32: # %bb.0: 763; ALL32-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 764; ALL32-NEXT: vpaddw %ymm1, %ymm0, %ymm0 765; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 766; ALL32-NEXT: retl 767; 768; AVX-64-LABEL: f16xi16_i64: 769; AVX-64: # %bb.0: 770; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 771; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = [0,1,2,3,0,1,2,3] 772; AVX-64-NEXT: # xmm2 = mem[0,0] 773; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 774; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 775; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 776; AVX-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 777; AVX-64-NEXT: retq 778; 779; ALL64-LABEL: f16xi16_i64: 780; ALL64: # %bb.0: 781; ALL64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 782; ALL64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 783; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 784; ALL64-NEXT: retq 785 %res1 = add <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %a 786 %res2 = and <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %res1 787 ret <16 x i16> %res2 788} 789 790 791define <16 x i16> @f16xi16_i128(<16 x i16> %a) { 792; AVX-LABEL: f16xi16_i128: 793; AVX: # %bb.0: 794; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 795; AVX-NEXT: # ymm1 = mem[0,1,0,1] 796; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 797; AVX-NEXT: vpaddw %xmm1, %xmm2, %xmm2 798; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 799; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 800; AVX-NEXT: vandps %ymm1, %ymm0, %ymm0 801; AVX-NEXT: retl 802; 803; ALL32-LABEL: f16xi16_i128: 804; ALL32: # %bb.0: 805; ALL32-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 806; ALL32-NEXT: # ymm1 = mem[0,1,0,1] 807; ALL32-NEXT: vpaddw %ymm1, %ymm0, %ymm0 808; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 809; ALL32-NEXT: retl 810; 811; AVX-64-LABEL: f16xi16_i128: 812; AVX-64: # %bb.0: 813; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 814; AVX-64-NEXT: # ymm1 = mem[0,1,0,1] 815; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 816; AVX-64-NEXT: vpaddw %xmm1, %xmm2, %xmm2 817; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 818; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 819; AVX-64-NEXT: vandps %ymm1, %ymm0, %ymm0 820; AVX-64-NEXT: retq 821; 822; ALL64-LABEL: f16xi16_i128: 823; ALL64: # %bb.0: 824; ALL64-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 825; ALL64-NEXT: # ymm1 = mem[0,1,0,1] 826; ALL64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 827; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 828; ALL64-NEXT: retq 829 %res1 = add <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %a 830 %res2 = and <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %res1 831 ret <16 x i16> %res2 832} 833 834 835define <32 x i16> @f32xi16_i32(<32 x i16> %a) { 836; AVX-LABEL: f32xi16_i32: 837; AVX: # %bb.0: 838; AVX-NEXT: vbroadcastss {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 839; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3 840; AVX-NEXT: vpaddw %xmm2, %xmm3, %xmm3 841; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 842; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 843; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3 844; AVX-NEXT: vpaddw %xmm2, %xmm3, %xmm3 845; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0 846; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 847; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 848; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 849; AVX-NEXT: retl 850; 851; AVX2-LABEL: f32xi16_i32: 852; AVX2: # %bb.0: 853; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 854; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1 855; AVX2-NEXT: vpaddw %ymm2, %ymm0, %ymm0 856; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 857; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 858; AVX2-NEXT: retl 859; 860; AVX512BW-LABEL: f32xi16_i32: 861; AVX512BW: # %bb.0: 862; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 863; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 864; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 865; AVX512BW-NEXT: retl 866; 867; AVX-64-LABEL: f32xi16_i32: 868; AVX-64: # %bb.0: 869; AVX-64-NEXT: vbroadcastss {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 870; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3 871; AVX-64-NEXT: vpaddw %xmm2, %xmm3, %xmm3 872; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 873; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 874; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3 875; AVX-64-NEXT: vpaddw %xmm2, %xmm3, %xmm3 876; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 877; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 878; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 879; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 880; AVX-64-NEXT: retq 881; 882; AVX2-64-LABEL: f32xi16_i32: 883; AVX2-64: # %bb.0: 884; AVX2-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 885; AVX2-64-NEXT: vpaddw %ymm2, %ymm1, %ymm1 886; AVX2-64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 887; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 888; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 889; AVX2-64-NEXT: retq 890; 891; AVX512BW-64-LABEL: f32xi16_i32: 892; AVX512BW-64: # %bb.0: 893; AVX512BW-64-NEXT: vpbroadcastd {{.*#+}} zmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 894; AVX512BW-64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 895; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 896; AVX512BW-64-NEXT: retq 897 %res1 = add <32 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %a 898 %res2 = and <32 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %res1 899 ret <32 x i16> %res2 900} 901 902 903define <32 x i16> @f32xi16_i64(<32 x i16> %a) { 904; AVX-LABEL: f32xi16_i64: 905; AVX: # %bb.0: 906; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 907; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3 908; AVX-NEXT: vpaddw %xmm2, %xmm3, %xmm3 909; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 910; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 911; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3 912; AVX-NEXT: vpaddw %xmm2, %xmm3, %xmm3 913; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0 914; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 915; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 916; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 917; AVX-NEXT: retl 918; 919; AVX2-LABEL: f32xi16_i64: 920; AVX2: # %bb.0: 921; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 922; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1 923; AVX2-NEXT: vpaddw %ymm2, %ymm0, %ymm0 924; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 925; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 926; AVX2-NEXT: retl 927; 928; AVX512BW-LABEL: f32xi16_i64: 929; AVX512BW: # %bb.0: 930; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 931; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 932; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 933; AVX512BW-NEXT: retl 934; 935; AVX-64-LABEL: f32xi16_i64: 936; AVX-64: # %bb.0: 937; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 938; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3 939; AVX-64-NEXT: vpaddw %xmm2, %xmm3, %xmm3 940; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 941; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 942; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3 943; AVX-64-NEXT: vpaddw %xmm2, %xmm3, %xmm3 944; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 945; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 946; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 947; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 948; AVX-64-NEXT: retq 949; 950; AVX2-64-LABEL: f32xi16_i64: 951; AVX2-64: # %bb.0: 952; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 953; AVX2-64-NEXT: vpaddw %ymm2, %ymm1, %ymm1 954; AVX2-64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 955; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 956; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 957; AVX2-64-NEXT: retq 958; 959; AVX512BW-64-LABEL: f32xi16_i64: 960; AVX512BW-64: # %bb.0: 961; AVX512BW-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 962; AVX512BW-64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 963; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 964; AVX512BW-64-NEXT: retq 965 %res1 = add <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %a 966 %res2 = and <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %res1 967 ret <32 x i16> %res2 968} 969 970 971define <32 x i16> @f32xi16_i128(<32 x i16> %a) { 972; AVX-LABEL: f32xi16_i128: 973; AVX: # %bb.0: 974; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 975; AVX-NEXT: # ymm2 = mem[0,1,0,1] 976; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3 977; AVX-NEXT: vpaddw %xmm2, %xmm3, %xmm3 978; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 979; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 980; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3 981; AVX-NEXT: vpaddw %xmm2, %xmm3, %xmm3 982; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0 983; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 984; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 985; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 986; AVX-NEXT: retl 987; 988; AVX2-LABEL: f32xi16_i128: 989; AVX2: # %bb.0: 990; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 991; AVX2-NEXT: # ymm2 = mem[0,1,0,1] 992; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1 993; AVX2-NEXT: vpaddw %ymm2, %ymm0, %ymm0 994; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 995; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 996; AVX2-NEXT: retl 997; 998; AVX512BW-LABEL: f32xi16_i128: 999; AVX512BW: # %bb.0: 1000; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1001; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1002; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 1003; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 1004; AVX512BW-NEXT: retl 1005; 1006; AVX-64-LABEL: f32xi16_i128: 1007; AVX-64: # %bb.0: 1008; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1009; AVX-64-NEXT: # ymm2 = mem[0,1,0,1] 1010; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3 1011; AVX-64-NEXT: vpaddw %xmm2, %xmm3, %xmm3 1012; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 1013; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 1014; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3 1015; AVX-64-NEXT: vpaddw %xmm2, %xmm3, %xmm3 1016; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 1017; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1018; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 1019; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 1020; AVX-64-NEXT: retq 1021; 1022; AVX2-64-LABEL: f32xi16_i128: 1023; AVX2-64: # %bb.0: 1024; AVX2-64-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1025; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1] 1026; AVX2-64-NEXT: vpaddw %ymm2, %ymm1, %ymm1 1027; AVX2-64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 1028; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 1029; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 1030; AVX2-64-NEXT: retq 1031; 1032; AVX512BW-64-LABEL: f32xi16_i128: 1033; AVX512BW-64: # %bb.0: 1034; AVX512BW-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1035; AVX512BW-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1036; AVX512BW-64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 1037; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 1038; AVX512BW-64-NEXT: retq 1039 %res1 = add <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %a 1040 %res2 = and <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %res1 1041 ret <32 x i16> %res2 1042} 1043 1044 1045define <32 x i16> @f32xi16_i256(<32 x i16> %a) { 1046; AVX-LABEL: f32xi16_i256: 1047; AVX: # %bb.0: 1048; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 1049; AVX-NEXT: vpmovsxbw {{.*#+}} xmm3 = [8,9,10,11,12,13,14,15] 1050; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2 1051; AVX-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1052; AVX-NEXT: vpaddw %xmm4, %xmm1, %xmm1 1053; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1054; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 1055; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2 1056; AVX-NEXT: vpaddw %xmm4, %xmm0, %xmm0 1057; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1058; AVX-NEXT: vandps %ymm4, %ymm0, %ymm0 1059; AVX-NEXT: vandps %ymm4, %ymm1, %ymm1 1060; AVX-NEXT: retl 1061; 1062; AVX2-LABEL: f32xi16_i256: 1063; AVX2: # %bb.0: 1064; AVX2-NEXT: vpmovsxbw {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1065; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1 1066; AVX2-NEXT: vpaddw %ymm2, %ymm0, %ymm0 1067; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 1068; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 1069; AVX2-NEXT: retl 1070; 1071; AVX512BW-LABEL: f32xi16_i256: 1072; AVX512BW: # %bb.0: 1073; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1074; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 1075; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 1076; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 1077; AVX512BW-NEXT: retl 1078; 1079; AVX-64-LABEL: f32xi16_i256: 1080; AVX-64: # %bb.0: 1081; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 1082; AVX-64-NEXT: vpmovsxbw {{.*#+}} xmm3 = [8,9,10,11,12,13,14,15] 1083; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2 1084; AVX-64-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1085; AVX-64-NEXT: vpaddw %xmm4, %xmm1, %xmm1 1086; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1087; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 1088; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2 1089; AVX-64-NEXT: vpaddw %xmm4, %xmm0, %xmm0 1090; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1091; AVX-64-NEXT: vandps %ymm4, %ymm0, %ymm0 1092; AVX-64-NEXT: vandps %ymm4, %ymm1, %ymm1 1093; AVX-64-NEXT: retq 1094; 1095; AVX2-64-LABEL: f32xi16_i256: 1096; AVX2-64: # %bb.0: 1097; AVX2-64-NEXT: vpmovsxbw {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1098; AVX2-64-NEXT: vpaddw %ymm2, %ymm1, %ymm1 1099; AVX2-64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 1100; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 1101; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 1102; AVX2-64-NEXT: retq 1103; 1104; AVX512BW-64-LABEL: f32xi16_i256: 1105; AVX512BW-64: # %bb.0: 1106; AVX512BW-64-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1107; AVX512BW-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 1108; AVX512BW-64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 1109; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 1110; AVX512BW-64-NEXT: retq 1111 %res1 = add <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, %a 1112 %res2 = and <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, %res1 1113 ret <32 x i16> %res2 1114} 1115 1116 1117define <4 x i32> @f4xi32_i64(<4 x i32> %a) { 1118; AVX-LABEL: f4xi32_i64: 1119; AVX: # %bb.0: 1120; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [0,1,0,1] 1121; AVX-NEXT: # xmm1 = mem[0,0] 1122; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1123; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 1124; AVX-NEXT: retl 1125; 1126; ALL32-LABEL: f4xi32_i64: 1127; ALL32: # %bb.0: 1128; ALL32-NEXT: vpbroadcastq {{.*#+}} xmm1 = [0,1,0,1] 1129; ALL32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1130; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 1131; ALL32-NEXT: retl 1132; 1133; AVX-64-LABEL: f4xi32_i64: 1134; AVX-64: # %bb.0: 1135; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [0,1,0,1] 1136; AVX-64-NEXT: # xmm1 = mem[0,0] 1137; AVX-64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1138; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0 1139; AVX-64-NEXT: retq 1140; 1141; ALL64-LABEL: f4xi32_i64: 1142; ALL64: # %bb.0: 1143; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [0,1,0,1] 1144; ALL64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1145; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 1146; ALL64-NEXT: retq 1147 %res1 = add <4 x i32> <i32 0, i32 1, i32 0, i32 1>, %a 1148 %res2 = and <4 x i32> <i32 0, i32 1, i32 0, i32 1>, %res1 1149 ret <4 x i32> %res2 1150} 1151 1152 1153define <8 x i32> @f8xi32_i64(<8 x i32> %a) { 1154; AVX-LABEL: f8xi32_i64: 1155; AVX: # %bb.0: 1156; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 1157; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [0,1,0,1] 1158; AVX-NEXT: # xmm2 = mem[0,0] 1159; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1160; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 1161; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1162; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 1163; AVX-NEXT: retl 1164; 1165; ALL32-LABEL: f8xi32_i64: 1166; ALL32: # %bb.0: 1167; ALL32-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,1,0,1,0,1,0,1] 1168; ALL32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 1169; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 1170; ALL32-NEXT: retl 1171; 1172; AVX-64-LABEL: f8xi32_i64: 1173; AVX-64: # %bb.0: 1174; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 1175; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = [0,1,0,1] 1176; AVX-64-NEXT: # xmm2 = mem[0,0] 1177; AVX-64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1178; AVX-64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 1179; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1180; AVX-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1181; AVX-64-NEXT: retq 1182; 1183; ALL64-LABEL: f8xi32_i64: 1184; ALL64: # %bb.0: 1185; ALL64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,1,0,1,0,1,0,1] 1186; ALL64-NEXT: vpaddd %ymm1, %ymm0, %ymm0 1187; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 1188; ALL64-NEXT: retq 1189 %res1 = add <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %a 1190 %res2 = and <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %res1 1191 ret <8 x i32> %res2 1192} 1193 1194 1195define <8 x i32> @f8xi32_i128(<8 x i32> %a) { 1196; AVX-LABEL: f8xi32_i128: 1197; AVX: # %bb.0: 1198; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3] 1199; AVX-NEXT: # ymm1 = mem[0,1,0,1] 1200; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 1201; AVX-NEXT: vpaddd %xmm1, %xmm2, %xmm2 1202; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1203; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1204; AVX-NEXT: vandps %ymm1, %ymm0, %ymm0 1205; AVX-NEXT: retl 1206; 1207; ALL32-LABEL: f8xi32_i128: 1208; ALL32: # %bb.0: 1209; ALL32-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3] 1210; ALL32-NEXT: # ymm1 = mem[0,1,0,1] 1211; ALL32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 1212; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 1213; ALL32-NEXT: retl 1214; 1215; AVX-64-LABEL: f8xi32_i128: 1216; AVX-64: # %bb.0: 1217; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3] 1218; AVX-64-NEXT: # ymm1 = mem[0,1,0,1] 1219; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 1220; AVX-64-NEXT: vpaddd %xmm1, %xmm2, %xmm2 1221; AVX-64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1222; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1223; AVX-64-NEXT: vandps %ymm1, %ymm0, %ymm0 1224; AVX-64-NEXT: retq 1225; 1226; ALL64-LABEL: f8xi32_i128: 1227; ALL64: # %bb.0: 1228; ALL64-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3] 1229; ALL64-NEXT: # ymm1 = mem[0,1,0,1] 1230; ALL64-NEXT: vpaddd %ymm1, %ymm0, %ymm0 1231; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 1232; ALL64-NEXT: retq 1233 %res1 = add <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %a 1234 %res2 = and <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %res1 1235 ret <8 x i32> %res2 1236} 1237 1238 1239define <16 x i32> @f16xi32_i64(<16 x i32> %a) { 1240; AVX-LABEL: f16xi32_i64: 1241; AVX: # %bb.0: 1242; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1] 1243; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3 1244; AVX-NEXT: vpaddd %xmm2, %xmm3, %xmm3 1245; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1246; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 1247; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3 1248; AVX-NEXT: vpaddd %xmm2, %xmm3, %xmm3 1249; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 1250; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1251; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 1252; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 1253; AVX-NEXT: retl 1254; 1255; AVX2-LABEL: f16xi32_i64: 1256; AVX2: # %bb.0: 1257; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1] 1258; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1 1259; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 1260; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 1261; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 1262; AVX2-NEXT: retl 1263; 1264; AVX512-LABEL: f16xi32_i64: 1265; AVX512: # %bb.0: 1266; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 1267; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 1268; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0 1269; AVX512-NEXT: retl 1270; 1271; AVX-64-LABEL: f16xi32_i64: 1272; AVX-64: # %bb.0: 1273; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1] 1274; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3 1275; AVX-64-NEXT: vpaddd %xmm2, %xmm3, %xmm3 1276; AVX-64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1277; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 1278; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3 1279; AVX-64-NEXT: vpaddd %xmm2, %xmm3, %xmm3 1280; AVX-64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 1281; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1282; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 1283; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 1284; AVX-64-NEXT: retq 1285; 1286; AVX2-64-LABEL: f16xi32_i64: 1287; AVX2-64: # %bb.0: 1288; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1] 1289; AVX2-64-NEXT: vpaddd %ymm2, %ymm1, %ymm1 1290; AVX2-64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 1291; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 1292; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 1293; AVX2-64-NEXT: retq 1294; 1295; AVX512F-64-LABEL: f16xi32_i64: 1296; AVX512F-64: # %bb.0: 1297; AVX512F-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 1298; AVX512F-64-NEXT: vpaddd %zmm1, %zmm0, %zmm0 1299; AVX512F-64-NEXT: vpandd %zmm1, %zmm0, %zmm0 1300; AVX512F-64-NEXT: retq 1301 %res1 = add <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %a 1302 %res2 = and <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %res1 1303 ret <16 x i32> %res2 1304} 1305 1306 1307define <16 x i32> @f16xi32_i128(<16 x i32> %a) { 1308; AVX-LABEL: f16xi32_i128: 1309; AVX: # %bb.0: 1310; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3] 1311; AVX-NEXT: # ymm2 = mem[0,1,0,1] 1312; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3 1313; AVX-NEXT: vpaddd %xmm2, %xmm3, %xmm3 1314; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1315; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 1316; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3 1317; AVX-NEXT: vpaddd %xmm2, %xmm3, %xmm3 1318; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 1319; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1320; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 1321; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 1322; AVX-NEXT: retl 1323; 1324; AVX2-LABEL: f16xi32_i128: 1325; AVX2: # %bb.0: 1326; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3] 1327; AVX2-NEXT: # ymm2 = mem[0,1,0,1] 1328; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1 1329; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 1330; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 1331; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 1332; AVX2-NEXT: retl 1333; 1334; AVX512-LABEL: f16xi32_i128: 1335; AVX512: # %bb.0: 1336; AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1337; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1338; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 1339; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0 1340; AVX512-NEXT: retl 1341; 1342; AVX-64-LABEL: f16xi32_i128: 1343; AVX-64: # %bb.0: 1344; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3] 1345; AVX-64-NEXT: # ymm2 = mem[0,1,0,1] 1346; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3 1347; AVX-64-NEXT: vpaddd %xmm2, %xmm3, %xmm3 1348; AVX-64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1349; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 1350; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3 1351; AVX-64-NEXT: vpaddd %xmm2, %xmm3, %xmm3 1352; AVX-64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 1353; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1354; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 1355; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 1356; AVX-64-NEXT: retq 1357; 1358; AVX2-64-LABEL: f16xi32_i128: 1359; AVX2-64: # %bb.0: 1360; AVX2-64-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3] 1361; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1] 1362; AVX2-64-NEXT: vpaddd %ymm2, %ymm1, %ymm1 1363; AVX2-64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 1364; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 1365; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 1366; AVX2-64-NEXT: retq 1367; 1368; AVX512F-64-LABEL: f16xi32_i128: 1369; AVX512F-64: # %bb.0: 1370; AVX512F-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1371; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1372; AVX512F-64-NEXT: vpaddd %zmm1, %zmm0, %zmm0 1373; AVX512F-64-NEXT: vpandd %zmm1, %zmm0, %zmm0 1374; AVX512F-64-NEXT: retq 1375 %res1 = add <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %a 1376 %res2 = and <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %res1 1377 ret <16 x i32> %res2 1378} 1379 1380 1381define <4 x i64> @f4xi64_i128(<4 x i64> %a) { 1382; AVX-LABEL: f4xi64_i128: 1383; AVX: # %bb.0: 1384; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,0,1,0,0,0,1,0] 1385; AVX-NEXT: # ymm1 = mem[0,1,0,1] 1386; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 1387; AVX-NEXT: vpaddq %xmm1, %xmm2, %xmm2 1388; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 1389; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1390; AVX-NEXT: vandps %ymm1, %ymm0, %ymm0 1391; AVX-NEXT: retl 1392; 1393; ALL32-LABEL: f4xi64_i128: 1394; ALL32: # %bb.0: 1395; ALL32-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,1,0,0,0,1,0] 1396; ALL32-NEXT: # ymm1 = mem[0,1,0,1] 1397; ALL32-NEXT: vpaddq %ymm1, %ymm0, %ymm0 1398; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 1399; ALL32-NEXT: retl 1400; 1401; AVX-64-LABEL: f4xi64_i128: 1402; AVX-64: # %bb.0: 1403; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,1,0,1] 1404; AVX-64-NEXT: # ymm1 = mem[0,1,0,1] 1405; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 1406; AVX-64-NEXT: vpaddq %xmm1, %xmm2, %xmm2 1407; AVX-64-NEXT: vpaddq %xmm1, %xmm0, %xmm0 1408; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1409; AVX-64-NEXT: vandps %ymm1, %ymm0, %ymm0 1410; AVX-64-NEXT: retq 1411; 1412; ALL64-LABEL: f4xi64_i128: 1413; ALL64: # %bb.0: 1414; ALL64-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,0,1] 1415; ALL64-NEXT: # ymm1 = mem[0,1,0,1] 1416; ALL64-NEXT: vpaddq %ymm1, %ymm0, %ymm0 1417; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 1418; ALL64-NEXT: retq 1419 %res1 = add <4 x i64> <i64 0, i64 1, i64 0, i64 1>, %a 1420 %res2 = and <4 x i64> <i64 0, i64 1, i64 0, i64 1>, %res1 1421 ret <4 x i64> %res2 1422} 1423 1424 1425define <8 x i64> @f8xi64_i128(<8 x i64> %a) { 1426; AVX-LABEL: f8xi64_i128: 1427; AVX: # %bb.0: 1428; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,0,1,0,0,0,1,0] 1429; AVX-NEXT: # ymm2 = mem[0,1,0,1] 1430; AVX-NEXT: vextractf128 $1, %ymm1, %xmm3 1431; AVX-NEXT: vpaddq %xmm2, %xmm3, %xmm3 1432; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1 1433; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 1434; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3 1435; AVX-NEXT: vpaddq %xmm2, %xmm3, %xmm3 1436; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 1437; AVX-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1438; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 1439; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 1440; AVX-NEXT: retl 1441; 1442; AVX2-LABEL: f8xi64_i128: 1443; AVX2: # %bb.0: 1444; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,0,1,0,0,0,1,0] 1445; AVX2-NEXT: # ymm2 = mem[0,1,0,1] 1446; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1 1447; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 1448; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 1449; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 1450; AVX2-NEXT: retl 1451; 1452; AVX512-LABEL: f8xi64_i128: 1453; AVX512: # %bb.0: 1454; AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0] 1455; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1456; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0 1457; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 1458; AVX512-NEXT: retl 1459; 1460; AVX-64-LABEL: f8xi64_i128: 1461; AVX-64: # %bb.0: 1462; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,1,0,1] 1463; AVX-64-NEXT: # ymm2 = mem[0,1,0,1] 1464; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3 1465; AVX-64-NEXT: vpaddq %xmm2, %xmm3, %xmm3 1466; AVX-64-NEXT: vpaddq %xmm2, %xmm1, %xmm1 1467; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 1468; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3 1469; AVX-64-NEXT: vpaddq %xmm2, %xmm3, %xmm3 1470; AVX-64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 1471; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 1472; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 1473; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 1474; AVX-64-NEXT: retq 1475; 1476; AVX2-64-LABEL: f8xi64_i128: 1477; AVX2-64: # %bb.0: 1478; AVX2-64-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,0,1] 1479; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1] 1480; AVX2-64-NEXT: vpaddq %ymm2, %ymm1, %ymm1 1481; AVX2-64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 1482; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 1483; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 1484; AVX2-64-NEXT: retq 1485; 1486; AVX512F-64-LABEL: f8xi64_i128: 1487; AVX512F-64: # %bb.0: 1488; AVX512F-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,0,1,0,1,0,1] 1489; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1490; AVX512F-64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 1491; AVX512F-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 1492; AVX512F-64-NEXT: retq 1493 %res1 = add <8 x i64> <i64 0, i64 1, i64 0, i64 1, i64 0, i64 1, i64 0, i64 1>, %a 1494 %res2 = and <8 x i64> <i64 0, i64 1, i64 0, i64 1, i64 0, i64 1, i64 0, i64 1>, %res1 1495 ret <8 x i64> %res2 1496} 1497 1498 1499define <8 x i64> @f8xi64_i256(<8 x i64> %a) { 1500; AVX-LABEL: f8xi64_i256: 1501; AVX: # %bb.0: 1502; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 1503; AVX-NEXT: vpmovsxbq {{.*#+}} xmm3 = [2,3] 1504; AVX-NEXT: vpaddq %xmm3, %xmm2, %xmm2 1505; AVX-NEXT: vmovdqa {{.*#+}} ymm4 = [0,0,1,0,2,0,3,0] 1506; AVX-NEXT: vpaddq %xmm4, %xmm1, %xmm1 1507; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1508; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 1509; AVX-NEXT: vpaddq %xmm3, %xmm2, %xmm2 1510; AVX-NEXT: vpaddq %xmm4, %xmm0, %xmm0 1511; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1512; AVX-NEXT: vandps %ymm4, %ymm0, %ymm0 1513; AVX-NEXT: vandps %ymm4, %ymm1, %ymm1 1514; AVX-NEXT: retl 1515; 1516; AVX2-LABEL: f8xi64_i256: 1517; AVX2: # %bb.0: 1518; AVX2-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,1,2,3] 1519; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1 1520; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 1521; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 1522; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 1523; AVX2-NEXT: retl 1524; 1525; AVX512-LABEL: f8xi64_i256: 1526; AVX512: # %bb.0: 1527; AVX512-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,0,0,1,0,2,0,3,0] 1528; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 1529; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0 1530; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 1531; AVX512-NEXT: retl 1532; 1533; AVX-64-LABEL: f8xi64_i256: 1534; AVX-64: # %bb.0: 1535; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 1536; AVX-64-NEXT: vpmovsxbq {{.*#+}} xmm3 = [2,3] 1537; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2 1538; AVX-64-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,2,3] 1539; AVX-64-NEXT: vpaddq %xmm4, %xmm1, %xmm1 1540; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1541; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 1542; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2 1543; AVX-64-NEXT: vpaddq %xmm4, %xmm0, %xmm0 1544; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1545; AVX-64-NEXT: vandps %ymm4, %ymm0, %ymm0 1546; AVX-64-NEXT: vandps %ymm4, %ymm1, %ymm1 1547; AVX-64-NEXT: retq 1548; 1549; AVX2-64-LABEL: f8xi64_i256: 1550; AVX2-64: # %bb.0: 1551; AVX2-64-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,1,2,3] 1552; AVX2-64-NEXT: vpaddq %ymm2, %ymm1, %ymm1 1553; AVX2-64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 1554; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 1555; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 1556; AVX2-64-NEXT: retq 1557; 1558; AVX512F-64-LABEL: f8xi64_i256: 1559; AVX512F-64: # %bb.0: 1560; AVX512F-64-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3] 1561; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 1562; AVX512F-64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 1563; AVX512F-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 1564; AVX512F-64-NEXT: retq 1565 %res1 = add <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>, %a 1566 %res2 = and <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>, %res1 1567 ret <8 x i64> %res2 1568} 1569 1570 1571define <4 x float> @f4xf32_f64(<4 x float> %a) { 1572; AVX-LABEL: f4xf32_f64: 1573; AVX: # %bb.0: 1574; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1575; AVX-NEXT: # xmm1 = mem[0,0] 1576; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 1577; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0 1578; AVX-NEXT: retl 1579; 1580; ALL32-LABEL: f4xf32_f64: 1581; ALL32: # %bb.0: 1582; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1583; ALL32-NEXT: # xmm1 = mem[0,0] 1584; ALL32-NEXT: vaddps %xmm1, %xmm0, %xmm0 1585; ALL32-NEXT: vdivps %xmm0, %xmm1, %xmm0 1586; ALL32-NEXT: retl 1587; 1588; AVX-64-LABEL: f4xf32_f64: 1589; AVX-64: # %bb.0: 1590; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1591; AVX-64-NEXT: # xmm1 = mem[0,0] 1592; AVX-64-NEXT: vaddps %xmm1, %xmm0, %xmm0 1593; AVX-64-NEXT: vdivps %xmm0, %xmm1, %xmm0 1594; AVX-64-NEXT: retq 1595; 1596; ALL64-LABEL: f4xf32_f64: 1597; ALL64: # %bb.0: 1598; ALL64-NEXT: vmovddup {{.*#+}} xmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1599; ALL64-NEXT: # xmm1 = mem[0,0] 1600; ALL64-NEXT: vaddps %xmm1, %xmm0, %xmm0 1601; ALL64-NEXT: vdivps %xmm0, %xmm1, %xmm0 1602; ALL64-NEXT: retq 1603 %res1 = fadd <4 x float> <float 2.0, float 1.0, float 2.0, float 1.0>, %a 1604 %res2 = fdiv <4 x float> <float 2.0, float 1.0, float 2.0, float 1.0>, %res1 1605 ret <4 x float> %res2 1606} 1607 1608 1609define <8 x float> @f8xf32_f64(<8 x float> %a) { 1610; AVX-LABEL: f8xf32_f64: 1611; AVX: # %bb.0: 1612; AVX-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1613; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 1614; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0 1615; AVX-NEXT: retl 1616; 1617; ALL32-LABEL: f8xf32_f64: 1618; ALL32: # %bb.0: 1619; ALL32-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1620; ALL32-NEXT: vaddps %ymm1, %ymm0, %ymm0 1621; ALL32-NEXT: vdivps %ymm0, %ymm1, %ymm0 1622; ALL32-NEXT: retl 1623; 1624; AVX-64-LABEL: f8xf32_f64: 1625; AVX-64: # %bb.0: 1626; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1627; AVX-64-NEXT: vaddps %ymm1, %ymm0, %ymm0 1628; AVX-64-NEXT: vdivps %ymm0, %ymm1, %ymm0 1629; AVX-64-NEXT: retq 1630; 1631; ALL64-LABEL: f8xf32_f64: 1632; ALL64: # %bb.0: 1633; ALL64-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1634; ALL64-NEXT: vaddps %ymm1, %ymm0, %ymm0 1635; ALL64-NEXT: vdivps %ymm0, %ymm1, %ymm0 1636; ALL64-NEXT: retq 1637 %res1 = fadd <8 x float> <float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0>, %a 1638 %res2 = fdiv <8 x float> <float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0>, %res1 1639 ret <8 x float> %res2 1640} 1641 1642 1643define <8 x float> @f8xf32_f128(<8 x float> %a) { 1644; AVX-LABEL: f8xf32_f128: 1645; AVX: # %bb.0: 1646; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0] 1647; AVX-NEXT: # ymm1 = mem[0,1,0,1] 1648; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 1649; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0 1650; AVX-NEXT: retl 1651; 1652; ALL32-LABEL: f8xf32_f128: 1653; ALL32: # %bb.0: 1654; ALL32-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0] 1655; ALL32-NEXT: # ymm1 = mem[0,1,0,1] 1656; ALL32-NEXT: vaddps %ymm1, %ymm0, %ymm0 1657; ALL32-NEXT: vdivps %ymm0, %ymm1, %ymm0 1658; ALL32-NEXT: retl 1659; 1660; AVX-64-LABEL: f8xf32_f128: 1661; AVX-64: # %bb.0: 1662; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0] 1663; AVX-64-NEXT: # ymm1 = mem[0,1,0,1] 1664; AVX-64-NEXT: vaddps %ymm1, %ymm0, %ymm0 1665; AVX-64-NEXT: vdivps %ymm0, %ymm1, %ymm0 1666; AVX-64-NEXT: retq 1667; 1668; ALL64-LABEL: f8xf32_f128: 1669; ALL64: # %bb.0: 1670; ALL64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0] 1671; ALL64-NEXT: # ymm1 = mem[0,1,0,1] 1672; ALL64-NEXT: vaddps %ymm1, %ymm0, %ymm0 1673; ALL64-NEXT: vdivps %ymm0, %ymm1, %ymm0 1674; ALL64-NEXT: retq 1675 %res1 = fadd <8 x float> <float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0>, %a 1676 %res2 = fdiv <8 x float> <float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0>, %res1 1677 ret <8 x float> %res2 1678} 1679 1680 1681define <16 x float> @f16xf32_f64(<16 x float> %a) { 1682; AVX-LABEL: f16xf32_f64: 1683; AVX: # %bb.0: 1684; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1685; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1 1686; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0 1687; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0 1688; AVX-NEXT: vdivps %ymm1, %ymm2, %ymm1 1689; AVX-NEXT: retl 1690; 1691; AVX2-LABEL: f16xf32_f64: 1692; AVX2: # %bb.0: 1693; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1694; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1 1695; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0 1696; AVX2-NEXT: vdivps %ymm0, %ymm2, %ymm0 1697; AVX2-NEXT: vdivps %ymm1, %ymm2, %ymm1 1698; AVX2-NEXT: retl 1699; 1700; AVX512-LABEL: f16xf32_f64: 1701; AVX512: # %bb.0: 1702; AVX512-NEXT: vbroadcastsd {{.*#+}} zmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1703; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0 1704; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0 1705; AVX512-NEXT: retl 1706; 1707; AVX-64-LABEL: f16xf32_f64: 1708; AVX-64: # %bb.0: 1709; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1710; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1 1711; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0 1712; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0 1713; AVX-64-NEXT: vdivps %ymm1, %ymm2, %ymm1 1714; AVX-64-NEXT: retq 1715; 1716; AVX2-64-LABEL: f16xf32_f64: 1717; AVX2-64: # %bb.0: 1718; AVX2-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1719; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1 1720; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0 1721; AVX2-64-NEXT: vdivps %ymm0, %ymm2, %ymm0 1722; AVX2-64-NEXT: vdivps %ymm1, %ymm2, %ymm1 1723; AVX2-64-NEXT: retq 1724; 1725; AVX512F-64-LABEL: f16xf32_f64: 1726; AVX512F-64: # %bb.0: 1727; AVX512F-64-NEXT: vbroadcastsd {{.*#+}} zmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1728; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0 1729; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0 1730; AVX512F-64-NEXT: retq 1731 %res1 = fadd <16 x float> <float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0>, %a 1732 %res2 = fdiv <16 x float> <float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0>, %res1 1733 ret <16 x float> %res2 1734} 1735 1736 1737define <16 x float> @f16xf32_f128(<16 x float> %a) { 1738; AVX-LABEL: f16xf32_f128: 1739; AVX: # %bb.0: 1740; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0] 1741; AVX-NEXT: # ymm2 = mem[0,1,0,1] 1742; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1 1743; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0 1744; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0 1745; AVX-NEXT: vdivps %ymm1, %ymm2, %ymm1 1746; AVX-NEXT: retl 1747; 1748; AVX2-LABEL: f16xf32_f128: 1749; AVX2: # %bb.0: 1750; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0] 1751; AVX2-NEXT: # ymm2 = mem[0,1,0,1] 1752; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1 1753; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0 1754; AVX2-NEXT: vdivps %ymm0, %ymm2, %ymm0 1755; AVX2-NEXT: vdivps %ymm1, %ymm2, %ymm1 1756; AVX2-NEXT: retl 1757; 1758; AVX512-LABEL: f16xf32_f128: 1759; AVX512: # %bb.0: 1760; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0] 1761; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1762; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0 1763; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0 1764; AVX512-NEXT: retl 1765; 1766; AVX-64-LABEL: f16xf32_f128: 1767; AVX-64: # %bb.0: 1768; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0] 1769; AVX-64-NEXT: # ymm2 = mem[0,1,0,1] 1770; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1 1771; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0 1772; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0 1773; AVX-64-NEXT: vdivps %ymm1, %ymm2, %ymm1 1774; AVX-64-NEXT: retq 1775; 1776; AVX2-64-LABEL: f16xf32_f128: 1777; AVX2-64: # %bb.0: 1778; AVX2-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0] 1779; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1] 1780; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1 1781; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0 1782; AVX2-64-NEXT: vdivps %ymm0, %ymm2, %ymm0 1783; AVX2-64-NEXT: vdivps %ymm1, %ymm2, %ymm1 1784; AVX2-64-NEXT: retq 1785; 1786; AVX512F-64-LABEL: f16xf32_f128: 1787; AVX512F-64: # %bb.0: 1788; AVX512F-64-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0] 1789; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1790; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0 1791; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0 1792; AVX512F-64-NEXT: retq 1793 %res1 = fadd <16 x float> <float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0>, %a 1794 %res2 = fdiv <16 x float> <float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0>, %res1 1795 ret <16 x float> %res2 1796} 1797 1798 1799define <16 x float> @f16xf32_f256(<16 x float> %a) { 1800; AVX-LABEL: f16xf32_f256: 1801; AVX: # %bb.0: 1802; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0] 1803; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1 1804; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0 1805; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0 1806; AVX-NEXT: vdivps %ymm1, %ymm2, %ymm1 1807; AVX-NEXT: retl 1808; 1809; AVX2-LABEL: f16xf32_f256: 1810; AVX2: # %bb.0: 1811; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0] 1812; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1 1813; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0 1814; AVX2-NEXT: vdivps %ymm0, %ymm2, %ymm0 1815; AVX2-NEXT: vdivps %ymm1, %ymm2, %ymm1 1816; AVX2-NEXT: retl 1817; 1818; AVX512-LABEL: f16xf32_f256: 1819; AVX512: # %bb.0: 1820; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0] 1821; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 1822; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0 1823; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0 1824; AVX512-NEXT: retl 1825; 1826; AVX-64-LABEL: f16xf32_f256: 1827; AVX-64: # %bb.0: 1828; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0] 1829; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1 1830; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0 1831; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0 1832; AVX-64-NEXT: vdivps %ymm1, %ymm2, %ymm1 1833; AVX-64-NEXT: retq 1834; 1835; AVX2-64-LABEL: f16xf32_f256: 1836; AVX2-64: # %bb.0: 1837; AVX2-64-NEXT: vmovaps {{.*#+}} ymm2 = [8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0] 1838; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1 1839; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0 1840; AVX2-64-NEXT: vdivps %ymm0, %ymm2, %ymm0 1841; AVX2-64-NEXT: vdivps %ymm1, %ymm2, %ymm1 1842; AVX2-64-NEXT: retq 1843; 1844; AVX512F-64-LABEL: f16xf32_f256: 1845; AVX512F-64: # %bb.0: 1846; AVX512F-64-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0] 1847; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 1848; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0 1849; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0 1850; AVX512F-64-NEXT: retq 1851 %res1 = fadd <16 x float> <float 8.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, %a 1852 %res2 = fdiv <16 x float> <float 8.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, %res1 1853 ret <16 x float> %res2 1854} 1855 1856 1857define <4 x double> @f4xf64_f128(<4 x double> %a) { 1858; AVX-LABEL: f4xf64_f128: 1859; AVX: # %bb.0: 1860; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1861; AVX-NEXT: # ymm1 = mem[0,1,0,1] 1862; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 1863; AVX-NEXT: vdivpd %ymm0, %ymm1, %ymm0 1864; AVX-NEXT: retl 1865; 1866; ALL32-LABEL: f4xf64_f128: 1867; ALL32: # %bb.0: 1868; ALL32-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1869; ALL32-NEXT: # ymm1 = mem[0,1,0,1] 1870; ALL32-NEXT: vaddpd %ymm1, %ymm0, %ymm0 1871; ALL32-NEXT: vdivpd %ymm0, %ymm1, %ymm0 1872; ALL32-NEXT: retl 1873; 1874; AVX-64-LABEL: f4xf64_f128: 1875; AVX-64: # %bb.0: 1876; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1877; AVX-64-NEXT: # ymm1 = mem[0,1,0,1] 1878; AVX-64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 1879; AVX-64-NEXT: vdivpd %ymm0, %ymm1, %ymm0 1880; AVX-64-NEXT: retq 1881; 1882; ALL64-LABEL: f4xf64_f128: 1883; ALL64: # %bb.0: 1884; ALL64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1885; ALL64-NEXT: # ymm1 = mem[0,1,0,1] 1886; ALL64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 1887; ALL64-NEXT: vdivpd %ymm0, %ymm1, %ymm0 1888; ALL64-NEXT: retq 1889 %res1 = fadd <4 x double> <double 2.0, double 1.0, double 2.0, double 1.0>, %a 1890 %res2 = fdiv <4 x double> <double 2.0, double 1.0, double 2.0, double 1.0>, %res1 1891 ret <4 x double> %res2 1892} 1893 1894 1895define <8 x double> @f8xf64_f128(<8 x double> %a) { 1896; AVX-LABEL: f8xf64_f128: 1897; AVX: # %bb.0: 1898; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1899; AVX-NEXT: # ymm2 = mem[0,1,0,1] 1900; AVX-NEXT: vaddpd %ymm2, %ymm1, %ymm1 1901; AVX-NEXT: vaddpd %ymm2, %ymm0, %ymm0 1902; AVX-NEXT: vdivpd %ymm0, %ymm2, %ymm0 1903; AVX-NEXT: vdivpd %ymm1, %ymm2, %ymm1 1904; AVX-NEXT: retl 1905; 1906; AVX2-LABEL: f8xf64_f128: 1907; AVX2: # %bb.0: 1908; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1909; AVX2-NEXT: # ymm2 = mem[0,1,0,1] 1910; AVX2-NEXT: vaddpd %ymm2, %ymm1, %ymm1 1911; AVX2-NEXT: vaddpd %ymm2, %ymm0, %ymm0 1912; AVX2-NEXT: vdivpd %ymm0, %ymm2, %ymm0 1913; AVX2-NEXT: vdivpd %ymm1, %ymm2, %ymm1 1914; AVX2-NEXT: retl 1915; 1916; AVX512-LABEL: f8xf64_f128: 1917; AVX512: # %bb.0: 1918; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1919; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1920; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0 1921; AVX512-NEXT: vdivpd %zmm0, %zmm1, %zmm0 1922; AVX512-NEXT: retl 1923; 1924; AVX-64-LABEL: f8xf64_f128: 1925; AVX-64: # %bb.0: 1926; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1927; AVX-64-NEXT: # ymm2 = mem[0,1,0,1] 1928; AVX-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1 1929; AVX-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0 1930; AVX-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0 1931; AVX-64-NEXT: vdivpd %ymm1, %ymm2, %ymm1 1932; AVX-64-NEXT: retq 1933; 1934; AVX2-64-LABEL: f8xf64_f128: 1935; AVX2-64: # %bb.0: 1936; AVX2-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1937; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1] 1938; AVX2-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1 1939; AVX2-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0 1940; AVX2-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0 1941; AVX2-64-NEXT: vdivpd %ymm1, %ymm2, %ymm1 1942; AVX2-64-NEXT: retq 1943; 1944; AVX512F-64-LABEL: f8xf64_f128: 1945; AVX512F-64: # %bb.0: 1946; AVX512F-64-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0,2.0E+0,1.0E+0] 1947; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1948; AVX512F-64-NEXT: vaddpd %zmm1, %zmm0, %zmm0 1949; AVX512F-64-NEXT: vdivpd %zmm0, %zmm1, %zmm0 1950; AVX512F-64-NEXT: retq 1951 %res1 = fadd <8 x double> <double 2.0, double 1.0, double 2.0, double 1.0, double 2.0, double 1.0, double 2.0, double 1.0>, %a 1952 %res2 = fdiv <8 x double> <double 2.0, double 1.0, double 2.0, double 1.0, double 2.0, double 1.0, double 2.0, double 1.0>, %res1 1953 ret <8 x double> %res2 1954} 1955 1956 1957define <8 x double> @f8xf64_f256(<8 x double> %a) { 1958; AVX-LABEL: f8xf64_f256: 1959; AVX: # %bb.0: 1960; AVX-NEXT: vmovapd {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0] 1961; AVX-NEXT: vaddpd %ymm2, %ymm1, %ymm1 1962; AVX-NEXT: vaddpd %ymm2, %ymm0, %ymm0 1963; AVX-NEXT: vdivpd %ymm0, %ymm2, %ymm0 1964; AVX-NEXT: vdivpd %ymm1, %ymm2, %ymm1 1965; AVX-NEXT: retl 1966; 1967; AVX2-LABEL: f8xf64_f256: 1968; AVX2: # %bb.0: 1969; AVX2-NEXT: vmovapd {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0] 1970; AVX2-NEXT: vaddpd %ymm2, %ymm1, %ymm1 1971; AVX2-NEXT: vaddpd %ymm2, %ymm0, %ymm0 1972; AVX2-NEXT: vdivpd %ymm0, %ymm2, %ymm0 1973; AVX2-NEXT: vdivpd %ymm1, %ymm2, %ymm1 1974; AVX2-NEXT: retl 1975; 1976; AVX512-LABEL: f8xf64_f256: 1977; AVX512: # %bb.0: 1978; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0] 1979; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 1980; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0 1981; AVX512-NEXT: vdivpd %zmm0, %zmm1, %zmm0 1982; AVX512-NEXT: retl 1983; 1984; AVX-64-LABEL: f8xf64_f256: 1985; AVX-64: # %bb.0: 1986; AVX-64-NEXT: vmovapd {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0] 1987; AVX-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1 1988; AVX-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0 1989; AVX-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0 1990; AVX-64-NEXT: vdivpd %ymm1, %ymm2, %ymm1 1991; AVX-64-NEXT: retq 1992; 1993; AVX2-64-LABEL: f8xf64_f256: 1994; AVX2-64: # %bb.0: 1995; AVX2-64-NEXT: vmovapd {{.*#+}} ymm2 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0] 1996; AVX2-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1 1997; AVX2-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0 1998; AVX2-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0 1999; AVX2-64-NEXT: vdivpd %ymm1, %ymm2, %ymm1 2000; AVX2-64-NEXT: retq 2001; 2002; AVX512F-64-LABEL: f8xf64_f256: 2003; AVX512F-64: # %bb.0: 2004; AVX512F-64-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [4.0E+0,1.0E+0,2.0E+0,3.0E+0,4.0E+0,1.0E+0,2.0E+0,3.0E+0] 2005; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 2006; AVX512F-64-NEXT: vaddpd %zmm1, %zmm0, %zmm0 2007; AVX512F-64-NEXT: vdivpd %zmm0, %zmm1, %zmm0 2008; AVX512F-64-NEXT: retq 2009 %res1 = fadd <8 x double> <double 4.0, double 1.0, double 2.0, double 3.0, double 4.0, double 1.0, double 2.0, double 3.0>, %a 2010 %res2 = fdiv <8 x double> <double 4.0, double 1.0, double 2.0, double 3.0, double 4.0, double 1.0, double 2.0, double 3.0>, %res1 2011 ret <8 x double> %res2 2012} 2013 2014 2015define <8 x i16> @f8xi16_i32_NaN(<8 x i16> %a) { 2016; AVX-LABEL: f8xi16_i32_NaN: 2017; AVX: # %bb.0: 2018; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [0,65466,0,65466,0,65466,0,65466] 2019; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 2020; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 2021; AVX-NEXT: retl 2022; 2023; ALL32-LABEL: f8xi16_i32_NaN: 2024; ALL32: # %bb.0: 2025; ALL32-NEXT: vpbroadcastd {{.*#+}} xmm1 = [0,65466,0,65466,0,65466,0,65466] 2026; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0 2027; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 2028; ALL32-NEXT: retl 2029; 2030; AVX-64-LABEL: f8xi16_i32_NaN: 2031; AVX-64: # %bb.0: 2032; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [0,65466,0,65466,0,65466,0,65466] 2033; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 2034; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0 2035; AVX-64-NEXT: retq 2036; 2037; ALL64-LABEL: f8xi16_i32_NaN: 2038; ALL64: # %bb.0: 2039; ALL64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [0,65466,0,65466,0,65466,0,65466] 2040; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 2041; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 2042; ALL64-NEXT: retq 2043 %res1 = add <8 x i16> <i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70>, %a 2044 %res2 = and <8 x i16> <i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70>, %res1 2045 ret <8 x i16> %res2 2046} 2047