1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s 3 4define <8 x i32> @test_256_1(ptr %addr) { 5; CHECK-LABEL: test_256_1: 6; CHECK: ## %bb.0: 7; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] 8; CHECK-NEXT: retq ## encoding: [0xc3] 9 %res = load <8 x i32>, ptr %addr, align 1 10 ret <8 x i32>%res 11} 12 13define <8 x i32> @test_256_2(ptr %addr) { 14; CHECK-LABEL: test_256_2: 15; CHECK: ## %bb.0: 16; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] 17; CHECK-NEXT: retq ## encoding: [0xc3] 18 %res = load <8 x i32>, ptr %addr, align 32 19 ret <8 x i32>%res 20} 21 22define void @test_256_3(ptr %addr, <4 x i64> %data) { 23; CHECK-LABEL: test_256_3: 24; CHECK: ## %bb.0: 25; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07] 26; CHECK-NEXT: retq ## encoding: [0xc3] 27 store <4 x i64>%data, ptr %addr, align 32 28 ret void 29} 30 31define void @test_256_4(ptr %addr, <8 x i32> %data) { 32; CHECK-LABEL: test_256_4: 33; CHECK: ## %bb.0: 34; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] 35; CHECK-NEXT: retq ## encoding: [0xc3] 36 store <8 x i32>%data, ptr %addr, align 1 37 ret void 38} 39 40define void @test_256_5(ptr %addr, <8 x i32> %data) { 41; CHECK-LABEL: test_256_5: 42; CHECK: ## %bb.0: 43; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07] 44; CHECK-NEXT: retq ## encoding: [0xc3] 45 store <8 x i32>%data, ptr %addr, align 32 46 ret void 47} 48 49define <4 x i64> @test_256_6(ptr %addr) { 50; CHECK-LABEL: test_256_6: 51; CHECK: ## %bb.0: 52; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] 53; CHECK-NEXT: retq ## encoding: [0xc3] 54 %res = load <4 x i64>, ptr %addr, align 32 55 ret <4 x i64>%res 56} 57 58define void @test_256_7(ptr %addr, <4 x i64> %data) { 59; CHECK-LABEL: test_256_7: 60; CHECK: ## %bb.0: 61; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] 62; CHECK-NEXT: retq ## encoding: [0xc3] 63 store <4 x i64>%data, ptr %addr, align 1 64 ret void 65} 66 67define <4 x i64> @test_256_8(ptr %addr) { 68; CHECK-LABEL: test_256_8: 69; CHECK: ## %bb.0: 70; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] 71; CHECK-NEXT: retq ## encoding: [0xc3] 72 %res = load <4 x i64>, ptr %addr, align 1 73 ret <4 x i64>%res 74} 75 76define void @test_256_9(ptr %addr, <4 x double> %data) { 77; CHECK-LABEL: test_256_9: 78; CHECK: ## %bb.0: 79; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07] 80; CHECK-NEXT: retq ## encoding: [0xc3] 81 store <4 x double>%data, ptr %addr, align 32 82 ret void 83} 84 85define <4 x double> @test_256_10(ptr %addr) { 86; CHECK-LABEL: test_256_10: 87; CHECK: ## %bb.0: 88; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] 89; CHECK-NEXT: retq ## encoding: [0xc3] 90 %res = load <4 x double>, ptr %addr, align 32 91 ret <4 x double>%res 92} 93 94define void @test_256_11(ptr %addr, <8 x float> %data) { 95; CHECK-LABEL: test_256_11: 96; CHECK: ## %bb.0: 97; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07] 98; CHECK-NEXT: retq ## encoding: [0xc3] 99 store <8 x float>%data, ptr %addr, align 32 100 ret void 101} 102 103define <8 x float> @test_256_12(ptr %addr) { 104; CHECK-LABEL: test_256_12: 105; CHECK: ## %bb.0: 106; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07] 107; CHECK-NEXT: retq ## encoding: [0xc3] 108 %res = load <8 x float>, ptr %addr, align 32 109 ret <8 x float>%res 110} 111 112define void @test_256_13(ptr %addr, <4 x double> %data) { 113; CHECK-LABEL: test_256_13: 114; CHECK: ## %bb.0: 115; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] 116; CHECK-NEXT: retq ## encoding: [0xc3] 117 store <4 x double>%data, ptr %addr, align 1 118 ret void 119} 120 121define <4 x double> @test_256_14(ptr %addr) { 122; CHECK-LABEL: test_256_14: 123; CHECK: ## %bb.0: 124; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] 125; CHECK-NEXT: retq ## encoding: [0xc3] 126 %res = load <4 x double>, ptr %addr, align 1 127 ret <4 x double>%res 128} 129 130define void @test_256_15(ptr %addr, <8 x float> %data) { 131; CHECK-LABEL: test_256_15: 132; CHECK: ## %bb.0: 133; CHECK-NEXT: vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07] 134; CHECK-NEXT: retq ## encoding: [0xc3] 135 store <8 x float>%data, ptr %addr, align 1 136 ret void 137} 138 139define <8 x float> @test_256_16(ptr %addr) { 140; CHECK-LABEL: test_256_16: 141; CHECK: ## %bb.0: 142; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07] 143; CHECK-NEXT: retq ## encoding: [0xc3] 144 %res = load <8 x float>, ptr %addr, align 1 145 ret <8 x float>%res 146} 147 148define <8 x i32> @test_256_17(ptr %addr, <8 x i32> %old, <8 x i32> %mask1) { 149; CHECK-LABEL: test_256_17: 150; CHECK: ## %bb.0: 151; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9] 152; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07] 153; CHECK-NEXT: retq ## encoding: [0xc3] 154 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 155 %r = load <8 x i32>, ptr %addr, align 32 156 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old 157 ret <8 x i32>%res 158} 159 160define <8 x i32> @test_256_18(ptr %addr, <8 x i32> %old, <8 x i32> %mask1) { 161; CHECK-LABEL: test_256_18: 162; CHECK: ## %bb.0: 163; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9] 164; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x07] 165; CHECK-NEXT: retq ## encoding: [0xc3] 166 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 167 %r = load <8 x i32>, ptr %addr, align 1 168 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old 169 ret <8 x i32>%res 170} 171 172define <8 x i32> @test_256_19(ptr %addr, <8 x i32> %mask1) { 173; CHECK-LABEL: test_256_19: 174; CHECK: ## %bb.0: 175; CHECK-NEXT: vptestmd %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc8] 176; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x07] 177; CHECK-NEXT: retq ## encoding: [0xc3] 178 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 179 %r = load <8 x i32>, ptr %addr, align 32 180 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer 181 ret <8 x i32>%res 182} 183 184define <8 x i32> @test_256_20(ptr %addr, <8 x i32> %mask1) { 185; CHECK-LABEL: test_256_20: 186; CHECK: ## %bb.0: 187; CHECK-NEXT: vptestmd %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc8] 188; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x07] 189; CHECK-NEXT: retq ## encoding: [0xc3] 190 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 191 %r = load <8 x i32>, ptr %addr, align 1 192 %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer 193 ret <8 x i32>%res 194} 195 196define <4 x i64> @test_256_21(ptr %addr, <4 x i64> %old, <4 x i64> %mask1) { 197; CHECK-LABEL: test_256_21: 198; CHECK: ## %bb.0: 199; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9] 200; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07] 201; CHECK-NEXT: retq ## encoding: [0xc3] 202 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 203 %r = load <4 x i64>, ptr %addr, align 32 204 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old 205 ret <4 x i64>%res 206} 207 208define <4 x i64> @test_256_22(ptr %addr, <4 x i64> %old, <4 x i64> %mask1) { 209; CHECK-LABEL: test_256_22: 210; CHECK: ## %bb.0: 211; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9] 212; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x07] 213; CHECK-NEXT: retq ## encoding: [0xc3] 214 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 215 %r = load <4 x i64>, ptr %addr, align 1 216 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old 217 ret <4 x i64>%res 218} 219 220define <4 x i64> @test_256_23(ptr %addr, <4 x i64> %mask1) { 221; CHECK-LABEL: test_256_23: 222; CHECK: ## %bb.0: 223; CHECK-NEXT: vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8] 224; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x07] 225; CHECK-NEXT: retq ## encoding: [0xc3] 226 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 227 %r = load <4 x i64>, ptr %addr, align 32 228 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer 229 ret <4 x i64>%res 230} 231 232define <4 x i64> @test_256_24(ptr %addr, <4 x i64> %mask1) { 233; CHECK-LABEL: test_256_24: 234; CHECK: ## %bb.0: 235; CHECK-NEXT: vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8] 236; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x07] 237; CHECK-NEXT: retq ## encoding: [0xc3] 238 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 239 %r = load <4 x i64>, ptr %addr, align 1 240 %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer 241 ret <4 x i64>%res 242} 243 244define <8 x float> @test_256_25(ptr %addr, <8 x float> %old, <8 x float> %mask1) { 245; CHECK-LABEL: test_256_25: 246; CHECK: ## %bb.0: 247; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2] 248; CHECK-NEXT: vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c] 249; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07] 250; CHECK-NEXT: retq ## encoding: [0xc3] 251 %mask = fcmp one <8 x float> %mask1, zeroinitializer 252 %r = load <8 x float>, ptr %addr, align 32 253 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old 254 ret <8 x float>%res 255} 256 257define <8 x float> @test_256_26(ptr %addr, <8 x float> %old, <8 x float> %mask1) { 258; CHECK-LABEL: test_256_26: 259; CHECK: ## %bb.0: 260; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2] 261; CHECK-NEXT: vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c] 262; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07] 263; CHECK-NEXT: retq ## encoding: [0xc3] 264 %mask = fcmp one <8 x float> %mask1, zeroinitializer 265 %r = load <8 x float>, ptr %addr, align 1 266 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old 267 ret <8 x float>%res 268} 269 270define <8 x float> @test_256_27(ptr %addr, <8 x float> %mask1) { 271; CHECK-LABEL: test_256_27: 272; CHECK: ## %bb.0: 273; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] 274; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c] 275; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07] 276; CHECK-NEXT: retq ## encoding: [0xc3] 277 %mask = fcmp one <8 x float> %mask1, zeroinitializer 278 %r = load <8 x float>, ptr %addr, align 32 279 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer 280 ret <8 x float>%res 281} 282 283define <8 x float> @test_256_28(ptr %addr, <8 x float> %mask1) { 284; CHECK-LABEL: test_256_28: 285; CHECK: ## %bb.0: 286; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9] 287; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c] 288; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07] 289; CHECK-NEXT: retq ## encoding: [0xc3] 290 %mask = fcmp one <8 x float> %mask1, zeroinitializer 291 %r = load <8 x float>, ptr %addr, align 1 292 %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer 293 ret <8 x float>%res 294} 295 296define <4 x double> @test_256_29(ptr %addr, <4 x double> %old, <4 x i64> %mask1) { 297; CHECK-LABEL: test_256_29: 298; CHECK: ## %bb.0: 299; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9] 300; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07] 301; CHECK-NEXT: retq ## encoding: [0xc3] 302 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 303 %r = load <4 x double>, ptr %addr, align 32 304 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old 305 ret <4 x double>%res 306} 307 308define <4 x double> @test_256_30(ptr %addr, <4 x double> %old, <4 x i64> %mask1) { 309; CHECK-LABEL: test_256_30: 310; CHECK: ## %bb.0: 311; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9] 312; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07] 313; CHECK-NEXT: retq ## encoding: [0xc3] 314 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 315 %r = load <4 x double>, ptr %addr, align 1 316 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old 317 ret <4 x double>%res 318} 319 320define <4 x double> @test_256_31(ptr %addr, <4 x i64> %mask1) { 321; CHECK-LABEL: test_256_31: 322; CHECK: ## %bb.0: 323; CHECK-NEXT: vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8] 324; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x07] 325; CHECK-NEXT: retq ## encoding: [0xc3] 326 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 327 %r = load <4 x double>, ptr %addr, align 32 328 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer 329 ret <4 x double>%res 330} 331 332define <4 x double> @test_256_32(ptr %addr, <4 x i64> %mask1) { 333; CHECK-LABEL: test_256_32: 334; CHECK: ## %bb.0: 335; CHECK-NEXT: vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8] 336; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x07] 337; CHECK-NEXT: retq ## encoding: [0xc3] 338 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 339 %r = load <4 x double>, ptr %addr, align 1 340 %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer 341 ret <4 x double>%res 342} 343 344define <4 x i32> @test_128_1(ptr %addr) { 345; CHECK-LABEL: test_128_1: 346; CHECK: ## %bb.0: 347; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 348; CHECK-NEXT: retq ## encoding: [0xc3] 349 %res = load <4 x i32>, ptr %addr, align 1 350 ret <4 x i32>%res 351} 352 353define <4 x i32> @test_128_2(ptr %addr) { 354; CHECK-LABEL: test_128_2: 355; CHECK: ## %bb.0: 356; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 357; CHECK-NEXT: retq ## encoding: [0xc3] 358 %res = load <4 x i32>, ptr %addr, align 16 359 ret <4 x i32>%res 360} 361 362define void @test_128_3(ptr %addr, <2 x i64> %data) { 363; CHECK-LABEL: test_128_3: 364; CHECK: ## %bb.0: 365; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 366; CHECK-NEXT: retq ## encoding: [0xc3] 367 store <2 x i64>%data, ptr %addr, align 16 368 ret void 369} 370 371define void @test_128_4(ptr %addr, <4 x i32> %data) { 372; CHECK-LABEL: test_128_4: 373; CHECK: ## %bb.0: 374; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 375; CHECK-NEXT: retq ## encoding: [0xc3] 376 store <4 x i32>%data, ptr %addr, align 1 377 ret void 378} 379 380define void @test_128_5(ptr %addr, <4 x i32> %data) { 381; CHECK-LABEL: test_128_5: 382; CHECK: ## %bb.0: 383; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 384; CHECK-NEXT: retq ## encoding: [0xc3] 385 store <4 x i32>%data, ptr %addr, align 16 386 ret void 387} 388 389define <2 x i64> @test_128_6(ptr %addr) { 390; CHECK-LABEL: test_128_6: 391; CHECK: ## %bb.0: 392; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 393; CHECK-NEXT: retq ## encoding: [0xc3] 394 %res = load <2 x i64>, ptr %addr, align 16 395 ret <2 x i64>%res 396} 397 398define void @test_128_7(ptr %addr, <2 x i64> %data) { 399; CHECK-LABEL: test_128_7: 400; CHECK: ## %bb.0: 401; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 402; CHECK-NEXT: retq ## encoding: [0xc3] 403 store <2 x i64>%data, ptr %addr, align 1 404 ret void 405} 406 407define <2 x i64> @test_128_8(ptr %addr) { 408; CHECK-LABEL: test_128_8: 409; CHECK: ## %bb.0: 410; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 411; CHECK-NEXT: retq ## encoding: [0xc3] 412 %res = load <2 x i64>, ptr %addr, align 1 413 ret <2 x i64>%res 414} 415 416define void @test_128_9(ptr %addr, <2 x double> %data) { 417; CHECK-LABEL: test_128_9: 418; CHECK: ## %bb.0: 419; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 420; CHECK-NEXT: retq ## encoding: [0xc3] 421 store <2 x double>%data, ptr %addr, align 16 422 ret void 423} 424 425define <2 x double> @test_128_10(ptr %addr) { 426; CHECK-LABEL: test_128_10: 427; CHECK: ## %bb.0: 428; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 429; CHECK-NEXT: retq ## encoding: [0xc3] 430 %res = load <2 x double>, ptr %addr, align 16 431 ret <2 x double>%res 432} 433 434define void @test_128_11(ptr %addr, <4 x float> %data) { 435; CHECK-LABEL: test_128_11: 436; CHECK: ## %bb.0: 437; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] 438; CHECK-NEXT: retq ## encoding: [0xc3] 439 store <4 x float>%data, ptr %addr, align 16 440 ret void 441} 442 443define <4 x float> @test_128_12(ptr %addr) { 444; CHECK-LABEL: test_128_12: 445; CHECK: ## %bb.0: 446; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] 447; CHECK-NEXT: retq ## encoding: [0xc3] 448 %res = load <4 x float>, ptr %addr, align 16 449 ret <4 x float>%res 450} 451 452define void @test_128_13(ptr %addr, <2 x double> %data) { 453; CHECK-LABEL: test_128_13: 454; CHECK: ## %bb.0: 455; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 456; CHECK-NEXT: retq ## encoding: [0xc3] 457 store <2 x double>%data, ptr %addr, align 1 458 ret void 459} 460 461define <2 x double> @test_128_14(ptr %addr) { 462; CHECK-LABEL: test_128_14: 463; CHECK: ## %bb.0: 464; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 465; CHECK-NEXT: retq ## encoding: [0xc3] 466 %res = load <2 x double>, ptr %addr, align 1 467 ret <2 x double>%res 468} 469 470define void @test_128_15(ptr %addr, <4 x float> %data) { 471; CHECK-LABEL: test_128_15: 472; CHECK: ## %bb.0: 473; CHECK-NEXT: vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] 474; CHECK-NEXT: retq ## encoding: [0xc3] 475 store <4 x float>%data, ptr %addr, align 1 476 ret void 477} 478 479define <4 x float> @test_128_16(ptr %addr) { 480; CHECK-LABEL: test_128_16: 481; CHECK: ## %bb.0: 482; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] 483; CHECK-NEXT: retq ## encoding: [0xc3] 484 %res = load <4 x float>, ptr %addr, align 1 485 ret <4 x float>%res 486} 487 488define <4 x i32> @test_128_17(ptr %addr, <4 x i32> %old, <4 x i32> %mask1) { 489; CHECK-LABEL: test_128_17: 490; CHECK: ## %bb.0: 491; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 492; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07] 493; CHECK-NEXT: retq ## encoding: [0xc3] 494 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 495 %r = load <4 x i32>, ptr %addr, align 16 496 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old 497 ret <4 x i32>%res 498} 499 500define <4 x i32> @test_128_18(ptr %addr, <4 x i32> %old, <4 x i32> %mask1) { 501; CHECK-LABEL: test_128_18: 502; CHECK: ## %bb.0: 503; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 504; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x07] 505; CHECK-NEXT: retq ## encoding: [0xc3] 506 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 507 %r = load <4 x i32>, ptr %addr, align 1 508 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old 509 ret <4 x i32>%res 510} 511 512define <4 x i32> @test_128_19(ptr %addr, <4 x i32> %mask1) { 513; CHECK-LABEL: test_128_19: 514; CHECK: ## %bb.0: 515; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8] 516; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x07] 517; CHECK-NEXT: retq ## encoding: [0xc3] 518 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 519 %r = load <4 x i32>, ptr %addr, align 16 520 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer 521 ret <4 x i32>%res 522} 523 524define <4 x i32> @test_128_20(ptr %addr, <4 x i32> %mask1) { 525; CHECK-LABEL: test_128_20: 526; CHECK: ## %bb.0: 527; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8] 528; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x07] 529; CHECK-NEXT: retq ## encoding: [0xc3] 530 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 531 %r = load <4 x i32>, ptr %addr, align 1 532 %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer 533 ret <4 x i32>%res 534} 535 536define <2 x i64> @test_128_21(ptr %addr, <2 x i64> %old, <2 x i64> %mask1) { 537; CHECK-LABEL: test_128_21: 538; CHECK: ## %bb.0: 539; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9] 540; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07] 541; CHECK-NEXT: retq ## encoding: [0xc3] 542 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 543 %r = load <2 x i64>, ptr %addr, align 16 544 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old 545 ret <2 x i64>%res 546} 547 548define <2 x i64> @test_128_22(ptr %addr, <2 x i64> %old, <2 x i64> %mask1) { 549; CHECK-LABEL: test_128_22: 550; CHECK: ## %bb.0: 551; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9] 552; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x07] 553; CHECK-NEXT: retq ## encoding: [0xc3] 554 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 555 %r = load <2 x i64>, ptr %addr, align 1 556 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old 557 ret <2 x i64>%res 558} 559 560define <2 x i64> @test_128_23(ptr %addr, <2 x i64> %mask1) { 561; CHECK-LABEL: test_128_23: 562; CHECK: ## %bb.0: 563; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8] 564; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x07] 565; CHECK-NEXT: retq ## encoding: [0xc3] 566 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 567 %r = load <2 x i64>, ptr %addr, align 16 568 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer 569 ret <2 x i64>%res 570} 571 572define <2 x i64> @test_128_24(ptr %addr, <2 x i64> %mask1) { 573; CHECK-LABEL: test_128_24: 574; CHECK: ## %bb.0: 575; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8] 576; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x07] 577; CHECK-NEXT: retq ## encoding: [0xc3] 578 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 579 %r = load <2 x i64>, ptr %addr, align 1 580 %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer 581 ret <2 x i64>%res 582} 583 584define <4 x float> @test_128_25(ptr %addr, <4 x float> %old, <4 x i32> %mask1) { 585; CHECK-LABEL: test_128_25: 586; CHECK: ## %bb.0: 587; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 588; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07] 589; CHECK-NEXT: retq ## encoding: [0xc3] 590 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 591 %r = load <4 x float>, ptr %addr, align 16 592 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old 593 ret <4 x float>%res 594} 595 596define <4 x float> @test_128_26(ptr %addr, <4 x float> %old, <4 x i32> %mask1) { 597; CHECK-LABEL: test_128_26: 598; CHECK: ## %bb.0: 599; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 600; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07] 601; CHECK-NEXT: retq ## encoding: [0xc3] 602 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 603 %r = load <4 x float>, ptr %addr, align 1 604 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old 605 ret <4 x float>%res 606} 607 608define <4 x float> @test_128_27(ptr %addr, <4 x i32> %mask1) { 609; CHECK-LABEL: test_128_27: 610; CHECK: ## %bb.0: 611; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8] 612; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x07] 613; CHECK-NEXT: retq ## encoding: [0xc3] 614 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 615 %r = load <4 x float>, ptr %addr, align 16 616 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer 617 ret <4 x float>%res 618} 619 620define <4 x float> @test_128_28(ptr %addr, <4 x i32> %mask1) { 621; CHECK-LABEL: test_128_28: 622; CHECK: ## %bb.0: 623; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8] 624; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x07] 625; CHECK-NEXT: retq ## encoding: [0xc3] 626 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 627 %r = load <4 x float>, ptr %addr, align 1 628 %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer 629 ret <4 x float>%res 630} 631 632define <2 x double> @test_128_29(ptr %addr, <2 x double> %old, <2 x i64> %mask1) { 633; CHECK-LABEL: test_128_29: 634; CHECK: ## %bb.0: 635; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9] 636; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07] 637; CHECK-NEXT: retq ## encoding: [0xc3] 638 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 639 %r = load <2 x double>, ptr %addr, align 16 640 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old 641 ret <2 x double>%res 642} 643 644define <2 x double> @test_128_30(ptr %addr, <2 x double> %old, <2 x i64> %mask1) { 645; CHECK-LABEL: test_128_30: 646; CHECK: ## %bb.0: 647; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9] 648; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07] 649; CHECK-NEXT: retq ## encoding: [0xc3] 650 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 651 %r = load <2 x double>, ptr %addr, align 1 652 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old 653 ret <2 x double>%res 654} 655 656define <2 x double> @test_128_31(ptr %addr, <2 x i64> %mask1) { 657; CHECK-LABEL: test_128_31: 658; CHECK: ## %bb.0: 659; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8] 660; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x07] 661; CHECK-NEXT: retq ## encoding: [0xc3] 662 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 663 %r = load <2 x double>, ptr %addr, align 16 664 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer 665 ret <2 x double>%res 666} 667 668define <2 x double> @test_128_32(ptr %addr, <2 x i64> %mask1) { 669; CHECK-LABEL: test_128_32: 670; CHECK: ## %bb.0: 671; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8] 672; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x07] 673; CHECK-NEXT: retq ## encoding: [0xc3] 674 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 675 %r = load <2 x double>, ptr %addr, align 1 676 %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer 677 ret <2 x double>%res 678} 679 680