1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-pc-linux -mattr=+avx -interleaved-access -S | FileCheck %s 3; RUN: opt < %s -mtriple=x86_64-pc-linux -mattr=+avx -passes=interleaved-access -S | FileCheck %s 4 5; This file tests the function `llvm::lowerInterleavedLoad/Store`. 6 7define <4 x double> @load_factorf64_4(ptr %ptr) { 8; CHECK-LABEL: @load_factorf64_4( 9; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <4 x double>, ptr [[PTR:%.*]], i32 0 10; CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, ptr [[TMP1]], align 16 11; CHECK-NEXT: [[TMP3:%.*]] = getelementptr <4 x double>, ptr [[PTR]], i32 1 12; CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, ptr [[TMP3]], align 16 13; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <4 x double>, ptr [[PTR]], i32 2 14; CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, ptr [[TMP5]], align 16 15; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x double>, ptr [[PTR]], i32 3 16; CHECK-NEXT: [[TMP8:%.*]] = load <4 x double>, ptr [[TMP7]], align 16 17; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 18; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 19; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP6]], <4 x i32> <i32 2, i32 3, i32 6, i32 7> 20; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP8]], <4 x i32> <i32 2, i32 3, i32 6, i32 7> 21; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 22; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> [[TMP12]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 23; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 24; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> [[TMP12]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 25; CHECK-NEXT: [[ADD1:%.*]] = fadd <4 x double> [[TMP13]], [[TMP15]] 26; CHECK-NEXT: [[ADD2:%.*]] = fadd <4 x double> [[ADD1]], [[TMP14]] 27; CHECK-NEXT: [[ADD3:%.*]] = fadd <4 x double> [[ADD2]], [[TMP16]] 28; CHECK-NEXT: ret <4 x double> [[ADD3]] 29; 30 %wide.vec = load <16 x double>, ptr %ptr, align 16 31 %strided.v0 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 32 %strided.v1 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 33 %strided.v2 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 34 %strided.v3 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 35 %add1 = fadd <4 x double> %strided.v0, %strided.v1 36 %add2 = fadd <4 x double> %add1, %strided.v2 37 %add3 = fadd <4 x double> %add2, %strided.v3 38 ret <4 x double> %add3 39} 40 41define <4 x i64> @load_factori64_4(ptr %ptr) { 42; CHECK-LABEL: @load_factori64_4( 43; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <4 x i64>, ptr [[PTR:%.*]], i32 0 44; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 16 45; CHECK-NEXT: [[TMP3:%.*]] = getelementptr <4 x i64>, ptr [[PTR]], i32 1 46; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 16 47; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <4 x i64>, ptr [[PTR]], i32 2 48; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 16 49; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x i64>, ptr [[PTR]], i32 3 50; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i64>, ptr [[TMP7]], align 16 51; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 52; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 53; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP6]], <4 x i32> <i32 2, i32 3, i32 6, i32 7> 54; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP8]], <4 x i32> <i32 2, i32 3, i32 6, i32 7> 55; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP10]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 56; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP12]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 57; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP10]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 58; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP12]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 59; CHECK-NEXT: [[ADD1:%.*]] = add <4 x i64> [[TMP13]], [[TMP15]] 60; CHECK-NEXT: [[ADD2:%.*]] = add <4 x i64> [[ADD1]], [[TMP14]] 61; CHECK-NEXT: [[ADD3:%.*]] = add <4 x i64> [[ADD2]], [[TMP16]] 62; CHECK-NEXT: ret <4 x i64> [[ADD3]] 63; 64 %wide.vec = load <16 x i64>, ptr %ptr, align 16 65 %strided.v0 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 66 %strided.v1 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 67 %strided.v2 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 68 %strided.v3 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 69 %add1 = add <4 x i64> %strided.v0, %strided.v1 70 %add2 = add <4 x i64> %add1, %strided.v2 71 %add3 = add <4 x i64> %add2, %strided.v3 72 ret <4 x i64> %add3 73} 74 75define <4 x double> @load_factorf64_1(ptr %ptr) { 76; CHECK-LABEL: @load_factorf64_1( 77; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <4 x double>, ptr [[PTR:%.*]], i32 0 78; CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, ptr [[TMP1]], align 16 79; CHECK-NEXT: [[TMP3:%.*]] = getelementptr <4 x double>, ptr [[PTR]], i32 1 80; CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, ptr [[TMP3]], align 16 81; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <4 x double>, ptr [[PTR]], i32 2 82; CHECK-NEXT: [[TMP6:%.*]] = load <4 x double>, ptr [[TMP5]], align 16 83; CHECK-NEXT: [[TMP7:%.*]] = getelementptr <4 x double>, ptr [[PTR]], i32 3 84; CHECK-NEXT: [[TMP8:%.*]] = load <4 x double>, ptr [[TMP7]], align 16 85; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 86; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 87; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP6]], <4 x i32> <i32 2, i32 3, i32 6, i32 7> 88; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP8]], <4 x i32> <i32 2, i32 3, i32 6, i32 7> 89; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 90; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> [[TMP12]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 91; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 92; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> [[TMP12]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 93; CHECK-NEXT: [[MUL:%.*]] = fmul <4 x double> [[TMP13]], [[TMP13]] 94; CHECK-NEXT: ret <4 x double> [[MUL]] 95; 96 %wide.vec = load <16 x double>, ptr %ptr, align 16 97 %strided.v0 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 98 %strided.v3 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 99 %mul = fmul <4 x double> %strided.v0, %strided.v3 100 ret <4 x double> %mul 101} 102 103define void @store_factorf64_4(ptr %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) { 104; CHECK-LABEL: @store_factorf64_4( 105; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 106; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 107; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 108; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 109; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> 110; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> 111; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 112; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 113; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7> 114; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7> 115; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 116; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 117; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 118; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 119; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 120; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 121; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 122; CHECK-NEXT: store <16 x double> [[TMP15]], ptr [[PTR:%.*]], align 16 123; CHECK-NEXT: ret void 124; 125 %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 126 %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 127 %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 128 store <16 x double> %interleaved.vec, ptr %ptr, align 16 129 ret void 130} 131 132define void @store_factori64_4(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2, <4 x i64> %v3) { 133; CHECK-LABEL: @store_factori64_4( 134; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x i64> [[V0:%.*]], <4 x i64> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 135; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i64> [[V2:%.*]], <4 x i64> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 136; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 137; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 138; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> 139; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> 140; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 141; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 142; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7> 143; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7> 144; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 145; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 146; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 147; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 148; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 149; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 150; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <8 x i64> [[TMP13]], <8 x i64> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 151; CHECK-NEXT: store <16 x i64> [[TMP15]], ptr [[PTR:%.*]], align 16 152; CHECK-NEXT: ret void 153; 154 %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 155 %s1 = shufflevector <4 x i64> %v2, <4 x i64> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 156 %interleaved.vec = shufflevector <8 x i64> %s0, <8 x i64> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 157 store <16 x i64> %interleaved.vec, ptr %ptr, align 16 158 ret void 159} 160 161define void @store_factorf64_4_revMask(ptr %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) { 162; CHECK-LABEL: @store_factorf64_4_revMask( 163; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 164; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 165; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> 166; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> 167; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 168; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 169; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 170; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 171; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7> 172; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7> 173; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 174; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 175; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 176; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 177; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 178; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 179; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 180; CHECK-NEXT: store <16 x double> [[TMP15]], ptr [[PTR:%.*]], align 16 181; CHECK-NEXT: ret void 182; 183 %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 184 %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 185 %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> <i32 12, i32 8, i32 4, i32 0, i32 13, i32 9, i32 5, i32 1, i32 14, i32 10, i32 6, i32 2, i32 15, i32 11, i32 7, i32 3> 186 store <16 x double> %interleaved.vec, ptr %ptr, align 16 187 ret void 188} 189 190define void @store_factorf64_4_arbitraryMask(ptr %ptr, <16 x double> %v0, <16 x double> %v1, <16 x double> %v2, <16 x double> %v3) { 191; CHECK-LABEL: @store_factorf64_4_arbitraryMask( 192; CHECK-NEXT: [[S0:%.*]] = shufflevector <16 x double> [[V0:%.*]], <16 x double> [[V1:%.*]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 193; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x double> [[V2:%.*]], <16 x double> [[V3:%.*]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 194; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 195; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35> 196; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19> 197; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> 198; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 199; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5> 200; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7> 201; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7> 202; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 203; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> 204; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 205; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> 206; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 207; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 208; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 209; CHECK-NEXT: store <16 x double> [[TMP15]], ptr [[PTR:%.*]], align 16 210; CHECK-NEXT: ret void 211; 212 %s0 = shufflevector <16 x double> %v0, <16 x double> %v1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 213 %s1 = shufflevector <16 x double> %v2, <16 x double> %v3, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 214 %interleaved.vec = shufflevector <32 x double> %s0, <32 x double> %s1, <16 x i32> <i32 4, i32 32, i32 16, i32 8, i32 5, i32 33, i32 17, i32 9, i32 6, i32 34, i32 18, i32 10, i32 7, i32 35, i32 19, i32 11> 215 store <16 x double> %interleaved.vec, ptr %ptr, align 16 216 ret void 217} 218 219; This verifies whether the test passes and does not hit any assertions. 220; Today, X86InterleavedAccess could have handled this case and 221; generate transposed sequence by extending the current implementation 222; which would be creating dummy vectors of undef. But it decided not to 223; optimize these cases where the load-size is less than Factor * NumberOfElements. 224; Because a better sequence can easily be generated by CG. 225 226@a = local_unnamed_addr global <4 x double> zeroinitializer, align 32 227; Function Attrs: norecurse nounwind readonly uwtable 228define <4 x double> @test_unhandled(<4 x double> %b) { 229; CHECK-LABEL: @test_unhandled( 230; CHECK-NEXT: entry: 231; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr @a, align 32 232; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> undef, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison> 233; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 0, i32 0> 234; CHECK-NEXT: ret <4 x double> [[SHUFFLE]] 235; 236entry: 237 %0 = load <4 x double>, ptr @a, align 32 238 %1 = shufflevector <4 x double> %0, <4 x double> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef> 239 %shuffle = shufflevector <4 x double> %1, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 0, i32 0> 240 ret <4 x double> %shuffle 241} 242