1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 2 // RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 \ 3 // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-LE %s 4 // RUN: %clang_cc1 -triple aarch64_be -target-feature +neon -target-feature +bf16 \ 5 // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck --check-prefix=CHECK-BE %s 6 7 // REQUIRES: aarch64-registered-target || arm-registered-target 8 9 #include <arm_neon.h> 10 11 // CHECK-LE-LABEL: @test_vcopy_lane_bf16_v1( 12 // CHECK-LE-NEXT: entry: 13 // CHECK-LE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 3 14 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[A:%.*]], bfloat [[VGET_LANE]], i32 1 15 // CHECK-LE-NEXT: ret <4 x bfloat> [[VSET_LANE]] 16 // 17 // CHECK-BE-LABEL: @test_vcopy_lane_bf16_v1( 18 // CHECK-BE-NEXT: entry: 19 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> 20 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> [[B]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> 21 // CHECK-BE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[SHUFFLE1]], i32 3 22 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[SHUFFLE]], bfloat [[VGET_LANE]], i32 1 23 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> [[VSET_LANE]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> 24 // CHECK-BE-NEXT: ret <4 x bfloat> [[SHUFFLE5]] 25 // 26 bfloat16x4_t test_vcopy_lane_bf16_v1(bfloat16x4_t a, bfloat16x4_t b) { 27 return vcopy_lane_bf16(a, 1, b, 3); 28 } 29 30 // CHECK-LE-LABEL: @test_vcopy_lane_bf16_v2( 31 // CHECK-LE-NEXT: entry: 32 // CHECK-LE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 0 33 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[A:%.*]], bfloat [[VGET_LANE]], i32 2 34 // CHECK-LE-NEXT: ret <4 x bfloat> [[VSET_LANE]] 35 // 36 // CHECK-BE-LABEL: @test_vcopy_lane_bf16_v2( 37 // CHECK-BE-NEXT: entry: 38 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> 39 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> [[B]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> 40 // CHECK-BE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[SHUFFLE1]], i32 0 41 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[SHUFFLE]], bfloat [[VGET_LANE]], i32 2 42 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> [[VSET_LANE]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> 43 // CHECK-BE-NEXT: ret <4 x bfloat> [[SHUFFLE5]] 44 // 45 bfloat16x4_t test_vcopy_lane_bf16_v2(bfloat16x4_t a, bfloat16x4_t b) { 46 return vcopy_lane_bf16(a, 2, b, 0); 47 } 48 49 // CHECK-LE-LABEL: @test_vcopyq_lane_bf16_v1( 50 // CHECK-LE-NEXT: entry: 51 // CHECK-LE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 2 52 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[A:%.*]], bfloat [[VGET_LANE]], i32 0 53 // CHECK-LE-NEXT: ret <8 x bfloat> [[VSET_LANE]] 54 // 55 // CHECK-BE-LABEL: @test_vcopyq_lane_bf16_v1( 56 // CHECK-BE-NEXT: entry: 57 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 58 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> [[B]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> 59 // CHECK-BE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[SHUFFLE1]], i32 2 60 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[SHUFFLE]], bfloat [[VGET_LANE]], i32 0 61 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> [[VSET_LANE]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 62 // CHECK-BE-NEXT: ret <8 x bfloat> [[SHUFFLE5]] 63 // 64 bfloat16x8_t test_vcopyq_lane_bf16_v1(bfloat16x8_t a, bfloat16x4_t b) { 65 return vcopyq_lane_bf16(a, 0, b, 2); 66 } 67 68 // CHECK-LE-LABEL: @test_vcopyq_lane_bf16_v2( 69 // CHECK-LE-NEXT: entry: 70 // CHECK-LE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[B:%.*]], i32 0 71 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[A:%.*]], bfloat [[VGET_LANE]], i32 6 72 // CHECK-LE-NEXT: ret <8 x bfloat> [[VSET_LANE]] 73 // 74 // CHECK-BE-LABEL: @test_vcopyq_lane_bf16_v2( 75 // CHECK-BE-NEXT: entry: 76 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 77 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x bfloat> [[B:%.*]], <4 x bfloat> [[B]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> 78 // CHECK-BE-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[SHUFFLE1]], i32 0 79 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[SHUFFLE]], bfloat [[VGET_LANE]], i32 6 80 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> [[VSET_LANE]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 81 // CHECK-BE-NEXT: ret <8 x bfloat> [[SHUFFLE5]] 82 // 83 bfloat16x8_t test_vcopyq_lane_bf16_v2(bfloat16x8_t a, bfloat16x4_t b) { 84 return vcopyq_lane_bf16(a, 6, b, 0); 85 } 86 87 // CHECK-LE-LABEL: @test_vcopy_laneq_bf16_v1( 88 // CHECK-LE-NEXT: entry: 89 // CHECK-LE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 7 90 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[A:%.*]], bfloat [[VGETQ_LANE]], i32 0 91 // CHECK-LE-NEXT: ret <4 x bfloat> [[VSET_LANE]] 92 // 93 // CHECK-BE-LABEL: @test_vcopy_laneq_bf16_v1( 94 // CHECK-BE-NEXT: entry: 95 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> 96 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x bfloat> [[B:%.*]], <8 x bfloat> [[B]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 97 // CHECK-BE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[SHUFFLE1]], i32 7 98 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[SHUFFLE]], bfloat [[VGETQ_LANE]], i32 0 99 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> [[VSET_LANE]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> 100 // CHECK-BE-NEXT: ret <4 x bfloat> [[SHUFFLE5]] 101 // 102 bfloat16x4_t test_vcopy_laneq_bf16_v1(bfloat16x4_t a, bfloat16x8_t b) { 103 return vcopy_laneq_bf16(a, 0, b, 7); 104 } 105 106 // CHECK-LE-LABEL: @test_vcopy_laneq_bf16_v2( 107 // CHECK-LE-NEXT: entry: 108 // CHECK-LE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 4 109 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[A:%.*]], bfloat [[VGETQ_LANE]], i32 3 110 // CHECK-LE-NEXT: ret <4 x bfloat> [[VSET_LANE]] 111 // 112 // CHECK-BE-LABEL: @test_vcopy_laneq_bf16_v2( 113 // CHECK-BE-NEXT: entry: 114 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x bfloat> [[A:%.*]], <4 x bfloat> [[A]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> 115 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x bfloat> [[B:%.*]], <8 x bfloat> [[B]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 116 // CHECK-BE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[SHUFFLE1]], i32 4 117 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[SHUFFLE]], bfloat [[VGETQ_LANE]], i32 3 118 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <4 x bfloat> [[VSET_LANE]], <4 x bfloat> [[VSET_LANE]], <4 x i32> <i32 3, i32 2, i32 1, i32 0> 119 // CHECK-BE-NEXT: ret <4 x bfloat> [[SHUFFLE5]] 120 // 121 bfloat16x4_t test_vcopy_laneq_bf16_v2(bfloat16x4_t a, bfloat16x8_t b) { 122 return vcopy_laneq_bf16(a, 3, b, 4); 123 } 124 125 // CHECK-LE-LABEL: @test_vcopyq_laneq_bf16_v1( 126 // CHECK-LE-NEXT: entry: 127 // CHECK-LE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 7 128 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[A:%.*]], bfloat [[VGETQ_LANE]], i32 3 129 // CHECK-LE-NEXT: ret <8 x bfloat> [[VSET_LANE]] 130 // 131 // CHECK-BE-LABEL: @test_vcopyq_laneq_bf16_v1( 132 // CHECK-BE-NEXT: entry: 133 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 134 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x bfloat> [[B:%.*]], <8 x bfloat> [[B]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 135 // CHECK-BE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[SHUFFLE1]], i32 7 136 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[SHUFFLE]], bfloat [[VGETQ_LANE]], i32 3 137 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> [[VSET_LANE]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 138 // CHECK-BE-NEXT: ret <8 x bfloat> [[SHUFFLE5]] 139 // 140 bfloat16x8_t test_vcopyq_laneq_bf16_v1(bfloat16x8_t a, bfloat16x8_t b) { 141 return vcopyq_laneq_bf16(a, 3, b, 7); 142 143 } 144 145 // CHECK-LE-LABEL: @test_vcopyq_laneq_bf16_v2( 146 // CHECK-LE-NEXT: entry: 147 // CHECK-LE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[B:%.*]], i32 2 148 // CHECK-LE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[A:%.*]], bfloat [[VGETQ_LANE]], i32 6 149 // CHECK-LE-NEXT: ret <8 x bfloat> [[VSET_LANE]] 150 // 151 // CHECK-BE-LABEL: @test_vcopyq_laneq_bf16_v2( 152 // CHECK-BE-NEXT: entry: 153 // CHECK-BE-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 154 // CHECK-BE-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x bfloat> [[B:%.*]], <8 x bfloat> [[B]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 155 // CHECK-BE-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[SHUFFLE1]], i32 2 156 // CHECK-BE-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[SHUFFLE]], bfloat [[VGETQ_LANE]], i32 6 157 // CHECK-BE-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x bfloat> [[VSET_LANE]], <8 x bfloat> [[VSET_LANE]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 158 // CHECK-BE-NEXT: ret <8 x bfloat> [[SHUFFLE5]] 159 // 160 bfloat16x8_t test_vcopyq_laneq_bf16_v2(bfloat16x8_t a, bfloat16x8_t b) { 161 return vcopyq_laneq_bf16(a, 6, b, 2); 162 } 163 164