1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 2 // RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 \ 3 // RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s 4 5 // REQUIRES: aarch64-registered-target || arm-registered-target 6 7 #include <arm_neon.h> 8 9 // CHECK-LABEL: @test_vcreate_bf16( 10 // CHECK-NEXT: entry: 11 // CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[A:%.*]] to <4 x bfloat> 12 // CHECK-NEXT: ret <4 x bfloat> [[TMP0]] 13 // 14 bfloat16x4_t test_vcreate_bf16(uint64_t a) { 15 return vcreate_bf16(a); 16 } 17 18 // CHECK-LABEL: @test_vdup_n_bf16( 19 // CHECK-NEXT: entry: 20 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x bfloat> poison, bfloat [[V:%.*]], i32 0 21 // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x bfloat> [[VECINIT_I]], bfloat [[V]], i32 1 22 // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x bfloat> [[VECINIT1_I]], bfloat [[V]], i32 2 23 // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x bfloat> [[VECINIT2_I]], bfloat [[V]], i32 3 24 // CHECK-NEXT: ret <4 x bfloat> [[VECINIT3_I]] 25 // 26 bfloat16x4_t test_vdup_n_bf16(bfloat16_t v) { 27 return vdup_n_bf16(v); 28 } 29 30 // CHECK-LABEL: @test_vdupq_n_bf16( 31 // CHECK-NEXT: entry: 32 // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x bfloat> poison, bfloat [[V:%.*]], i32 0 33 // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x bfloat> [[VECINIT_I]], bfloat [[V]], i32 1 34 // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x bfloat> [[VECINIT1_I]], bfloat [[V]], i32 2 35 // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x bfloat> [[VECINIT2_I]], bfloat [[V]], i32 3 36 // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x bfloat> [[VECINIT3_I]], bfloat [[V]], i32 4 37 // CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x bfloat> [[VECINIT4_I]], bfloat [[V]], i32 5 38 // CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x bfloat> [[VECINIT5_I]], bfloat [[V]], i32 6 39 // CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x bfloat> [[VECINIT6_I]], bfloat [[V]], i32 7 40 // CHECK-NEXT: ret <8 x bfloat> [[VECINIT7_I]] 41 // 42 bfloat16x8_t test_vdupq_n_bf16(bfloat16_t v) { 43 return vdupq_n_bf16(v); 44 } 45 46 // CHECK-LABEL: @test_vdup_lane_bf16( 47 // CHECK-NEXT: entry: 48 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8> 49 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat> 50 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 51 // CHECK-NEXT: ret <4 x bfloat> [[LANE]] 52 // 53 bfloat16x4_t test_vdup_lane_bf16(bfloat16x4_t v) { 54 return vdup_lane_bf16(v, 1); 55 } 56 57 // CHECK-LABEL: @test_vdupq_lane_bf16( 58 // CHECK-NEXT: entry: 59 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x bfloat> [[V:%.*]] to <8 x i8> 60 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x bfloat> 61 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> [[TMP1]], <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 62 // CHECK-NEXT: ret <8 x bfloat> [[LANE]] 63 // 64 bfloat16x8_t test_vdupq_lane_bf16(bfloat16x4_t v) { 65 return vdupq_lane_bf16(v, 1); 66 } 67 68 // CHECK-LABEL: @test_vdup_laneq_bf16( 69 // CHECK-NEXT: entry: 70 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8> 71 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat> 72 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 73 // CHECK-NEXT: ret <4 x bfloat> [[LANE]] 74 // 75 bfloat16x4_t test_vdup_laneq_bf16(bfloat16x8_t v) { 76 return vdup_laneq_bf16(v, 7); 77 } 78 79 // CHECK-LABEL: @test_vdupq_laneq_bf16( 80 // CHECK-NEXT: entry: 81 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[V:%.*]] to <16 x i8> 82 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x bfloat> 83 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 84 // CHECK-NEXT: ret <8 x bfloat> [[LANE]] 85 // 86 bfloat16x8_t test_vdupq_laneq_bf16(bfloat16x8_t v) { 87 return vdupq_laneq_bf16(v, 7); 88 } 89 90 // CHECK-LABEL: @test_vcombine_bf16( 91 // CHECK-NEXT: entry: 92 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x bfloat> [[LOW:%.*]], <4 x bfloat> [[HIGH:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 93 // CHECK-NEXT: ret <8 x bfloat> [[SHUFFLE_I]] 94 // 95 bfloat16x8_t test_vcombine_bf16(bfloat16x4_t low, bfloat16x4_t high) { 96 return vcombine_bf16(low, high); 97 } 98 99 // CHECK-LABEL: @test_vget_high_bf16( 100 // CHECK-NEXT: entry: 101 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 102 // CHECK-NEXT: ret <4 x bfloat> [[SHUFFLE_I]] 103 // 104 bfloat16x4_t test_vget_high_bf16(bfloat16x8_t a) { 105 return vget_high_bf16(a); 106 } 107 108 // CHECK-LABEL: @test_vget_low_bf16( 109 // CHECK-NEXT: entry: 110 // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 111 // CHECK-NEXT: ret <4 x bfloat> [[SHUFFLE_I]] 112 // 113 bfloat16x4_t test_vget_low_bf16(bfloat16x8_t a) { 114 return vget_low_bf16(a); 115 } 116 117 // CHECK-LABEL: @test_vget_lane_bf16( 118 // CHECK-NEXT: entry: 119 // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], i32 1 120 // CHECK-NEXT: ret bfloat [[VGET_LANE]] 121 // 122 bfloat16_t test_vget_lane_bf16(bfloat16x4_t v) { 123 return vget_lane_bf16(v, 1); 124 } 125 126 // CHECK-LABEL: @test_vgetq_lane_bf16( 127 // CHECK-NEXT: entry: 128 // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], i32 7 129 // CHECK-NEXT: ret bfloat [[VGETQ_LANE]] 130 // 131 bfloat16_t test_vgetq_lane_bf16(bfloat16x8_t v) { 132 return vgetq_lane_bf16(v, 7); 133 } 134 135 // CHECK-LABEL: @test_vset_lane_bf16( 136 // CHECK-NEXT: entry: 137 // CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <4 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i32 1 138 // CHECK-NEXT: ret <4 x bfloat> [[VSET_LANE]] 139 // 140 bfloat16x4_t test_vset_lane_bf16(bfloat16_t a, bfloat16x4_t v) { 141 return vset_lane_bf16(a, v, 1); 142 } 143 144 // CHECK-LABEL: @test_vsetq_lane_bf16( 145 // CHECK-NEXT: entry: 146 // CHECK-NEXT: [[VSET_LANE:%.*]] = insertelement <8 x bfloat> [[V:%.*]], bfloat [[A:%.*]], i32 7 147 // CHECK-NEXT: ret <8 x bfloat> [[VSET_LANE]] 148 // 149 bfloat16x8_t test_vsetq_lane_bf16(bfloat16_t a, bfloat16x8_t v) { 150 return vsetq_lane_bf16(a, v, 7); 151 } 152 153 // CHECK-LABEL: @test_vduph_lane_bf16( 154 // CHECK-NEXT: entry: 155 // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x bfloat> [[V:%.*]], i32 1 156 // CHECK-NEXT: ret bfloat [[VGET_LANE]] 157 // 158 bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) { 159 return vduph_lane_bf16(v, 1); 160 } 161 162 // CHECK-LABEL: @test_vduph_laneq_bf16( 163 // CHECK-NEXT: entry: 164 // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x bfloat> [[V:%.*]], i32 7 165 // CHECK-NEXT: ret bfloat [[VGETQ_LANE]] 166 // 167 bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) { 168 return vduph_laneq_bf16(v, 7); 169 } 170