1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVFHMIN-ZVFBFMIN 3; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN-ZVFBFMIN 4 5 6define void @test(ptr %p, ptr noalias %s) { 7; CHECK-LABEL: @test( 8; CHECK-NEXT: entry: 9; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 10; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 11; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 12; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8) 13; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> splat (i1 true), i32 8) 14; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <8 x float> [[TMP1]], [[TMP0]] 15; CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[ARRAYIDX2]], align 4 16; CHECK-NEXT: ret void 17; 18entry: 19 %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 20 %i = load float, ptr %arrayidx, align 4 21 %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 22 %i1 = load float, ptr %arrayidx1, align 4 23 %add = fsub fast float %i1, %i 24 %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 25 store float %add, ptr %arrayidx2, align 4 26 %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 4 27 %i2 = load float, ptr %arrayidx4, align 4 28 %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 29 %i3 = load float, ptr %arrayidx6, align 4 30 %add7 = fsub fast float %i3, %i2 31 %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 32 store float %add7, ptr %arrayidx9, align 4 33 %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 8 34 %i4 = load float, ptr %arrayidx11, align 4 35 %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22 36 %i5 = load float, ptr %arrayidx13, align 4 37 %add14 = fsub fast float %i5, %i4 38 %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 39 store float %add14, ptr %arrayidx16, align 4 40 %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 12 41 %i6 = load float, ptr %arrayidx18, align 4 42 %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18 43 %i7 = load float, ptr %arrayidx20, align 4 44 %add21 = fsub fast float %i7, %i6 45 %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 46 store float %add21, ptr %arrayidx23, align 4 47 %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 16 48 %i8 = load float, ptr %arrayidx25, align 4 49 %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14 50 %i9 = load float, ptr %arrayidx27, align 4 51 %add28 = fsub fast float %i9, %i8 52 %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 53 store float %add28, ptr %arrayidx30, align 4 54 %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 20 55 %i10 = load float, ptr %arrayidx32, align 4 56 %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10 57 %i11 = load float, ptr %arrayidx34, align 4 58 %add35 = fsub fast float %i11, %i10 59 %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 60 store float %add35, ptr %arrayidx37, align 4 61 %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24 62 %i12 = load float, ptr %arrayidx39, align 4 63 %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6 64 %i13 = load float, ptr %arrayidx41, align 4 65 %add42 = fsub fast float %i13, %i12 66 %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 67 store float %add42, ptr %arrayidx44, align 4 68 %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28 69 %i14 = load float, ptr %arrayidx46, align 4 70 %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2 71 %i15 = load float, ptr %arrayidx48, align 4 72 %add49 = fsub fast float %i15, %i14 73 %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 74 store float %add49, ptr %arrayidx51, align 4 75 ret void 76} 77 78define void @test1(ptr %p, ptr noalias %s, i32 %stride) { 79; CHECK-LABEL: @test1( 80; CHECK-NEXT: entry: 81; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64 82; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 83; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 84; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 85; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[STR]], 4 86; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 [[TMP0]], <8 x i1> splat (i1 true), i32 8) 87; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> splat (i1 true), i32 8) 88; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP1]] 89; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 90; CHECK-NEXT: ret void 91; 92entry: 93 %str = zext i32 %stride to i64 94 %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 95 %i = load float, ptr %arrayidx, align 4 96 %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 97 %i1 = load float, ptr %arrayidx1, align 4 98 %add = fsub fast float %i1, %i 99 %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 100 store float %add, ptr %arrayidx2, align 4 101 %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %str 102 %i2 = load float, ptr %arrayidx4, align 4 103 %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 104 %i3 = load float, ptr %arrayidx6, align 4 105 %add7 = fsub fast float %i3, %i2 106 %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 107 store float %add7, ptr %arrayidx9, align 4 108 %st1 = mul i64 %str, 2 109 %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st1 110 %i4 = load float, ptr %arrayidx11, align 4 111 %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22 112 %i5 = load float, ptr %arrayidx13, align 4 113 %add14 = fsub fast float %i5, %i4 114 %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 115 store float %add14, ptr %arrayidx16, align 4 116 %st2 = mul i64 %str, 3 117 %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st2 118 %i6 = load float, ptr %arrayidx18, align 4 119 %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18 120 %i7 = load float, ptr %arrayidx20, align 4 121 %add21 = fsub fast float %i7, %i6 122 %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 123 store float %add21, ptr %arrayidx23, align 4 124 %st3 = mul i64 %str, 4 125 %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st3 126 %i8 = load float, ptr %arrayidx25, align 4 127 %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14 128 %i9 = load float, ptr %arrayidx27, align 4 129 %add28 = fsub fast float %i9, %i8 130 %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 131 store float %add28, ptr %arrayidx30, align 4 132 %st4 = mul i64 %str, 5 133 %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st4 134 %i10 = load float, ptr %arrayidx32, align 4 135 %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10 136 %i11 = load float, ptr %arrayidx34, align 4 137 %add35 = fsub fast float %i11, %i10 138 %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 139 store float %add35, ptr %arrayidx37, align 4 140 %st5 = mul i64 %str, 6 141 %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st5 142 %i12 = load float, ptr %arrayidx39, align 4 143 %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6 144 %i13 = load float, ptr %arrayidx41, align 4 145 %add42 = fsub fast float %i13, %i12 146 %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 147 store float %add42, ptr %arrayidx44, align 4 148 %st6 = mul i64 %str, 7 149 %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st6 150 %i14 = load float, ptr %arrayidx46, align 4 151 %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2 152 %i15 = load float, ptr %arrayidx48, align 4 153 %add49 = fsub fast float %i15, %i14 154 %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 155 store float %add49, ptr %arrayidx51, align 4 156 ret void 157} 158 159define void @test2(ptr %p, ptr noalias %s, i32 %stride) { 160; CHECK-LABEL: @test2( 161; CHECK-NEXT: entry: 162; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64 163; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 2 164; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7 165; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]] 166; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 167; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8) 168; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STR]], -4 169; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 [[TMP1]], <8 x i1> splat (i1 true), i32 8) 170; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]] 171; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 172; CHECK-NEXT: ret void 173; 174entry: 175 %str = zext i32 %stride to i64 176 %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2 177 %i = load float, ptr %arrayidx, align 4 178 %st6 = mul i64 %str, 7 179 %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st6 180 %i1 = load float, ptr %arrayidx1, align 4 181 %add = fsub fast float %i1, %i 182 %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 183 store float %add, ptr %arrayidx2, align 4 184 %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6 185 %i2 = load float, ptr %arrayidx4, align 4 186 %st5 = mul i64 %str, 6 187 %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st5 188 %i3 = load float, ptr %arrayidx6, align 4 189 %add7 = fsub fast float %i3, %i2 190 %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 191 store float %add7, ptr %arrayidx9, align 4 192 %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10 193 %i4 = load float, ptr %arrayidx11, align 4 194 %st4 = mul i64 %str, 5 195 %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st4 196 %i5 = load float, ptr %arrayidx13, align 4 197 %add14 = fsub fast float %i5, %i4 198 %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 199 store float %add14, ptr %arrayidx16, align 4 200 %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14 201 %i6 = load float, ptr %arrayidx18, align 4 202 %st3 = mul i64 %str, 4 203 %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st3 204 %i7 = load float, ptr %arrayidx20, align 4 205 %add21 = fsub fast float %i7, %i6 206 %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 207 store float %add21, ptr %arrayidx23, align 4 208 %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18 209 %st2 = mul i64 %str, 3 210 %i8 = load float, ptr %arrayidx25, align 4 211 %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st2 212 %i9 = load float, ptr %arrayidx27, align 4 213 %add28 = fsub fast float %i9, %i8 214 %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 215 store float %add28, ptr %arrayidx30, align 4 216 %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22 217 %i10 = load float, ptr %arrayidx32, align 4 218 %st1 = mul i64 %str, 2 219 %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st1 220 %i11 = load float, ptr %arrayidx34, align 4 221 %add35 = fsub fast float %i11, %i10 222 %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 223 store float %add35, ptr %arrayidx37, align 4 224 %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 225 %i12 = load float, ptr %arrayidx39, align 4 226 %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %str 227 %i13 = load float, ptr %arrayidx41, align 4 228 %add42 = fsub fast float %i13, %i12 229 %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 230 store float %add42, ptr %arrayidx44, align 4 231 %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 232 %i14 = load float, ptr %arrayidx46, align 4 233 %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 234 %i15 = load float, ptr %arrayidx48, align 4 235 %add49 = fsub fast float %i15, %i14 236 %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 237 store float %add49, ptr %arrayidx51, align 4 238 ret void 239} 240 241define void @test3(ptr %p, ptr noalias %s) { 242; CHECK-LABEL: @test3( 243; CHECK-NEXT: entry: 244; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 245; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 246; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 247; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8) 248; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -4, <8 x i1> splat (i1 true), i32 8) 249; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]] 250; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 251; CHECK-NEXT: ret void 252; 253entry: 254 %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 255 %i = load float, ptr %arrayidx, align 4 256 %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 257 %i1 = load float, ptr %arrayidx1, align 4 258 %add = fsub fast float %i1, %i 259 %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 260 store float %add, ptr %arrayidx2, align 4 261 %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 4 262 %i2 = load float, ptr %arrayidx4, align 4 263 %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 29 264 %i3 = load float, ptr %arrayidx6, align 4 265 %add7 = fsub fast float %i3, %i2 266 %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 267 store float %add7, ptr %arrayidx9, align 4 268 %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 8 269 %i4 = load float, ptr %arrayidx11, align 4 270 %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28 271 %i5 = load float, ptr %arrayidx13, align 4 272 %add14 = fsub fast float %i5, %i4 273 %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 274 store float %add14, ptr %arrayidx16, align 4 275 %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 12 276 %i6 = load float, ptr %arrayidx18, align 4 277 %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 27 278 %i7 = load float, ptr %arrayidx20, align 4 279 %add21 = fsub fast float %i7, %i6 280 %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 281 store float %add21, ptr %arrayidx23, align 4 282 %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 16 283 %i8 = load float, ptr %arrayidx25, align 4 284 %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 285 %i9 = load float, ptr %arrayidx27, align 4 286 %add28 = fsub fast float %i9, %i8 287 %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 288 store float %add28, ptr %arrayidx30, align 4 289 %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 20 290 %i10 = load float, ptr %arrayidx32, align 4 291 %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 25 292 %i11 = load float, ptr %arrayidx34, align 4 293 %add35 = fsub fast float %i11, %i10 294 %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 295 store float %add35, ptr %arrayidx37, align 4 296 %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24 297 %i12 = load float, ptr %arrayidx39, align 4 298 %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24 299 %i13 = load float, ptr %arrayidx41, align 4 300 %add42 = fsub fast float %i13, %i12 301 %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 302 store float %add42, ptr %arrayidx44, align 4 303 %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28 304 %i14 = load float, ptr %arrayidx46, align 4 305 %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 23 306 %i15 = load float, ptr %arrayidx48, align 4 307 %add49 = fsub fast float %i15, %i14 308 %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 309 store float %add49, ptr %arrayidx51, align 4 310 ret void 311} 312 313 314define void @test_bf16(ptr %p, ptr noalias %s) { 315; NO-ZVFHMIN-ZVFBFMIN-LABEL: @test_bf16( 316; NO-ZVFHMIN-ZVFBFMIN-NEXT: entry: 317; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P:%.*]], i64 0, i64 0 318; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4 319; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 30 320; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I1:%.*]] = load bfloat, ptr [[ARRAYIDX1]], align 4 321; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD:%.*]] = fsub fast bfloat [[I1]], [[I]] 322; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[S:%.*]], i64 0 323; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD]], ptr [[ARRAYIDX2]], align 4 324; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 4 325; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I2:%.*]] = load bfloat, ptr [[ARRAYIDX4]], align 4 326; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 26 327; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I3:%.*]] = load bfloat, ptr [[ARRAYIDX6]], align 4 328; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD7:%.*]] = fsub fast bfloat [[I3]], [[I2]] 329; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 1 330; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD7]], ptr [[ARRAYIDX9]], align 4 331; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 8 332; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I4:%.*]] = load bfloat, ptr [[ARRAYIDX11]], align 4 333; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 22 334; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I5:%.*]] = load bfloat, ptr [[ARRAYIDX13]], align 4 335; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD14:%.*]] = fsub fast bfloat [[I5]], [[I4]] 336; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 2 337; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD14]], ptr [[ARRAYIDX16]], align 4 338; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 12 339; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I6:%.*]] = load bfloat, ptr [[ARRAYIDX18]], align 4 340; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 18 341; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I7:%.*]] = load bfloat, ptr [[ARRAYIDX20]], align 4 342; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD21:%.*]] = fsub fast bfloat [[I7]], [[I6]] 343; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 3 344; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD21]], ptr [[ARRAYIDX23]], align 4 345; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 16 346; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I8:%.*]] = load bfloat, ptr [[ARRAYIDX25]], align 4 347; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 14 348; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I9:%.*]] = load bfloat, ptr [[ARRAYIDX27]], align 4 349; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD28:%.*]] = fsub fast bfloat [[I9]], [[I8]] 350; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 4 351; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD28]], ptr [[ARRAYIDX30]], align 4 352; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 20 353; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I10:%.*]] = load bfloat, ptr [[ARRAYIDX32]], align 4 354; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 10 355; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I11:%.*]] = load bfloat, ptr [[ARRAYIDX34]], align 4 356; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD35:%.*]] = fsub fast bfloat [[I11]], [[I10]] 357; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 5 358; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD35]], ptr [[ARRAYIDX37]], align 4 359; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 24 360; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I12:%.*]] = load bfloat, ptr [[ARRAYIDX39]], align 4 361; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 6 362; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I13:%.*]] = load bfloat, ptr [[ARRAYIDX41]], align 4 363; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD42:%.*]] = fsub fast bfloat [[I13]], [[I12]] 364; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 6 365; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD42]], ptr [[ARRAYIDX44]], align 4 366; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 28 367; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I14:%.*]] = load bfloat, ptr [[ARRAYIDX46]], align 4 368; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 2 369; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I15:%.*]] = load bfloat, ptr [[ARRAYIDX48]], align 4 370; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD49:%.*]] = fsub fast bfloat [[I15]], [[I14]] 371; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 7 372; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD49]], ptr [[ARRAYIDX51]], align 4 373; NO-ZVFHMIN-ZVFBFMIN-NEXT: ret void 374; 375; ZVFHMIN-ZVFBFMIN-LABEL: @test_bf16( 376; ZVFHMIN-ZVFBFMIN-NEXT: entry: 377; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P:%.*]], i64 0, i64 0 378; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 30 379; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[S:%.*]], i64 0 380; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8) 381; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8) 382; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP16:%.*]] = fsub fast <8 x bfloat> [[TMP7]], [[TMP15]] 383; ZVFHMIN-ZVFBFMIN-NEXT: store <8 x bfloat> [[TMP16]], ptr [[ARRAYIDX2]], align 4 384; ZVFHMIN-ZVFBFMIN-NEXT: ret void 385; 386entry: 387 %arrayidx = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 0 388 %i = load bfloat, ptr %arrayidx, align 4 389 %arrayidx1 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 30 390 %i1 = load bfloat, ptr %arrayidx1, align 4 391 %add = fsub fast bfloat %i1, %i 392 %arrayidx2 = getelementptr inbounds bfloat, ptr %s, i64 0 393 store bfloat %add, ptr %arrayidx2, align 4 394 %arrayidx4 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 4 395 %i2 = load bfloat, ptr %arrayidx4, align 4 396 %arrayidx6 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 26 397 %i3 = load bfloat, ptr %arrayidx6, align 4 398 %add7 = fsub fast bfloat %i3, %i2 399 %arrayidx9 = getelementptr inbounds bfloat, ptr %s, i64 1 400 store bfloat %add7, ptr %arrayidx9, align 4 401 %arrayidx11 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 8 402 %i4 = load bfloat, ptr %arrayidx11, align 4 403 %arrayidx13 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 22 404 %i5 = load bfloat, ptr %arrayidx13, align 4 405 %add14 = fsub fast bfloat %i5, %i4 406 %arrayidx16 = getelementptr inbounds bfloat, ptr %s, i64 2 407 store bfloat %add14, ptr %arrayidx16, align 4 408 %arrayidx18 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 12 409 %i6 = load bfloat, ptr %arrayidx18, align 4 410 %arrayidx20 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 18 411 %i7 = load bfloat, ptr %arrayidx20, align 4 412 %add21 = fsub fast bfloat %i7, %i6 413 %arrayidx23 = getelementptr inbounds bfloat, ptr %s, i64 3 414 store bfloat %add21, ptr %arrayidx23, align 4 415 %arrayidx25 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 16 416 %i8 = load bfloat, ptr %arrayidx25, align 4 417 %arrayidx27 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 14 418 %i9 = load bfloat, ptr %arrayidx27, align 4 419 %add28 = fsub fast bfloat %i9, %i8 420 %arrayidx30 = getelementptr inbounds bfloat, ptr %s, i64 4 421 store bfloat %add28, ptr %arrayidx30, align 4 422 %arrayidx32 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 20 423 %i10 = load bfloat, ptr %arrayidx32, align 4 424 %arrayidx34 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 10 425 %i11 = load bfloat, ptr %arrayidx34, align 4 426 %add35 = fsub fast bfloat %i11, %i10 427 %arrayidx37 = getelementptr inbounds bfloat, ptr %s, i64 5 428 store bfloat %add35, ptr %arrayidx37, align 4 429 %arrayidx39 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 24 430 %i12 = load bfloat, ptr %arrayidx39, align 4 431 %arrayidx41 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 6 432 %i13 = load bfloat, ptr %arrayidx41, align 4 433 %add42 = fsub fast bfloat %i13, %i12 434 %arrayidx44 = getelementptr inbounds bfloat, ptr %s, i64 6 435 store bfloat %add42, ptr %arrayidx44, align 4 436 %arrayidx46 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 28 437 %i14 = load bfloat, ptr %arrayidx46, align 4 438 %arrayidx48 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 2 439 %i15 = load bfloat, ptr %arrayidx48, align 4 440 %add49 = fsub fast bfloat %i15, %i14 441 %arrayidx51 = getelementptr inbounds bfloat, ptr %s, i64 7 442 store bfloat %add49, ptr %arrayidx51, align 4 443 ret void 444} 445 446define void @test_f16(ptr %p, ptr noalias %s) { 447; NO-ZVFHMIN-ZVFBFMIN-LABEL: @test_f16( 448; NO-ZVFHMIN-ZVFBFMIN-NEXT: entry: 449; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x half], ptr [[P:%.*]], i64 0, i64 0 450; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I:%.*]] = load half, ptr [[ARRAYIDX]], align 4 451; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 30 452; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I1:%.*]] = load half, ptr [[ARRAYIDX1]], align 4 453; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD:%.*]] = fsub fast half [[I1]], [[I]] 454; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[S:%.*]], i64 0 455; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD]], ptr [[ARRAYIDX2]], align 4 456; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 4 457; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I2:%.*]] = load half, ptr [[ARRAYIDX4]], align 4 458; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 26 459; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I3:%.*]] = load half, ptr [[ARRAYIDX6]], align 4 460; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD7:%.*]] = fsub fast half [[I3]], [[I2]] 461; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds half, ptr [[S]], i64 1 462; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD7]], ptr [[ARRAYIDX9]], align 4 463; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 8 464; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I4:%.*]] = load half, ptr [[ARRAYIDX11]], align 4 465; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 22 466; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I5:%.*]] = load half, ptr [[ARRAYIDX13]], align 4 467; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD14:%.*]] = fsub fast half [[I5]], [[I4]] 468; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds half, ptr [[S]], i64 2 469; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD14]], ptr [[ARRAYIDX16]], align 4 470; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 12 471; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I6:%.*]] = load half, ptr [[ARRAYIDX18]], align 4 472; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 18 473; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I7:%.*]] = load half, ptr [[ARRAYIDX20]], align 4 474; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD21:%.*]] = fsub fast half [[I7]], [[I6]] 475; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds half, ptr [[S]], i64 3 476; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD21]], ptr [[ARRAYIDX23]], align 4 477; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 16 478; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I8:%.*]] = load half, ptr [[ARRAYIDX25]], align 4 479; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 14 480; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I9:%.*]] = load half, ptr [[ARRAYIDX27]], align 4 481; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD28:%.*]] = fsub fast half [[I9]], [[I8]] 482; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds half, ptr [[S]], i64 4 483; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD28]], ptr [[ARRAYIDX30]], align 4 484; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 20 485; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I10:%.*]] = load half, ptr [[ARRAYIDX32]], align 4 486; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 10 487; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I11:%.*]] = load half, ptr [[ARRAYIDX34]], align 4 488; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD35:%.*]] = fsub fast half [[I11]], [[I10]] 489; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds half, ptr [[S]], i64 5 490; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD35]], ptr [[ARRAYIDX37]], align 4 491; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 24 492; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I12:%.*]] = load half, ptr [[ARRAYIDX39]], align 4 493; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 6 494; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I13:%.*]] = load half, ptr [[ARRAYIDX41]], align 4 495; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD42:%.*]] = fsub fast half [[I13]], [[I12]] 496; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds half, ptr [[S]], i64 6 497; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD42]], ptr [[ARRAYIDX44]], align 4 498; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 28 499; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I14:%.*]] = load half, ptr [[ARRAYIDX46]], align 4 500; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 2 501; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I15:%.*]] = load half, ptr [[ARRAYIDX48]], align 4 502; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD49:%.*]] = fsub fast half [[I15]], [[I14]] 503; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds half, ptr [[S]], i64 7 504; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD49]], ptr [[ARRAYIDX51]], align 4 505; NO-ZVFHMIN-ZVFBFMIN-NEXT: ret void 506; 507; ZVFHMIN-ZVFBFMIN-LABEL: @test_f16( 508; ZVFHMIN-ZVFBFMIN-NEXT: entry: 509; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x half], ptr [[P:%.*]], i64 0, i64 0 510; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 30 511; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[S:%.*]], i64 0 512; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8) 513; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8) 514; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP16:%.*]] = fsub fast <8 x half> [[TMP7]], [[TMP15]] 515; ZVFHMIN-ZVFBFMIN-NEXT: store <8 x half> [[TMP16]], ptr [[ARRAYIDX2]], align 4 516; ZVFHMIN-ZVFBFMIN-NEXT: ret void 517; 518entry: 519 %arrayidx = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 0 520 %i = load half, ptr %arrayidx, align 4 521 %arrayidx1 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 30 522 %i1 = load half, ptr %arrayidx1, align 4 523 %add = fsub fast half %i1, %i 524 %arrayidx2 = getelementptr inbounds half, ptr %s, i64 0 525 store half %add, ptr %arrayidx2, align 4 526 %arrayidx4 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 4 527 %i2 = load half, ptr %arrayidx4, align 4 528 %arrayidx6 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 26 529 %i3 = load half, ptr %arrayidx6, align 4 530 %add7 = fsub fast half %i3, %i2 531 %arrayidx9 = getelementptr inbounds half, ptr %s, i64 1 532 store half %add7, ptr %arrayidx9, align 4 533 %arrayidx11 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 8 534 %i4 = load half, ptr %arrayidx11, align 4 535 %arrayidx13 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 22 536 %i5 = load half, ptr %arrayidx13, align 4 537 %add14 = fsub fast half %i5, %i4 538 %arrayidx16 = getelementptr inbounds half, ptr %s, i64 2 539 store half %add14, ptr %arrayidx16, align 4 540 %arrayidx18 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 12 541 %i6 = load half, ptr %arrayidx18, align 4 542 %arrayidx20 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 18 543 %i7 = load half, ptr %arrayidx20, align 4 544 %add21 = fsub fast half %i7, %i6 545 %arrayidx23 = getelementptr inbounds half, ptr %s, i64 3 546 store half %add21, ptr %arrayidx23, align 4 547 %arrayidx25 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 16 548 %i8 = load half, ptr %arrayidx25, align 4 549 %arrayidx27 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 14 550 %i9 = load half, ptr %arrayidx27, align 4 551 %add28 = fsub fast half %i9, %i8 552 %arrayidx30 = getelementptr inbounds half, ptr %s, i64 4 553 store half %add28, ptr %arrayidx30, align 4 554 %arrayidx32 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 20 555 %i10 = load half, ptr %arrayidx32, align 4 556 %arrayidx34 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 10 557 %i11 = load half, ptr %arrayidx34, align 4 558 %add35 = fsub fast half %i11, %i10 559 %arrayidx37 = getelementptr inbounds half, ptr %s, i64 5 560 store half %add35, ptr %arrayidx37, align 4 561 %arrayidx39 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 24 562 %i12 = load half, ptr %arrayidx39, align 4 563 %arrayidx41 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 6 564 %i13 = load half, ptr %arrayidx41, align 4 565 %add42 = fsub fast half %i13, %i12 566 %arrayidx44 = getelementptr inbounds half, ptr %s, i64 6 567 store half %add42, ptr %arrayidx44, align 4 568 %arrayidx46 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 28 569 %i14 = load half, ptr %arrayidx46, align 4 570 %arrayidx48 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 2 571 %i15 = load half, ptr %arrayidx48, align 4 572 %add49 = fsub fast half %i15, %i14 573 %arrayidx51 = getelementptr inbounds half, ptr %s, i64 7 574 store half %add49, ptr %arrayidx51, align 4 575 ret void 576} 577