1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \ 3; RUN: -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \ 4; RUN: | FileCheck %s 5; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \ 6; RUN: | FileCheck %s --check-prefix=DEFAULT 7 8define void @fp_add(ptr %dst, ptr %p, ptr %q) { 9; CHECK-LABEL: define void @fp_add 10; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] { 11; CHECK-NEXT: entry: 12; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 13; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4 14; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]] 15; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4 16; CHECK-NEXT: ret void 17; 18; DEFAULT-LABEL: define void @fp_add 19; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] { 20; DEFAULT-NEXT: entry: 21; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 22; DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4 23; DEFAULT-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]] 24; DEFAULT-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4 25; DEFAULT-NEXT: ret void 26; 27entry: 28 %e0 = load float, ptr %p, align 4 29 %pe1 = getelementptr inbounds float, ptr %p, i64 1 30 %e1 = load float, ptr %pe1, align 4 31 %pe2 = getelementptr inbounds float, ptr %p, i64 2 32 %e2 = load float, ptr %pe2, align 4 33 %pe3 = getelementptr inbounds float, ptr %p, i64 3 34 %e3 = load float, ptr %pe3, align 4 35 36 %f0 = load float, ptr %q, align 4 37 %pf1 = getelementptr inbounds float, ptr %q, i64 1 38 %f1 = load float, ptr %pf1, align 4 39 %pf2 = getelementptr inbounds float, ptr %q, i64 2 40 %f2 = load float, ptr %pf2, align 4 41 %pf3 = getelementptr inbounds float, ptr %q, i64 3 42 %f3 = load float, ptr %pf3, align 4 43 44 %a0 = fadd float %e0, %f0 45 %a1 = fadd float %e1, %f1 46 %a2 = fadd float %e2, %f2 47 %a3 = fadd float %e3, %f3 48 49 store float %a0, ptr %dst, align 4 50 %pa1 = getelementptr inbounds float, ptr %dst, i64 1 51 store float %a1, ptr %pa1, align 4 52 %pa2 = getelementptr inbounds float, ptr %dst, i64 2 53 store float %a2, ptr %pa2, align 4 54 %pa3 = getelementptr inbounds float, ptr %dst, i64 3 55 store float %a3, ptr %pa3, align 4 56 57 ret void 58} 59 60define void @fp_sub(ptr %dst, ptr %p) { 61; CHECK-LABEL: define void @fp_sub 62; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { 63; CHECK-NEXT: entry: 64; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 65; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], splat (float 3.000000e+00) 66; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 67; CHECK-NEXT: ret void 68; 69; DEFAULT-LABEL: define void @fp_sub 70; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { 71; DEFAULT-NEXT: entry: 72; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 73; DEFAULT-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], splat (float 3.000000e+00) 74; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 75; DEFAULT-NEXT: ret void 76; 77entry: 78 %e0 = load float, ptr %p, align 4 79 %pe1 = getelementptr inbounds float, ptr %p, i64 1 80 %e1 = load float, ptr %pe1, align 4 81 %pe2 = getelementptr inbounds float, ptr %p, i64 2 82 %e2 = load float, ptr %pe2, align 4 83 %pe3 = getelementptr inbounds float, ptr %p, i64 3 84 %e3 = load float, ptr %pe3, align 4 85 86 %a0 = fsub float %e0, 3.0 87 %a1 = fsub float %e1, 3.0 88 %a2 = fsub float %e2, 3.0 89 %a3 = fsub float %e3, 3.0 90 91 store float %a0, ptr %dst, align 4 92 %pa1 = getelementptr inbounds float, ptr %dst, i64 1 93 store float %a1, ptr %pa1, align 4 94 %pa2 = getelementptr inbounds float, ptr %dst, i64 2 95 store float %a2, ptr %pa2, align 4 96 %pa3 = getelementptr inbounds float, ptr %dst, i64 3 97 store float %a3, ptr %pa3, align 4 98 99 ret void 100} 101 102define void @fp_mul(ptr %dst, ptr %p, ptr %q) { 103; CHECK-LABEL: define void @fp_mul 104; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] { 105; CHECK-NEXT: entry: 106; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 107; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4 108; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]] 109; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4 110; CHECK-NEXT: ret void 111; 112; DEFAULT-LABEL: define void @fp_mul 113; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] { 114; DEFAULT-NEXT: entry: 115; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 116; DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4 117; DEFAULT-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]] 118; DEFAULT-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4 119; DEFAULT-NEXT: ret void 120; 121entry: 122 %e0 = load float, ptr %p, align 4 123 %pe1 = getelementptr inbounds float, ptr %p, i64 1 124 %e1 = load float, ptr %pe1, align 4 125 %pe2 = getelementptr inbounds float, ptr %p, i64 2 126 %e2 = load float, ptr %pe2, align 4 127 %pe3 = getelementptr inbounds float, ptr %p, i64 3 128 %e3 = load float, ptr %pe3, align 4 129 130 %f0 = load float, ptr %q, align 4 131 %pf1 = getelementptr inbounds float, ptr %q, i64 1 132 %f1 = load float, ptr %pf1, align 4 133 %pf2 = getelementptr inbounds float, ptr %q, i64 2 134 %f2 = load float, ptr %pf2, align 4 135 %pf3 = getelementptr inbounds float, ptr %q, i64 3 136 %f3 = load float, ptr %pf3, align 4 137 138 %a0 = fmul float %e0, %f0 139 %a1 = fmul float %e1, %f1 140 %a2 = fmul float %e2, %f2 141 %a3 = fmul float %e3, %f3 142 143 store float %a0, ptr %dst, align 4 144 %pa1 = getelementptr inbounds float, ptr %dst, i64 1 145 store float %a1, ptr %pa1, align 4 146 %pa2 = getelementptr inbounds float, ptr %dst, i64 2 147 store float %a2, ptr %pa2, align 4 148 %pa3 = getelementptr inbounds float, ptr %dst, i64 3 149 store float %a3, ptr %pa3, align 4 150 151 ret void 152} 153 154define void @fp_div(ptr %dst, ptr %p) { 155; CHECK-LABEL: define void @fp_div 156; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { 157; CHECK-NEXT: entry: 158; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 159; CHECK-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], splat (float 1.050000e+01) 160; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 161; CHECK-NEXT: ret void 162; 163; DEFAULT-LABEL: define void @fp_div 164; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { 165; DEFAULT-NEXT: entry: 166; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 167; DEFAULT-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], splat (float 1.050000e+01) 168; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 169; DEFAULT-NEXT: ret void 170; 171entry: 172 %e0 = load float, ptr %p, align 4 173 %pe1 = getelementptr inbounds float, ptr %p, i64 1 174 %e1 = load float, ptr %pe1, align 4 175 %pe2 = getelementptr inbounds float, ptr %p, i64 2 176 %e2 = load float, ptr %pe2, align 4 177 %pe3 = getelementptr inbounds float, ptr %p, i64 3 178 %e3 = load float, ptr %pe3, align 4 179 180 %a0 = fdiv float %e0, 10.5 181 %a1 = fdiv float %e1, 10.5 182 %a2 = fdiv float %e2, 10.5 183 %a3 = fdiv float %e3, 10.5 184 185 store float %a0, ptr %dst, align 4 186 %pa1 = getelementptr inbounds float, ptr %dst, i64 1 187 store float %a1, ptr %pa1, align 4 188 %pa2 = getelementptr inbounds float, ptr %dst, i64 2 189 store float %a2, ptr %pa2, align 4 190 %pa3 = getelementptr inbounds float, ptr %dst, i64 3 191 store float %a3, ptr %pa3, align 4 192 193 ret void 194} 195 196declare float @llvm.maxnum.f32(float, float) 197 198define void @fp_max(ptr %dst, ptr %p, ptr %q) { 199; CHECK-LABEL: define void @fp_max 200; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] { 201; CHECK-NEXT: entry: 202; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 203; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4 204; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) 205; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4 206; CHECK-NEXT: ret void 207; 208; DEFAULT-LABEL: define void @fp_max 209; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] { 210; DEFAULT-NEXT: entry: 211; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 212; DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4 213; DEFAULT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) 214; DEFAULT-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4 215; DEFAULT-NEXT: ret void 216; 217entry: 218 %e0 = load float, ptr %p, align 4 219 %pe1 = getelementptr inbounds float, ptr %p, i64 1 220 %e1 = load float, ptr %pe1, align 4 221 %pe2 = getelementptr inbounds float, ptr %p, i64 2 222 %e2 = load float, ptr %pe2, align 4 223 %pe3 = getelementptr inbounds float, ptr %p, i64 3 224 %e3 = load float, ptr %pe3, align 4 225 226 %f0 = load float, ptr %q, align 4 227 %pf1 = getelementptr inbounds float, ptr %q, i64 1 228 %f1 = load float, ptr %pf1, align 4 229 %pf2 = getelementptr inbounds float, ptr %q, i64 2 230 %f2 = load float, ptr %pf2, align 4 231 %pf3 = getelementptr inbounds float, ptr %q, i64 3 232 %f3 = load float, ptr %pf3, align 4 233 234 %a0 = tail call float @llvm.maxnum.f32(float %e0, float %f0) 235 %a1 = tail call float @llvm.maxnum.f32(float %e1, float %f1) 236 %a2 = tail call float @llvm.maxnum.f32(float %e2, float %f2) 237 %a3 = tail call float @llvm.maxnum.f32(float %e3, float %f3) 238 239 store float %a0, ptr %dst, align 4 240 %pa1 = getelementptr inbounds float, ptr %dst, i64 1 241 store float %a1, ptr %pa1, align 4 242 %pa2 = getelementptr inbounds float, ptr %dst, i64 2 243 store float %a2, ptr %pa2, align 4 244 %pa3 = getelementptr inbounds float, ptr %dst, i64 3 245 store float %a3, ptr %pa3, align 4 246 247 ret void 248} 249 250declare float @llvm.minnum.f32(float, float) 251 252define void @fp_min(ptr %dst, ptr %p) { 253; CHECK-LABEL: define void @fp_min 254; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { 255; CHECK-NEXT: entry: 256; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 257; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> splat (float 1.250000e+00)) 258; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 259; CHECK-NEXT: ret void 260; 261; DEFAULT-LABEL: define void @fp_min 262; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { 263; DEFAULT-NEXT: entry: 264; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 265; DEFAULT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> splat (float 1.250000e+00)) 266; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 267; DEFAULT-NEXT: ret void 268; 269entry: 270 %e0 = load float, ptr %p, align 4 271 %pe1 = getelementptr inbounds float, ptr %p, i64 1 272 %e1 = load float, ptr %pe1, align 4 273 %pe2 = getelementptr inbounds float, ptr %p, i64 2 274 %e2 = load float, ptr %pe2, align 4 275 %pe3 = getelementptr inbounds float, ptr %p, i64 3 276 %e3 = load float, ptr %pe3, align 4 277 278 %a0 = tail call float @llvm.minnum.f32(float %e0, float 1.25) 279 %a1 = tail call float @llvm.minnum.f32(float %e1, float 1.25) 280 %a2 = tail call float @llvm.minnum.f32(float %e2, float 1.25) 281 %a3 = tail call float @llvm.minnum.f32(float %e3, float 1.25) 282 283 store float %a0, ptr %dst, align 4 284 %pa1 = getelementptr inbounds float, ptr %dst, i64 1 285 store float %a1, ptr %pa1, align 4 286 %pa2 = getelementptr inbounds float, ptr %dst, i64 2 287 store float %a2, ptr %pa2, align 4 288 %pa3 = getelementptr inbounds float, ptr %dst, i64 3 289 store float %a3, ptr %pa3, align 4 290 291 ret void 292} 293 294declare i32 @llvm.fptosi.sat.i32.f32(float) 295 296define void @fp_convert(ptr %dst, ptr %p) { 297; CHECK-LABEL: define void @fp_convert 298; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { 299; CHECK-NEXT: entry: 300; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 301; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]]) 302; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[DST]], align 4 303; CHECK-NEXT: ret void 304; 305; DEFAULT-LABEL: define void @fp_convert 306; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { 307; DEFAULT-NEXT: entry: 308; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 309; DEFAULT-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]]) 310; DEFAULT-NEXT: store <4 x i32> [[TMP1]], ptr [[DST]], align 4 311; DEFAULT-NEXT: ret void 312; 313entry: 314 %e0 = load float, ptr %p, align 4 315 %pe1 = getelementptr inbounds float, ptr %p, i64 1 316 %e1 = load float, ptr %pe1, align 4 317 %pe2 = getelementptr inbounds float, ptr %p, i64 2 318 %e2 = load float, ptr %pe2, align 4 319 %pe3 = getelementptr inbounds float, ptr %p, i64 3 320 %e3 = load float, ptr %pe3, align 4 321 322 %a0 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e0) 323 %a1 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e1) 324 %a2 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e2) 325 %a3 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e3) 326 327 store i32 %a0, ptr %dst, align 4 328 %pa1 = getelementptr inbounds i32, ptr %dst, i64 1 329 store i32 %a1, ptr %pa1, align 4 330 %pa2 = getelementptr inbounds i32, ptr %dst, i64 2 331 store i32 %a2, ptr %pa2, align 4 332 %pa3 = getelementptr inbounds i32, ptr %dst, i64 3 333 store i32 %a3, ptr %pa3, align 4 334 335 ret void 336} 337