1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; Test vector add reduction intrinsic 3; 4; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s 5 6; 1 vector length 7declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a) 8declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a) 9declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a) 10declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a) 11declare i128 @llvm.vector.reduce.add.v1i128(<1 x i128> %a) 12; 2 vector lengths 13declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %a) 14declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a) 15declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a) 16declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a) 17declare i128 @llvm.vector.reduce.add.v2i128(<2 x i128> %a) 18; ; TODO 19; ; 4 vector lengths 20declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8> %a) 21declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16> %a) 22declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a) 23declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a) 24declare i128 @llvm.vector.reduce.add.v4i128(<4 x i128> %a) 25; ; Subvector lengths 26declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a) 27declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a) 28declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a) 29declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a) 30 31define i8 @f1_1(<16 x i8> %a) { 32; CHECK-LABEL: f1_1: 33; CHECK: # %bb.0: 34; CHECK-NEXT: vgbm %v0, 0 35; CHECK-NEXT: vsumb %v1, %v24, %v0 36; CHECK-NEXT: vsumqf %v0, %v1, %v0 37; CHECK-NEXT: vlgvf %r2, %v0, 3 38; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d 39; CHECK-NEXT: br %r14 40 %redadd = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a) 41 ret i8 %redadd 42} 43 44define i16 @f1_2(<8 x i16> %a) { 45; CHECK-LABEL: f1_2: 46; CHECK: # %bb.0: 47; CHECK-NEXT: vgbm %v0, 0 48; CHECK-NEXT: vsumh %v1, %v24, %v0 49; CHECK-NEXT: vsumqf %v0, %v1, %v0 50; CHECK-NEXT: vlgvf %r2, %v0, 3 51; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d 52; CHECK-NEXT: br %r14 53 %redadd = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a) 54 ret i16 %redadd 55} 56 57define i32 @f1_3(<4 x i32> %a) { 58; CHECK-LABEL: f1_3: 59; CHECK: # %bb.0: 60; CHECK-NEXT: vgbm %v0, 0 61; CHECK-NEXT: vsumqf %v0, %v24, %v0 62; CHECK-NEXT: vlgvf %r2, %v0, 3 63; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d 64; CHECK-NEXT: br %r14 65 66 %redadd = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a) 67 ret i32 %redadd 68} 69 70define i64 @f1_4(<2 x i64> %a) { 71; CHECK-LABEL: f1_4: 72; CHECK: # %bb.0: 73; CHECK-NEXT: vrepg %v0, %v24, 1 74; CHECK-NEXT: vag %v0, %v24, %v0 75; CHECK-NEXT: vlgvg %r2, %v0, 0 76; CHECK-NEXT: br %r14 77 78 %redadd = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a) 79 ret i64 %redadd 80} 81 82define i128 @f1_5(<1 x i128> %a) { 83; CHECK-LABEL: f1_5: 84; CHECK: # %bb.0: 85; CHECK-NEXT: vst %v24, 0(%r2), 3 86; CHECK-NEXT: br %r14 87 %redadd = call i128 @llvm.vector.reduce.add.v1i128(<1 x i128> %a) 88 ret i128 %redadd 89} 90 91define i8 @f2_1(<32 x i8> %a) { 92; CHECK-LABEL: f2_1: 93; CHECK: # %bb.0: 94; CHECK-NEXT: vab %v0, %v24, %v26 95; CHECK-NEXT: vgbm %v1, 0 96; CHECK-NEXT: vsumb %v0, %v0, %v1 97; CHECK-NEXT: vsumqf %v0, %v0, %v1 98; CHECK-NEXT: vlgvf %r2, %v0, 3 99; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d 100; CHECK-NEXT: br %r14 101 %redadd = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %a) 102 ret i8 %redadd 103} 104 105define i16 @f2_2(<16 x i16> %a) { 106; CHECK-LABEL: f2_2: 107; CHECK: # %bb.0: 108; CHECK-NEXT: vah %v0, %v24, %v26 109; CHECK-NEXT: vgbm %v1, 0 110; CHECK-NEXT: vsumh %v0, %v0, %v1 111; CHECK-NEXT: vsumqf %v0, %v0, %v1 112; CHECK-NEXT: vlgvf %r2, %v0, 3 113; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d 114; CHECK-NEXT: br %r14 115 %redadd = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a) 116 ret i16 %redadd 117} 118 119define i32 @f2_3(<8 x i32> %a) { 120; CHECK-LABEL: f2_3: 121; CHECK: # %bb.0: 122; CHECK-NEXT: vaf %v0, %v24, %v26 123; CHECK-NEXT: vgbm %v1, 0 124; CHECK-NEXT: vsumqf %v0, %v0, %v1 125; CHECK-NEXT: vlgvf %r2, %v0, 3 126; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d 127; CHECK-NEXT: br %r14 128 129 %redadd = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a) 130 ret i32 %redadd 131} 132 133define i64 @f2_4(<4 x i64> %a) { 134; CHECK-LABEL: f2_4: 135; CHECK: # %bb.0: 136; CHECK-NEXT: vag %v0, %v24, %v26 137; CHECK-NEXT: vrepg %v1, %v0, 1 138; CHECK-NEXT: vag %v0, %v0, %v1 139; CHECK-NEXT: vlgvg %r2, %v0, 0 140; CHECK-NEXT: br %r14 141 142 %redadd = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a) 143 ret i64 %redadd 144} 145 146define i128 @f2_5(<2 x i128> %a) { 147; CHECK-LABEL: f2_5: 148; CHECK: # %bb.0: 149; CHECK-NEXT: vl %v0, 16(%r3), 3 150; CHECK-NEXT: vl %v1, 0(%r3), 3 151; CHECK-NEXT: vaq %v0, %v1, %v0 152; CHECK-NEXT: vst %v0, 0(%r2), 3 153; CHECK-NEXT: br %r14 154 %redadd = call i128 @llvm.vector.reduce.add.v2i128(<2 x i128> %a) 155 ret i128 %redadd 156} 157 158define i8 @f3_1(<64 x i8> %a) { 159; CHECK-LABEL: f3_1: 160; CHECK: # %bb.0: 161; CHECK-NEXT: vab %v0, %v26, %v30 162; CHECK-NEXT: vab %v1, %v24, %v28 163; CHECK-NEXT: vab %v0, %v1, %v0 164; CHECK-NEXT: vgbm %v1, 0 165; CHECK-NEXT: vsumb %v0, %v0, %v1 166; CHECK-NEXT: vsumqf %v0, %v0, %v1 167; CHECK-NEXT: vlgvf %r2, %v0, 3 168; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d 169; CHECK-NEXT: br %r14 170 %redadd = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> %a) 171 ret i8 %redadd 172} 173 174define i16 @f3_2(<32 x i16> %a) { 175; CHECK-LABEL: f3_2: 176; CHECK: # %bb.0: 177; CHECK-NEXT: vah %v0, %v26, %v30 178; CHECK-NEXT: vah %v1, %v24, %v28 179; CHECK-NEXT: vah %v0, %v1, %v0 180; CHECK-NEXT: vgbm %v1, 0 181; CHECK-NEXT: vsumh %v0, %v0, %v1 182; CHECK-NEXT: vsumqf %v0, %v0, %v1 183; CHECK-NEXT: vlgvf %r2, %v0, 3 184; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d 185; CHECK-NEXT: br %r14 186 %redadd = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> %a) 187 ret i16 %redadd 188} 189 190define i32 @f3_3(<16 x i32> %a) { 191; CHECK-LABEL: f3_3: 192; CHECK: # %bb.0: 193; CHECK-NEXT: vaf %v0, %v26, %v30 194; CHECK-NEXT: vaf %v1, %v24, %v28 195; CHECK-NEXT: vaf %v0, %v1, %v0 196; CHECK-NEXT: vgbm %v1, 0 197; CHECK-NEXT: vsumqf %v0, %v0, %v1 198; CHECK-NEXT: vlgvf %r2, %v0, 3 199; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d 200; CHECK-NEXT: br %r14 201 202 %redadd = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a) 203 ret i32 %redadd 204} 205 206define i64 @f3_4(<8 x i64> %a) { 207; CHECK-LABEL: f3_4: 208; CHECK: # %bb.0: 209; CHECK-NEXT: vag %v0, %v26, %v30 210; CHECK-NEXT: vag %v1, %v24, %v28 211; CHECK-NEXT: vag %v0, %v1, %v0 212; CHECK-NEXT: vrepg %v1, %v0, 1 213; CHECK-NEXT: vag %v0, %v0, %v1 214; CHECK-NEXT: vlgvg %r2, %v0, 0 215; CHECK-NEXT: br %r14 216 217 %redadd = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a) 218 ret i64 %redadd 219} 220 221define i128 @f3_5(<4 x i128> %a) { 222; CHECK-LABEL: f3_5: 223; CHECK: # %bb.0: 224; CHECK-NEXT: vl %v0, 32(%r3), 3 225; CHECK-NEXT: vl %v1, 0(%r3), 3 226; CHECK-NEXT: vl %v2, 48(%r3), 3 227; CHECK-NEXT: vl %v3, 16(%r3), 3 228; CHECK-NEXT: vaq %v2, %v3, %v2 229; CHECK-NEXT: vaq %v0, %v1, %v0 230; CHECK-NEXT: vaq %v0, %v0, %v2 231; CHECK-NEXT: vst %v0, 0(%r2), 3 232; CHECK-NEXT: br %r14 233 %redadd = call i128 @llvm.vector.reduce.add.v4i128(<4 x i128> %a) 234 ret i128 %redadd 235} 236 237 238define i8 @f4_1(<8 x i8> %a) { 239; CHECK-LABEL: f4_1: 240; CHECK: # %bb.0: 241; CHECK-NEXT: vpkg %v0, %v24, %v24 242; CHECK-NEXT: vab %v0, %v24, %v0 243; CHECK-NEXT: vpkf %v1, %v0, %v0 244; CHECK-NEXT: vab %v0, %v0, %v1 245; CHECK-NEXT: vrepb %v1, %v0, 1 246; CHECK-NEXT: vab %v0, %v0, %v1 247; CHECK-NEXT: vlgvb %r2, %v0, 0 248; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d 249; CHECK-NEXT: br %r14 250 %redadd = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a) 251 ret i8 %redadd 252} 253 254define i16 @f4_2(<4 x i16> %a) { 255; CHECK-LABEL: f4_2: 256; CHECK: # %bb.0: 257; CHECK-NEXT: vpkg %v0, %v24, %v24 258; CHECK-NEXT: vah %v0, %v24, %v0 259; CHECK-NEXT: vreph %v1, %v0, 1 260; CHECK-NEXT: vah %v0, %v0, %v1 261; CHECK-NEXT: vlgvh %r2, %v0, 0 262; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d 263; CHECK-NEXT: br %r14 264 %redadd = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a) 265 ret i16 %redadd 266} 267 268define i32 @f4_3(<2 x i32> %a) { 269; CHECK-LABEL: f4_3: 270; CHECK: # %bb.0: 271; CHECK-NEXT: vrepf %v0, %v24, 1 272; CHECK-NEXT: vaf %v0, %v24, %v0 273; CHECK-NEXT: vlgvf %r2, %v0, 0 274; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d 275; CHECK-NEXT: br %r14 276 277 %redadd = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a) 278 ret i32 %redadd 279} 280 281define i64 @f4_4(<1 x i64> %a) { 282; CHECK-LABEL: f4_4: 283; CHECK: # %bb.0: 284; CHECK-NEXT: vlgvg %r2, %v24, 0 285; CHECK-NEXT: br %r14 286 287 %redadd = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a) 288 ret i64 %redadd 289} 290