1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -verify-machineinstrs < %s | FileCheck %s 3 4target triple = "aarch64-linux" 5 6; CHECK-LABEL: .LCPI0_0: 7; CHECK-NEXT: .byte 255 8; CHECK-NEXT: .byte 255 9; CHECK-NEXT: .byte 255 10; CHECK-NEXT: .byte 4 11; CHECK-NEXT: .byte 255 12; CHECK-NEXT: .byte 255 13; CHECK-NEXT: .byte 255 14; CHECK-NEXT: .byte 5 15; CHECK-NEXT: .byte 255 16; CHECK-NEXT: .byte 255 17; CHECK-NEXT: .byte 255 18; CHECK-NEXT: .byte 6 19; CHECK-NEXT: .byte 255 20; CHECK-NEXT: .byte 255 21; CHECK-NEXT: .byte 255 22; CHECK-NEXT: .byte 7 23; CHECK-NEXT: .LCPI0_1: 24; CHECK-NEXT: .byte 255 25; CHECK-NEXT: .byte 255 26; CHECK-NEXT: .byte 255 27; CHECK-NEXT: .byte 0 28; CHECK-NEXT: .byte 255 29; CHECK-NEXT: .byte 255 30; CHECK-NEXT: .byte 255 31; CHECK-NEXT: .byte 1 32; CHECK-NEXT: .byte 255 33; CHECK-NEXT: .byte 255 34; CHECK-NEXT: .byte 255 35; CHECK-NEXT: .byte 2 36; CHECK-NEXT: .byte 255 37; CHECK-NEXT: .byte 255 38; CHECK-NEXT: .byte 255 39; CHECK-NEXT: .byte 3 40 41define void @sitofp_v8i8_to_v8f32(ptr %src, ptr %dst) { 42; CHECK-LABEL: sitofp_v8i8_to_v8f32: 43; CHECK: // %bb.0: // %entry 44; CHECK-NEXT: adrp x8, .LCPI0_0 45; CHECK-NEXT: adrp x9, .LCPI0_1 46; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0] 47; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI0_1] 48; CHECK-NEXT: mov x8, xzr 49; CHECK-NEXT: .LBB0_1: // %loop 50; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 51; CHECK-NEXT: ldr d2, [x0, x8, lsl #3] 52; CHECK-NEXT: add x9, x1, x8, lsl #5 53; CHECK-NEXT: add x8, x8, #1 54; CHECK-NEXT: cmp x8, #1000 55; CHECK-NEXT: tbl v3.16b, { v2.16b }, v0.16b 56; CHECK-NEXT: tbl v2.16b, { v2.16b }, v1.16b 57; CHECK-NEXT: scvtf v3.4s, v3.4s, #24 58; CHECK-NEXT: scvtf v2.4s, v2.4s, #24 59; CHECK-NEXT: stp q2, q3, [x9] 60; CHECK-NEXT: b.eq .LBB0_1 61; CHECK-NEXT: // %bb.2: // %exit 62; CHECK-NEXT: ret 63entry: 64 br label %loop 65 66loop: 67 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 68 %gep.src = getelementptr inbounds <8 x i8>, ptr %src, i64 %iv 69 %l = load <8 x i8>, ptr %gep.src 70 %conv = sitofp <8 x i8> %l to <8 x float> 71 %gep.dst = getelementptr inbounds <8 x float>, ptr %dst, i64 %iv 72 store <8 x float> %conv, ptr %gep.dst 73 %iv.next = add i64 %iv, 1 74 %ec = icmp eq i64 %iv.next, 1000 75 br i1 %ec, label %loop, label %exit 76 77exit: 78 ret void 79} 80 81; CHECK-LABEL: .LCPI1_0: 82; CHECK-NEXT: .byte 255 83; CHECK-NEXT: .byte 255 84; CHECK-NEXT: .byte 255 85; CHECK-NEXT: .byte 12 86; CHECK-NEXT: .byte 255 87; CHECK-NEXT: .byte 255 88; CHECK-NEXT: .byte 255 89; CHECK-NEXT: .byte 13 90; CHECK-NEXT: .byte 255 91; CHECK-NEXT: .byte 255 92; CHECK-NEXT: .byte 255 93; CHECK-NEXT: .byte 14 94; CHECK-NEXT: .byte 255 95; CHECK-NEXT: .byte 255 96; CHECK-NEXT: .byte 255 97; CHECK-NEXT: .byte 15 98; CHECK-NEXT: .LCPI1_1: 99; CHECK-NEXT: .byte 255 100; CHECK-NEXT: .byte 255 101; CHECK-NEXT: .byte 255 102; CHECK-NEXT: .byte 8 103; CHECK-NEXT: .byte 255 104; CHECK-NEXT: .byte 255 105; CHECK-NEXT: .byte 255 106; CHECK-NEXT: .byte 9 107; CHECK-NEXT: .byte 255 108; CHECK-NEXT: .byte 255 109; CHECK-NEXT: .byte 255 110; CHECK-NEXT: .byte 10 111; CHECK-NEXT: .byte 255 112; CHECK-NEXT: .byte 255 113; CHECK-NEXT: .byte 255 114; CHECK-NEXT: .byte 11 115; CHECK-NEXT: .LCPI1_2: 116; CHECK-NEXT: .byte 255 117; CHECK-NEXT: .byte 255 118; CHECK-NEXT: .byte 255 119; CHECK-NEXT: .byte 4 120; CHECK-NEXT: .byte 255 121; CHECK-NEXT: .byte 255 122; CHECK-NEXT: .byte 255 123; CHECK-NEXT: .byte 5 124; CHECK-NEXT: .byte 255 125; CHECK-NEXT: .byte 255 126; CHECK-NEXT: .byte 255 127; CHECK-NEXT: .byte 6 128; CHECK-NEXT: .byte 255 129; CHECK-NEXT: .byte 255 130; CHECK-NEXT: .byte 255 131; CHECK-NEXT: .byte 7 132; CHECK-NEXT: .LCPI1_3: 133; CHECK-NEXT: .byte 255 134; CHECK-NEXT: .byte 255 135; CHECK-NEXT: .byte 255 136; CHECK-NEXT: .byte 0 137; CHECK-NEXT: .byte 255 138; CHECK-NEXT: .byte 255 139; CHECK-NEXT: .byte 255 140; CHECK-NEXT: .byte 1 141; CHECK-NEXT: .byte 255 142; CHECK-NEXT: .byte 255 143; CHECK-NEXT: .byte 255 144; CHECK-NEXT: .byte 2 145; CHECK-NEXT: .byte 255 146; CHECK-NEXT: .byte 255 147; CHECK-NEXT: .byte 255 148; CHECK-NEXT: .byte 3 149 150define void @sitofp_v16i8_to_v16f32(ptr %src, ptr %dst) { 151; CHECK-LABEL: sitofp_v16i8_to_v16f32: 152; CHECK: // %bb.0: // %entry 153; CHECK-NEXT: adrp x8, .LCPI1_0 154; CHECK-NEXT: adrp x9, .LCPI1_1 155; CHECK-NEXT: adrp x10, .LCPI1_2 156; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] 157; CHECK-NEXT: adrp x8, .LCPI1_3 158; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI1_1] 159; CHECK-NEXT: ldr q2, [x10, :lo12:.LCPI1_2] 160; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI1_3] 161; CHECK-NEXT: mov x8, xzr 162; CHECK-NEXT: .LBB1_1: // %loop 163; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 164; CHECK-NEXT: ldr q4, [x0, x8, lsl #4] 165; CHECK-NEXT: add x9, x1, x8, lsl #6 166; CHECK-NEXT: add x8, x8, #1 167; CHECK-NEXT: cmp x8, #1000 168; CHECK-NEXT: tbl v5.16b, { v4.16b }, v0.16b 169; CHECK-NEXT: tbl v6.16b, { v4.16b }, v1.16b 170; CHECK-NEXT: tbl v7.16b, { v4.16b }, v2.16b 171; CHECK-NEXT: tbl v4.16b, { v4.16b }, v3.16b 172; CHECK-NEXT: scvtf v5.4s, v5.4s, #24 173; CHECK-NEXT: scvtf v6.4s, v6.4s, #24 174; CHECK-NEXT: scvtf v7.4s, v7.4s, #24 175; CHECK-NEXT: scvtf v4.4s, v4.4s, #24 176; CHECK-NEXT: stp q6, q5, [x9, #32] 177; CHECK-NEXT: stp q4, q7, [x9] 178; CHECK-NEXT: b.eq .LBB1_1 179; CHECK-NEXT: // %bb.2: // %exit 180; CHECK-NEXT: ret 181entry: 182 br label %loop 183 184loop: 185 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 186 %gep.src = getelementptr inbounds <16 x i8>, ptr %src, i64 %iv 187 %l = load <16 x i8>, ptr %gep.src 188 %conv = sitofp <16 x i8> %l to <16 x float> 189 %gep.dst = getelementptr inbounds <16 x float>, ptr %dst, i64 %iv 190 store <16 x float> %conv, ptr %gep.dst 191 %iv.next = add i64 %iv, 1 192 %ec = icmp eq i64 %iv.next, 1000 193 br i1 %ec, label %loop, label %exit 194 195exit: 196 ret void 197} 198 199 200; Negative tests, currently we don't convert to f16/bf16 via `tbl`. 201define void @sitofp_v8i8_to_v8f16(ptr %src, ptr %dst) { 202; CHECK-LABEL: sitofp_v8i8_to_v8f16: 203; CHECK: // %bb.0: // %entry 204; CHECK-NEXT: mov x8, xzr 205; CHECK-NEXT: .LBB2_1: // %loop 206; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 207; CHECK-NEXT: ldr d0, [x0, x8, lsl #3] 208; CHECK-NEXT: sshll v0.8h, v0.8b, #0 209; CHECK-NEXT: sshll v1.4s, v0.4h, #0 210; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 211; CHECK-NEXT: scvtf v1.4s, v1.4s 212; CHECK-NEXT: scvtf v0.4s, v0.4s 213; CHECK-NEXT: fcvtn v1.4h, v1.4s 214; CHECK-NEXT: fcvtn2 v1.8h, v0.4s 215; CHECK-NEXT: str q1, [x1, x8, lsl #4] 216; CHECK-NEXT: add x8, x8, #1 217; CHECK-NEXT: cmp x8, #1000 218; CHECK-NEXT: b.eq .LBB2_1 219; CHECK-NEXT: // %bb.2: // %exit 220; CHECK-NEXT: ret 221entry: 222 br label %loop 223 224loop: 225 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 226 %gep.src = getelementptr inbounds <8 x i8>, ptr %src, i64 %iv 227 %l = load <8 x i8>, ptr %gep.src 228 %conv = sitofp <8 x i8> %l to <8 x half> 229 %gep.dst = getelementptr inbounds <8 x half>, ptr %dst, i64 %iv 230 store <8 x half> %conv, ptr %gep.dst 231 %iv.next = add i64 %iv, 1 232 %ec = icmp eq i64 %iv.next, 1000 233 br i1 %ec, label %loop, label %exit 234 235exit: 236 ret void 237} 238 239 240; Negative test, conversion to double with the help of `tbl` not implemented (TODO) 241define void @sitofp_v2i8_to_v2f64(ptr %src, ptr %dst) { 242; CHECK-LABEL: sitofp_v2i8_to_v2f64: 243; CHECK: // %bb.0: // %entry 244; CHECK-NEXT: mov x8, xzr 245; CHECK-NEXT: .LBB3_1: // %loop 246; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 247; CHECK-NEXT: add x9, x0, x8, lsl #1 248; CHECK-NEXT: ldrsb w10, [x9] 249; CHECK-NEXT: ldrsb w9, [x9, #1] 250; CHECK-NEXT: fmov s0, w10 251; CHECK-NEXT: mov v0.s[1], w9 252; CHECK-NEXT: sshll v0.2d, v0.2s, #0 253; CHECK-NEXT: scvtf v0.2d, v0.2d 254; CHECK-NEXT: str q0, [x1, x8, lsl #4] 255; CHECK-NEXT: add x8, x8, #1 256; CHECK-NEXT: cmp x8, #1000 257; CHECK-NEXT: b.eq .LBB3_1 258; CHECK-NEXT: // %bb.2: // %exit 259; CHECK-NEXT: ret 260entry: 261 br label %loop 262 263loop: 264 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 265 %gep.src = getelementptr inbounds <2 x i8>, ptr %src, i64 %iv 266 %l = load <2 x i8>, ptr %gep.src 267 %conv = sitofp <2 x i8> %l to <2 x double> 268 %gep.dst = getelementptr inbounds <2 x double>, ptr %dst, i64 %iv 269 store <2 x double> %conv, ptr %gep.dst 270 %iv.next = add i64 %iv, 1 271 %ec = icmp eq i64 %iv.next, 1000 272 br i1 %ec, label %loop, label %exit 273 274exit: 275 ret void 276} 277