1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s 3 4; Test that SIMD shifts can be lowered correctly even when shift 5; values are exported from outside blocks. 6 7target triple = "wasm32-unknown-unknown" 8 9define void @shl_loop(ptr %a, i8 %shift, i32 %count) { 10; CHECK-LABEL: shl_loop: 11; CHECK: .functype shl_loop (i32, i32, i32) -> () 12; CHECK-NEXT: .local i32 13; CHECK-NEXT: # %bb.0: # %entry 14; CHECK-NEXT: .LBB0_1: # %body 15; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 16; CHECK-NEXT: loop # label0: 17; CHECK-NEXT: local.get 0 18; CHECK-NEXT: i32.const 16 19; CHECK-NEXT: i32.add 20; CHECK-NEXT: local.tee 3 21; CHECK-NEXT: local.get 0 22; CHECK-NEXT: v128.load 0:p2align=0 23; CHECK-NEXT: local.get 1 24; CHECK-NEXT: i8x16.shl 25; CHECK-NEXT: v128.store 0 26; CHECK-NEXT: local.get 3 27; CHECK-NEXT: local.set 0 28; CHECK-NEXT: local.get 2 29; CHECK-NEXT: i32.const -1 30; CHECK-NEXT: i32.add 31; CHECK-NEXT: local.tee 2 32; CHECK-NEXT: i32.eqz 33; CHECK-NEXT: br_if 0 # 0: up to label0 34; CHECK-NEXT: # %bb.2: # %exit 35; CHECK-NEXT: end_loop 36; CHECK-NEXT: # fallthrough-return 37entry: 38 %t1 = insertelement <16 x i8> undef, i8 %shift, i32 0 39 %vshift = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer 40 br label %body 41body: 42 %out = phi ptr [%a, %entry], [%b, %body] 43 %i = phi i32 [0, %entry], [%next, %body] 44 %v = load <16 x i8>, ptr %out, align 1 45 %r = shl <16 x i8> %v, %vshift 46 %b = getelementptr inbounds i8, ptr %out, i32 16 47 store <16 x i8> %r, ptr %b 48 %next = add i32 %i, 1 49 %i.cmp = icmp eq i32 %next, %count 50 br i1 %i.cmp, label %body, label %exit 51exit: 52 ret void 53} 54 55; Test that SIMD shifts can be lowered correctly when shift value 56; is a phi inside loop body. 57 58define void @shl_phi_loop(ptr %a, i8 %shift, i32 %count) { 59; CHECK-LABEL: shl_phi_loop: 60; CHECK: .functype shl_phi_loop (i32, i32, i32) -> () 61; CHECK-NEXT: .local i32 62; CHECK-NEXT: # %bb.0: # %entry 63; CHECK-NEXT: .LBB1_1: # %body 64; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 65; CHECK-NEXT: loop # label1: 66; CHECK-NEXT: local.get 0 67; CHECK-NEXT: i32.const 16 68; CHECK-NEXT: i32.add 69; CHECK-NEXT: local.tee 3 70; CHECK-NEXT: local.get 0 71; CHECK-NEXT: v128.load 0:p2align=0 72; CHECK-NEXT: local.get 1 73; CHECK-NEXT: i8x16.shl 74; CHECK-NEXT: v128.store 0 75; CHECK-NEXT: local.get 1 76; CHECK-NEXT: i32.const 1 77; CHECK-NEXT: i32.and 78; CHECK-NEXT: local.set 1 79; CHECK-NEXT: local.get 3 80; CHECK-NEXT: local.set 0 81; CHECK-NEXT: local.get 2 82; CHECK-NEXT: i32.const -1 83; CHECK-NEXT: i32.add 84; CHECK-NEXT: local.tee 2 85; CHECK-NEXT: i32.eqz 86; CHECK-NEXT: br_if 0 # 0: up to label1 87; CHECK-NEXT: # %bb.2: # %exit 88; CHECK-NEXT: end_loop 89; CHECK-NEXT: # fallthrough-return 90entry: 91 br label %body 92body: 93 %out = phi ptr [%a, %entry], [%b, %body] 94 %i = phi i32 [0, %entry], [%next, %body] 95 %t1 = phi i8 [%shift, %entry], [%sand, %body] 96 %t2 = insertelement <16 x i8> undef, i8 %t1, i32 0 97 %vshift = shufflevector <16 x i8> %t2, <16 x i8> undef, <16 x i32> zeroinitializer 98 %v = load <16 x i8>, ptr %out, align 1 99 %r = shl <16 x i8> %v, %vshift 100 %b = getelementptr inbounds i8, ptr %out, i32 16 101 store <16 x i8> %r, ptr %b 102 %sand = and i8 %t1, 1 103 %next = add i32 %i, 1 104 %i.cmp = icmp eq i32 %next, %count 105 br i1 %i.cmp, label %body, label %exit 106exit: 107 ret void 108} 109