1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s 3 4; Test wide load+promote patterns, which after combines and legalization are 5; represented differently than 128-bit load+promote patterns. 6 7target triple = "wasm32-unknown-unknown" 8 9define <4 x double> @load_promote_v2f64(ptr %p) { 10; CHECK-LABEL: load_promote_v2f64: 11; CHECK: .functype load_promote_v2f64 (i32, i32) -> () 12; CHECK-NEXT: # %bb.0: 13; CHECK-NEXT: local.get 0 14; CHECK-NEXT: local.get 1 15; CHECK-NEXT: i32.const 8 16; CHECK-NEXT: i32.add 17; CHECK-NEXT: v128.load64_zero 0 18; CHECK-NEXT: f64x2.promote_low_f32x4 19; CHECK-NEXT: v128.store 16 20; CHECK-NEXT: local.get 0 21; CHECK-NEXT: local.get 1 22; CHECK-NEXT: v128.load64_zero 0 23; CHECK-NEXT: f64x2.promote_low_f32x4 24; CHECK-NEXT: v128.store 0 25; CHECK-NEXT: # fallthrough-return 26 %e = load <4 x float>, ptr %p 27 %v = fpext <4 x float> %e to <4 x double> 28 ret <4 x double> %v 29} 30 31define <4 x double> @load_promote_v2f64_with_folded_offset(ptr %p) { 32; CHECK-LABEL: load_promote_v2f64_with_folded_offset: 33; CHECK: .functype load_promote_v2f64_with_folded_offset (i32, i32) -> () 34; CHECK-NEXT: # %bb.0: 35; CHECK-NEXT: local.get 0 36; CHECK-NEXT: local.get 1 37; CHECK-NEXT: i32.const 24 38; CHECK-NEXT: i32.add 39; CHECK-NEXT: v128.load64_zero 0 40; CHECK-NEXT: f64x2.promote_low_f32x4 41; CHECK-NEXT: v128.store 16 42; CHECK-NEXT: local.get 0 43; CHECK-NEXT: local.get 1 44; CHECK-NEXT: i32.const 16 45; CHECK-NEXT: i32.add 46; CHECK-NEXT: v128.load64_zero 0 47; CHECK-NEXT: f64x2.promote_low_f32x4 48; CHECK-NEXT: v128.store 0 49; CHECK-NEXT: # fallthrough-return 50 %q = ptrtoint ptr %p to i32 51 %r = add nuw i32 %q, 16 52 %s = inttoptr i32 %r to ptr 53 %e = load <4 x float>, ptr %s 54 %v = fpext <4 x float> %e to <4 x double> 55 ret <4 x double> %v 56} 57 58define <4 x double> @load_promote_v2f64_with_folded_gep_offset(ptr %p) { 59; CHECK-LABEL: load_promote_v2f64_with_folded_gep_offset: 60; CHECK: .functype load_promote_v2f64_with_folded_gep_offset (i32, i32) -> () 61; CHECK-NEXT: # %bb.0: 62; CHECK-NEXT: local.get 0 63; CHECK-NEXT: local.get 1 64; CHECK-NEXT: i32.const 24 65; CHECK-NEXT: i32.add 66; CHECK-NEXT: v128.load64_zero 0 67; CHECK-NEXT: f64x2.promote_low_f32x4 68; CHECK-NEXT: v128.store 16 69; CHECK-NEXT: local.get 0 70; CHECK-NEXT: local.get 1 71; CHECK-NEXT: i32.const 16 72; CHECK-NEXT: i32.add 73; CHECK-NEXT: v128.load64_zero 0 74; CHECK-NEXT: f64x2.promote_low_f32x4 75; CHECK-NEXT: v128.store 0 76; CHECK-NEXT: # fallthrough-return 77 %s = getelementptr inbounds <4 x float>, ptr %p, i32 1 78 %e = load <4 x float>, ptr %s 79 %v = fpext <4 x float> %e to <4 x double> 80 ret <4 x double> %v 81} 82 83define <4 x double> @load_promote_v2f64_with_unfolded_gep_negative_offset(ptr %p) { 84; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_negative_offset: 85; CHECK: .functype load_promote_v2f64_with_unfolded_gep_negative_offset (i32, i32) -> () 86; CHECK-NEXT: # %bb.0: 87; CHECK-NEXT: local.get 0 88; CHECK-NEXT: local.get 1 89; CHECK-NEXT: i32.const -16 90; CHECK-NEXT: i32.add 91; CHECK-NEXT: local.tee 1 92; CHECK-NEXT: v128.load64_zero 0 93; CHECK-NEXT: f64x2.promote_low_f32x4 94; CHECK-NEXT: v128.store 0 95; CHECK-NEXT: local.get 0 96; CHECK-NEXT: local.get 1 97; CHECK-NEXT: i32.const 8 98; CHECK-NEXT: i32.add 99; CHECK-NEXT: v128.load64_zero 0 100; CHECK-NEXT: f64x2.promote_low_f32x4 101; CHECK-NEXT: v128.store 16 102; CHECK-NEXT: # fallthrough-return 103 %s = getelementptr inbounds <4 x float>, ptr %p, i32 -1 104 %e = load <4 x float>, ptr %s 105 %v = fpext <4 x float> %e to <4 x double> 106 ret <4 x double> %v 107} 108 109define <4 x double> @load_promote_v2f64_with_unfolded_offset(ptr %p) { 110; CHECK-LABEL: load_promote_v2f64_with_unfolded_offset: 111; CHECK: .functype load_promote_v2f64_with_unfolded_offset (i32, i32) -> () 112; CHECK-NEXT: # %bb.0: 113; CHECK-NEXT: local.get 0 114; CHECK-NEXT: local.get 1 115; CHECK-NEXT: i32.const 24 116; CHECK-NEXT: i32.add 117; CHECK-NEXT: v128.load64_zero 0 118; CHECK-NEXT: f64x2.promote_low_f32x4 119; CHECK-NEXT: v128.store 16 120; CHECK-NEXT: local.get 0 121; CHECK-NEXT: local.get 1 122; CHECK-NEXT: i32.const 16 123; CHECK-NEXT: i32.add 124; CHECK-NEXT: v128.load64_zero 0 125; CHECK-NEXT: f64x2.promote_low_f32x4 126; CHECK-NEXT: v128.store 0 127; CHECK-NEXT: # fallthrough-return 128 %q = ptrtoint ptr %p to i32 129 %r = add nsw i32 %q, 16 130 %s = inttoptr i32 %r to ptr 131 %e = load <4 x float>, ptr %s 132 %v = fpext <4 x float> %e to <4 x double> 133 ret <4 x double> %v 134} 135 136define <4 x double> @load_promote_v2f64_with_unfolded_gep_offset(ptr %p) { 137; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_offset: 138; CHECK: .functype load_promote_v2f64_with_unfolded_gep_offset (i32, i32) -> () 139; CHECK-NEXT: # %bb.0: 140; CHECK-NEXT: local.get 0 141; CHECK-NEXT: local.get 1 142; CHECK-NEXT: i32.const 24 143; CHECK-NEXT: i32.add 144; CHECK-NEXT: v128.load64_zero 0 145; CHECK-NEXT: f64x2.promote_low_f32x4 146; CHECK-NEXT: v128.store 16 147; CHECK-NEXT: local.get 0 148; CHECK-NEXT: local.get 1 149; CHECK-NEXT: i32.const 16 150; CHECK-NEXT: i32.add 151; CHECK-NEXT: v128.load64_zero 0 152; CHECK-NEXT: f64x2.promote_low_f32x4 153; CHECK-NEXT: v128.store 0 154; CHECK-NEXT: # fallthrough-return 155 %s = getelementptr <4 x float>, ptr %p, i32 1 156 %e = load <4 x float>, ptr %s 157 %v = fpext <4 x float> %e to <4 x double> 158 ret <4 x double> %v 159} 160 161define <4 x double> @load_promote_v2f64_from_numeric_address() { 162; CHECK-LABEL: load_promote_v2f64_from_numeric_address: 163; CHECK: .functype load_promote_v2f64_from_numeric_address (i32) -> () 164; CHECK-NEXT: # %bb.0: 165; CHECK-NEXT: local.get 0 166; CHECK-NEXT: i32.const 40 167; CHECK-NEXT: v128.load64_zero 0 168; CHECK-NEXT: f64x2.promote_low_f32x4 169; CHECK-NEXT: v128.store 16 170; CHECK-NEXT: local.get 0 171; CHECK-NEXT: i32.const 32 172; CHECK-NEXT: v128.load64_zero 0 173; CHECK-NEXT: f64x2.promote_low_f32x4 174; CHECK-NEXT: v128.store 0 175; CHECK-NEXT: # fallthrough-return 176 %s = inttoptr i32 32 to ptr 177 %e = load <4 x float>, ptr %s 178 %v = fpext <4 x float> %e to <4 x double> 179 ret <4 x double> %v 180} 181 182@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.> 183define <4 x double> @load_promote_v2f64_from_global_address() { 184; CHECK-LABEL: load_promote_v2f64_from_global_address: 185; CHECK: .functype load_promote_v2f64_from_global_address (i32) -> () 186; CHECK-NEXT: # %bb.0: 187; CHECK-NEXT: local.get 0 188; CHECK-NEXT: i32.const gv_v4f32 189; CHECK-NEXT: i32.const 8 190; CHECK-NEXT: i32.add 191; CHECK-NEXT: v128.load64_zero 0 192; CHECK-NEXT: f64x2.promote_low_f32x4 193; CHECK-NEXT: v128.store 16 194; CHECK-NEXT: local.get 0 195; CHECK-NEXT: i32.const gv_v4f32 196; CHECK-NEXT: v128.load64_zero 0 197; CHECK-NEXT: f64x2.promote_low_f32x4 198; CHECK-NEXT: v128.store 0 199; CHECK-NEXT: # fallthrough-return 200 %e = load <4 x float>, ptr @gv_v4f32 201 %v = fpext <4 x float> %e to <4 x double> 202 ret <4 x double> %v 203} 204