1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s 3 4define void @insert_32xi8(ptr %src, ptr %dst, i8 %in) nounwind { 5; CHECK-LABEL: insert_32xi8: 6; CHECK: # %bb.0: 7; CHECK-NEXT: xvld $xr0, $a0, 0 8; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 9; CHECK-NEXT: xvst $xr0, $a1, 0 10; CHECK-NEXT: ret 11 %v = load volatile <32 x i8>, ptr %src 12 %v_new = insertelement <32 x i8> %v, i8 %in, i32 1 13 store <32 x i8> %v_new, ptr %dst 14 ret void 15} 16 17define void @insert_32xi8_upper(ptr %src, ptr %dst, i8 %in) nounwind { 18; CHECK-LABEL: insert_32xi8_upper: 19; CHECK: # %bb.0: 20; CHECK-NEXT: xvld $xr0, $a0, 0 21; CHECK-NEXT: xvori.b $xr1, $xr0, 0 22; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 23; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 0 24; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 25; CHECK-NEXT: xvst $xr0, $a1, 0 26; CHECK-NEXT: ret 27 %v = load volatile <32 x i8>, ptr %src 28 %v_new = insertelement <32 x i8> %v, i8 %in, i32 16 29 store <32 x i8> %v_new, ptr %dst 30 ret void 31} 32 33define void @insert_16xi16(ptr %src, ptr %dst, i16 %in) nounwind { 34; CHECK-LABEL: insert_16xi16: 35; CHECK: # %bb.0: 36; CHECK-NEXT: xvld $xr0, $a0, 0 37; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 38; CHECK-NEXT: xvst $xr0, $a1, 0 39; CHECK-NEXT: ret 40 %v = load volatile <16 x i16>, ptr %src 41 %v_new = insertelement <16 x i16> %v, i16 %in, i32 1 42 store <16 x i16> %v_new, ptr %dst 43 ret void 44} 45 46define void @insert_16xi16_upper(ptr %src, ptr %dst, i16 %in) nounwind { 47; CHECK-LABEL: insert_16xi16_upper: 48; CHECK: # %bb.0: 49; CHECK-NEXT: xvld $xr0, $a0, 0 50; CHECK-NEXT: xvori.b $xr1, $xr0, 0 51; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 52; CHECK-NEXT: vinsgr2vr.h $vr1, $a2, 0 53; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 54; CHECK-NEXT: xvst $xr0, $a1, 0 55; CHECK-NEXT: ret 56 %v = load volatile <16 x i16>, ptr %src 57 %v_new = insertelement <16 x i16> %v, i16 %in, i32 8 58 store <16 x i16> %v_new, ptr %dst 59 ret void 60} 61 62define void @insert_8xi32(ptr %src, ptr %dst, i32 %in) nounwind { 63; CHECK-LABEL: insert_8xi32: 64; CHECK: # %bb.0: 65; CHECK-NEXT: xvld $xr0, $a0, 0 66; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 67; CHECK-NEXT: xvst $xr0, $a1, 0 68; CHECK-NEXT: ret 69 %v = load volatile <8 x i32>, ptr %src 70 %v_new = insertelement <8 x i32> %v, i32 %in, i32 1 71 store <8 x i32> %v_new, ptr %dst 72 ret void 73} 74 75define void @insert_4xi64(ptr %src, ptr %dst, i64 %in) nounwind { 76; CHECK-LABEL: insert_4xi64: 77; CHECK: # %bb.0: 78; CHECK-NEXT: xvld $xr0, $a0, 0 79; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 80; CHECK-NEXT: xvst $xr0, $a1, 0 81; CHECK-NEXT: ret 82 %v = load volatile <4 x i64>, ptr %src 83 %v_new = insertelement <4 x i64> %v, i64 %in, i32 1 84 store <4 x i64> %v_new, ptr %dst 85 ret void 86} 87 88define void @insert_8xfloat(ptr %src, ptr %dst, float %in) nounwind { 89; CHECK-LABEL: insert_8xfloat: 90; CHECK: # %bb.0: 91; CHECK-NEXT: xvld $xr1, $a0, 0 92; CHECK-NEXT: movfr2gr.s $a0, $fa0 93; CHECK-NEXT: xvinsgr2vr.w $xr1, $a0, 1 94; CHECK-NEXT: xvst $xr1, $a1, 0 95; CHECK-NEXT: ret 96 %v = load volatile <8 x float>, ptr %src 97 %v_new = insertelement <8 x float> %v, float %in, i32 1 98 store <8 x float> %v_new, ptr %dst 99 ret void 100} 101 102define void @insert_4xdouble(ptr %src, ptr %dst, double %in) nounwind { 103; CHECK-LABEL: insert_4xdouble: 104; CHECK: # %bb.0: 105; CHECK-NEXT: xvld $xr1, $a0, 0 106; CHECK-NEXT: movfr2gr.d $a0, $fa0 107; CHECK-NEXT: xvinsgr2vr.d $xr1, $a0, 1 108; CHECK-NEXT: xvst $xr1, $a1, 0 109; CHECK-NEXT: ret 110 %v = load volatile <4 x double>, ptr %src 111 %v_new = insertelement <4 x double> %v, double %in, i32 1 112 store <4 x double> %v_new, ptr %dst 113 ret void 114} 115 116define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind { 117; CHECK-LABEL: insert_32xi8_idx: 118; CHECK: # %bb.0: 119; CHECK-NEXT: addi.d $sp, $sp, -64 120; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill 121; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill 122; CHECK-NEXT: addi.d $fp, $sp, 64 123; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 124; CHECK-NEXT: xvld $xr0, $a0, 0 125; CHECK-NEXT: xvst $xr0, $sp, 0 126; CHECK-NEXT: addi.d $a0, $sp, 0 127; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0 128; CHECK-NEXT: st.b $a2, $a0, 0 129; CHECK-NEXT: xvld $xr0, $sp, 0 130; CHECK-NEXT: xvst $xr0, $a1, 0 131; CHECK-NEXT: addi.d $sp, $fp, -64 132; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload 133; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload 134; CHECK-NEXT: addi.d $sp, $sp, 64 135; CHECK-NEXT: ret 136 %v = load volatile <32 x i8>, ptr %src 137 %v_new = insertelement <32 x i8> %v, i8 %in, i32 %idx 138 store <32 x i8> %v_new, ptr %dst 139 ret void 140} 141 142define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind { 143; CHECK-LABEL: insert_16xi16_idx: 144; CHECK: # %bb.0: 145; CHECK-NEXT: addi.d $sp, $sp, -64 146; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill 147; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill 148; CHECK-NEXT: addi.d $fp, $sp, 64 149; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 150; CHECK-NEXT: xvld $xr0, $a0, 0 151; CHECK-NEXT: xvst $xr0, $sp, 0 152; CHECK-NEXT: addi.d $a0, $sp, 0 153; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1 154; CHECK-NEXT: st.h $a2, $a0, 0 155; CHECK-NEXT: xvld $xr0, $sp, 0 156; CHECK-NEXT: xvst $xr0, $a1, 0 157; CHECK-NEXT: addi.d $sp, $fp, -64 158; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload 159; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload 160; CHECK-NEXT: addi.d $sp, $sp, 64 161; CHECK-NEXT: ret 162 %v = load volatile <16 x i16>, ptr %src 163 %v_new = insertelement <16 x i16> %v, i16 %in, i32 %idx 164 store <16 x i16> %v_new, ptr %dst 165 ret void 166} 167 168define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind { 169; CHECK-LABEL: insert_8xi32_idx: 170; CHECK: # %bb.0: 171; CHECK-NEXT: addi.d $sp, $sp, -64 172; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill 173; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill 174; CHECK-NEXT: addi.d $fp, $sp, 64 175; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 176; CHECK-NEXT: xvld $xr0, $a0, 0 177; CHECK-NEXT: xvst $xr0, $sp, 0 178; CHECK-NEXT: addi.d $a0, $sp, 0 179; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2 180; CHECK-NEXT: st.w $a2, $a0, 0 181; CHECK-NEXT: xvld $xr0, $sp, 0 182; CHECK-NEXT: xvst $xr0, $a1, 0 183; CHECK-NEXT: addi.d $sp, $fp, -64 184; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload 185; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload 186; CHECK-NEXT: addi.d $sp, $sp, 64 187; CHECK-NEXT: ret 188 %v = load volatile <8 x i32>, ptr %src 189 %v_new = insertelement <8 x i32> %v, i32 %in, i32 %idx 190 store <8 x i32> %v_new, ptr %dst 191 ret void 192} 193 194define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind { 195; CHECK-LABEL: insert_4xi64_idx: 196; CHECK: # %bb.0: 197; CHECK-NEXT: addi.d $sp, $sp, -64 198; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill 199; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill 200; CHECK-NEXT: addi.d $fp, $sp, 64 201; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 202; CHECK-NEXT: xvld $xr0, $a0, 0 203; CHECK-NEXT: xvst $xr0, $sp, 0 204; CHECK-NEXT: addi.d $a0, $sp, 0 205; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3 206; CHECK-NEXT: st.d $a2, $a0, 0 207; CHECK-NEXT: xvld $xr0, $sp, 0 208; CHECK-NEXT: xvst $xr0, $a1, 0 209; CHECK-NEXT: addi.d $sp, $fp, -64 210; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload 211; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload 212; CHECK-NEXT: addi.d $sp, $sp, 64 213; CHECK-NEXT: ret 214 %v = load volatile <4 x i64>, ptr %src 215 %v_new = insertelement <4 x i64> %v, i64 %in, i32 %idx 216 store <4 x i64> %v_new, ptr %dst 217 ret void 218} 219 220define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwind { 221; CHECK-LABEL: insert_8xfloat_idx: 222; CHECK: # %bb.0: 223; CHECK-NEXT: addi.d $sp, $sp, -64 224; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill 225; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill 226; CHECK-NEXT: addi.d $fp, $sp, 64 227; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 228; CHECK-NEXT: xvld $xr1, $a0, 0 229; CHECK-NEXT: xvst $xr1, $sp, 0 230; CHECK-NEXT: addi.d $a0, $sp, 0 231; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 232; CHECK-NEXT: fst.s $fa0, $a0, 0 233; CHECK-NEXT: xvld $xr0, $sp, 0 234; CHECK-NEXT: xvst $xr0, $a1, 0 235; CHECK-NEXT: addi.d $sp, $fp, -64 236; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload 237; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload 238; CHECK-NEXT: addi.d $sp, $sp, 64 239; CHECK-NEXT: ret 240 %v = load volatile <8 x float>, ptr %src 241 %v_new = insertelement <8 x float> %v, float %in, i32 %idx 242 store <8 x float> %v_new, ptr %dst 243 ret void 244} 245 246define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounwind { 247; CHECK-LABEL: insert_4xdouble_idx: 248; CHECK: # %bb.0: 249; CHECK-NEXT: addi.d $sp, $sp, -64 250; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill 251; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill 252; CHECK-NEXT: addi.d $fp, $sp, 64 253; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 254; CHECK-NEXT: xvld $xr1, $a0, 0 255; CHECK-NEXT: xvst $xr1, $sp, 0 256; CHECK-NEXT: addi.d $a0, $sp, 0 257; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 258; CHECK-NEXT: fst.d $fa0, $a0, 0 259; CHECK-NEXT: xvld $xr0, $sp, 0 260; CHECK-NEXT: xvst $xr0, $a1, 0 261; CHECK-NEXT: addi.d $sp, $fp, -64 262; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload 263; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload 264; CHECK-NEXT: addi.d $sp, $sp, 64 265; CHECK-NEXT: ret 266 %v = load volatile <4 x double>, ptr %src 267 %v_new = insertelement <4 x double> %v, double %in, i32 %idx 268 store <4 x double> %v_new, ptr %dst 269 ret void 270} 271