1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=aarch64 -mattr=+v8.9a -mattr=+rcpc3 < %s | FileCheck --check-prefixes=BOTH,RCPC3 %s 3; RUN: llc -mtriple=aarch64 -mattr=+v8.9a < %s | FileCheck --check-prefixes=BOTH,NO-RCPC3 %s 4 5define hidden <2 x i64> @test_ldap1_2xi64_lane0(ptr nocapture noundef readonly %a, <2 x i64> noundef %b) local_unnamed_addr { 6; 7; RCPC3-LABEL: test_ldap1_2xi64_lane0: 8; RCPC3: // %bb.0: 9; RCPC3-NEXT: ldap1 { v0.d }[0], [x0] 10; RCPC3-NEXT: ret 11; 12; NO-RCPC3-LABEL: test_ldap1_2xi64_lane0: 13; NO-RCPC3: // %bb.0: 14; NO-RCPC3-NEXT: ldapr x8, [x0] 15; NO-RCPC3-NEXT: mov v0.d[0], x8 16; NO-RCPC3-NEXT: ret 17 %1 = load atomic i64, ptr %a acquire, align 8 18 %ldap1 = insertelement <2 x i64> %b, i64 %1, i64 0 19 ret <2 x i64> %ldap1 20} 21 22define hidden <2 x i64> @test_ldap1_2xi64_lane1(ptr nocapture noundef readonly %a, <2 x i64> noundef %b) local_unnamed_addr { 23; 24; RCPC3-LABEL: test_ldap1_2xi64_lane1: 25; RCPC3: // %bb.0: 26; RCPC3-NEXT: ldap1 { v0.d }[1], [x0] 27; RCPC3-NEXT: ret 28; 29; NO-RCPC3-LABEL: test_ldap1_2xi64_lane1: 30; NO-RCPC3: // %bb.0: 31; NO-RCPC3-NEXT: ldapr x8, [x0] 32; NO-RCPC3-NEXT: mov v0.d[1], x8 33; NO-RCPC3-NEXT: ret 34 %1 = load atomic i64, ptr %a acquire, align 8 35 %ldap1 = insertelement <2 x i64> %b, i64 %1, i64 1 36 ret <2 x i64> %ldap1 37} 38 39define hidden nofpclass(nan inf) <2 x double> @test_ldap1_2xdouble_lane0(ptr nocapture noundef readonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { 40; 41; RCPC3-LABEL: test_ldap1_2xdouble_lane0: 42; RCPC3: // %bb.0: 43; RCPC3-NEXT: ldap1 { v0.d }[0], [x0] 44; RCPC3-NEXT: ret 45; 46; NO-RCPC3-LABEL: test_ldap1_2xdouble_lane0: 47; NO-RCPC3: // %bb.0: 48; NO-RCPC3-NEXT: ldapr x8, [x0] 49; NO-RCPC3-NEXT: fmov d1, x8 50; NO-RCPC3-NEXT: mov v0.d[0], v1.d[0] 51; NO-RCPC3-NEXT: ret 52 %1 = load atomic double, ptr %a acquire, align 8 53 %ldap1 = insertelement <2 x double> %b, double %1, i64 0 54 ret <2 x double> %ldap1 55} 56 57define hidden nofpclass(nan inf) <2 x double> @test_ldap1_2xdouble_lane1(ptr nocapture noundef readonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { 58; 59; RCPC3-LABEL: test_ldap1_2xdouble_lane1: 60; RCPC3: // %bb.0: 61; RCPC3-NEXT: ldap1 { v0.d }[1], [x0] 62; RCPC3-NEXT: ret 63; 64; NO-RCPC3-LABEL: test_ldap1_2xdouble_lane1: 65; NO-RCPC3: // %bb.0: 66; NO-RCPC3-NEXT: ldapr x8, [x0] 67; NO-RCPC3-NEXT: fmov d1, x8 68; NO-RCPC3-NEXT: mov v0.d[1], v1.d[0] 69; NO-RCPC3-NEXT: ret 70 %1 = load atomic double, ptr %a acquire, align 8 71 %ldap1 = insertelement <2 x double> %b, double %1, i64 1 72 ret <2 x double> %ldap1 73} 74 75define hidden <1 x i64> @test_ldap1_1xi64_lane0(ptr nocapture noundef readonly %a, <1 x i64> noundef %b) local_unnamed_addr { 76; 77; RCPC3-LABEL: test_ldap1_1xi64_lane0: 78; RCPC3: // %bb.0: 79; RCPC3-NEXT: ldap1 { v0.d }[0], [x0] 80; RCPC3-NEXT: // kill: def $d0 killed $d0 killed $q0 81; RCPC3-NEXT: ret 82; 83; NO-RCPC3-LABEL: test_ldap1_1xi64_lane0: 84; NO-RCPC3: // %bb.0: 85; NO-RCPC3-NEXT: ldapr x8, [x0] 86; NO-RCPC3-NEXT: fmov d0, x8 87; NO-RCPC3-NEXT: ret 88 %1 = load atomic i64, ptr %a acquire, align 8 89 %ldap1 = insertelement <1 x i64> poison, i64 %1, i64 0 90 ret <1 x i64> %ldap1 91} 92 93define hidden nofpclass(nan inf) <1 x double> @test_ldap1_1xdouble_lane0(ptr nocapture noundef readonly %a, <1 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { 94; 95; RCPC3-LABEL: test_ldap1_1xdouble_lane0: 96; RCPC3: // %bb.0: 97; RCPC3-NEXT: ldap1 { v0.d }[0], [x0] 98; RCPC3-NEXT: // kill: def $d0 killed $d0 killed $q0 99; RCPC3-NEXT: ret 100; 101; NO-RCPC3-LABEL: test_ldap1_1xdouble_lane0: 102; NO-RCPC3: // %bb.0: 103; NO-RCPC3-NEXT: ldapr x8, [x0] 104; NO-RCPC3-NEXT: fmov d0, x8 105; NO-RCPC3-NEXT: ret 106 %1 = load atomic double, ptr %a acquire, align 8 107 %ldap1 = insertelement <1 x double> poison, double %1, i64 0 108 ret <1 x double> %ldap1 109} 110 111define hidden void @test_stl1_2xi64_lane0(ptr nocapture noundef writeonly %a, <2 x i64> noundef %b) local_unnamed_addr { 112; 113; RCPC3-LABEL: test_stl1_2xi64_lane0: 114; RCPC3: // %bb.0: 115; RCPC3-NEXT: stl1 { v0.d }[0], [x0] 116; RCPC3-NEXT: ret 117; 118; NO-RCPC3-LABEL: test_stl1_2xi64_lane0: 119; NO-RCPC3: // %bb.0: 120; NO-RCPC3-NEXT: fmov x8, d0 121; NO-RCPC3-NEXT: stlr x8, [x0] 122; NO-RCPC3-NEXT: ret 123 %1 = extractelement <2 x i64> %b, i64 0 124 store atomic i64 %1, ptr %a release, align 8 125 ret void 126} 127 128define hidden void @test_stl1_2xi64_lane1(ptr nocapture noundef writeonly %a, <2 x i64> noundef %b) local_unnamed_addr { 129; 130; RCPC3-LABEL: test_stl1_2xi64_lane1: 131; RCPC3: // %bb.0: 132; RCPC3-NEXT: stl1 { v0.d }[1], [x0] 133; RCPC3-NEXT: ret 134; 135; NO-RCPC3-LABEL: test_stl1_2xi64_lane1: 136; NO-RCPC3: // %bb.0: 137; NO-RCPC3-NEXT: mov x8, v0.d[1] 138; NO-RCPC3-NEXT: stlr x8, [x0] 139; NO-RCPC3-NEXT: ret 140 %1 = extractelement <2 x i64> %b, i64 1 141 store atomic i64 %1, ptr %a release, align 8 142 ret void 143} 144 145define hidden void @test_stl1_2xdouble_lane0(ptr nocapture noundef writeonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { 146; 147; RCPC3-LABEL: test_stl1_2xdouble_lane0: 148; RCPC3: // %bb.0: 149; RCPC3-NEXT: stl1 { v0.d }[0], [x0] 150; RCPC3-NEXT: ret 151; 152; NO-RCPC3-LABEL: test_stl1_2xdouble_lane0: 153; NO-RCPC3: // %bb.0: 154; NO-RCPC3-NEXT: fmov x8, d0 155; NO-RCPC3-NEXT: stlr x8, [x0] 156; NO-RCPC3-NEXT: ret 157 %1 = extractelement <2 x double> %b, i64 0 158 store atomic double %1, ptr %a release, align 8 159 ret void 160} 161 162define hidden void @test_stl1_2xdouble_lane1(ptr nocapture noundef writeonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { 163; 164; RCPC3-LABEL: test_stl1_2xdouble_lane1: 165; RCPC3: // %bb.0: 166; RCPC3-NEXT: stl1 { v0.d }[1], [x0] 167; RCPC3-NEXT: ret 168; 169; NO-RCPC3-LABEL: test_stl1_2xdouble_lane1: 170; NO-RCPC3: // %bb.0: 171; NO-RCPC3-NEXT: mov d0, v0.d[1] 172; NO-RCPC3-NEXT: fmov x8, d0 173; NO-RCPC3-NEXT: stlr x8, [x0] 174; NO-RCPC3-NEXT: ret 175 %1 = extractelement <2 x double> %b, i64 1 176 store atomic double %1, ptr %a release, align 8 177 ret void 178} 179 180define hidden void @test_stl1_1xi64_lane0(ptr nocapture noundef writeonly %a, <1 x i64> noundef %b) local_unnamed_addr { 181; 182; RCPC3-LABEL: test_stl1_1xi64_lane0: 183; RCPC3: // %bb.0: 184; RCPC3-NEXT: // kill: def $d0 killed $d0 def $q0 185; RCPC3-NEXT: stl1 { v0.d }[0], [x0] 186; RCPC3-NEXT: ret 187; 188; NO-RCPC3-LABEL: test_stl1_1xi64_lane0: 189; NO-RCPC3: // %bb.0: 190; NO-RCPC3-NEXT: // kill: def $d0 killed $d0 def $q0 191; NO-RCPC3-NEXT: fmov x8, d0 192; NO-RCPC3-NEXT: stlr x8, [x0] 193; NO-RCPC3-NEXT: ret 194 %1 = extractelement <1 x i64> %b, i64 0 195 store atomic i64 %1, ptr %a release, align 8 196 ret void 197} 198 199define hidden void @test_stl1_1xdouble_lane0(ptr nocapture noundef writeonly %a, <1 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { 200; 201; RCPC3-LABEL: test_stl1_1xdouble_lane0: 202; RCPC3: // %bb.0: 203; RCPC3-NEXT: // kill: def $d0 killed $d0 def $q0 204; RCPC3-NEXT: stl1 { v0.d }[0], [x0] 205; RCPC3-NEXT: ret 206; 207; NO-RCPC3-LABEL: test_stl1_1xdouble_lane0: 208; NO-RCPC3: // %bb.0: 209; NO-RCPC3-NEXT: fmov x8, d0 210; NO-RCPC3-NEXT: stlr x8, [x0] 211; NO-RCPC3-NEXT: ret 212 %1 = extractelement <1 x double> %b, i64 0 213 store atomic double %1, ptr %a release, align 8 214 ret void 215} 216 217; The remaining tests do not have any particular RCPC3-specific codegen: 218 219; load-acquire a plain non-vector double value 220define hidden double @test_double_load(ptr nocapture noundef readonly %a) local_unnamed_addr { 221; BOTH-LABEL: test_double_load: 222; BOTH: // %bb.0: 223; BOTH-NEXT: ldapr x8, [x0] 224; BOTH-NEXT: fmov d0, x8 225; BOTH-NEXT: ret 226 %1 = load atomic double, ptr %a acquire, align 8 227 ret double %1 228} 229 230; store-release a plain non-vector double value 231define hidden void @test_double_store(ptr nocapture noundef writeonly %a, double noundef %b) local_unnamed_addr { 232; BOTH-LABEL: test_double_store: 233; BOTH: // %bb.0: 234; BOTH-NEXT: fmov x8, d0 235; BOTH-NEXT: stlr x8, [x0] 236; BOTH-NEXT: ret 237 store atomic double %b, ptr %a release, align 8 238 ret void 239} 240 241; load-acquire an i64, followed by a bitcast to a 64-bit vector 242define hidden <2 x i32> @test_load_i64_bitcast_2xi32(ptr nocapture noundef readonly %a) local_unnamed_addr { 243; BOTH-LABEL: test_load_i64_bitcast_2xi32: 244; BOTH: // %bb.0: 245; BOTH-NEXT: ldapr x8, [x0] 246; BOTH-NEXT: fmov d0, x8 247; BOTH-NEXT: ret 248 %1 = load atomic i64, ptr %a acquire, align 8 249 %2 = bitcast i64 %1 to <2 x i32> 250 ret <2 x i32> %2 251} 252 253; bitcast from a 64-bit vector, followed by a store-release of the i64 254define hidden void @test_bitcast_2xi32_store_i64(ptr nocapture noundef readonly %a, <2 x i32> noundef %b) local_unnamed_addr { 255; BOTH-LABEL: test_bitcast_2xi32_store_i64: 256; BOTH: // %bb.0: 257; BOTH-NEXT: fmov x8, d0 258; BOTH-NEXT: stlr x8, [x0] 259; BOTH-NEXT: ret 260 %1 = bitcast <2 x i32> %b to i64 261 store atomic i64 %1, ptr %a release, align 8 262 ret void 263} 264 265; (non-atomic) load a 64-bit vector 266define hidden <2 x i32> @test_load_2xi32(ptr nocapture noundef readonly %a) local_unnamed_addr { 267; BOTH-LABEL: test_load_2xi32: 268; BOTH: // %bb.0: 269; BOTH-NEXT: ldr d0, [x0] 270; BOTH-NEXT: ret 271 %1 = load <2 x i32>, ptr %a, align 8 272 ret <2 x i32> %1 273} 274 275; (non-atomic) store a 64-bit vector 276define hidden void @test_store_2xi32(ptr nocapture noundef writeonly %a, <2 x i32> noundef %b) local_unnamed_addr { 277; BOTH-LABEL: test_store_2xi32: 278; BOTH: // %bb.0: 279; BOTH-NEXT: str d0, [x0] 280; BOTH-NEXT: ret 281 store <2 x i32> %b, ptr %a, align 8 282 ret void 283} 284 285; (non-atomic) load a 64-bit vector 286define hidden <1 x i64> @test_load_1xi64(ptr nocapture noundef readonly %a) local_unnamed_addr { 287; BOTH-LABEL: test_load_1xi64: 288; BOTH: // %bb.0: 289; BOTH-NEXT: ldr d0, [x0] 290; BOTH-NEXT: ret 291 %1 = load <1 x i64>, ptr %a, align 8 292 ret <1 x i64> %1 293} 294 295; (non-atomic) store a 64-bit vector 296define hidden void @test_store_1xi64(ptr nocapture noundef writeonly %a, <1 x i64> noundef %b) local_unnamed_addr { 297; BOTH-LABEL: test_store_1xi64: 298; BOTH: // %bb.0: 299; BOTH-NEXT: str d0, [x0] 300; BOTH-NEXT: ret 301 store <1 x i64> %b, ptr %a, align 8 302 ret void 303} 304 305; (non-atomic) load a 64-bit value and insert into vector 306define hidden <2 x i64> @test_load_insert_2xi64(ptr nocapture noundef readonly %a, <2 x i64> noundef %b) local_unnamed_addr { 307; BOTH-LABEL: test_load_insert_2xi64: 308; BOTH: // %bb.0: 309; BOTH-NEXT: ld1 { v0.d }[0], [x0] 310; BOTH-NEXT: ret 311 %1 = load i64, ptr %a, align 8 312 %2 = insertelement <2 x i64> %b, i64 %1, i64 0 313 ret <2 x i64> %2 314} 315 316; extract from vector and (non-atomic) store a 64-bit value 317define hidden void @test_extract_store_2xi64(ptr nocapture noundef writeonly %a, <2 x i64> noundef %b) local_unnamed_addr { 318; BOTH-LABEL: test_extract_store_2xi64: 319; BOTH: // %bb.0: 320; BOTH-NEXT: st1 { v0.d }[1], [x0] 321; BOTH-NEXT: ret 322 %1 = extractelement <2 x i64> %b, i64 1 323 store i64 %1, ptr %a, align 8 324 ret void 325} 326