1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK 3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK 4 5define ptr @ldrwu32_4(ptr %x, ptr %y, ptr %m) { 6; CHECK-LABEL: ldrwu32_4: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vldrw.u32 q0, [r2] 9; CHECK-NEXT: vpt.i32 ne, q0, zr 10; CHECK-NEXT: vldrwt.u32 q0, [r0, #4] 11; CHECK-NEXT: vstrw.32 q0, [r1] 12; CHECK-NEXT: bx lr 13entry: 14 %z = getelementptr inbounds i8, ptr %x, i32 4 15 %mask = load <4 x i32>, ptr %m, align 4 16 %c = icmp ne <4 x i32> %mask, zeroinitializer 17 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef) 18 store <4 x i32> %0, ptr %y, align 4 19 ret ptr %x 20} 21 22define ptr @ldrwu32_3(ptr %x, ptr %y, ptr %m) { 23; CHECK-LABEL: ldrwu32_3: 24; CHECK: @ %bb.0: @ %entry 25; CHECK-NEXT: vldrw.u32 q0, [r2] 26; CHECK-NEXT: adds r3, r0, #3 27; CHECK-NEXT: vpt.i32 ne, q0, zr 28; CHECK-NEXT: vldrwt.u32 q0, [r3] 29; CHECK-NEXT: vstrw.32 q0, [r1] 30; CHECK-NEXT: bx lr 31entry: 32 %z = getelementptr inbounds i8, ptr %x, i32 3 33 %mask = load <4 x i32>, ptr %m, align 4 34 %c = icmp ne <4 x i32> %mask, zeroinitializer 35 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef) 36 store <4 x i32> %0, ptr %y, align 4 37 ret ptr %x 38} 39 40define ptr @ldrwu32_2(ptr %x, ptr %y, ptr %m) { 41; CHECK-LABEL: ldrwu32_2: 42; CHECK: @ %bb.0: @ %entry 43; CHECK-NEXT: vldrw.u32 q0, [r2] 44; CHECK-NEXT: adds r3, r0, #2 45; CHECK-NEXT: vpt.i32 ne, q0, zr 46; CHECK-NEXT: vldrwt.u32 q0, [r3] 47; CHECK-NEXT: vstrw.32 q0, [r1] 48; CHECK-NEXT: bx lr 49entry: 50 %z = getelementptr inbounds i8, ptr %x, i32 2 51 %mask = load <4 x i32>, ptr %m, align 4 52 %c = icmp ne <4 x i32> %mask, zeroinitializer 53 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef) 54 store <4 x i32> %0, ptr %y, align 4 55 ret ptr %x 56} 57 58define ptr @ldrwu32_508(ptr %x, ptr %y, ptr %m) { 59; CHECK-LABEL: ldrwu32_508: 60; CHECK: @ %bb.0: @ %entry 61; CHECK-NEXT: vldrw.u32 q0, [r2] 62; CHECK-NEXT: vpt.i32 ne, q0, zr 63; CHECK-NEXT: vldrwt.u32 q0, [r0, #508] 64; CHECK-NEXT: vstrw.32 q0, [r1] 65; CHECK-NEXT: bx lr 66entry: 67 %z = getelementptr inbounds i8, ptr %x, i32 508 68 %mask = load <4 x i32>, ptr %m, align 4 69 %c = icmp ne <4 x i32> %mask, zeroinitializer 70 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef) 71 store <4 x i32> %0, ptr %y, align 4 72 ret ptr %x 73} 74 75define ptr @ldrwu32_512(ptr %x, ptr %y, ptr %m) { 76; CHECK-LABEL: ldrwu32_512: 77; CHECK: @ %bb.0: @ %entry 78; CHECK-NEXT: vldrw.u32 q0, [r2] 79; CHECK-NEXT: add.w r3, r0, #512 80; CHECK-NEXT: vpt.i32 ne, q0, zr 81; CHECK-NEXT: vldrwt.u32 q0, [r3] 82; CHECK-NEXT: vstrw.32 q0, [r1] 83; CHECK-NEXT: bx lr 84entry: 85 %z = getelementptr inbounds i8, ptr %x, i32 512 86 %mask = load <4 x i32>, ptr %m, align 4 87 %c = icmp ne <4 x i32> %mask, zeroinitializer 88 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef) 89 store <4 x i32> %0, ptr %y, align 4 90 ret ptr %x 91} 92 93define ptr @ldrwu32_m508(ptr %x, ptr %y, ptr %m) { 94; CHECK-LABEL: ldrwu32_m508: 95; CHECK: @ %bb.0: @ %entry 96; CHECK-NEXT: vldrw.u32 q0, [r2] 97; CHECK-NEXT: vpt.i32 ne, q0, zr 98; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508] 99; CHECK-NEXT: vstrw.32 q0, [r1] 100; CHECK-NEXT: bx lr 101entry: 102 %z = getelementptr inbounds i8, ptr %x, i32 -508 103 %mask = load <4 x i32>, ptr %m, align 4 104 %c = icmp ne <4 x i32> %mask, zeroinitializer 105 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef) 106 store <4 x i32> %0, ptr %y, align 4 107 ret ptr %x 108} 109 110define ptr @ldrwu32_m512(ptr %x, ptr %y, ptr %m) { 111; CHECK-LABEL: ldrwu32_m512: 112; CHECK: @ %bb.0: @ %entry 113; CHECK-NEXT: vldrw.u32 q0, [r2] 114; CHECK-NEXT: sub.w r3, r0, #512 115; CHECK-NEXT: vpt.i32 ne, q0, zr 116; CHECK-NEXT: vldrwt.u32 q0, [r3] 117; CHECK-NEXT: vstrw.32 q0, [r1] 118; CHECK-NEXT: bx lr 119entry: 120 %z = getelementptr inbounds i8, ptr %x, i32 -512 121 %mask = load <4 x i32>, ptr %m, align 4 122 %c = icmp ne <4 x i32> %mask, zeroinitializer 123 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef) 124 store <4 x i32> %0, ptr %y, align 4 125 ret ptr %x 126} 127 128define ptr @ldrhu32_4(ptr %x, ptr %y, ptr %m) { 129; CHECK-LABEL: ldrhu32_4: 130; CHECK: @ %bb.0: @ %entry 131; CHECK-NEXT: vldrw.u32 q0, [r2] 132; CHECK-NEXT: vpt.i32 ne, q0, zr 133; CHECK-NEXT: vldrht.u32 q0, [r0, #4] 134; CHECK-NEXT: vstrw.32 q0, [r1] 135; CHECK-NEXT: bx lr 136entry: 137 %z = getelementptr inbounds i8, ptr %x, i32 4 138 %mask = load <4 x i32>, ptr %m, align 4 139 %c = icmp ne <4 x i32> %mask, zeroinitializer 140 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef) 141 %1 = zext <4 x i16> %0 to <4 x i32> 142 store <4 x i32> %1, ptr %y, align 4 143 ret ptr %x 144} 145 146define ptr @ldrhu32_3(ptr %x, ptr %y, ptr %m) { 147; CHECK-LABEL: ldrhu32_3: 148; CHECK: @ %bb.0: @ %entry 149; CHECK-NEXT: vldrw.u32 q0, [r2] 150; CHECK-NEXT: adds r3, r0, #3 151; CHECK-NEXT: vpt.i32 ne, q0, zr 152; CHECK-NEXT: vldrht.u32 q0, [r3] 153; CHECK-NEXT: vstrw.32 q0, [r1] 154; CHECK-NEXT: bx lr 155entry: 156 %z = getelementptr inbounds i8, ptr %x, i32 3 157 %mask = load <4 x i32>, ptr %m, align 4 158 %c = icmp ne <4 x i32> %mask, zeroinitializer 159 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef) 160 %1 = zext <4 x i16> %0 to <4 x i32> 161 store <4 x i32> %1, ptr %y, align 4 162 ret ptr %x 163} 164 165define ptr @ldrhu32_2(ptr %x, ptr %y, ptr %m) { 166; CHECK-LABEL: ldrhu32_2: 167; CHECK: @ %bb.0: @ %entry 168; CHECK-NEXT: vldrw.u32 q0, [r2] 169; CHECK-NEXT: vpt.i32 ne, q0, zr 170; CHECK-NEXT: vldrht.u32 q0, [r0, #2] 171; CHECK-NEXT: vstrw.32 q0, [r1] 172; CHECK-NEXT: bx lr 173entry: 174 %z = getelementptr inbounds i8, ptr %x, i32 2 175 %mask = load <4 x i32>, ptr %m, align 4 176 %c = icmp ne <4 x i32> %mask, zeroinitializer 177 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef) 178 %1 = zext <4 x i16> %0 to <4 x i32> 179 store <4 x i32> %1, ptr %y, align 4 180 ret ptr %x 181} 182 183define ptr @ldrhu32_254(ptr %x, ptr %y, ptr %m) { 184; CHECK-LABEL: ldrhu32_254: 185; CHECK: @ %bb.0: @ %entry 186; CHECK-NEXT: vldrw.u32 q0, [r2] 187; CHECK-NEXT: vpt.i32 ne, q0, zr 188; CHECK-NEXT: vldrht.u32 q0, [r0, #254] 189; CHECK-NEXT: vstrw.32 q0, [r1] 190; CHECK-NEXT: bx lr 191entry: 192 %z = getelementptr inbounds i8, ptr %x, i32 254 193 %mask = load <4 x i32>, ptr %m, align 4 194 %c = icmp ne <4 x i32> %mask, zeroinitializer 195 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef) 196 %1 = zext <4 x i16> %0 to <4 x i32> 197 store <4 x i32> %1, ptr %y, align 4 198 ret ptr %x 199} 200 201define ptr @ldrhu32_256(ptr %x, ptr %y, ptr %m) { 202; CHECK-LABEL: ldrhu32_256: 203; CHECK: @ %bb.0: @ %entry 204; CHECK-NEXT: vldrw.u32 q0, [r2] 205; CHECK-NEXT: add.w r3, r0, #256 206; CHECK-NEXT: vpt.i32 ne, q0, zr 207; CHECK-NEXT: vldrht.u32 q0, [r3] 208; CHECK-NEXT: vstrw.32 q0, [r1] 209; CHECK-NEXT: bx lr 210entry: 211 %z = getelementptr inbounds i8, ptr %x, i32 256 212 %mask = load <4 x i32>, ptr %m, align 4 213 %c = icmp ne <4 x i32> %mask, zeroinitializer 214 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef) 215 %1 = zext <4 x i16> %0 to <4 x i32> 216 store <4 x i32> %1, ptr %y, align 4 217 ret ptr %x 218} 219 220define ptr @ldrhu32_m254(ptr %x, ptr %y, ptr %m) { 221; CHECK-LABEL: ldrhu32_m254: 222; CHECK: @ %bb.0: @ %entry 223; CHECK-NEXT: vldrw.u32 q0, [r2] 224; CHECK-NEXT: vpt.i32 ne, q0, zr 225; CHECK-NEXT: vldrht.u32 q0, [r0, #-254] 226; CHECK-NEXT: vstrw.32 q0, [r1] 227; CHECK-NEXT: bx lr 228entry: 229 %z = getelementptr inbounds i8, ptr %x, i32 -254 230 %mask = load <4 x i32>, ptr %m, align 4 231 %c = icmp ne <4 x i32> %mask, zeroinitializer 232 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef) 233 %1 = zext <4 x i16> %0 to <4 x i32> 234 store <4 x i32> %1, ptr %y, align 4 235 ret ptr %x 236} 237 238define ptr @ldrhu32_m256(ptr %x, ptr %y, ptr %m) { 239; CHECK-LABEL: ldrhu32_m256: 240; CHECK: @ %bb.0: @ %entry 241; CHECK-NEXT: vldrw.u32 q0, [r2] 242; CHECK-NEXT: sub.w r3, r0, #256 243; CHECK-NEXT: vpt.i32 ne, q0, zr 244; CHECK-NEXT: vldrht.u32 q0, [r3] 245; CHECK-NEXT: vstrw.32 q0, [r1] 246; CHECK-NEXT: bx lr 247entry: 248 %z = getelementptr inbounds i8, ptr %x, i32 -256 249 %mask = load <4 x i32>, ptr %m, align 4 250 %c = icmp ne <4 x i32> %mask, zeroinitializer 251 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef) 252 %1 = zext <4 x i16> %0 to <4 x i32> 253 store <4 x i32> %1, ptr %y, align 4 254 ret ptr %x 255} 256 257define ptr @ldrhs32_4(ptr %x, ptr %y, ptr %m) { 258; CHECK-LABEL: ldrhs32_4: 259; CHECK: @ %bb.0: @ %entry 260; CHECK-NEXT: vldrw.u32 q0, [r2] 261; CHECK-NEXT: vpt.i32 ne, q0, zr 262; CHECK-NEXT: vldrht.s32 q0, [r0, #4] 263; CHECK-NEXT: vstrw.32 q0, [r1] 264; CHECK-NEXT: bx lr 265entry: 266 %z = getelementptr inbounds i8, ptr %x, i32 4 267 %mask = load <4 x i32>, ptr %m, align 4 268 %c = icmp ne <4 x i32> %mask, zeroinitializer 269 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef) 270 %1 = sext <4 x i16> %0 to <4 x i32> 271 store <4 x i32> %1, ptr %y, align 4 272 ret ptr %x 273} 274 275define ptr @ldrhs32_3(ptr %x, ptr %y, ptr %m) { 276; CHECK-LABEL: ldrhs32_3: 277; CHECK: @ %bb.0: @ %entry 278; CHECK-NEXT: vldrw.u32 q0, [r2] 279; CHECK-NEXT: adds r3, r0, #3 280; CHECK-NEXT: vpt.i32 ne, q0, zr 281; CHECK-NEXT: vldrht.s32 q0, [r3] 282; CHECK-NEXT: vstrw.32 q0, [r1] 283; CHECK-NEXT: bx lr 284entry: 285 %z = getelementptr inbounds i8, ptr %x, i32 3 286 %mask = load <4 x i32>, ptr %m, align 4 287 %c = icmp ne <4 x i32> %mask, zeroinitializer 288 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef) 289 %1 = sext <4 x i16> %0 to <4 x i32> 290 store <4 x i32> %1, ptr %y, align 4 291 ret ptr %x 292} 293 294define ptr @ldrhs32_2(ptr %x, ptr %y, ptr %m) { 295; CHECK-LABEL: ldrhs32_2: 296; CHECK: @ %bb.0: @ %entry 297; CHECK-NEXT: vldrw.u32 q0, [r2] 298; CHECK-NEXT: vpt.i32 ne, q0, zr 299; CHECK-NEXT: vldrht.s32 q0, [r0, #2] 300; CHECK-NEXT: vstrw.32 q0, [r1] 301; CHECK-NEXT: bx lr 302entry: 303 %z = getelementptr inbounds i8, ptr %x, i32 2 304 %mask = load <4 x i32>, ptr %m, align 4 305 %c = icmp ne <4 x i32> %mask, zeroinitializer 306 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef) 307 %1 = sext <4 x i16> %0 to <4 x i32> 308 store <4 x i32> %1, ptr %y, align 4 309 ret ptr %x 310} 311 312define ptr @ldrhs32_254(ptr %x, ptr %y, ptr %m) { 313; CHECK-LABEL: ldrhs32_254: 314; CHECK: @ %bb.0: @ %entry 315; CHECK-NEXT: vldrw.u32 q0, [r2] 316; CHECK-NEXT: vpt.i32 ne, q0, zr 317; CHECK-NEXT: vldrht.s32 q0, [r0, #254] 318; CHECK-NEXT: vstrw.32 q0, [r1] 319; CHECK-NEXT: bx lr 320entry: 321 %z = getelementptr inbounds i8, ptr %x, i32 254 322 %mask = load <4 x i32>, ptr %m, align 4 323 %c = icmp ne <4 x i32> %mask, zeroinitializer 324 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef) 325 %1 = sext <4 x i16> %0 to <4 x i32> 326 store <4 x i32> %1, ptr %y, align 4 327 ret ptr %x 328} 329 330define ptr @ldrhs32_256(ptr %x, ptr %y, ptr %m) { 331; CHECK-LABEL: ldrhs32_256: 332; CHECK: @ %bb.0: @ %entry 333; CHECK-NEXT: vldrw.u32 q0, [r2] 334; CHECK-NEXT: add.w r3, r0, #256 335; CHECK-NEXT: vpt.i32 ne, q0, zr 336; CHECK-NEXT: vldrht.s32 q0, [r3] 337; CHECK-NEXT: vstrw.32 q0, [r1] 338; CHECK-NEXT: bx lr 339entry: 340 %z = getelementptr inbounds i8, ptr %x, i32 256 341 %mask = load <4 x i32>, ptr %m, align 4 342 %c = icmp ne <4 x i32> %mask, zeroinitializer 343 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef) 344 %1 = sext <4 x i16> %0 to <4 x i32> 345 store <4 x i32> %1, ptr %y, align 4 346 ret ptr %x 347} 348 349define ptr @ldrhs32_m254(ptr %x, ptr %y, ptr %m) { 350; CHECK-LABEL: ldrhs32_m254: 351; CHECK: @ %bb.0: @ %entry 352; CHECK-NEXT: vldrw.u32 q0, [r2] 353; CHECK-NEXT: vpt.i32 ne, q0, zr 354; CHECK-NEXT: vldrht.s32 q0, [r0, #-254] 355; CHECK-NEXT: vstrw.32 q0, [r1] 356; CHECK-NEXT: bx lr 357entry: 358 %z = getelementptr inbounds i8, ptr %x, i32 -254 359 %mask = load <4 x i32>, ptr %m, align 4 360 %c = icmp ne <4 x i32> %mask, zeroinitializer 361 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef) 362 %1 = sext <4 x i16> %0 to <4 x i32> 363 store <4 x i32> %1, ptr %y, align 4 364 ret ptr %x 365} 366 367define ptr @ldrhs32_m256(ptr %x, ptr %y, ptr %m) { 368; CHECK-LABEL: ldrhs32_m256: 369; CHECK: @ %bb.0: @ %entry 370; CHECK-NEXT: vldrw.u32 q0, [r2] 371; CHECK-NEXT: sub.w r3, r0, #256 372; CHECK-NEXT: vpt.i32 ne, q0, zr 373; CHECK-NEXT: vldrht.s32 q0, [r3] 374; CHECK-NEXT: vstrw.32 q0, [r1] 375; CHECK-NEXT: bx lr 376entry: 377 %z = getelementptr inbounds i8, ptr %x, i32 -256 378 %mask = load <4 x i32>, ptr %m, align 4 379 %c = icmp ne <4 x i32> %mask, zeroinitializer 380 %0 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %z, i32 2, <4 x i1> %c, <4 x i16> undef) 381 %1 = sext <4 x i16> %0 to <4 x i32> 382 store <4 x i32> %1, ptr %y, align 4 383 ret ptr %x 384} 385 386define ptr @ldrhu16_4(ptr %x, ptr %y, ptr %m) { 387; CHECK-LABEL: ldrhu16_4: 388; CHECK: @ %bb.0: @ %entry 389; CHECK-NEXT: vldrh.u16 q0, [r2] 390; CHECK-NEXT: vpt.i16 ne, q0, zr 391; CHECK-NEXT: vldrht.u16 q0, [r0, #4] 392; CHECK-NEXT: vstrh.16 q0, [r1] 393; CHECK-NEXT: bx lr 394entry: 395 %z = getelementptr inbounds i8, ptr %x, i32 4 396 %mask = load <8 x i16>, ptr %m, align 2 397 %c = icmp ne <8 x i16> %mask, zeroinitializer 398 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x i16> undef) 399 store <8 x i16> %0, ptr %y, align 2 400 ret ptr %x 401} 402 403define ptr @ldrhu16_3(ptr %x, ptr %y, ptr %m) { 404; CHECK-LABEL: ldrhu16_3: 405; CHECK: @ %bb.0: @ %entry 406; CHECK-NEXT: vldrh.u16 q0, [r2] 407; CHECK-NEXT: adds r3, r0, #3 408; CHECK-NEXT: vpt.i16 ne, q0, zr 409; CHECK-NEXT: vldrht.u16 q0, [r3] 410; CHECK-NEXT: vstrh.16 q0, [r1] 411; CHECK-NEXT: bx lr 412entry: 413 %z = getelementptr inbounds i8, ptr %x, i32 3 414 %mask = load <8 x i16>, ptr %m, align 2 415 %c = icmp ne <8 x i16> %mask, zeroinitializer 416 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x i16> undef) 417 store <8 x i16> %0, ptr %y, align 2 418 ret ptr %x 419} 420 421define ptr @ldrhu16_2(ptr %x, ptr %y, ptr %m) { 422; CHECK-LABEL: ldrhu16_2: 423; CHECK: @ %bb.0: @ %entry 424; CHECK-NEXT: vldrh.u16 q0, [r2] 425; CHECK-NEXT: vpt.i16 ne, q0, zr 426; CHECK-NEXT: vldrht.u16 q0, [r0, #2] 427; CHECK-NEXT: vstrh.16 q0, [r1] 428; CHECK-NEXT: bx lr 429entry: 430 %z = getelementptr inbounds i8, ptr %x, i32 2 431 %mask = load <8 x i16>, ptr %m, align 2 432 %c = icmp ne <8 x i16> %mask, zeroinitializer 433 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x i16> undef) 434 store <8 x i16> %0, ptr %y, align 2 435 ret ptr %x 436} 437 438define ptr @ldrhu16_254(ptr %x, ptr %y, ptr %m) { 439; CHECK-LABEL: ldrhu16_254: 440; CHECK: @ %bb.0: @ %entry 441; CHECK-NEXT: vldrh.u16 q0, [r2] 442; CHECK-NEXT: vpt.i16 ne, q0, zr 443; CHECK-NEXT: vldrht.u16 q0, [r0, #254] 444; CHECK-NEXT: vstrh.16 q0, [r1] 445; CHECK-NEXT: bx lr 446entry: 447 %z = getelementptr inbounds i8, ptr %x, i32 254 448 %mask = load <8 x i16>, ptr %m, align 2 449 %c = icmp ne <8 x i16> %mask, zeroinitializer 450 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x i16> undef) 451 store <8 x i16> %0, ptr %y, align 2 452 ret ptr %x 453} 454 455define ptr @ldrhu16_256(ptr %x, ptr %y, ptr %m) { 456; CHECK-LABEL: ldrhu16_256: 457; CHECK: @ %bb.0: @ %entry 458; CHECK-NEXT: vldrh.u16 q0, [r2] 459; CHECK-NEXT: add.w r3, r0, #256 460; CHECK-NEXT: vpt.i16 ne, q0, zr 461; CHECK-NEXT: vldrht.u16 q0, [r3] 462; CHECK-NEXT: vstrh.16 q0, [r1] 463; CHECK-NEXT: bx lr 464entry: 465 %z = getelementptr inbounds i8, ptr %x, i32 256 466 %mask = load <8 x i16>, ptr %m, align 2 467 %c = icmp ne <8 x i16> %mask, zeroinitializer 468 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x i16> undef) 469 store <8 x i16> %0, ptr %y, align 2 470 ret ptr %x 471} 472 473define ptr @ldrhu16_m254(ptr %x, ptr %y, ptr %m) { 474; CHECK-LABEL: ldrhu16_m254: 475; CHECK: @ %bb.0: @ %entry 476; CHECK-NEXT: vldrh.u16 q0, [r2] 477; CHECK-NEXT: vpt.i16 ne, q0, zr 478; CHECK-NEXT: vldrht.u16 q0, [r0, #-254] 479; CHECK-NEXT: vstrh.16 q0, [r1] 480; CHECK-NEXT: bx lr 481entry: 482 %z = getelementptr inbounds i8, ptr %x, i32 -254 483 %mask = load <8 x i16>, ptr %m, align 2 484 %c = icmp ne <8 x i16> %mask, zeroinitializer 485 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x i16> undef) 486 store <8 x i16> %0, ptr %y, align 2 487 ret ptr %x 488} 489 490define ptr @ldrhu16_m256(ptr %x, ptr %y, ptr %m) { 491; CHECK-LABEL: ldrhu16_m256: 492; CHECK: @ %bb.0: @ %entry 493; CHECK-NEXT: vldrh.u16 q0, [r2] 494; CHECK-NEXT: sub.w r3, r0, #256 495; CHECK-NEXT: vpt.i16 ne, q0, zr 496; CHECK-NEXT: vldrht.u16 q0, [r3] 497; CHECK-NEXT: vstrh.16 q0, [r1] 498; CHECK-NEXT: bx lr 499entry: 500 %z = getelementptr inbounds i8, ptr %x, i32 -256 501 %mask = load <8 x i16>, ptr %m, align 2 502 %c = icmp ne <8 x i16> %mask, zeroinitializer 503 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x i16> undef) 504 store <8 x i16> %0, ptr %y, align 2 505 ret ptr %x 506} 507 508define ptr @ldrbu32_4(ptr %x, ptr %y, ptr %m) { 509; CHECK-LABEL: ldrbu32_4: 510; CHECK: @ %bb.0: @ %entry 511; CHECK-NEXT: vldrw.u32 q0, [r2] 512; CHECK-NEXT: vpt.i32 ne, q0, zr 513; CHECK-NEXT: vldrbt.u32 q0, [r0, #4] 514; CHECK-NEXT: vstrw.32 q0, [r1] 515; CHECK-NEXT: bx lr 516entry: 517 %z = getelementptr inbounds i8, ptr %x, i32 4 518 %mask = load <4 x i32>, ptr %m, align 4 519 %c = icmp ne <4 x i32> %mask, zeroinitializer 520 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef) 521 %1 = zext <4 x i8> %0 to <4 x i32> 522 store <4 x i32> %1, ptr %y, align 4 523 ret ptr %x 524} 525 526define ptr @ldrbu32_3(ptr %x, ptr %y, ptr %m) { 527; CHECK-LABEL: ldrbu32_3: 528; CHECK: @ %bb.0: @ %entry 529; CHECK-NEXT: vldrw.u32 q0, [r2] 530; CHECK-NEXT: vpt.i32 ne, q0, zr 531; CHECK-NEXT: vldrbt.u32 q0, [r0, #3] 532; CHECK-NEXT: vstrw.32 q0, [r1] 533; CHECK-NEXT: bx lr 534entry: 535 %z = getelementptr inbounds i8, ptr %x, i32 3 536 %mask = load <4 x i32>, ptr %m, align 4 537 %c = icmp ne <4 x i32> %mask, zeroinitializer 538 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef) 539 %1 = zext <4 x i8> %0 to <4 x i32> 540 store <4 x i32> %1, ptr %y, align 4 541 ret ptr %x 542} 543 544define ptr @ldrbu32_2(ptr %x, ptr %y, ptr %m) { 545; CHECK-LABEL: ldrbu32_2: 546; CHECK: @ %bb.0: @ %entry 547; CHECK-NEXT: vldrw.u32 q0, [r2] 548; CHECK-NEXT: vpt.i32 ne, q0, zr 549; CHECK-NEXT: vldrbt.u32 q0, [r0, #2] 550; CHECK-NEXT: vstrw.32 q0, [r1] 551; CHECK-NEXT: bx lr 552entry: 553 %z = getelementptr inbounds i8, ptr %x, i32 2 554 %mask = load <4 x i32>, ptr %m, align 4 555 %c = icmp ne <4 x i32> %mask, zeroinitializer 556 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef) 557 %1 = zext <4 x i8> %0 to <4 x i32> 558 store <4 x i32> %1, ptr %y, align 4 559 ret ptr %x 560} 561 562define ptr @ldrbu32_127(ptr %x, ptr %y, ptr %m) { 563; CHECK-LABEL: ldrbu32_127: 564; CHECK: @ %bb.0: @ %entry 565; CHECK-NEXT: vldrw.u32 q0, [r2] 566; CHECK-NEXT: vpt.i32 ne, q0, zr 567; CHECK-NEXT: vldrbt.u32 q0, [r0, #127] 568; CHECK-NEXT: vstrw.32 q0, [r1] 569; CHECK-NEXT: bx lr 570entry: 571 %z = getelementptr inbounds i8, ptr %x, i32 127 572 %mask = load <4 x i32>, ptr %m, align 4 573 %c = icmp ne <4 x i32> %mask, zeroinitializer 574 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef) 575 %1 = zext <4 x i8> %0 to <4 x i32> 576 store <4 x i32> %1, ptr %y, align 4 577 ret ptr %x 578} 579 580define ptr @ldrbu32_128(ptr %x, ptr %y, ptr %m) { 581; CHECK-LABEL: ldrbu32_128: 582; CHECK: @ %bb.0: @ %entry 583; CHECK-NEXT: vldrw.u32 q0, [r2] 584; CHECK-NEXT: add.w r3, r0, #128 585; CHECK-NEXT: vpt.i32 ne, q0, zr 586; CHECK-NEXT: vldrbt.u32 q0, [r3] 587; CHECK-NEXT: vstrw.32 q0, [r1] 588; CHECK-NEXT: bx lr 589entry: 590 %z = getelementptr inbounds i8, ptr %x, i32 128 591 %mask = load <4 x i32>, ptr %m, align 4 592 %c = icmp ne <4 x i32> %mask, zeroinitializer 593 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef) 594 %1 = zext <4 x i8> %0 to <4 x i32> 595 store <4 x i32> %1, ptr %y, align 4 596 ret ptr %x 597} 598 599define ptr @ldrbu32_m127(ptr %x, ptr %y, ptr %m) { 600; CHECK-LABEL: ldrbu32_m127: 601; CHECK: @ %bb.0: @ %entry 602; CHECK-NEXT: vldrw.u32 q0, [r2] 603; CHECK-NEXT: vpt.i32 ne, q0, zr 604; CHECK-NEXT: vldrbt.u32 q0, [r0, #-127] 605; CHECK-NEXT: vstrw.32 q0, [r1] 606; CHECK-NEXT: bx lr 607entry: 608 %z = getelementptr inbounds i8, ptr %x, i32 -127 609 %mask = load <4 x i32>, ptr %m, align 4 610 %c = icmp ne <4 x i32> %mask, zeroinitializer 611 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef) 612 %1 = zext <4 x i8> %0 to <4 x i32> 613 store <4 x i32> %1, ptr %y, align 4 614 ret ptr %x 615} 616 617define ptr @ldrbu32_m128(ptr %x, ptr %y, ptr %m) { 618; CHECK-LABEL: ldrbu32_m128: 619; CHECK: @ %bb.0: @ %entry 620; CHECK-NEXT: vldrw.u32 q0, [r2] 621; CHECK-NEXT: sub.w r3, r0, #128 622; CHECK-NEXT: vpt.i32 ne, q0, zr 623; CHECK-NEXT: vldrbt.u32 q0, [r3] 624; CHECK-NEXT: vstrw.32 q0, [r1] 625; CHECK-NEXT: bx lr 626entry: 627 %z = getelementptr inbounds i8, ptr %x, i32 -128 628 %mask = load <4 x i32>, ptr %m, align 4 629 %c = icmp ne <4 x i32> %mask, zeroinitializer 630 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef) 631 %1 = zext <4 x i8> %0 to <4 x i32> 632 store <4 x i32> %1, ptr %y, align 4 633 ret ptr %x 634} 635 636define ptr @ldrbs32_4(ptr %x, ptr %y, ptr %m) { 637; CHECK-LABEL: ldrbs32_4: 638; CHECK: @ %bb.0: @ %entry 639; CHECK-NEXT: vldrw.u32 q0, [r2] 640; CHECK-NEXT: vpt.i32 ne, q0, zr 641; CHECK-NEXT: vldrbt.s32 q0, [r0, #4] 642; CHECK-NEXT: vstrw.32 q0, [r1] 643; CHECK-NEXT: bx lr 644entry: 645 %z = getelementptr inbounds i8, ptr %x, i32 4 646 %mask = load <4 x i32>, ptr %m, align 4 647 %c = icmp ne <4 x i32> %mask, zeroinitializer 648 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef) 649 %1 = sext <4 x i8> %0 to <4 x i32> 650 store <4 x i32> %1, ptr %y, align 4 651 ret ptr %x 652} 653 654define ptr @ldrbs32_3(ptr %x, ptr %y, ptr %m) { 655; CHECK-LABEL: ldrbs32_3: 656; CHECK: @ %bb.0: @ %entry 657; CHECK-NEXT: vldrw.u32 q0, [r2] 658; CHECK-NEXT: vpt.i32 ne, q0, zr 659; CHECK-NEXT: vldrbt.s32 q0, [r0, #3] 660; CHECK-NEXT: vstrw.32 q0, [r1] 661; CHECK-NEXT: bx lr 662entry: 663 %z = getelementptr inbounds i8, ptr %x, i32 3 664 %mask = load <4 x i32>, ptr %m, align 4 665 %c = icmp ne <4 x i32> %mask, zeroinitializer 666 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef) 667 %1 = sext <4 x i8> %0 to <4 x i32> 668 store <4 x i32> %1, ptr %y, align 4 669 ret ptr %x 670} 671 672define ptr @ldrbs32_2(ptr %x, ptr %y, ptr %m) { 673; CHECK-LABEL: ldrbs32_2: 674; CHECK: @ %bb.0: @ %entry 675; CHECK-NEXT: vldrw.u32 q0, [r2] 676; CHECK-NEXT: vpt.i32 ne, q0, zr 677; CHECK-NEXT: vldrbt.s32 q0, [r0, #2] 678; CHECK-NEXT: vstrw.32 q0, [r1] 679; CHECK-NEXT: bx lr 680entry: 681 %z = getelementptr inbounds i8, ptr %x, i32 2 682 %mask = load <4 x i32>, ptr %m, align 4 683 %c = icmp ne <4 x i32> %mask, zeroinitializer 684 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef) 685 %1 = sext <4 x i8> %0 to <4 x i32> 686 store <4 x i32> %1, ptr %y, align 4 687 ret ptr %x 688} 689 690define ptr @ldrbs32_127(ptr %x, ptr %y, ptr %m) { 691; CHECK-LABEL: ldrbs32_127: 692; CHECK: @ %bb.0: @ %entry 693; CHECK-NEXT: vldrw.u32 q0, [r2] 694; CHECK-NEXT: vpt.i32 ne, q0, zr 695; CHECK-NEXT: vldrbt.s32 q0, [r0, #127] 696; CHECK-NEXT: vstrw.32 q0, [r1] 697; CHECK-NEXT: bx lr 698entry: 699 %z = getelementptr inbounds i8, ptr %x, i32 127 700 %mask = load <4 x i32>, ptr %m, align 4 701 %c = icmp ne <4 x i32> %mask, zeroinitializer 702 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef) 703 %1 = sext <4 x i8> %0 to <4 x i32> 704 store <4 x i32> %1, ptr %y, align 4 705 ret ptr %x 706} 707 708define ptr @ldrbs32_128(ptr %x, ptr %y, ptr %m) { 709; CHECK-LABEL: ldrbs32_128: 710; CHECK: @ %bb.0: @ %entry 711; CHECK-NEXT: vldrw.u32 q0, [r2] 712; CHECK-NEXT: add.w r3, r0, #128 713; CHECK-NEXT: vpt.i32 ne, q0, zr 714; CHECK-NEXT: vldrbt.s32 q0, [r3] 715; CHECK-NEXT: vstrw.32 q0, [r1] 716; CHECK-NEXT: bx lr 717entry: 718 %z = getelementptr inbounds i8, ptr %x, i32 128 719 %mask = load <4 x i32>, ptr %m, align 4 720 %c = icmp ne <4 x i32> %mask, zeroinitializer 721 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef) 722 %1 = sext <4 x i8> %0 to <4 x i32> 723 store <4 x i32> %1, ptr %y, align 4 724 ret ptr %x 725} 726 727define ptr @ldrbs32_m127(ptr %x, ptr %y, ptr %m) { 728; CHECK-LABEL: ldrbs32_m127: 729; CHECK: @ %bb.0: @ %entry 730; CHECK-NEXT: vldrw.u32 q0, [r2] 731; CHECK-NEXT: vpt.i32 ne, q0, zr 732; CHECK-NEXT: vldrbt.s32 q0, [r0, #-127] 733; CHECK-NEXT: vstrw.32 q0, [r1] 734; CHECK-NEXT: bx lr 735entry: 736 %z = getelementptr inbounds i8, ptr %x, i32 -127 737 %mask = load <4 x i32>, ptr %m, align 4 738 %c = icmp ne <4 x i32> %mask, zeroinitializer 739 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef) 740 %1 = sext <4 x i8> %0 to <4 x i32> 741 store <4 x i32> %1, ptr %y, align 4 742 ret ptr %x 743} 744 745define ptr @ldrbs32_m128(ptr %x, ptr %y, ptr %m) { 746; CHECK-LABEL: ldrbs32_m128: 747; CHECK: @ %bb.0: @ %entry 748; CHECK-NEXT: vldrw.u32 q0, [r2] 749; CHECK-NEXT: sub.w r3, r0, #128 750; CHECK-NEXT: vpt.i32 ne, q0, zr 751; CHECK-NEXT: vldrbt.s32 q0, [r3] 752; CHECK-NEXT: vstrw.32 q0, [r1] 753; CHECK-NEXT: bx lr 754entry: 755 %z = getelementptr inbounds i8, ptr %x, i32 -128 756 %mask = load <4 x i32>, ptr %m, align 4 757 %c = icmp ne <4 x i32> %mask, zeroinitializer 758 %0 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %z, i32 1, <4 x i1> %c, <4 x i8> undef) 759 %1 = sext <4 x i8> %0 to <4 x i32> 760 store <4 x i32> %1, ptr %y, align 4 761 ret ptr %x 762} 763 764define ptr @ldrbu16_4(ptr %x, ptr %y, ptr %m) { 765; CHECK-LABEL: ldrbu16_4: 766; CHECK: @ %bb.0: @ %entry 767; CHECK-NEXT: vldrh.u16 q0, [r2] 768; CHECK-NEXT: vpt.i16 ne, q0, zr 769; CHECK-NEXT: vldrbt.u16 q0, [r0, #4] 770; CHECK-NEXT: vstrh.16 q0, [r1] 771; CHECK-NEXT: bx lr 772entry: 773 %z = getelementptr inbounds i8, ptr %x, i32 4 774 %mask = load <8 x i16>, ptr %m, align 2 775 %c = icmp ne <8 x i16> %mask, zeroinitializer 776 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef) 777 %1 = zext <8 x i8> %0 to <8 x i16> 778 store <8 x i16> %1, ptr %y, align 2 779 ret ptr %x 780} 781 782define ptr @ldrbu16_3(ptr %x, ptr %y, ptr %m) { 783; CHECK-LABEL: ldrbu16_3: 784; CHECK: @ %bb.0: @ %entry 785; CHECK-NEXT: vldrh.u16 q0, [r2] 786; CHECK-NEXT: vpt.i16 ne, q0, zr 787; CHECK-NEXT: vldrbt.u16 q0, [r0, #3] 788; CHECK-NEXT: vstrh.16 q0, [r1] 789; CHECK-NEXT: bx lr 790entry: 791 %z = getelementptr inbounds i8, ptr %x, i32 3 792 %mask = load <8 x i16>, ptr %m, align 2 793 %c = icmp ne <8 x i16> %mask, zeroinitializer 794 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef) 795 %1 = zext <8 x i8> %0 to <8 x i16> 796 store <8 x i16> %1, ptr %y, align 2 797 ret ptr %x 798} 799 800define ptr @ldrbu16_2(ptr %x, ptr %y, ptr %m) { 801; CHECK-LABEL: ldrbu16_2: 802; CHECK: @ %bb.0: @ %entry 803; CHECK-NEXT: vldrh.u16 q0, [r2] 804; CHECK-NEXT: vpt.i16 ne, q0, zr 805; CHECK-NEXT: vldrbt.u16 q0, [r0, #2] 806; CHECK-NEXT: vstrh.16 q0, [r1] 807; CHECK-NEXT: bx lr 808entry: 809 %z = getelementptr inbounds i8, ptr %x, i32 2 810 %mask = load <8 x i16>, ptr %m, align 2 811 %c = icmp ne <8 x i16> %mask, zeroinitializer 812 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef) 813 %1 = zext <8 x i8> %0 to <8 x i16> 814 store <8 x i16> %1, ptr %y, align 2 815 ret ptr %x 816} 817 818define ptr @ldrbu16_127(ptr %x, ptr %y, ptr %m) { 819; CHECK-LABEL: ldrbu16_127: 820; CHECK: @ %bb.0: @ %entry 821; CHECK-NEXT: vldrh.u16 q0, [r2] 822; CHECK-NEXT: vpt.i16 ne, q0, zr 823; CHECK-NEXT: vldrbt.u16 q0, [r0, #127] 824; CHECK-NEXT: vstrh.16 q0, [r1] 825; CHECK-NEXT: bx lr 826entry: 827 %z = getelementptr inbounds i8, ptr %x, i32 127 828 %mask = load <8 x i16>, ptr %m, align 2 829 %c = icmp ne <8 x i16> %mask, zeroinitializer 830 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef) 831 %1 = zext <8 x i8> %0 to <8 x i16> 832 store <8 x i16> %1, ptr %y, align 2 833 ret ptr %x 834} 835 836define ptr @ldrbu16_128(ptr %x, ptr %y, ptr %m) { 837; CHECK-LABEL: ldrbu16_128: 838; CHECK: @ %bb.0: @ %entry 839; CHECK-NEXT: vldrh.u16 q0, [r2] 840; CHECK-NEXT: add.w r3, r0, #128 841; CHECK-NEXT: vpt.i16 ne, q0, zr 842; CHECK-NEXT: vldrbt.u16 q0, [r3] 843; CHECK-NEXT: vstrh.16 q0, [r1] 844; CHECK-NEXT: bx lr 845entry: 846 %z = getelementptr inbounds i8, ptr %x, i32 128 847 %mask = load <8 x i16>, ptr %m, align 2 848 %c = icmp ne <8 x i16> %mask, zeroinitializer 849 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef) 850 %1 = zext <8 x i8> %0 to <8 x i16> 851 store <8 x i16> %1, ptr %y, align 2 852 ret ptr %x 853} 854 855define ptr @ldrbu16_m127(ptr %x, ptr %y, ptr %m) { 856; CHECK-LABEL: ldrbu16_m127: 857; CHECK: @ %bb.0: @ %entry 858; CHECK-NEXT: vldrh.u16 q0, [r2] 859; CHECK-NEXT: vpt.i16 ne, q0, zr 860; CHECK-NEXT: vldrbt.u16 q0, [r0, #-127] 861; CHECK-NEXT: vstrh.16 q0, [r1] 862; CHECK-NEXT: bx lr 863entry: 864 %z = getelementptr inbounds i8, ptr %x, i32 -127 865 %mask = load <8 x i16>, ptr %m, align 2 866 %c = icmp ne <8 x i16> %mask, zeroinitializer 867 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef) 868 %1 = zext <8 x i8> %0 to <8 x i16> 869 store <8 x i16> %1, ptr %y, align 2 870 ret ptr %x 871} 872 873define ptr @ldrbu16_m128(ptr %x, ptr %y, ptr %m) { 874; CHECK-LABEL: ldrbu16_m128: 875; CHECK: @ %bb.0: @ %entry 876; CHECK-NEXT: vldrh.u16 q0, [r2] 877; CHECK-NEXT: sub.w r3, r0, #128 878; CHECK-NEXT: vpt.i16 ne, q0, zr 879; CHECK-NEXT: vldrbt.u16 q0, [r3] 880; CHECK-NEXT: vstrh.16 q0, [r1] 881; CHECK-NEXT: bx lr 882entry: 883 %z = getelementptr inbounds i8, ptr %x, i32 -128 884 %mask = load <8 x i16>, ptr %m, align 2 885 %c = icmp ne <8 x i16> %mask, zeroinitializer 886 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef) 887 %1 = zext <8 x i8> %0 to <8 x i16> 888 store <8 x i16> %1, ptr %y, align 2 889 ret ptr %x 890} 891 892define ptr @ldrbs16_4(ptr %x, ptr %y, ptr %m) { 893; CHECK-LABEL: ldrbs16_4: 894; CHECK: @ %bb.0: @ %entry 895; CHECK-NEXT: vldrh.u16 q0, [r2] 896; CHECK-NEXT: vpt.i16 ne, q0, zr 897; CHECK-NEXT: vldrbt.s16 q0, [r0, #4] 898; CHECK-NEXT: vstrh.16 q0, [r1] 899; CHECK-NEXT: bx lr 900entry: 901 %z = getelementptr inbounds i8, ptr %x, i32 4 902 %mask = load <8 x i16>, ptr %m, align 2 903 %c = icmp ne <8 x i16> %mask, zeroinitializer 904 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef) 905 %1 = sext <8 x i8> %0 to <8 x i16> 906 store <8 x i16> %1, ptr %y, align 2 907 ret ptr %x 908} 909 910define ptr @ldrbs16_3(ptr %x, ptr %y, ptr %m) { 911; CHECK-LABEL: ldrbs16_3: 912; CHECK: @ %bb.0: @ %entry 913; CHECK-NEXT: vldrh.u16 q0, [r2] 914; CHECK-NEXT: vpt.i16 ne, q0, zr 915; CHECK-NEXT: vldrbt.s16 q0, [r0, #3] 916; CHECK-NEXT: vstrh.16 q0, [r1] 917; CHECK-NEXT: bx lr 918entry: 919 %z = getelementptr inbounds i8, ptr %x, i32 3 920 %mask = load <8 x i16>, ptr %m, align 2 921 %c = icmp ne <8 x i16> %mask, zeroinitializer 922 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef) 923 %1 = sext <8 x i8> %0 to <8 x i16> 924 store <8 x i16> %1, ptr %y, align 2 925 ret ptr %x 926} 927 928define ptr @ldrbs16_2(ptr %x, ptr %y, ptr %m) { 929; CHECK-LABEL: ldrbs16_2: 930; CHECK: @ %bb.0: @ %entry 931; CHECK-NEXT: vldrh.u16 q0, [r2] 932; CHECK-NEXT: vpt.i16 ne, q0, zr 933; CHECK-NEXT: vldrbt.s16 q0, [r0, #2] 934; CHECK-NEXT: vstrh.16 q0, [r1] 935; CHECK-NEXT: bx lr 936entry: 937 %z = getelementptr inbounds i8, ptr %x, i32 2 938 %mask = load <8 x i16>, ptr %m, align 2 939 %c = icmp ne <8 x i16> %mask, zeroinitializer 940 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef) 941 %1 = sext <8 x i8> %0 to <8 x i16> 942 store <8 x i16> %1, ptr %y, align 2 943 ret ptr %x 944} 945 946define ptr @ldrbs16_127(ptr %x, ptr %y, ptr %m) { 947; CHECK-LABEL: ldrbs16_127: 948; CHECK: @ %bb.0: @ %entry 949; CHECK-NEXT: vldrh.u16 q0, [r2] 950; CHECK-NEXT: vpt.i16 ne, q0, zr 951; CHECK-NEXT: vldrbt.s16 q0, [r0, #127] 952; CHECK-NEXT: vstrh.16 q0, [r1] 953; CHECK-NEXT: bx lr 954entry: 955 %z = getelementptr inbounds i8, ptr %x, i32 127 956 %mask = load <8 x i16>, ptr %m, align 2 957 %c = icmp ne <8 x i16> %mask, zeroinitializer 958 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef) 959 %1 = sext <8 x i8> %0 to <8 x i16> 960 store <8 x i16> %1, ptr %y, align 2 961 ret ptr %x 962} 963 964define ptr @ldrbs16_128(ptr %x, ptr %y, ptr %m) { 965; CHECK-LABEL: ldrbs16_128: 966; CHECK: @ %bb.0: @ %entry 967; CHECK-NEXT: vldrh.u16 q0, [r2] 968; CHECK-NEXT: add.w r3, r0, #128 969; CHECK-NEXT: vpt.i16 ne, q0, zr 970; CHECK-NEXT: vldrbt.s16 q0, [r3] 971; CHECK-NEXT: vstrh.16 q0, [r1] 972; CHECK-NEXT: bx lr 973entry: 974 %z = getelementptr inbounds i8, ptr %x, i32 128 975 %mask = load <8 x i16>, ptr %m, align 2 976 %c = icmp ne <8 x i16> %mask, zeroinitializer 977 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef) 978 %1 = sext <8 x i8> %0 to <8 x i16> 979 store <8 x i16> %1, ptr %y, align 2 980 ret ptr %x 981} 982 983define ptr @ldrbs16_m127(ptr %x, ptr %y, ptr %m) { 984; CHECK-LABEL: ldrbs16_m127: 985; CHECK: @ %bb.0: @ %entry 986; CHECK-NEXT: vldrh.u16 q0, [r2] 987; CHECK-NEXT: vpt.i16 ne, q0, zr 988; CHECK-NEXT: vldrbt.s16 q0, [r0, #-127] 989; CHECK-NEXT: vstrh.16 q0, [r1] 990; CHECK-NEXT: bx lr 991entry: 992 %z = getelementptr inbounds i8, ptr %x, i32 -127 993 %mask = load <8 x i16>, ptr %m, align 2 994 %c = icmp ne <8 x i16> %mask, zeroinitializer 995 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef) 996 %1 = sext <8 x i8> %0 to <8 x i16> 997 store <8 x i16> %1, ptr %y, align 2 998 ret ptr %x 999} 1000 1001define ptr @ldrbs16_m128(ptr %x, ptr %y, ptr %m) { 1002; CHECK-LABEL: ldrbs16_m128: 1003; CHECK: @ %bb.0: @ %entry 1004; CHECK-NEXT: vldrh.u16 q0, [r2] 1005; CHECK-NEXT: sub.w r3, r0, #128 1006; CHECK-NEXT: vpt.i16 ne, q0, zr 1007; CHECK-NEXT: vldrbt.s16 q0, [r3] 1008; CHECK-NEXT: vstrh.16 q0, [r1] 1009; CHECK-NEXT: bx lr 1010entry: 1011 %z = getelementptr inbounds i8, ptr %x, i32 -128 1012 %mask = load <8 x i16>, ptr %m, align 2 1013 %c = icmp ne <8 x i16> %mask, zeroinitializer 1014 %0 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %z, i32 1, <8 x i1> %c, <8 x i8> undef) 1015 %1 = sext <8 x i8> %0 to <8 x i16> 1016 store <8 x i16> %1, ptr %y, align 2 1017 ret ptr %x 1018} 1019 1020define ptr @ldrbu8_4(ptr %x, ptr %y, ptr %m) { 1021; CHECK-LABEL: ldrbu8_4: 1022; CHECK: @ %bb.0: @ %entry 1023; CHECK-NEXT: vldrb.u8 q0, [r2] 1024; CHECK-NEXT: vpt.i8 ne, q0, zr 1025; CHECK-NEXT: vldrbt.u8 q0, [r0, #4] 1026; CHECK-NEXT: vstrb.8 q0, [r1] 1027; CHECK-NEXT: bx lr 1028entry: 1029 %z = getelementptr inbounds i8, ptr %x, i32 4 1030 %mask = load <16 x i8>, ptr %m, align 1 1031 %c = icmp ne <16 x i8> %mask, zeroinitializer 1032 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 1, <16 x i1> %c, <16 x i8> undef) 1033 store <16 x i8> %0, ptr %y, align 1 1034 ret ptr %x 1035} 1036 1037define ptr @ldrbu8_3(ptr %x, ptr %y, ptr %m) { 1038; CHECK-LABEL: ldrbu8_3: 1039; CHECK: @ %bb.0: @ %entry 1040; CHECK-NEXT: vldrb.u8 q0, [r2] 1041; CHECK-NEXT: vpt.i8 ne, q0, zr 1042; CHECK-NEXT: vldrbt.u8 q0, [r0, #3] 1043; CHECK-NEXT: vstrb.8 q0, [r1] 1044; CHECK-NEXT: bx lr 1045entry: 1046 %z = getelementptr inbounds i8, ptr %x, i32 3 1047 %mask = load <16 x i8>, ptr %m, align 1 1048 %c = icmp ne <16 x i8> %mask, zeroinitializer 1049 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 1, <16 x i1> %c, <16 x i8> undef) 1050 store <16 x i8> %0, ptr %y, align 1 1051 ret ptr %x 1052} 1053 1054define ptr @ldrbu8_2(ptr %x, ptr %y, ptr %m) { 1055; CHECK-LABEL: ldrbu8_2: 1056; CHECK: @ %bb.0: @ %entry 1057; CHECK-NEXT: vldrb.u8 q0, [r2] 1058; CHECK-NEXT: vpt.i8 ne, q0, zr 1059; CHECK-NEXT: vldrbt.u8 q0, [r0, #2] 1060; CHECK-NEXT: vstrb.8 q0, [r1] 1061; CHECK-NEXT: bx lr 1062entry: 1063 %z = getelementptr inbounds i8, ptr %x, i32 2 1064 %mask = load <16 x i8>, ptr %m, align 1 1065 %c = icmp ne <16 x i8> %mask, zeroinitializer 1066 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 1, <16 x i1> %c, <16 x i8> undef) 1067 store <16 x i8> %0, ptr %y, align 1 1068 ret ptr %x 1069} 1070 1071define ptr @ldrbu8_127(ptr %x, ptr %y, ptr %m) { 1072; CHECK-LABEL: ldrbu8_127: 1073; CHECK: @ %bb.0: @ %entry 1074; CHECK-NEXT: vldrb.u8 q0, [r2] 1075; CHECK-NEXT: vpt.i8 ne, q0, zr 1076; CHECK-NEXT: vldrbt.u8 q0, [r0, #127] 1077; CHECK-NEXT: vstrb.8 q0, [r1] 1078; CHECK-NEXT: bx lr 1079entry: 1080 %z = getelementptr inbounds i8, ptr %x, i32 127 1081 %mask = load <16 x i8>, ptr %m, align 1 1082 %c = icmp ne <16 x i8> %mask, zeroinitializer 1083 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 1, <16 x i1> %c, <16 x i8> undef) 1084 store <16 x i8> %0, ptr %y, align 1 1085 ret ptr %x 1086} 1087 1088define ptr @ldrbu8_128(ptr %x, ptr %y, ptr %m) { 1089; CHECK-LABEL: ldrbu8_128: 1090; CHECK: @ %bb.0: @ %entry 1091; CHECK-NEXT: vldrb.u8 q0, [r2] 1092; CHECK-NEXT: add.w r3, r0, #128 1093; CHECK-NEXT: vpt.i8 ne, q0, zr 1094; CHECK-NEXT: vldrbt.u8 q0, [r3] 1095; CHECK-NEXT: vstrb.8 q0, [r1] 1096; CHECK-NEXT: bx lr 1097entry: 1098 %z = getelementptr inbounds i8, ptr %x, i32 128 1099 %mask = load <16 x i8>, ptr %m, align 1 1100 %c = icmp ne <16 x i8> %mask, zeroinitializer 1101 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 1, <16 x i1> %c, <16 x i8> undef) 1102 store <16 x i8> %0, ptr %y, align 1 1103 ret ptr %x 1104} 1105 1106define ptr @ldrbu8_m127(ptr %x, ptr %y, ptr %m) { 1107; CHECK-LABEL: ldrbu8_m127: 1108; CHECK: @ %bb.0: @ %entry 1109; CHECK-NEXT: vldrb.u8 q0, [r2] 1110; CHECK-NEXT: vpt.i8 ne, q0, zr 1111; CHECK-NEXT: vldrbt.u8 q0, [r0, #-127] 1112; CHECK-NEXT: vstrb.8 q0, [r1] 1113; CHECK-NEXT: bx lr 1114entry: 1115 %z = getelementptr inbounds i8, ptr %x, i32 -127 1116 %mask = load <16 x i8>, ptr %m, align 1 1117 %c = icmp ne <16 x i8> %mask, zeroinitializer 1118 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 1, <16 x i1> %c, <16 x i8> undef) 1119 store <16 x i8> %0, ptr %y, align 1 1120 ret ptr %x 1121} 1122 1123define ptr @ldrbu8_m128(ptr %x, ptr %y, ptr %m) { 1124; CHECK-LABEL: ldrbu8_m128: 1125; CHECK: @ %bb.0: @ %entry 1126; CHECK-NEXT: vldrb.u8 q0, [r2] 1127; CHECK-NEXT: sub.w r3, r0, #128 1128; CHECK-NEXT: vpt.i8 ne, q0, zr 1129; CHECK-NEXT: vldrbt.u8 q0, [r3] 1130; CHECK-NEXT: vstrb.8 q0, [r1] 1131; CHECK-NEXT: bx lr 1132entry: 1133 %z = getelementptr inbounds i8, ptr %x, i32 -128 1134 %mask = load <16 x i8>, ptr %m, align 1 1135 %c = icmp ne <16 x i8> %mask, zeroinitializer 1136 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 1, <16 x i1> %c, <16 x i8> undef) 1137 store <16 x i8> %0, ptr %y, align 1 1138 ret ptr %x 1139} 1140 1141define ptr @ldrwf32_4(ptr %x, ptr %y, ptr %m) { 1142; CHECK-LABEL: ldrwf32_4: 1143; CHECK: @ %bb.0: @ %entry 1144; CHECK-NEXT: vldrw.u32 q0, [r2] 1145; CHECK-NEXT: vpt.i32 ne, q0, zr 1146; CHECK-NEXT: vldrwt.u32 q0, [r0, #4] 1147; CHECK-NEXT: vstrw.32 q0, [r1] 1148; CHECK-NEXT: bx lr 1149entry: 1150 %z = getelementptr inbounds i8, ptr %x, i32 4 1151 %mask = load <4 x i32>, ptr %m, align 4 1152 %c = icmp ne <4 x i32> %mask, zeroinitializer 1153 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef) 1154 store <4 x float> %0, ptr %y, align 4 1155 ret ptr %x 1156} 1157 1158define ptr @ldrwf32_3(ptr %x, ptr %y, ptr %m) { 1159; CHECK-LABEL: ldrwf32_3: 1160; CHECK: @ %bb.0: @ %entry 1161; CHECK-NEXT: vldrw.u32 q0, [r2] 1162; CHECK-NEXT: adds r3, r0, #3 1163; CHECK-NEXT: vpt.i32 ne, q0, zr 1164; CHECK-NEXT: vldrwt.u32 q0, [r3] 1165; CHECK-NEXT: vstrw.32 q0, [r1] 1166; CHECK-NEXT: bx lr 1167entry: 1168 %z = getelementptr inbounds i8, ptr %x, i32 3 1169 %mask = load <4 x i32>, ptr %m, align 4 1170 %c = icmp ne <4 x i32> %mask, zeroinitializer 1171 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef) 1172 store <4 x float> %0, ptr %y, align 4 1173 ret ptr %x 1174} 1175 1176define ptr @ldrwf32_2(ptr %x, ptr %y, ptr %m) { 1177; CHECK-LABEL: ldrwf32_2: 1178; CHECK: @ %bb.0: @ %entry 1179; CHECK-NEXT: vldrw.u32 q0, [r2] 1180; CHECK-NEXT: adds r3, r0, #2 1181; CHECK-NEXT: vpt.i32 ne, q0, zr 1182; CHECK-NEXT: vldrwt.u32 q0, [r3] 1183; CHECK-NEXT: vstrw.32 q0, [r1] 1184; CHECK-NEXT: bx lr 1185entry: 1186 %z = getelementptr inbounds i8, ptr %x, i32 2 1187 %mask = load <4 x i32>, ptr %m, align 4 1188 %c = icmp ne <4 x i32> %mask, zeroinitializer 1189 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef) 1190 store <4 x float> %0, ptr %y, align 4 1191 ret ptr %x 1192} 1193 1194define ptr @ldrwf32_508(ptr %x, ptr %y, ptr %m) { 1195; CHECK-LABEL: ldrwf32_508: 1196; CHECK: @ %bb.0: @ %entry 1197; CHECK-NEXT: vldrw.u32 q0, [r2] 1198; CHECK-NEXT: vpt.i32 ne, q0, zr 1199; CHECK-NEXT: vldrwt.u32 q0, [r0, #508] 1200; CHECK-NEXT: vstrw.32 q0, [r1] 1201; CHECK-NEXT: bx lr 1202entry: 1203 %z = getelementptr inbounds i8, ptr %x, i32 508 1204 %mask = load <4 x i32>, ptr %m, align 4 1205 %c = icmp ne <4 x i32> %mask, zeroinitializer 1206 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef) 1207 store <4 x float> %0, ptr %y, align 4 1208 ret ptr %x 1209} 1210 1211define ptr @ldrwf32_512(ptr %x, ptr %y, ptr %m) { 1212; CHECK-LABEL: ldrwf32_512: 1213; CHECK: @ %bb.0: @ %entry 1214; CHECK-NEXT: vldrw.u32 q0, [r2] 1215; CHECK-NEXT: add.w r3, r0, #512 1216; CHECK-NEXT: vpt.i32 ne, q0, zr 1217; CHECK-NEXT: vldrwt.u32 q0, [r3] 1218; CHECK-NEXT: vstrw.32 q0, [r1] 1219; CHECK-NEXT: bx lr 1220entry: 1221 %z = getelementptr inbounds i8, ptr %x, i32 512 1222 %mask = load <4 x i32>, ptr %m, align 4 1223 %c = icmp ne <4 x i32> %mask, zeroinitializer 1224 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef) 1225 store <4 x float> %0, ptr %y, align 4 1226 ret ptr %x 1227} 1228 1229define ptr @ldrwf32_m508(ptr %x, ptr %y, ptr %m) { 1230; CHECK-LABEL: ldrwf32_m508: 1231; CHECK: @ %bb.0: @ %entry 1232; CHECK-NEXT: vldrw.u32 q0, [r2] 1233; CHECK-NEXT: vpt.i32 ne, q0, zr 1234; CHECK-NEXT: vldrwt.u32 q0, [r0, #-508] 1235; CHECK-NEXT: vstrw.32 q0, [r1] 1236; CHECK-NEXT: bx lr 1237entry: 1238 %z = getelementptr inbounds i8, ptr %x, i32 -508 1239 %mask = load <4 x i32>, ptr %m, align 4 1240 %c = icmp ne <4 x i32> %mask, zeroinitializer 1241 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef) 1242 store <4 x float> %0, ptr %y, align 4 1243 ret ptr %x 1244} 1245 1246define ptr @ldrwf32_m512(ptr %x, ptr %y, ptr %m) { 1247; CHECK-LABEL: ldrwf32_m512: 1248; CHECK: @ %bb.0: @ %entry 1249; CHECK-NEXT: vldrw.u32 q0, [r2] 1250; CHECK-NEXT: sub.w r3, r0, #512 1251; CHECK-NEXT: vpt.i32 ne, q0, zr 1252; CHECK-NEXT: vldrwt.u32 q0, [r3] 1253; CHECK-NEXT: vstrw.32 q0, [r1] 1254; CHECK-NEXT: bx lr 1255entry: 1256 %z = getelementptr inbounds i8, ptr %x, i32 -512 1257 %mask = load <4 x i32>, ptr %m, align 4 1258 %c = icmp ne <4 x i32> %mask, zeroinitializer 1259 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef) 1260 store <4 x float> %0, ptr %y, align 4 1261 ret ptr %x 1262} 1263 1264define ptr @ldrhf16_4(ptr %x, ptr %y, ptr %m) { 1265; CHECK-LABEL: ldrhf16_4: 1266; CHECK: @ %bb.0: @ %entry 1267; CHECK-NEXT: vldrh.u16 q0, [r2] 1268; CHECK-NEXT: vpt.i16 ne, q0, zr 1269; CHECK-NEXT: vldrht.u16 q0, [r0, #4] 1270; CHECK-NEXT: vstrh.16 q0, [r1] 1271; CHECK-NEXT: bx lr 1272entry: 1273 %z = getelementptr inbounds i8, ptr %x, i32 4 1274 %mask = load <8 x i16>, ptr %m, align 2 1275 %c = icmp ne <8 x i16> %mask, zeroinitializer 1276 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x half> undef) 1277 store <8 x half> %0, ptr %y, align 2 1278 ret ptr %x 1279} 1280 1281define ptr @ldrhf16_3(ptr %x, ptr %y, ptr %m) { 1282; CHECK-LABEL: ldrhf16_3: 1283; CHECK: @ %bb.0: @ %entry 1284; CHECK-NEXT: vldrh.u16 q0, [r2] 1285; CHECK-NEXT: adds r3, r0, #3 1286; CHECK-NEXT: vpt.i16 ne, q0, zr 1287; CHECK-NEXT: vldrht.u16 q0, [r3] 1288; CHECK-NEXT: vstrh.16 q0, [r1] 1289; CHECK-NEXT: bx lr 1290entry: 1291 %z = getelementptr inbounds i8, ptr %x, i32 3 1292 %mask = load <8 x i16>, ptr %m, align 2 1293 %c = icmp ne <8 x i16> %mask, zeroinitializer 1294 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x half> undef) 1295 store <8 x half> %0, ptr %y, align 2 1296 ret ptr %x 1297} 1298 1299define ptr @ldrhf16_2(ptr %x, ptr %y, ptr %m) { 1300; CHECK-LABEL: ldrhf16_2: 1301; CHECK: @ %bb.0: @ %entry 1302; CHECK-NEXT: vldrh.u16 q0, [r2] 1303; CHECK-NEXT: vpt.i16 ne, q0, zr 1304; CHECK-NEXT: vldrht.u16 q0, [r0, #2] 1305; CHECK-NEXT: vstrh.16 q0, [r1] 1306; CHECK-NEXT: bx lr 1307entry: 1308 %z = getelementptr inbounds i8, ptr %x, i32 2 1309 %mask = load <8 x i16>, ptr %m, align 2 1310 %c = icmp ne <8 x i16> %mask, zeroinitializer 1311 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x half> undef) 1312 store <8 x half> %0, ptr %y, align 2 1313 ret ptr %x 1314} 1315 1316define ptr @ldrhf16_254(ptr %x, ptr %y, ptr %m) { 1317; CHECK-LABEL: ldrhf16_254: 1318; CHECK: @ %bb.0: @ %entry 1319; CHECK-NEXT: vldrh.u16 q0, [r2] 1320; CHECK-NEXT: vpt.i16 ne, q0, zr 1321; CHECK-NEXT: vldrht.u16 q0, [r0, #254] 1322; CHECK-NEXT: vstrh.16 q0, [r1] 1323; CHECK-NEXT: bx lr 1324entry: 1325 %z = getelementptr inbounds i8, ptr %x, i32 254 1326 %mask = load <8 x i16>, ptr %m, align 2 1327 %c = icmp ne <8 x i16> %mask, zeroinitializer 1328 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x half> undef) 1329 store <8 x half> %0, ptr %y, align 2 1330 ret ptr %x 1331} 1332 1333define ptr @ldrhf16_256(ptr %x, ptr %y, ptr %m) { 1334; CHECK-LABEL: ldrhf16_256: 1335; CHECK: @ %bb.0: @ %entry 1336; CHECK-NEXT: vldrh.u16 q0, [r2] 1337; CHECK-NEXT: add.w r3, r0, #256 1338; CHECK-NEXT: vpt.i16 ne, q0, zr 1339; CHECK-NEXT: vldrht.u16 q0, [r3] 1340; CHECK-NEXT: vstrh.16 q0, [r1] 1341; CHECK-NEXT: bx lr 1342entry: 1343 %z = getelementptr inbounds i8, ptr %x, i32 256 1344 %mask = load <8 x i16>, ptr %m, align 2 1345 %c = icmp ne <8 x i16> %mask, zeroinitializer 1346 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x half> undef) 1347 store <8 x half> %0, ptr %y, align 2 1348 ret ptr %x 1349} 1350 1351define ptr @ldrhf16_m254(ptr %x, ptr %y, ptr %m) { 1352; CHECK-LABEL: ldrhf16_m254: 1353; CHECK: @ %bb.0: @ %entry 1354; CHECK-NEXT: vldrh.u16 q0, [r2] 1355; CHECK-NEXT: vpt.i16 ne, q0, zr 1356; CHECK-NEXT: vldrht.u16 q0, [r0, #-254] 1357; CHECK-NEXT: vstrh.16 q0, [r1] 1358; CHECK-NEXT: bx lr 1359entry: 1360 %z = getelementptr inbounds i8, ptr %x, i32 -254 1361 %mask = load <8 x i16>, ptr %m, align 2 1362 %c = icmp ne <8 x i16> %mask, zeroinitializer 1363 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x half> undef) 1364 store <8 x half> %0, ptr %y, align 2 1365 ret ptr %x 1366} 1367 1368define ptr @ldrhf16_m256(ptr %x, ptr %y, ptr %m) { 1369; CHECK-LABEL: ldrhf16_m256: 1370; CHECK: @ %bb.0: @ %entry 1371; CHECK-NEXT: vldrh.u16 q0, [r2] 1372; CHECK-NEXT: sub.w r3, r0, #256 1373; CHECK-NEXT: vpt.i16 ne, q0, zr 1374; CHECK-NEXT: vldrht.u16 q0, [r3] 1375; CHECK-NEXT: vstrh.16 q0, [r1] 1376; CHECK-NEXT: bx lr 1377entry: 1378 %z = getelementptr inbounds i8, ptr %x, i32 -256 1379 %mask = load <8 x i16>, ptr %m, align 2 1380 %c = icmp ne <8 x i16> %mask, zeroinitializer 1381 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 2, <8 x i1> %c, <8 x half> undef) 1382 store <8 x half> %0, ptr %y, align 2 1383 ret ptr %x 1384} 1385 1386 1387 1388 1389define ptr @strw32_4(ptr %y, ptr %x, ptr %m) { 1390; CHECK-LABEL: strw32_4: 1391; CHECK: @ %bb.0: @ %entry 1392; CHECK-NEXT: vldrw.u32 q1, [r2] 1393; CHECK-NEXT: vldrw.u32 q0, [r1] 1394; CHECK-NEXT: vpt.i32 ne, q1, zr 1395; CHECK-NEXT: vstrwt.32 q0, [r0, #4] 1396; CHECK-NEXT: bx lr 1397entry: 1398 %z = getelementptr inbounds i8, ptr %y, i32 4 1399 %mask = load <4 x i32>, ptr %m, align 4 1400 %c = icmp ne <4 x i32> %mask, zeroinitializer 1401 %0 = load <4 x i32>, ptr %x, align 4 1402 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %z, i32 4, <4 x i1> %c) 1403 ret ptr %y 1404} 1405 1406define ptr @strw32_3(ptr %y, ptr %x, ptr %m) { 1407; CHECK-LABEL: strw32_3: 1408; CHECK: @ %bb.0: @ %entry 1409; CHECK-NEXT: vldrw.u32 q1, [r2] 1410; CHECK-NEXT: vldrw.u32 q0, [r1] 1411; CHECK-NEXT: adds r1, r0, #3 1412; CHECK-NEXT: vpt.i32 ne, q1, zr 1413; CHECK-NEXT: vstrwt.32 q0, [r1] 1414; CHECK-NEXT: bx lr 1415entry: 1416 %z = getelementptr inbounds i8, ptr %y, i32 3 1417 %mask = load <4 x i32>, ptr %m, align 4 1418 %c = icmp ne <4 x i32> %mask, zeroinitializer 1419 %0 = load <4 x i32>, ptr %x, align 4 1420 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %z, i32 4, <4 x i1> %c) 1421 ret ptr %y 1422} 1423 1424define ptr @strw32_2(ptr %y, ptr %x, ptr %m) { 1425; CHECK-LABEL: strw32_2: 1426; CHECK: @ %bb.0: @ %entry 1427; CHECK-NEXT: vldrw.u32 q1, [r2] 1428; CHECK-NEXT: vldrw.u32 q0, [r1] 1429; CHECK-NEXT: adds r1, r0, #2 1430; CHECK-NEXT: vpt.i32 ne, q1, zr 1431; CHECK-NEXT: vstrwt.32 q0, [r1] 1432; CHECK-NEXT: bx lr 1433entry: 1434 %z = getelementptr inbounds i8, ptr %y, i32 2 1435 %mask = load <4 x i32>, ptr %m, align 4 1436 %c = icmp ne <4 x i32> %mask, zeroinitializer 1437 %0 = load <4 x i32>, ptr %x, align 4 1438 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %z, i32 4, <4 x i1> %c) 1439 ret ptr %y 1440} 1441 1442define ptr @strw32_508(ptr %y, ptr %x, ptr %m) { 1443; CHECK-LABEL: strw32_508: 1444; CHECK: @ %bb.0: @ %entry 1445; CHECK-NEXT: vldrw.u32 q1, [r2] 1446; CHECK-NEXT: vldrw.u32 q0, [r1] 1447; CHECK-NEXT: vpt.i32 ne, q1, zr 1448; CHECK-NEXT: vstrwt.32 q0, [r0, #508] 1449; CHECK-NEXT: bx lr 1450entry: 1451 %z = getelementptr inbounds i8, ptr %y, i32 508 1452 %mask = load <4 x i32>, ptr %m, align 4 1453 %c = icmp ne <4 x i32> %mask, zeroinitializer 1454 %0 = load <4 x i32>, ptr %x, align 4 1455 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %z, i32 4, <4 x i1> %c) 1456 ret ptr %y 1457} 1458 1459define ptr @strw32_512(ptr %y, ptr %x, ptr %m) { 1460; CHECK-LABEL: strw32_512: 1461; CHECK: @ %bb.0: @ %entry 1462; CHECK-NEXT: vldrw.u32 q1, [r2] 1463; CHECK-NEXT: vldrw.u32 q0, [r1] 1464; CHECK-NEXT: add.w r1, r0, #512 1465; CHECK-NEXT: vpt.i32 ne, q1, zr 1466; CHECK-NEXT: vstrwt.32 q0, [r1] 1467; CHECK-NEXT: bx lr 1468entry: 1469 %z = getelementptr inbounds i8, ptr %y, i32 512 1470 %mask = load <4 x i32>, ptr %m, align 4 1471 %c = icmp ne <4 x i32> %mask, zeroinitializer 1472 %0 = load <4 x i32>, ptr %x, align 4 1473 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %z, i32 4, <4 x i1> %c) 1474 ret ptr %y 1475} 1476 1477define ptr @strw32_m508(ptr %y, ptr %x, ptr %m) { 1478; CHECK-LABEL: strw32_m508: 1479; CHECK: @ %bb.0: @ %entry 1480; CHECK-NEXT: vldrw.u32 q1, [r2] 1481; CHECK-NEXT: vldrw.u32 q0, [r1] 1482; CHECK-NEXT: vpt.i32 ne, q1, zr 1483; CHECK-NEXT: vstrwt.32 q0, [r0, #-508] 1484; CHECK-NEXT: bx lr 1485entry: 1486 %z = getelementptr inbounds i8, ptr %y, i32 -508 1487 %mask = load <4 x i32>, ptr %m, align 4 1488 %c = icmp ne <4 x i32> %mask, zeroinitializer 1489 %0 = load <4 x i32>, ptr %x, align 4 1490 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %z, i32 4, <4 x i1> %c) 1491 ret ptr %y 1492} 1493 1494define ptr @strw32_m512(ptr %y, ptr %x, ptr %m) { 1495; CHECK-LABEL: strw32_m512: 1496; CHECK: @ %bb.0: @ %entry 1497; CHECK-NEXT: vldrw.u32 q1, [r2] 1498; CHECK-NEXT: vldrw.u32 q0, [r1] 1499; CHECK-NEXT: sub.w r1, r0, #512 1500; CHECK-NEXT: vpt.i32 ne, q1, zr 1501; CHECK-NEXT: vstrwt.32 q0, [r1] 1502; CHECK-NEXT: bx lr 1503entry: 1504 %z = getelementptr inbounds i8, ptr %y, i32 -512 1505 %mask = load <4 x i32>, ptr %m, align 4 1506 %c = icmp ne <4 x i32> %mask, zeroinitializer 1507 %0 = load <4 x i32>, ptr %x, align 4 1508 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %z, i32 4, <4 x i1> %c) 1509 ret ptr %y 1510} 1511 1512define ptr @strh32_4(ptr %y, ptr %x, ptr %m) { 1513; CHECK-LABEL: strh32_4: 1514; CHECK: @ %bb.0: @ %entry 1515; CHECK-NEXT: vldrw.u32 q1, [r2] 1516; CHECK-NEXT: vldrh.u32 q0, [r1] 1517; CHECK-NEXT: vpt.i32 ne, q1, zr 1518; CHECK-NEXT: vstrht.32 q0, [r0, #4] 1519; CHECK-NEXT: bx lr 1520entry: 1521 %z = getelementptr inbounds i8, ptr %y, i32 4 1522 %mask = load <4 x i32>, ptr %m, align 4 1523 %c = icmp ne <4 x i32> %mask, zeroinitializer 1524 %0 = load <4 x i16>, ptr %x, align 2 1525 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %z, i32 2, <4 x i1> %c) 1526 ret ptr %y 1527} 1528 1529define ptr @strh32_3(ptr %y, ptr %x, ptr %m) { 1530; CHECK-LABEL: strh32_3: 1531; CHECK: @ %bb.0: @ %entry 1532; CHECK-NEXT: vldrw.u32 q1, [r2] 1533; CHECK-NEXT: vldrh.u32 q0, [r1] 1534; CHECK-NEXT: adds r1, r0, #3 1535; CHECK-NEXT: vpt.i32 ne, q1, zr 1536; CHECK-NEXT: vstrht.32 q0, [r1] 1537; CHECK-NEXT: bx lr 1538entry: 1539 %z = getelementptr inbounds i8, ptr %y, i32 3 1540 %mask = load <4 x i32>, ptr %m, align 4 1541 %c = icmp ne <4 x i32> %mask, zeroinitializer 1542 %0 = load <4 x i16>, ptr %x, align 2 1543 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %z, i32 2, <4 x i1> %c) 1544 ret ptr %y 1545} 1546 1547define ptr @strh32_2(ptr %y, ptr %x, ptr %m) { 1548; CHECK-LABEL: strh32_2: 1549; CHECK: @ %bb.0: @ %entry 1550; CHECK-NEXT: vldrw.u32 q1, [r2] 1551; CHECK-NEXT: vldrh.u32 q0, [r1] 1552; CHECK-NEXT: vpt.i32 ne, q1, zr 1553; CHECK-NEXT: vstrht.32 q0, [r0, #2] 1554; CHECK-NEXT: bx lr 1555entry: 1556 %z = getelementptr inbounds i8, ptr %y, i32 2 1557 %mask = load <4 x i32>, ptr %m, align 4 1558 %c = icmp ne <4 x i32> %mask, zeroinitializer 1559 %0 = load <4 x i16>, ptr %x, align 2 1560 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %z, i32 2, <4 x i1> %c) 1561 ret ptr %y 1562} 1563 1564define ptr @strh32_254(ptr %y, ptr %x, ptr %m) { 1565; CHECK-LABEL: strh32_254: 1566; CHECK: @ %bb.0: @ %entry 1567; CHECK-NEXT: vldrw.u32 q1, [r2] 1568; CHECK-NEXT: vldrh.u32 q0, [r1] 1569; CHECK-NEXT: vpt.i32 ne, q1, zr 1570; CHECK-NEXT: vstrht.32 q0, [r0, #254] 1571; CHECK-NEXT: bx lr 1572entry: 1573 %z = getelementptr inbounds i8, ptr %y, i32 254 1574 %mask = load <4 x i32>, ptr %m, align 4 1575 %c = icmp ne <4 x i32> %mask, zeroinitializer 1576 %0 = load <4 x i16>, ptr %x, align 2 1577 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %z, i32 2, <4 x i1> %c) 1578 ret ptr %y 1579} 1580 1581define ptr @strh32_256(ptr %y, ptr %x, ptr %m) { 1582; CHECK-LABEL: strh32_256: 1583; CHECK: @ %bb.0: @ %entry 1584; CHECK-NEXT: vldrw.u32 q1, [r2] 1585; CHECK-NEXT: vldrh.u32 q0, [r1] 1586; CHECK-NEXT: add.w r1, r0, #256 1587; CHECK-NEXT: vpt.i32 ne, q1, zr 1588; CHECK-NEXT: vstrht.32 q0, [r1] 1589; CHECK-NEXT: bx lr 1590entry: 1591 %z = getelementptr inbounds i8, ptr %y, i32 256 1592 %mask = load <4 x i32>, ptr %m, align 4 1593 %c = icmp ne <4 x i32> %mask, zeroinitializer 1594 %0 = load <4 x i16>, ptr %x, align 2 1595 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %z, i32 2, <4 x i1> %c) 1596 ret ptr %y 1597} 1598 1599define ptr @strh32_m254(ptr %y, ptr %x, ptr %m) { 1600; CHECK-LABEL: strh32_m254: 1601; CHECK: @ %bb.0: @ %entry 1602; CHECK-NEXT: vldrw.u32 q1, [r2] 1603; CHECK-NEXT: vldrh.u32 q0, [r1] 1604; CHECK-NEXT: vpt.i32 ne, q1, zr 1605; CHECK-NEXT: vstrht.32 q0, [r0, #-254] 1606; CHECK-NEXT: bx lr 1607entry: 1608 %z = getelementptr inbounds i8, ptr %y, i32 -254 1609 %mask = load <4 x i32>, ptr %m, align 4 1610 %c = icmp ne <4 x i32> %mask, zeroinitializer 1611 %0 = load <4 x i16>, ptr %x, align 2 1612 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %z, i32 2, <4 x i1> %c) 1613 ret ptr %y 1614} 1615 1616define ptr @strh32_m256(ptr %y, ptr %x, ptr %m) { 1617; CHECK-LABEL: strh32_m256: 1618; CHECK: @ %bb.0: @ %entry 1619; CHECK-NEXT: vldrw.u32 q1, [r2] 1620; CHECK-NEXT: vldrh.u32 q0, [r1] 1621; CHECK-NEXT: sub.w r1, r0, #256 1622; CHECK-NEXT: vpt.i32 ne, q1, zr 1623; CHECK-NEXT: vstrht.32 q0, [r1] 1624; CHECK-NEXT: bx lr 1625entry: 1626 %z = getelementptr inbounds i8, ptr %y, i32 -256 1627 %mask = load <4 x i32>, ptr %m, align 4 1628 %c = icmp ne <4 x i32> %mask, zeroinitializer 1629 %0 = load <4 x i16>, ptr %x, align 2 1630 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %z, i32 2, <4 x i1> %c) 1631 ret ptr %y 1632} 1633 1634define ptr @strh16_4(ptr %y, ptr %x, ptr %m) { 1635; CHECK-LABEL: strh16_4: 1636; CHECK: @ %bb.0: @ %entry 1637; CHECK-NEXT: vldrh.u16 q1, [r2] 1638; CHECK-NEXT: vldrh.u16 q0, [r1] 1639; CHECK-NEXT: vpt.i16 ne, q1, zr 1640; CHECK-NEXT: vstrht.16 q0, [r0, #4] 1641; CHECK-NEXT: bx lr 1642entry: 1643 %z = getelementptr inbounds i8, ptr %y, i32 4 1644 %mask = load <8 x i16>, ptr %m, align 2 1645 %c = icmp ne <8 x i16> %mask, zeroinitializer 1646 %0 = load <8 x i16>, ptr %x, align 2 1647 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %z, i32 2, <8 x i1> %c) 1648 ret ptr %y 1649} 1650 1651define ptr @strh16_3(ptr %y, ptr %x, ptr %m) { 1652; CHECK-LABEL: strh16_3: 1653; CHECK: @ %bb.0: @ %entry 1654; CHECK-NEXT: vldrh.u16 q1, [r2] 1655; CHECK-NEXT: vldrh.u16 q0, [r1] 1656; CHECK-NEXT: adds r1, r0, #3 1657; CHECK-NEXT: vpt.i16 ne, q1, zr 1658; CHECK-NEXT: vstrht.16 q0, [r1] 1659; CHECK-NEXT: bx lr 1660entry: 1661 %z = getelementptr inbounds i8, ptr %y, i32 3 1662 %mask = load <8 x i16>, ptr %m, align 2 1663 %c = icmp ne <8 x i16> %mask, zeroinitializer 1664 %0 = load <8 x i16>, ptr %x, align 2 1665 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %z, i32 2, <8 x i1> %c) 1666 ret ptr %y 1667} 1668 1669define ptr @strh16_2(ptr %y, ptr %x, ptr %m) { 1670; CHECK-LABEL: strh16_2: 1671; CHECK: @ %bb.0: @ %entry 1672; CHECK-NEXT: vldrh.u16 q1, [r2] 1673; CHECK-NEXT: vldrh.u16 q0, [r1] 1674; CHECK-NEXT: vpt.i16 ne, q1, zr 1675; CHECK-NEXT: vstrht.16 q0, [r0, #2] 1676; CHECK-NEXT: bx lr 1677entry: 1678 %z = getelementptr inbounds i8, ptr %y, i32 2 1679 %mask = load <8 x i16>, ptr %m, align 2 1680 %c = icmp ne <8 x i16> %mask, zeroinitializer 1681 %0 = load <8 x i16>, ptr %x, align 2 1682 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %z, i32 2, <8 x i1> %c) 1683 ret ptr %y 1684} 1685 1686define ptr @strh16_254(ptr %y, ptr %x, ptr %m) { 1687; CHECK-LABEL: strh16_254: 1688; CHECK: @ %bb.0: @ %entry 1689; CHECK-NEXT: vldrh.u16 q1, [r2] 1690; CHECK-NEXT: vldrh.u16 q0, [r1] 1691; CHECK-NEXT: vpt.i16 ne, q1, zr 1692; CHECK-NEXT: vstrht.16 q0, [r0, #254] 1693; CHECK-NEXT: bx lr 1694entry: 1695 %z = getelementptr inbounds i8, ptr %y, i32 254 1696 %mask = load <8 x i16>, ptr %m, align 2 1697 %c = icmp ne <8 x i16> %mask, zeroinitializer 1698 %0 = load <8 x i16>, ptr %x, align 2 1699 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %z, i32 2, <8 x i1> %c) 1700 ret ptr %y 1701} 1702 1703define ptr @strh16_256(ptr %y, ptr %x, ptr %m) { 1704; CHECK-LABEL: strh16_256: 1705; CHECK: @ %bb.0: @ %entry 1706; CHECK-NEXT: vldrh.u16 q1, [r2] 1707; CHECK-NEXT: vldrh.u16 q0, [r1] 1708; CHECK-NEXT: add.w r1, r0, #256 1709; CHECK-NEXT: vpt.i16 ne, q1, zr 1710; CHECK-NEXT: vstrht.16 q0, [r1] 1711; CHECK-NEXT: bx lr 1712entry: 1713 %z = getelementptr inbounds i8, ptr %y, i32 256 1714 %mask = load <8 x i16>, ptr %m, align 2 1715 %c = icmp ne <8 x i16> %mask, zeroinitializer 1716 %0 = load <8 x i16>, ptr %x, align 2 1717 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %z, i32 2, <8 x i1> %c) 1718 ret ptr %y 1719} 1720 1721define ptr @strh16_m254(ptr %y, ptr %x, ptr %m) { 1722; CHECK-LABEL: strh16_m254: 1723; CHECK: @ %bb.0: @ %entry 1724; CHECK-NEXT: vldrh.u16 q1, [r2] 1725; CHECK-NEXT: vldrh.u16 q0, [r1] 1726; CHECK-NEXT: vpt.i16 ne, q1, zr 1727; CHECK-NEXT: vstrht.16 q0, [r0, #-254] 1728; CHECK-NEXT: bx lr 1729entry: 1730 %z = getelementptr inbounds i8, ptr %y, i32 -254 1731 %mask = load <8 x i16>, ptr %m, align 2 1732 %c = icmp ne <8 x i16> %mask, zeroinitializer 1733 %0 = load <8 x i16>, ptr %x, align 2 1734 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %z, i32 2, <8 x i1> %c) 1735 ret ptr %y 1736} 1737 1738define ptr @strh16_m256(ptr %y, ptr %x, ptr %m) { 1739; CHECK-LABEL: strh16_m256: 1740; CHECK: @ %bb.0: @ %entry 1741; CHECK-NEXT: vldrh.u16 q1, [r2] 1742; CHECK-NEXT: vldrh.u16 q0, [r1] 1743; CHECK-NEXT: sub.w r1, r0, #256 1744; CHECK-NEXT: vpt.i16 ne, q1, zr 1745; CHECK-NEXT: vstrht.16 q0, [r1] 1746; CHECK-NEXT: bx lr 1747entry: 1748 %z = getelementptr inbounds i8, ptr %y, i32 -256 1749 %mask = load <8 x i16>, ptr %m, align 2 1750 %c = icmp ne <8 x i16> %mask, zeroinitializer 1751 %0 = load <8 x i16>, ptr %x, align 2 1752 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %z, i32 2, <8 x i1> %c) 1753 ret ptr %y 1754} 1755 1756define ptr @strb32_4(ptr %y, ptr %x, ptr %m) { 1757; CHECK-LABEL: strb32_4: 1758; CHECK: @ %bb.0: @ %entry 1759; CHECK-NEXT: vldrw.u32 q1, [r2] 1760; CHECK-NEXT: vldrb.u32 q0, [r1] 1761; CHECK-NEXT: vpt.i32 ne, q1, zr 1762; CHECK-NEXT: vstrbt.32 q0, [r0, #4] 1763; CHECK-NEXT: bx lr 1764entry: 1765 %z = getelementptr inbounds i8, ptr %y, i32 4 1766 %mask = load <4 x i32>, ptr %m, align 4 1767 %c = icmp ne <4 x i32> %mask, zeroinitializer 1768 %0 = load <4 x i8>, ptr %x, align 1 1769 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %z, i32 1, <4 x i1> %c) 1770 ret ptr %y 1771} 1772 1773define ptr @strb32_3(ptr %y, ptr %x, ptr %m) { 1774; CHECK-LABEL: strb32_3: 1775; CHECK: @ %bb.0: @ %entry 1776; CHECK-NEXT: vldrw.u32 q1, [r2] 1777; CHECK-NEXT: vldrb.u32 q0, [r1] 1778; CHECK-NEXT: vpt.i32 ne, q1, zr 1779; CHECK-NEXT: vstrbt.32 q0, [r0, #3] 1780; CHECK-NEXT: bx lr 1781entry: 1782 %z = getelementptr inbounds i8, ptr %y, i32 3 1783 %mask = load <4 x i32>, ptr %m, align 4 1784 %c = icmp ne <4 x i32> %mask, zeroinitializer 1785 %0 = load <4 x i8>, ptr %x, align 1 1786 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %z, i32 1, <4 x i1> %c) 1787 ret ptr %y 1788} 1789 1790define ptr @strb32_2(ptr %y, ptr %x, ptr %m) { 1791; CHECK-LABEL: strb32_2: 1792; CHECK: @ %bb.0: @ %entry 1793; CHECK-NEXT: vldrw.u32 q1, [r2] 1794; CHECK-NEXT: vldrb.u32 q0, [r1] 1795; CHECK-NEXT: vpt.i32 ne, q1, zr 1796; CHECK-NEXT: vstrbt.32 q0, [r0, #2] 1797; CHECK-NEXT: bx lr 1798entry: 1799 %z = getelementptr inbounds i8, ptr %y, i32 2 1800 %mask = load <4 x i32>, ptr %m, align 4 1801 %c = icmp ne <4 x i32> %mask, zeroinitializer 1802 %0 = load <4 x i8>, ptr %x, align 1 1803 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %z, i32 1, <4 x i1> %c) 1804 ret ptr %y 1805} 1806 1807define ptr @strb32_127(ptr %y, ptr %x, ptr %m) { 1808; CHECK-LABEL: strb32_127: 1809; CHECK: @ %bb.0: @ %entry 1810; CHECK-NEXT: vldrw.u32 q1, [r2] 1811; CHECK-NEXT: vldrb.u32 q0, [r1] 1812; CHECK-NEXT: vpt.i32 ne, q1, zr 1813; CHECK-NEXT: vstrbt.32 q0, [r0, #127] 1814; CHECK-NEXT: bx lr 1815entry: 1816 %z = getelementptr inbounds i8, ptr %y, i32 127 1817 %mask = load <4 x i32>, ptr %m, align 4 1818 %c = icmp ne <4 x i32> %mask, zeroinitializer 1819 %0 = load <4 x i8>, ptr %x, align 1 1820 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %z, i32 1, <4 x i1> %c) 1821 ret ptr %y 1822} 1823 1824define ptr @strb32_128(ptr %y, ptr %x, ptr %m) { 1825; CHECK-LABEL: strb32_128: 1826; CHECK: @ %bb.0: @ %entry 1827; CHECK-NEXT: vldrw.u32 q1, [r2] 1828; CHECK-NEXT: vldrb.u32 q0, [r1] 1829; CHECK-NEXT: add.w r1, r0, #128 1830; CHECK-NEXT: vpt.i32 ne, q1, zr 1831; CHECK-NEXT: vstrbt.32 q0, [r1] 1832; CHECK-NEXT: bx lr 1833entry: 1834 %z = getelementptr inbounds i8, ptr %y, i32 128 1835 %mask = load <4 x i32>, ptr %m, align 4 1836 %c = icmp ne <4 x i32> %mask, zeroinitializer 1837 %0 = load <4 x i8>, ptr %x, align 1 1838 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %z, i32 1, <4 x i1> %c) 1839 ret ptr %y 1840} 1841 1842define ptr @strb32_m127(ptr %y, ptr %x, ptr %m) { 1843; CHECK-LABEL: strb32_m127: 1844; CHECK: @ %bb.0: @ %entry 1845; CHECK-NEXT: vldrw.u32 q1, [r2] 1846; CHECK-NEXT: vldrb.u32 q0, [r1] 1847; CHECK-NEXT: vpt.i32 ne, q1, zr 1848; CHECK-NEXT: vstrbt.32 q0, [r0, #-127] 1849; CHECK-NEXT: bx lr 1850entry: 1851 %z = getelementptr inbounds i8, ptr %y, i32 -127 1852 %mask = load <4 x i32>, ptr %m, align 4 1853 %c = icmp ne <4 x i32> %mask, zeroinitializer 1854 %0 = load <4 x i8>, ptr %x, align 1 1855 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %z, i32 1, <4 x i1> %c) 1856 ret ptr %y 1857} 1858 1859define ptr @strb32_m128(ptr %y, ptr %x, ptr %m) { 1860; CHECK-LABEL: strb32_m128: 1861; CHECK: @ %bb.0: @ %entry 1862; CHECK-NEXT: vldrw.u32 q1, [r2] 1863; CHECK-NEXT: vldrb.u32 q0, [r1] 1864; CHECK-NEXT: sub.w r1, r0, #128 1865; CHECK-NEXT: vpt.i32 ne, q1, zr 1866; CHECK-NEXT: vstrbt.32 q0, [r1] 1867; CHECK-NEXT: bx lr 1868entry: 1869 %z = getelementptr inbounds i8, ptr %y, i32 -128 1870 %mask = load <4 x i32>, ptr %m, align 4 1871 %c = icmp ne <4 x i32> %mask, zeroinitializer 1872 %0 = load <4 x i8>, ptr %x, align 1 1873 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %z, i32 1, <4 x i1> %c) 1874 ret ptr %y 1875} 1876 1877define ptr @strb16_4(ptr %y, ptr %x, ptr %m) { 1878; CHECK-LABEL: strb16_4: 1879; CHECK: @ %bb.0: @ %entry 1880; CHECK-NEXT: vldrh.u16 q1, [r2] 1881; CHECK-NEXT: vldrb.u16 q0, [r1] 1882; CHECK-NEXT: vpt.i16 ne, q1, zr 1883; CHECK-NEXT: vstrbt.16 q0, [r0, #4] 1884; CHECK-NEXT: bx lr 1885entry: 1886 %z = getelementptr inbounds i8, ptr %y, i32 4 1887 %mask = load <8 x i16>, ptr %m, align 2 1888 %c = icmp ne <8 x i16> %mask, zeroinitializer 1889 %0 = load <8 x i8>, ptr %x, align 1 1890 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %z, i32 1, <8 x i1> %c) 1891 ret ptr %y 1892} 1893 1894define ptr @strb16_3(ptr %y, ptr %x, ptr %m) { 1895; CHECK-LABEL: strb16_3: 1896; CHECK: @ %bb.0: @ %entry 1897; CHECK-NEXT: vldrh.u16 q1, [r2] 1898; CHECK-NEXT: vldrb.u16 q0, [r1] 1899; CHECK-NEXT: vpt.i16 ne, q1, zr 1900; CHECK-NEXT: vstrbt.16 q0, [r0, #3] 1901; CHECK-NEXT: bx lr 1902entry: 1903 %z = getelementptr inbounds i8, ptr %y, i32 3 1904 %mask = load <8 x i16>, ptr %m, align 2 1905 %c = icmp ne <8 x i16> %mask, zeroinitializer 1906 %0 = load <8 x i8>, ptr %x, align 1 1907 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %z, i32 1, <8 x i1> %c) 1908 ret ptr %y 1909} 1910 1911define ptr @strb16_2(ptr %y, ptr %x, ptr %m) { 1912; CHECK-LABEL: strb16_2: 1913; CHECK: @ %bb.0: @ %entry 1914; CHECK-NEXT: vldrh.u16 q1, [r2] 1915; CHECK-NEXT: vldrb.u16 q0, [r1] 1916; CHECK-NEXT: vpt.i16 ne, q1, zr 1917; CHECK-NEXT: vstrbt.16 q0, [r0, #2] 1918; CHECK-NEXT: bx lr 1919entry: 1920 %z = getelementptr inbounds i8, ptr %y, i32 2 1921 %mask = load <8 x i16>, ptr %m, align 2 1922 %c = icmp ne <8 x i16> %mask, zeroinitializer 1923 %0 = load <8 x i8>, ptr %x, align 1 1924 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %z, i32 1, <8 x i1> %c) 1925 ret ptr %y 1926} 1927 1928define ptr @strb16_127(ptr %y, ptr %x, ptr %m) { 1929; CHECK-LABEL: strb16_127: 1930; CHECK: @ %bb.0: @ %entry 1931; CHECK-NEXT: vldrh.u16 q1, [r2] 1932; CHECK-NEXT: vldrb.u16 q0, [r1] 1933; CHECK-NEXT: vpt.i16 ne, q1, zr 1934; CHECK-NEXT: vstrbt.16 q0, [r0, #127] 1935; CHECK-NEXT: bx lr 1936entry: 1937 %z = getelementptr inbounds i8, ptr %y, i32 127 1938 %mask = load <8 x i16>, ptr %m, align 2 1939 %c = icmp ne <8 x i16> %mask, zeroinitializer 1940 %0 = load <8 x i8>, ptr %x, align 1 1941 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %z, i32 1, <8 x i1> %c) 1942 ret ptr %y 1943} 1944 1945define ptr @strb16_128(ptr %y, ptr %x, ptr %m) { 1946; CHECK-LABEL: strb16_128: 1947; CHECK: @ %bb.0: @ %entry 1948; CHECK-NEXT: vldrh.u16 q1, [r2] 1949; CHECK-NEXT: vldrb.u16 q0, [r1] 1950; CHECK-NEXT: add.w r1, r0, #128 1951; CHECK-NEXT: vpt.i16 ne, q1, zr 1952; CHECK-NEXT: vstrbt.16 q0, [r1] 1953; CHECK-NEXT: bx lr 1954entry: 1955 %z = getelementptr inbounds i8, ptr %y, i32 128 1956 %mask = load <8 x i16>, ptr %m, align 2 1957 %c = icmp ne <8 x i16> %mask, zeroinitializer 1958 %0 = load <8 x i8>, ptr %x, align 1 1959 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %z, i32 1, <8 x i1> %c) 1960 ret ptr %y 1961} 1962 1963define ptr @strb16_m127(ptr %y, ptr %x, ptr %m) { 1964; CHECK-LABEL: strb16_m127: 1965; CHECK: @ %bb.0: @ %entry 1966; CHECK-NEXT: vldrh.u16 q1, [r2] 1967; CHECK-NEXT: vldrb.u16 q0, [r1] 1968; CHECK-NEXT: vpt.i16 ne, q1, zr 1969; CHECK-NEXT: vstrbt.16 q0, [r0, #-127] 1970; CHECK-NEXT: bx lr 1971entry: 1972 %z = getelementptr inbounds i8, ptr %y, i32 -127 1973 %mask = load <8 x i16>, ptr %m, align 2 1974 %c = icmp ne <8 x i16> %mask, zeroinitializer 1975 %0 = load <8 x i8>, ptr %x, align 1 1976 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %z, i32 1, <8 x i1> %c) 1977 ret ptr %y 1978} 1979 1980define ptr @strb16_m128(ptr %y, ptr %x, ptr %m) { 1981; CHECK-LABEL: strb16_m128: 1982; CHECK: @ %bb.0: @ %entry 1983; CHECK-NEXT: vldrh.u16 q1, [r2] 1984; CHECK-NEXT: vldrb.u16 q0, [r1] 1985; CHECK-NEXT: sub.w r1, r0, #128 1986; CHECK-NEXT: vpt.i16 ne, q1, zr 1987; CHECK-NEXT: vstrbt.16 q0, [r1] 1988; CHECK-NEXT: bx lr 1989entry: 1990 %z = getelementptr inbounds i8, ptr %y, i32 -128 1991 %mask = load <8 x i16>, ptr %m, align 2 1992 %c = icmp ne <8 x i16> %mask, zeroinitializer 1993 %0 = load <8 x i8>, ptr %x, align 1 1994 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %z, i32 1, <8 x i1> %c) 1995 ret ptr %y 1996} 1997 1998define ptr @strb8_4(ptr %y, ptr %x, ptr %m) { 1999; CHECK-LABEL: strb8_4: 2000; CHECK: @ %bb.0: @ %entry 2001; CHECK-NEXT: vldrb.u8 q1, [r2] 2002; CHECK-NEXT: vldrb.u8 q0, [r1] 2003; CHECK-NEXT: vpt.i8 ne, q1, zr 2004; CHECK-NEXT: vstrbt.8 q0, [r0, #4] 2005; CHECK-NEXT: bx lr 2006entry: 2007 %z = getelementptr inbounds i8, ptr %y, i32 4 2008 %mask = load <16 x i8>, ptr %m, align 1 2009 %c = icmp ne <16 x i8> %mask, zeroinitializer 2010 %0 = load <16 x i8>, ptr %x, align 1 2011 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %z, i32 1, <16 x i1> %c) 2012 ret ptr %y 2013} 2014 2015define ptr @strb8_3(ptr %y, ptr %x, ptr %m) { 2016; CHECK-LABEL: strb8_3: 2017; CHECK: @ %bb.0: @ %entry 2018; CHECK-NEXT: vldrb.u8 q1, [r2] 2019; CHECK-NEXT: vldrb.u8 q0, [r1] 2020; CHECK-NEXT: vpt.i8 ne, q1, zr 2021; CHECK-NEXT: vstrbt.8 q0, [r0, #3] 2022; CHECK-NEXT: bx lr 2023entry: 2024 %z = getelementptr inbounds i8, ptr %y, i32 3 2025 %mask = load <16 x i8>, ptr %m, align 1 2026 %c = icmp ne <16 x i8> %mask, zeroinitializer 2027 %0 = load <16 x i8>, ptr %x, align 1 2028 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %z, i32 1, <16 x i1> %c) 2029 ret ptr %y 2030} 2031 2032define ptr @strb8_2(ptr %y, ptr %x, ptr %m) { 2033; CHECK-LABEL: strb8_2: 2034; CHECK: @ %bb.0: @ %entry 2035; CHECK-NEXT: vldrb.u8 q1, [r2] 2036; CHECK-NEXT: vldrb.u8 q0, [r1] 2037; CHECK-NEXT: vpt.i8 ne, q1, zr 2038; CHECK-NEXT: vstrbt.8 q0, [r0, #2] 2039; CHECK-NEXT: bx lr 2040entry: 2041 %z = getelementptr inbounds i8, ptr %y, i32 2 2042 %mask = load <16 x i8>, ptr %m, align 1 2043 %c = icmp ne <16 x i8> %mask, zeroinitializer 2044 %0 = load <16 x i8>, ptr %x, align 1 2045 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %z, i32 1, <16 x i1> %c) 2046 ret ptr %y 2047} 2048 2049define ptr @strb8_127(ptr %y, ptr %x, ptr %m) { 2050; CHECK-LABEL: strb8_127: 2051; CHECK: @ %bb.0: @ %entry 2052; CHECK-NEXT: vldrb.u8 q1, [r2] 2053; CHECK-NEXT: vldrb.u8 q0, [r1] 2054; CHECK-NEXT: vpt.i8 ne, q1, zr 2055; CHECK-NEXT: vstrbt.8 q0, [r0, #127] 2056; CHECK-NEXT: bx lr 2057entry: 2058 %z = getelementptr inbounds i8, ptr %y, i32 127 2059 %mask = load <16 x i8>, ptr %m, align 1 2060 %c = icmp ne <16 x i8> %mask, zeroinitializer 2061 %0 = load <16 x i8>, ptr %x, align 1 2062 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %z, i32 1, <16 x i1> %c) 2063 ret ptr %y 2064} 2065 2066define ptr @strb8_128(ptr %y, ptr %x, ptr %m) { 2067; CHECK-LABEL: strb8_128: 2068; CHECK: @ %bb.0: @ %entry 2069; CHECK-NEXT: vldrb.u8 q1, [r2] 2070; CHECK-NEXT: vldrb.u8 q0, [r1] 2071; CHECK-NEXT: add.w r1, r0, #128 2072; CHECK-NEXT: vpt.i8 ne, q1, zr 2073; CHECK-NEXT: vstrbt.8 q0, [r1] 2074; CHECK-NEXT: bx lr 2075entry: 2076 %z = getelementptr inbounds i8, ptr %y, i32 128 2077 %mask = load <16 x i8>, ptr %m, align 1 2078 %c = icmp ne <16 x i8> %mask, zeroinitializer 2079 %0 = load <16 x i8>, ptr %x, align 1 2080 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %z, i32 1, <16 x i1> %c) 2081 ret ptr %y 2082} 2083 2084define ptr @strb8_m127(ptr %y, ptr %x, ptr %m) { 2085; CHECK-LABEL: strb8_m127: 2086; CHECK: @ %bb.0: @ %entry 2087; CHECK-NEXT: vldrb.u8 q1, [r2] 2088; CHECK-NEXT: vldrb.u8 q0, [r1] 2089; CHECK-NEXT: vpt.i8 ne, q1, zr 2090; CHECK-NEXT: vstrbt.8 q0, [r0, #-127] 2091; CHECK-NEXT: bx lr 2092entry: 2093 %z = getelementptr inbounds i8, ptr %y, i32 -127 2094 %mask = load <16 x i8>, ptr %m, align 1 2095 %c = icmp ne <16 x i8> %mask, zeroinitializer 2096 %0 = load <16 x i8>, ptr %x, align 1 2097 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %z, i32 1, <16 x i1> %c) 2098 ret ptr %y 2099} 2100 2101define ptr @strb8_m128(ptr %y, ptr %x, ptr %m) { 2102; CHECK-LABEL: strb8_m128: 2103; CHECK: @ %bb.0: @ %entry 2104; CHECK-NEXT: vldrb.u8 q1, [r2] 2105; CHECK-NEXT: vldrb.u8 q0, [r1] 2106; CHECK-NEXT: sub.w r1, r0, #128 2107; CHECK-NEXT: vpt.i8 ne, q1, zr 2108; CHECK-NEXT: vstrbt.8 q0, [r1] 2109; CHECK-NEXT: bx lr 2110entry: 2111 %z = getelementptr inbounds i8, ptr %y, i32 -128 2112 %mask = load <16 x i8>, ptr %m, align 1 2113 %c = icmp ne <16 x i8> %mask, zeroinitializer 2114 %0 = load <16 x i8>, ptr %x, align 1 2115 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %z, i32 1, <16 x i1> %c) 2116 ret ptr %y 2117} 2118 2119define ptr @strwf32_4(ptr %y, ptr %x, ptr %m) { 2120; CHECK-LABEL: strwf32_4: 2121; CHECK: @ %bb.0: @ %entry 2122; CHECK-NEXT: vldrw.u32 q1, [r2] 2123; CHECK-NEXT: vldrw.u32 q0, [r1] 2124; CHECK-NEXT: vpt.i32 ne, q1, zr 2125; CHECK-NEXT: vstrwt.32 q0, [r0, #4] 2126; CHECK-NEXT: bx lr 2127entry: 2128 %z = getelementptr inbounds i8, ptr %y, i32 4 2129 %mask = load <4 x i32>, ptr %m, align 4 2130 %c = icmp ne <4 x i32> %mask, zeroinitializer 2131 %0 = load <4 x float>, ptr %x, align 4 2132 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %z, i32 4, <4 x i1> %c) 2133 ret ptr %y 2134} 2135 2136define ptr @strwf32_3(ptr %y, ptr %x, ptr %m) { 2137; CHECK-LABEL: strwf32_3: 2138; CHECK: @ %bb.0: @ %entry 2139; CHECK-NEXT: vldrw.u32 q1, [r2] 2140; CHECK-NEXT: vldrw.u32 q0, [r1] 2141; CHECK-NEXT: adds r1, r0, #3 2142; CHECK-NEXT: vpt.i32 ne, q1, zr 2143; CHECK-NEXT: vstrwt.32 q0, [r1] 2144; CHECK-NEXT: bx lr 2145entry: 2146 %z = getelementptr inbounds i8, ptr %y, i32 3 2147 %mask = load <4 x i32>, ptr %m, align 4 2148 %c = icmp ne <4 x i32> %mask, zeroinitializer 2149 %0 = load <4 x float>, ptr %x, align 4 2150 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %z, i32 4, <4 x i1> %c) 2151 ret ptr %y 2152} 2153 2154define ptr @strwf32_2(ptr %y, ptr %x, ptr %m) { 2155; CHECK-LABEL: strwf32_2: 2156; CHECK: @ %bb.0: @ %entry 2157; CHECK-NEXT: vldrw.u32 q1, [r2] 2158; CHECK-NEXT: vldrw.u32 q0, [r1] 2159; CHECK-NEXT: adds r1, r0, #2 2160; CHECK-NEXT: vpt.i32 ne, q1, zr 2161; CHECK-NEXT: vstrwt.32 q0, [r1] 2162; CHECK-NEXT: bx lr 2163entry: 2164 %z = getelementptr inbounds i8, ptr %y, i32 2 2165 %mask = load <4 x i32>, ptr %m, align 4 2166 %c = icmp ne <4 x i32> %mask, zeroinitializer 2167 %0 = load <4 x float>, ptr %x, align 4 2168 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %z, i32 4, <4 x i1> %c) 2169 ret ptr %y 2170} 2171 2172define ptr @strwf32_508(ptr %y, ptr %x, ptr %m) { 2173; CHECK-LABEL: strwf32_508: 2174; CHECK: @ %bb.0: @ %entry 2175; CHECK-NEXT: vldrw.u32 q1, [r2] 2176; CHECK-NEXT: vldrw.u32 q0, [r1] 2177; CHECK-NEXT: vpt.i32 ne, q1, zr 2178; CHECK-NEXT: vstrwt.32 q0, [r0, #508] 2179; CHECK-NEXT: bx lr 2180entry: 2181 %z = getelementptr inbounds i8, ptr %y, i32 508 2182 %mask = load <4 x i32>, ptr %m, align 4 2183 %c = icmp ne <4 x i32> %mask, zeroinitializer 2184 %0 = load <4 x float>, ptr %x, align 4 2185 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %z, i32 4, <4 x i1> %c) 2186 ret ptr %y 2187} 2188 2189define ptr @strwf32_512(ptr %y, ptr %x, ptr %m) { 2190; CHECK-LABEL: strwf32_512: 2191; CHECK: @ %bb.0: @ %entry 2192; CHECK-NEXT: vldrw.u32 q1, [r2] 2193; CHECK-NEXT: vldrw.u32 q0, [r1] 2194; CHECK-NEXT: add.w r1, r0, #512 2195; CHECK-NEXT: vpt.i32 ne, q1, zr 2196; CHECK-NEXT: vstrwt.32 q0, [r1] 2197; CHECK-NEXT: bx lr 2198entry: 2199 %z = getelementptr inbounds i8, ptr %y, i32 512 2200 %mask = load <4 x i32>, ptr %m, align 4 2201 %c = icmp ne <4 x i32> %mask, zeroinitializer 2202 %0 = load <4 x float>, ptr %x, align 4 2203 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %z, i32 4, <4 x i1> %c) 2204 ret ptr %y 2205} 2206 2207define ptr @strwf32_m508(ptr %y, ptr %x, ptr %m) { 2208; CHECK-LABEL: strwf32_m508: 2209; CHECK: @ %bb.0: @ %entry 2210; CHECK-NEXT: vldrw.u32 q1, [r2] 2211; CHECK-NEXT: vldrw.u32 q0, [r1] 2212; CHECK-NEXT: vpt.i32 ne, q1, zr 2213; CHECK-NEXT: vstrwt.32 q0, [r0, #-508] 2214; CHECK-NEXT: bx lr 2215entry: 2216 %z = getelementptr inbounds i8, ptr %y, i32 -508 2217 %mask = load <4 x i32>, ptr %m, align 4 2218 %c = icmp ne <4 x i32> %mask, zeroinitializer 2219 %0 = load <4 x float>, ptr %x, align 4 2220 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %z, i32 4, <4 x i1> %c) 2221 ret ptr %y 2222} 2223 2224define ptr @strwf32_m512(ptr %y, ptr %x, ptr %m) { 2225; CHECK-LABEL: strwf32_m512: 2226; CHECK: @ %bb.0: @ %entry 2227; CHECK-NEXT: vldrw.u32 q1, [r2] 2228; CHECK-NEXT: vldrw.u32 q0, [r1] 2229; CHECK-NEXT: sub.w r1, r0, #512 2230; CHECK-NEXT: vpt.i32 ne, q1, zr 2231; CHECK-NEXT: vstrwt.32 q0, [r1] 2232; CHECK-NEXT: bx lr 2233entry: 2234 %z = getelementptr inbounds i8, ptr %y, i32 -512 2235 %mask = load <4 x i32>, ptr %m, align 4 2236 %c = icmp ne <4 x i32> %mask, zeroinitializer 2237 %0 = load <4 x float>, ptr %x, align 4 2238 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %z, i32 4, <4 x i1> %c) 2239 ret ptr %y 2240} 2241 2242define ptr @strhf16_4(ptr %y, ptr %x, ptr %m) { 2243; CHECK-LABEL: strhf16_4: 2244; CHECK: @ %bb.0: @ %entry 2245; CHECK-NEXT: vldrh.u16 q1, [r2] 2246; CHECK-NEXT: vldrh.u16 q0, [r1] 2247; CHECK-NEXT: vpt.i16 ne, q1, zr 2248; CHECK-NEXT: vstrht.16 q0, [r0, #4] 2249; CHECK-NEXT: bx lr 2250entry: 2251 %z = getelementptr inbounds i8, ptr %y, i32 4 2252 %mask = load <8 x i16>, ptr %m, align 2 2253 %c = icmp ne <8 x i16> %mask, zeroinitializer 2254 %0 = load <8 x half>, ptr %x, align 2 2255 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %z, i32 2, <8 x i1> %c) 2256 ret ptr %y 2257} 2258 2259define ptr @strhf16_3(ptr %y, ptr %x, ptr %m) { 2260; CHECK-LABEL: strhf16_3: 2261; CHECK: @ %bb.0: @ %entry 2262; CHECK-NEXT: vldrh.u16 q1, [r2] 2263; CHECK-NEXT: vldrh.u16 q0, [r1] 2264; CHECK-NEXT: adds r1, r0, #3 2265; CHECK-NEXT: vpt.i16 ne, q1, zr 2266; CHECK-NEXT: vstrht.16 q0, [r1] 2267; CHECK-NEXT: bx lr 2268entry: 2269 %z = getelementptr inbounds i8, ptr %y, i32 3 2270 %mask = load <8 x i16>, ptr %m, align 2 2271 %c = icmp ne <8 x i16> %mask, zeroinitializer 2272 %0 = load <8 x half>, ptr %x, align 2 2273 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %z, i32 2, <8 x i1> %c) 2274 ret ptr %y 2275} 2276 2277define ptr @strhf16_2(ptr %y, ptr %x, ptr %m) { 2278; CHECK-LABEL: strhf16_2: 2279; CHECK: @ %bb.0: @ %entry 2280; CHECK-NEXT: vldrh.u16 q1, [r2] 2281; CHECK-NEXT: vldrh.u16 q0, [r1] 2282; CHECK-NEXT: vpt.i16 ne, q1, zr 2283; CHECK-NEXT: vstrht.16 q0, [r0, #2] 2284; CHECK-NEXT: bx lr 2285entry: 2286 %z = getelementptr inbounds i8, ptr %y, i32 2 2287 %mask = load <8 x i16>, ptr %m, align 2 2288 %c = icmp ne <8 x i16> %mask, zeroinitializer 2289 %0 = load <8 x half>, ptr %x, align 2 2290 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %z, i32 2, <8 x i1> %c) 2291 ret ptr %y 2292} 2293 2294define ptr @strhf16_254(ptr %y, ptr %x, ptr %m) { 2295; CHECK-LABEL: strhf16_254: 2296; CHECK: @ %bb.0: @ %entry 2297; CHECK-NEXT: vldrh.u16 q1, [r2] 2298; CHECK-NEXT: vldrh.u16 q0, [r1] 2299; CHECK-NEXT: vpt.i16 ne, q1, zr 2300; CHECK-NEXT: vstrht.16 q0, [r0, #254] 2301; CHECK-NEXT: bx lr 2302entry: 2303 %z = getelementptr inbounds i8, ptr %y, i32 254 2304 %mask = load <8 x i16>, ptr %m, align 2 2305 %c = icmp ne <8 x i16> %mask, zeroinitializer 2306 %0 = load <8 x half>, ptr %x, align 2 2307 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %z, i32 2, <8 x i1> %c) 2308 ret ptr %y 2309} 2310 2311define ptr @strhf16_256(ptr %y, ptr %x, ptr %m) { 2312; CHECK-LABEL: strhf16_256: 2313; CHECK: @ %bb.0: @ %entry 2314; CHECK-NEXT: vldrh.u16 q1, [r2] 2315; CHECK-NEXT: vldrh.u16 q0, [r1] 2316; CHECK-NEXT: add.w r1, r0, #256 2317; CHECK-NEXT: vpt.i16 ne, q1, zr 2318; CHECK-NEXT: vstrht.16 q0, [r1] 2319; CHECK-NEXT: bx lr 2320entry: 2321 %z = getelementptr inbounds i8, ptr %y, i32 256 2322 %mask = load <8 x i16>, ptr %m, align 2 2323 %c = icmp ne <8 x i16> %mask, zeroinitializer 2324 %0 = load <8 x half>, ptr %x, align 2 2325 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %z, i32 2, <8 x i1> %c) 2326 ret ptr %y 2327} 2328 2329define ptr @strhf16_m254(ptr %y, ptr %x, ptr %m) { 2330; CHECK-LABEL: strhf16_m254: 2331; CHECK: @ %bb.0: @ %entry 2332; CHECK-NEXT: vldrh.u16 q1, [r2] 2333; CHECK-NEXT: vldrh.u16 q0, [r1] 2334; CHECK-NEXT: vpt.i16 ne, q1, zr 2335; CHECK-NEXT: vstrht.16 q0, [r0, #-254] 2336; CHECK-NEXT: bx lr 2337entry: 2338 %z = getelementptr inbounds i8, ptr %y, i32 -254 2339 %mask = load <8 x i16>, ptr %m, align 2 2340 %c = icmp ne <8 x i16> %mask, zeroinitializer 2341 %0 = load <8 x half>, ptr %x, align 2 2342 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %z, i32 2, <8 x i1> %c) 2343 ret ptr %y 2344} 2345 2346define ptr @strhf16_m256(ptr %y, ptr %x, ptr %m) { 2347; CHECK-LABEL: strhf16_m256: 2348; CHECK: @ %bb.0: @ %entry 2349; CHECK-NEXT: vldrh.u16 q1, [r2] 2350; CHECK-NEXT: vldrh.u16 q0, [r1] 2351; CHECK-NEXT: sub.w r1, r0, #256 2352; CHECK-NEXT: vpt.i16 ne, q1, zr 2353; CHECK-NEXT: vstrht.16 q0, [r1] 2354; CHECK-NEXT: bx lr 2355entry: 2356 %z = getelementptr inbounds i8, ptr %y, i32 -256 2357 %mask = load <8 x i16>, ptr %m, align 2 2358 %c = icmp ne <8 x i16> %mask, zeroinitializer 2359 %0 = load <8 x half>, ptr %x, align 2 2360 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %z, i32 2, <8 x i1> %c) 2361 ret ptr %y 2362} 2363 2364declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>) 2365declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>) 2366declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>) 2367declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32, <4 x i1>, <4 x i8>) 2368declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>) 2369declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>) 2370declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>) 2371declare <8 x half> @llvm.masked.load.v8f16.p0(ptr, i32, <8 x i1>, <8 x half>) 2372 2373declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>) 2374declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>) 2375declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32, <4 x i1>) 2376declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32, <16 x i1>) 2377declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32, <8 x i1>) 2378declare void @llvm.masked.store.v4i8.p0(<4 x i8>, ptr, i32, <4 x i1>) 2379declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) 2380declare void @llvm.masked.store.v8f16.p0(<8 x half>, ptr, i32, <8 x i1>) 2381