1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm64-unknown | FileCheck %s 3 4; ptr p; // p is 1 byte aligned 5; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24) 6define i32 @load_i32_by_i8_unaligned(ptr %arg) { 7; CHECK-LABEL: load_i32_by_i8_unaligned: 8; CHECK: // %bb.0: 9; CHECK-NEXT: ldr w0, [x0] 10; CHECK-NEXT: ret 11 %tmp2 = load i8, ptr %arg, align 1 12 %tmp3 = zext i8 %tmp2 to i32 13 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1 14 %tmp5 = load i8, ptr %tmp4, align 1 15 %tmp6 = zext i8 %tmp5 to i32 16 %tmp7 = shl nuw nsw i32 %tmp6, 8 17 %tmp8 = or i32 %tmp7, %tmp3 18 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2 19 %tmp10 = load i8, ptr %tmp9, align 1 20 %tmp11 = zext i8 %tmp10 to i32 21 %tmp12 = shl nuw nsw i32 %tmp11, 16 22 %tmp13 = or i32 %tmp8, %tmp12 23 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3 24 %tmp15 = load i8, ptr %tmp14, align 1 25 %tmp16 = zext i8 %tmp15 to i32 26 %tmp17 = shl nuw nsw i32 %tmp16, 24 27 %tmp18 = or i32 %tmp13, %tmp17 28 ret i32 %tmp18 29} 30 31; ptr p; // p is 4 byte aligned 32; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24) 33define i32 @load_i32_by_i8_aligned(ptr %arg) { 34; CHECK-LABEL: load_i32_by_i8_aligned: 35; CHECK: // %bb.0: 36; CHECK-NEXT: ldr w0, [x0] 37; CHECK-NEXT: ret 38 %tmp2 = load i8, ptr %arg, align 4 39 %tmp3 = zext i8 %tmp2 to i32 40 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1 41 %tmp5 = load i8, ptr %tmp4, align 1 42 %tmp6 = zext i8 %tmp5 to i32 43 %tmp7 = shl nuw nsw i32 %tmp6, 8 44 %tmp8 = or i32 %tmp7, %tmp3 45 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2 46 %tmp10 = load i8, ptr %tmp9, align 1 47 %tmp11 = zext i8 %tmp10 to i32 48 %tmp12 = shl nuw nsw i32 %tmp11, 16 49 %tmp13 = or i32 %tmp8, %tmp12 50 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3 51 %tmp15 = load i8, ptr %tmp14, align 1 52 %tmp16 = zext i8 %tmp15 to i32 53 %tmp17 = shl nuw nsw i32 %tmp16, 24 54 %tmp18 = or i32 %tmp13, %tmp17 55 ret i32 %tmp18 56} 57 58; ptr p; // p is 4 byte aligned 59; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3] 60define i32 @load_i32_by_i8_bswap(ptr %arg) { 61; CHECK-LABEL: load_i32_by_i8_bswap: 62; CHECK: // %bb.0: 63; CHECK-NEXT: ldr w8, [x0] 64; CHECK-NEXT: rev w0, w8 65; CHECK-NEXT: ret 66 %tmp1 = load i8, ptr %arg, align 4 67 %tmp2 = zext i8 %tmp1 to i32 68 %tmp3 = shl nuw nsw i32 %tmp2, 24 69 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1 70 %tmp5 = load i8, ptr %tmp4, align 1 71 %tmp6 = zext i8 %tmp5 to i32 72 %tmp7 = shl nuw nsw i32 %tmp6, 16 73 %tmp8 = or i32 %tmp7, %tmp3 74 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2 75 %tmp10 = load i8, ptr %tmp9, align 1 76 %tmp11 = zext i8 %tmp10 to i32 77 %tmp12 = shl nuw nsw i32 %tmp11, 8 78 %tmp13 = or i32 %tmp8, %tmp12 79 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3 80 %tmp15 = load i8, ptr %tmp14, align 1 81 %tmp16 = zext i8 %tmp15 to i32 82 %tmp17 = or i32 %tmp13, %tmp16 83 ret i32 %tmp17 84} 85 86; ptr p; // p is 8 byte aligned 87; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56) 88define i64 @load_i64_by_i8(ptr %arg) { 89; CHECK-LABEL: load_i64_by_i8: 90; CHECK: // %bb.0: 91; CHECK-NEXT: ldr x0, [x0] 92; CHECK-NEXT: ret 93 %tmp1 = load i8, ptr %arg, align 8 94 %tmp2 = zext i8 %tmp1 to i64 95 %tmp3 = getelementptr inbounds i8, ptr %arg, i64 1 96 %tmp4 = load i8, ptr %tmp3, align 1 97 %tmp5 = zext i8 %tmp4 to i64 98 %tmp6 = shl nuw nsw i64 %tmp5, 8 99 %tmp7 = or i64 %tmp6, %tmp2 100 %tmp8 = getelementptr inbounds i8, ptr %arg, i64 2 101 %tmp9 = load i8, ptr %tmp8, align 1 102 %tmp10 = zext i8 %tmp9 to i64 103 %tmp11 = shl nuw nsw i64 %tmp10, 16 104 %tmp12 = or i64 %tmp7, %tmp11 105 %tmp13 = getelementptr inbounds i8, ptr %arg, i64 3 106 %tmp14 = load i8, ptr %tmp13, align 1 107 %tmp15 = zext i8 %tmp14 to i64 108 %tmp16 = shl nuw nsw i64 %tmp15, 24 109 %tmp17 = or i64 %tmp12, %tmp16 110 %tmp18 = getelementptr inbounds i8, ptr %arg, i64 4 111 %tmp19 = load i8, ptr %tmp18, align 1 112 %tmp20 = zext i8 %tmp19 to i64 113 %tmp21 = shl nuw nsw i64 %tmp20, 32 114 %tmp22 = or i64 %tmp17, %tmp21 115 %tmp23 = getelementptr inbounds i8, ptr %arg, i64 5 116 %tmp24 = load i8, ptr %tmp23, align 1 117 %tmp25 = zext i8 %tmp24 to i64 118 %tmp26 = shl nuw nsw i64 %tmp25, 40 119 %tmp27 = or i64 %tmp22, %tmp26 120 %tmp28 = getelementptr inbounds i8, ptr %arg, i64 6 121 %tmp29 = load i8, ptr %tmp28, align 1 122 %tmp30 = zext i8 %tmp29 to i64 123 %tmp31 = shl nuw nsw i64 %tmp30, 48 124 %tmp32 = or i64 %tmp27, %tmp31 125 %tmp33 = getelementptr inbounds i8, ptr %arg, i64 7 126 %tmp34 = load i8, ptr %tmp33, align 1 127 %tmp35 = zext i8 %tmp34 to i64 128 %tmp36 = shl nuw i64 %tmp35, 56 129 %tmp37 = or i64 %tmp32, %tmp36 130 ret i64 %tmp37 131} 132 133; ptr p; // p is 8 byte aligned 134; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7] 135define i64 @load_i64_by_i8_bswap(ptr %arg) { 136; CHECK-LABEL: load_i64_by_i8_bswap: 137; CHECK: // %bb.0: 138; CHECK-NEXT: ldr x8, [x0] 139; CHECK-NEXT: rev x0, x8 140; CHECK-NEXT: ret 141 %tmp1 = load i8, ptr %arg, align 8 142 %tmp2 = zext i8 %tmp1 to i64 143 %tmp3 = shl nuw i64 %tmp2, 56 144 %tmp4 = getelementptr inbounds i8, ptr %arg, i64 1 145 %tmp5 = load i8, ptr %tmp4, align 1 146 %tmp6 = zext i8 %tmp5 to i64 147 %tmp7 = shl nuw nsw i64 %tmp6, 48 148 %tmp8 = or i64 %tmp7, %tmp3 149 %tmp9 = getelementptr inbounds i8, ptr %arg, i64 2 150 %tmp10 = load i8, ptr %tmp9, align 1 151 %tmp11 = zext i8 %tmp10 to i64 152 %tmp12 = shl nuw nsw i64 %tmp11, 40 153 %tmp13 = or i64 %tmp8, %tmp12 154 %tmp14 = getelementptr inbounds i8, ptr %arg, i64 3 155 %tmp15 = load i8, ptr %tmp14, align 1 156 %tmp16 = zext i8 %tmp15 to i64 157 %tmp17 = shl nuw nsw i64 %tmp16, 32 158 %tmp18 = or i64 %tmp13, %tmp17 159 %tmp19 = getelementptr inbounds i8, ptr %arg, i64 4 160 %tmp20 = load i8, ptr %tmp19, align 1 161 %tmp21 = zext i8 %tmp20 to i64 162 %tmp22 = shl nuw nsw i64 %tmp21, 24 163 %tmp23 = or i64 %tmp18, %tmp22 164 %tmp24 = getelementptr inbounds i8, ptr %arg, i64 5 165 %tmp25 = load i8, ptr %tmp24, align 1 166 %tmp26 = zext i8 %tmp25 to i64 167 %tmp27 = shl nuw nsw i64 %tmp26, 16 168 %tmp28 = or i64 %tmp23, %tmp27 169 %tmp29 = getelementptr inbounds i8, ptr %arg, i64 6 170 %tmp30 = load i8, ptr %tmp29, align 1 171 %tmp31 = zext i8 %tmp30 to i64 172 %tmp32 = shl nuw nsw i64 %tmp31, 8 173 %tmp33 = or i64 %tmp28, %tmp32 174 %tmp34 = getelementptr inbounds i8, ptr %arg, i64 7 175 %tmp35 = load i8, ptr %tmp34, align 1 176 %tmp36 = zext i8 %tmp35 to i64 177 %tmp37 = or i64 %tmp33, %tmp36 178 ret i64 %tmp37 179} 180 181; ptr p; // p[1] is 4 byte aligned 182; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24) 183define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) { 184; CHECK-LABEL: load_i32_by_i8_nonzero_offset: 185; CHECK: // %bb.0: 186; CHECK-NEXT: ldur w0, [x0, #1] 187; CHECK-NEXT: ret 188 189 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1 190 %tmp2 = load i8, ptr %tmp1, align 4 191 %tmp3 = zext i8 %tmp2 to i32 192 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 2 193 %tmp5 = load i8, ptr %tmp4, align 1 194 %tmp6 = zext i8 %tmp5 to i32 195 %tmp7 = shl nuw nsw i32 %tmp6, 8 196 %tmp8 = or i32 %tmp7, %tmp3 197 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 3 198 %tmp10 = load i8, ptr %tmp9, align 1 199 %tmp11 = zext i8 %tmp10 to i32 200 %tmp12 = shl nuw nsw i32 %tmp11, 16 201 %tmp13 = or i32 %tmp8, %tmp12 202 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 4 203 %tmp15 = load i8, ptr %tmp14, align 1 204 %tmp16 = zext i8 %tmp15 to i32 205 %tmp17 = shl nuw nsw i32 %tmp16, 24 206 %tmp18 = or i32 %tmp13, %tmp17 207 ret i32 %tmp18 208} 209 210; ptr p; // p[-4] is 4 byte aligned 211; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24) 212define i32 @load_i32_by_i8_neg_offset(ptr %arg) { 213; CHECK-LABEL: load_i32_by_i8_neg_offset: 214; CHECK: // %bb.0: 215; CHECK-NEXT: ldur w0, [x0, #-4] 216; CHECK-NEXT: ret 217 218 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -4 219 %tmp2 = load i8, ptr %tmp1, align 4 220 %tmp3 = zext i8 %tmp2 to i32 221 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -3 222 %tmp5 = load i8, ptr %tmp4, align 1 223 %tmp6 = zext i8 %tmp5 to i32 224 %tmp7 = shl nuw nsw i32 %tmp6, 8 225 %tmp8 = or i32 %tmp7, %tmp3 226 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -2 227 %tmp10 = load i8, ptr %tmp9, align 1 228 %tmp11 = zext i8 %tmp10 to i32 229 %tmp12 = shl nuw nsw i32 %tmp11, 16 230 %tmp13 = or i32 %tmp8, %tmp12 231 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -1 232 %tmp15 = load i8, ptr %tmp14, align 1 233 %tmp16 = zext i8 %tmp15 to i32 234 %tmp17 = shl nuw nsw i32 %tmp16, 24 235 %tmp18 = or i32 %tmp13, %tmp17 236 ret i32 %tmp18 237} 238 239; ptr p; // p[1] is 4 byte aligned 240; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24) 241define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) { 242; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap: 243; CHECK: // %bb.0: 244; CHECK-NEXT: ldur w8, [x0, #1] 245; CHECK-NEXT: rev w0, w8 246; CHECK-NEXT: ret 247 248 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 4 249 %tmp2 = load i8, ptr %tmp1, align 1 250 %tmp3 = zext i8 %tmp2 to i32 251 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 3 252 %tmp5 = load i8, ptr %tmp4, align 1 253 %tmp6 = zext i8 %tmp5 to i32 254 %tmp7 = shl nuw nsw i32 %tmp6, 8 255 %tmp8 = or i32 %tmp7, %tmp3 256 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2 257 %tmp10 = load i8, ptr %tmp9, align 1 258 %tmp11 = zext i8 %tmp10 to i32 259 %tmp12 = shl nuw nsw i32 %tmp11, 16 260 %tmp13 = or i32 %tmp8, %tmp12 261 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 1 262 %tmp15 = load i8, ptr %tmp14, align 4 263 %tmp16 = zext i8 %tmp15 to i32 264 %tmp17 = shl nuw nsw i32 %tmp16, 24 265 %tmp18 = or i32 %tmp13, %tmp17 266 ret i32 %tmp18 267} 268 269; ptr p; // p[-4] is 4 byte aligned 270; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24) 271define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) { 272; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap: 273; CHECK: // %bb.0: 274; CHECK-NEXT: ldur w8, [x0, #-4] 275; CHECK-NEXT: rev w0, w8 276; CHECK-NEXT: ret 277 278 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -1 279 %tmp2 = load i8, ptr %tmp1, align 1 280 %tmp3 = zext i8 %tmp2 to i32 281 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -2 282 %tmp5 = load i8, ptr %tmp4, align 1 283 %tmp6 = zext i8 %tmp5 to i32 284 %tmp7 = shl nuw nsw i32 %tmp6, 8 285 %tmp8 = or i32 %tmp7, %tmp3 286 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -3 287 %tmp10 = load i8, ptr %tmp9, align 1 288 %tmp11 = zext i8 %tmp10 to i32 289 %tmp12 = shl nuw nsw i32 %tmp11, 16 290 %tmp13 = or i32 %tmp8, %tmp12 291 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -4 292 %tmp15 = load i8, ptr %tmp14, align 4 293 %tmp16 = zext i8 %tmp15 to i32 294 %tmp17 = shl nuw nsw i32 %tmp16, 24 295 %tmp18 = or i32 %tmp13, %tmp17 296 ret i32 %tmp18 297} 298 299declare i16 @llvm.bswap.i16(i16) 300 301; ptr p; // p is 4 byte aligned 302; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16) 303define i32 @load_i32_by_bswap_i16(ptr %arg) { 304; CHECK-LABEL: load_i32_by_bswap_i16: 305; CHECK: // %bb.0: 306; CHECK-NEXT: ldr w8, [x0] 307; CHECK-NEXT: rev w0, w8 308; CHECK-NEXT: ret 309 310 %tmp1 = load i16, ptr %arg, align 4 311 %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1) 312 %tmp2 = zext i16 %tmp11 to i32 313 %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1 314 %tmp4 = load i16, ptr %tmp3, align 1 315 %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4) 316 %tmp5 = zext i16 %tmp41 to i32 317 %tmp6 = shl nuw nsw i32 %tmp2, 16 318 %tmp7 = or i32 %tmp6, %tmp5 319 ret i32 %tmp7 320} 321 322; ptr p; // p is 4 byte aligned 323; (i32) p[0] | (sext(p[1] << 16) to i32) 324define i32 @load_i32_by_sext_i16(ptr %arg) { 325; CHECK-LABEL: load_i32_by_sext_i16: 326; CHECK: // %bb.0: 327; CHECK-NEXT: ldr w0, [x0] 328; CHECK-NEXT: ret 329 %tmp1 = load i16, ptr %arg, align 4 330 %tmp2 = zext i16 %tmp1 to i32 331 %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1 332 %tmp4 = load i16, ptr %tmp3, align 1 333 %tmp5 = sext i16 %tmp4 to i32 334 %tmp6 = shl nuw nsw i32 %tmp5, 16 335 %tmp7 = or i32 %tmp6, %tmp2 336 ret i32 %tmp7 337} 338 339; ptr arg; i32 i; 340; p = arg + 12; 341; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24) 342define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) { 343; CHECK-LABEL: load_i32_by_i8_base_offset_index: 344; CHECK: // %bb.0: 345; CHECK-NEXT: add x8, x0, w1, uxtw 346; CHECK-NEXT: ldr w0, [x8, #12] 347; CHECK-NEXT: ret 348 %tmp = add nuw nsw i32 %i, 3 349 %tmp2 = add nuw nsw i32 %i, 2 350 %tmp3 = add nuw nsw i32 %i, 1 351 %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12 352 %tmp5 = zext i32 %i to i64 353 %tmp6 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp5 354 %tmp7 = load i8, ptr %tmp6, align 4 355 %tmp8 = zext i8 %tmp7 to i32 356 %tmp9 = zext i32 %tmp3 to i64 357 %tmp10 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp9 358 %tmp11 = load i8, ptr %tmp10, align 1 359 %tmp12 = zext i8 %tmp11 to i32 360 %tmp13 = shl nuw nsw i32 %tmp12, 8 361 %tmp14 = or i32 %tmp13, %tmp8 362 %tmp15 = zext i32 %tmp2 to i64 363 %tmp16 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp15 364 %tmp17 = load i8, ptr %tmp16, align 1 365 %tmp18 = zext i8 %tmp17 to i32 366 %tmp19 = shl nuw nsw i32 %tmp18, 16 367 %tmp20 = or i32 %tmp14, %tmp19 368 %tmp21 = zext i32 %tmp to i64 369 %tmp22 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp21 370 %tmp23 = load i8, ptr %tmp22, align 1 371 %tmp24 = zext i8 %tmp23 to i32 372 %tmp25 = shl nuw i32 %tmp24, 24 373 %tmp26 = or i32 %tmp20, %tmp25 374 ret i32 %tmp26 375} 376 377; ptr arg; i32 i; 378; p = arg + 12; 379; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24) 380define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) { 381; CHECK-LABEL: load_i32_by_i8_base_offset_index_2: 382; CHECK: // %bb.0: 383; CHECK-NEXT: add x8, x0, w1, uxtw 384; CHECK-NEXT: ldur w0, [x8, #13] 385; CHECK-NEXT: ret 386 %tmp = add nuw nsw i32 %i, 4 387 %tmp2 = add nuw nsw i32 %i, 3 388 %tmp3 = add nuw nsw i32 %i, 2 389 %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12 390 %tmp5 = add nuw nsw i32 %i, 1 391 %tmp27 = zext i32 %tmp5 to i64 392 %tmp28 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp27 393 %tmp29 = load i8, ptr %tmp28, align 4 394 %tmp30 = zext i8 %tmp29 to i32 395 %tmp31 = zext i32 %tmp3 to i64 396 %tmp32 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp31 397 %tmp33 = load i8, ptr %tmp32, align 1 398 %tmp34 = zext i8 %tmp33 to i32 399 %tmp35 = shl nuw nsw i32 %tmp34, 8 400 %tmp36 = or i32 %tmp35, %tmp30 401 %tmp37 = zext i32 %tmp2 to i64 402 %tmp38 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp37 403 %tmp39 = load i8, ptr %tmp38, align 1 404 %tmp40 = zext i8 %tmp39 to i32 405 %tmp41 = shl nuw nsw i32 %tmp40, 16 406 %tmp42 = or i32 %tmp36, %tmp41 407 %tmp43 = zext i32 %tmp to i64 408 %tmp44 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp43 409 %tmp45 = load i8, ptr %tmp44, align 1 410 %tmp46 = zext i8 %tmp45 to i32 411 %tmp47 = shl nuw i32 %tmp46, 24 412 %tmp48 = or i32 %tmp42, %tmp47 413 ret i32 %tmp48 414} 415 416; ptr p; // p is 2 byte aligned 417; (i32) p[0] | ((i32) p[1] << 8) 418define i32 @zext_load_i32_by_i8(ptr %arg) { 419; CHECK-LABEL: zext_load_i32_by_i8: 420; CHECK: // %bb.0: 421; CHECK-NEXT: ldrh w0, [x0] 422; CHECK-NEXT: ret 423 424 %tmp2 = load i8, ptr %arg, align 2 425 %tmp3 = zext i8 %tmp2 to i32 426 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1 427 %tmp5 = load i8, ptr %tmp4, align 1 428 %tmp6 = zext i8 %tmp5 to i32 429 %tmp7 = shl nuw nsw i32 %tmp6, 8 430 %tmp8 = or i32 %tmp7, %tmp3 431 ret i32 %tmp8 432} 433 434; ptr p; // p is 2 byte aligned 435; ((i32) p[0] << 8) | ((i32) p[1] << 16) 436define i32 @zext_load_i32_by_i8_shl_8(ptr %arg) { 437; CHECK-LABEL: zext_load_i32_by_i8_shl_8: 438; CHECK: // %bb.0: 439; CHECK-NEXT: ldrb w8, [x0] 440; CHECK-NEXT: ldrb w9, [x0, #1] 441; CHECK-NEXT: lsl w8, w8, #8 442; CHECK-NEXT: orr w0, w8, w9, lsl #16 443; CHECK-NEXT: ret 444 445 %tmp2 = load i8, ptr %arg, align 2 446 %tmp3 = zext i8 %tmp2 to i32 447 %tmp30 = shl nuw nsw i32 %tmp3, 8 448 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1 449 %tmp5 = load i8, ptr %tmp4, align 1 450 %tmp6 = zext i8 %tmp5 to i32 451 %tmp7 = shl nuw nsw i32 %tmp6, 16 452 %tmp8 = or i32 %tmp7, %tmp30 453 ret i32 %tmp8 454} 455 456; ptr p; // p is 2 byte aligned 457; ((i32) p[0] << 16) | ((i32) p[1] << 24) 458define i32 @zext_load_i32_by_i8_shl_16(ptr %arg) { 459; CHECK-LABEL: zext_load_i32_by_i8_shl_16: 460; CHECK: // %bb.0: 461; CHECK-NEXT: ldrb w8, [x0] 462; CHECK-NEXT: ldrb w9, [x0, #1] 463; CHECK-NEXT: lsl w8, w8, #16 464; CHECK-NEXT: orr w0, w8, w9, lsl #24 465; CHECK-NEXT: ret 466 467 %tmp2 = load i8, ptr %arg, align 2 468 %tmp3 = zext i8 %tmp2 to i32 469 %tmp30 = shl nuw nsw i32 %tmp3, 16 470 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1 471 %tmp5 = load i8, ptr %tmp4, align 1 472 %tmp6 = zext i8 %tmp5 to i32 473 %tmp7 = shl nuw nsw i32 %tmp6, 24 474 %tmp8 = or i32 %tmp7, %tmp30 475 ret i32 %tmp8 476} 477; ptr p; // p is 2 byte aligned 478; (i32) p[1] | ((i32) p[0] << 8) 479define i32 @zext_load_i32_by_i8_bswap(ptr %arg) { 480; CHECK-LABEL: zext_load_i32_by_i8_bswap: 481; CHECK: // %bb.0: 482; CHECK-NEXT: ldrh w8, [x0] 483; CHECK-NEXT: rev16 w0, w8 484; CHECK-NEXT: ret 485 486 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1 487 %tmp2 = load i8, ptr %tmp1, align 1 488 %tmp3 = zext i8 %tmp2 to i32 489 %tmp5 = load i8, ptr %arg, align 2 490 %tmp6 = zext i8 %tmp5 to i32 491 %tmp7 = shl nuw nsw i32 %tmp6, 8 492 %tmp8 = or i32 %tmp7, %tmp3 493 ret i32 %tmp8 494} 495 496; ptr p; // p is 2 byte aligned 497; ((i32) p[1] << 8) | ((i32) p[0] << 16) 498define i32 @zext_load_i32_by_i8_bswap_shl_8(ptr %arg) { 499; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8: 500; CHECK: // %bb.0: 501; CHECK-NEXT: ldrb w8, [x0, #1] 502; CHECK-NEXT: ldrb w9, [x0] 503; CHECK-NEXT: lsl w8, w8, #8 504; CHECK-NEXT: orr w0, w8, w9, lsl #16 505; CHECK-NEXT: ret 506 507 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1 508 %tmp2 = load i8, ptr %tmp1, align 1 509 %tmp3 = zext i8 %tmp2 to i32 510 %tmp30 = shl nuw nsw i32 %tmp3, 8 511 %tmp5 = load i8, ptr %arg, align 2 512 %tmp6 = zext i8 %tmp5 to i32 513 %tmp7 = shl nuw nsw i32 %tmp6, 16 514 %tmp8 = or i32 %tmp7, %tmp30 515 ret i32 %tmp8 516} 517 518; ptr p; // p is 2 byte aligned 519; ((i32) p[1] << 16) | ((i32) p[0] << 24) 520define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) { 521; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16: 522; CHECK: // %bb.0: 523; CHECK-NEXT: ldrb w8, [x0, #1] 524; CHECK-NEXT: ldrb w9, [x0] 525; CHECK-NEXT: lsl w8, w8, #16 526; CHECK-NEXT: orr w0, w8, w9, lsl #24 527; CHECK-NEXT: ret 528 529 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1 530 %tmp2 = load i8, ptr %tmp1, align 1 531 %tmp3 = zext i8 %tmp2 to i32 532 %tmp30 = shl nuw nsw i32 %tmp3, 16 533 %tmp5 = load i8, ptr %arg, align 2 534 %tmp6 = zext i8 %tmp5 to i32 535 %tmp7 = shl nuw nsw i32 %tmp6, 24 536 %tmp8 = or i32 %tmp7, %tmp30 537 ret i32 %tmp8 538} 539 540; x1 = x0 541define void @short_vector_to_i32(ptr %in, ptr %out, ptr %p) { 542; CHECK-LABEL: short_vector_to_i32: 543; CHECK: // %bb.0: 544; CHECK-NEXT: ldr w8, [x0] 545; CHECK-NEXT: str w8, [x1] 546; CHECK-NEXT: ret 547 %ld = load <4 x i8>, ptr %in, align 4 548 549 %e1 = extractelement <4 x i8> %ld, i32 0 550 %e2 = extractelement <4 x i8> %ld, i32 1 551 %e3 = extractelement <4 x i8> %ld, i32 2 552 %e4 = extractelement <4 x i8> %ld, i32 3 553 554 %z0 = zext i8 %e1 to i32 555 %z1 = zext i8 %e2 to i32 556 %z2 = zext i8 %e3 to i32 557 %z3 = zext i8 %e4 to i32 558 559 %s1 = shl nuw nsw i32 %z1, 8 560 %s2 = shl nuw nsw i32 %z2, 16 561 %s3 = shl nuw i32 %z3, 24 562 563 %i1 = or i32 %s1, %z0 564 %i2 = or i32 %i1, %s2 565 %i3 = or i32 %i2, %s3 566 567 store i32 %i3, ptr %out 568 ret void 569} 570 571define void @short_vector_to_i32_unused_low_i8(ptr %in, ptr %out, ptr %p) { 572; CHECK-LABEL: short_vector_to_i32_unused_low_i8: 573; CHECK: // %bb.0: 574; CHECK-NEXT: ldr s0, [x0] 575; CHECK-NEXT: ushll v0.8h, v0.8b, #0 576; CHECK-NEXT: umov w8, v0.h[2] 577; CHECK-NEXT: umov w9, v0.h[1] 578; CHECK-NEXT: umov w10, v0.h[3] 579; CHECK-NEXT: lsl w8, w8, #16 580; CHECK-NEXT: bfi w8, w9, #8, #8 581; CHECK-NEXT: orr w8, w8, w10, lsl #24 582; CHECK-NEXT: str w8, [x1] 583; CHECK-NEXT: ret 584 %ld = load <4 x i8>, ptr %in, align 4 585 586 %e2 = extractelement <4 x i8> %ld, i32 1 587 %e3 = extractelement <4 x i8> %ld, i32 2 588 %e4 = extractelement <4 x i8> %ld, i32 3 589 590 %z1 = zext i8 %e2 to i32 591 %z2 = zext i8 %e3 to i32 592 %z3 = zext i8 %e4 to i32 593 594 %s1 = shl nuw nsw i32 %z1, 8 595 %s2 = shl nuw nsw i32 %z2, 16 596 %s3 = shl nuw i32 %z3, 24 597 598 %i2 = or i32 %s1, %s2 599 %i3 = or i32 %i2, %s3 600 601 store i32 %i3, ptr %out 602 ret void 603} 604 605define void @short_vector_to_i32_unused_high_i8(ptr %in, ptr %out, ptr %p) { 606; CHECK-LABEL: short_vector_to_i32_unused_high_i8: 607; CHECK: // %bb.0: 608; CHECK-NEXT: ldr s0, [x0] 609; CHECK-NEXT: ldrh w9, [x0] 610; CHECK-NEXT: ushll v0.8h, v0.8b, #0 611; CHECK-NEXT: umov w8, v0.h[2] 612; CHECK-NEXT: orr w8, w9, w8, lsl #16 613; CHECK-NEXT: str w8, [x1] 614; CHECK-NEXT: ret 615 %ld = load <4 x i8>, ptr %in, align 4 616 617 %e1 = extractelement <4 x i8> %ld, i32 0 618 %e2 = extractelement <4 x i8> %ld, i32 1 619 %e3 = extractelement <4 x i8> %ld, i32 2 620 621 %z0 = zext i8 %e1 to i32 622 %z1 = zext i8 %e2 to i32 623 %z2 = zext i8 %e3 to i32 624 625 %s1 = shl nuw nsw i32 %z1, 8 626 %s2 = shl nuw nsw i32 %z2, 16 627 628 %i1 = or i32 %s1, %z0 629 %i2 = or i32 %i1, %s2 630 631 store i32 %i2, ptr %out 632 ret void 633} 634 635define void @short_vector_to_i32_unused_low_i16(ptr %in, ptr %out, ptr %p) { 636; CHECK-LABEL: short_vector_to_i32_unused_low_i16: 637; CHECK: // %bb.0: 638; CHECK-NEXT: ldr s0, [x0] 639; CHECK-NEXT: ushll v0.8h, v0.8b, #0 640; CHECK-NEXT: umov w8, v0.h[3] 641; CHECK-NEXT: umov w9, v0.h[2] 642; CHECK-NEXT: lsl w8, w8, #24 643; CHECK-NEXT: orr w8, w8, w9, lsl #16 644; CHECK-NEXT: str w8, [x1] 645; CHECK-NEXT: ret 646 %ld = load <4 x i8>, ptr %in, align 4 647 648 %e3 = extractelement <4 x i8> %ld, i32 2 649 %e4 = extractelement <4 x i8> %ld, i32 3 650 651 %z2 = zext i8 %e3 to i32 652 %z3 = zext i8 %e4 to i32 653 654 %s2 = shl nuw nsw i32 %z2, 16 655 %s3 = shl nuw i32 %z3, 24 656 657 %i3 = or i32 %s2, %s3 658 659 store i32 %i3, ptr %out 660 ret void 661} 662 663; x1 = x0[0:1] 664define void @short_vector_to_i32_unused_high_i16(ptr %in, ptr %out, ptr %p) { 665; CHECK-LABEL: short_vector_to_i32_unused_high_i16: 666; CHECK: // %bb.0: 667; CHECK-NEXT: ldrh w8, [x0] 668; CHECK-NEXT: str w8, [x1] 669; CHECK-NEXT: ret 670 %ld = load <4 x i8>, ptr %in, align 4 671 672 %e1 = extractelement <4 x i8> %ld, i32 0 673 %e2 = extractelement <4 x i8> %ld, i32 1 674 675 %z0 = zext i8 %e1 to i32 676 %z1 = zext i8 %e2 to i32 677 678 %s1 = shl nuw nsw i32 %z1, 8 679 680 %i1 = or i32 %s1, %z0 681 682 store i32 %i1, ptr %out 683 ret void 684} 685 686; x1 = x0 687define void @short_vector_to_i64(ptr %in, ptr %out, ptr %p) { 688; CHECK-LABEL: short_vector_to_i64: 689; CHECK: // %bb.0: 690; CHECK-NEXT: ldr w8, [x0] 691; CHECK-NEXT: str x8, [x1] 692; CHECK-NEXT: ret 693 %ld = load <4 x i8>, ptr %in, align 4 694 695 %e1 = extractelement <4 x i8> %ld, i32 0 696 %e2 = extractelement <4 x i8> %ld, i32 1 697 %e3 = extractelement <4 x i8> %ld, i32 2 698 %e4 = extractelement <4 x i8> %ld, i32 3 699 700 %z0 = zext i8 %e1 to i64 701 %z1 = zext i8 %e2 to i64 702 %z2 = zext i8 %e3 to i64 703 %z3 = zext i8 %e4 to i64 704 705 %s1 = shl nuw nsw i64 %z1, 8 706 %s2 = shl nuw nsw i64 %z2, 16 707 %s3 = shl nuw i64 %z3, 24 708 709 %i1 = or i64 %s1, %z0 710 %i2 = or i64 %i1, %s2 711 %i3 = or i64 %i2, %s3 712 713 store i64 %i3, ptr %out 714 ret void 715} 716