1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=BSWAP 3; RUN: llc < %s -mtriple=i686-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK --check-prefix=MOVBE 4; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 --check-prefix=BSWAP64 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+movbe | FileCheck %s --check-prefix=CHECK64 --check-prefix=MOVBE64 6 7; ptr p; 8; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24) 9define i32 @load_i32_by_i8(ptr %arg) { 10; CHECK-LABEL: load_i32_by_i8: 11; CHECK: # %bb.0: 12; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 13; CHECK-NEXT: movl (%eax), %eax 14; CHECK-NEXT: retl 15; 16; CHECK64-LABEL: load_i32_by_i8: 17; CHECK64: # %bb.0: 18; CHECK64-NEXT: movl (%rdi), %eax 19; CHECK64-NEXT: retq 20 %tmp1 = load i8, ptr %arg, align 1 21 %tmp2 = zext i8 %tmp1 to i32 22 %tmp3 = getelementptr inbounds i8, ptr %arg, i32 1 23 %tmp4 = load i8, ptr %tmp3, align 1 24 %tmp5 = zext i8 %tmp4 to i32 25 %tmp6 = shl nuw nsw i32 %tmp5, 8 26 %tmp7 = or i32 %tmp6, %tmp2 27 %tmp8 = getelementptr inbounds i8, ptr %arg, i32 2 28 %tmp9 = load i8, ptr %tmp8, align 1 29 %tmp10 = zext i8 %tmp9 to i32 30 %tmp11 = shl nuw nsw i32 %tmp10, 16 31 %tmp12 = or i32 %tmp7, %tmp11 32 %tmp13 = getelementptr inbounds i8, ptr %arg, i32 3 33 %tmp14 = load i8, ptr %tmp13, align 1 34 %tmp15 = zext i8 %tmp14 to i32 35 %tmp16 = shl nuw nsw i32 %tmp15, 24 36 %tmp17 = or i32 %tmp12, %tmp16 37 ret i32 %tmp17 38} 39 40; ptr p; 41; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3] 42define i32 @load_i32_by_i8_bswap(ptr %arg) { 43; BSWAP-LABEL: load_i32_by_i8_bswap: 44; BSWAP: # %bb.0: 45; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 46; BSWAP-NEXT: movl (%eax), %eax 47; BSWAP-NEXT: bswapl %eax 48; BSWAP-NEXT: retl 49; 50; MOVBE-LABEL: load_i32_by_i8_bswap: 51; MOVBE: # %bb.0: 52; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 53; MOVBE-NEXT: movbel (%eax), %eax 54; MOVBE-NEXT: retl 55; 56; BSWAP64-LABEL: load_i32_by_i8_bswap: 57; BSWAP64: # %bb.0: 58; BSWAP64-NEXT: movl (%rdi), %eax 59; BSWAP64-NEXT: bswapl %eax 60; BSWAP64-NEXT: retq 61; 62; MOVBE64-LABEL: load_i32_by_i8_bswap: 63; MOVBE64: # %bb.0: 64; MOVBE64-NEXT: movbel (%rdi), %eax 65; MOVBE64-NEXT: retq 66 %tmp1 = load i8, ptr %arg, align 1 67 %tmp2 = zext i8 %tmp1 to i32 68 %tmp3 = shl nuw nsw i32 %tmp2, 24 69 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1 70 %tmp5 = load i8, ptr %tmp4, align 1 71 %tmp6 = zext i8 %tmp5 to i32 72 %tmp7 = shl nuw nsw i32 %tmp6, 16 73 %tmp8 = or i32 %tmp7, %tmp3 74 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2 75 %tmp10 = load i8, ptr %tmp9, align 1 76 %tmp11 = zext i8 %tmp10 to i32 77 %tmp12 = shl nuw nsw i32 %tmp11, 8 78 %tmp13 = or i32 %tmp8, %tmp12 79 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3 80 %tmp15 = load i8, ptr %tmp14, align 1 81 %tmp16 = zext i8 %tmp15 to i32 82 %tmp17 = or i32 %tmp13, %tmp16 83 ret i32 %tmp17 84} 85 86; ptr p; 87; (i32) p[0] | ((i32) p[1] << 16) 88define i32 @load_i32_by_i16(ptr %arg) { 89; CHECK-LABEL: load_i32_by_i16: 90; CHECK: # %bb.0: 91; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 92; CHECK-NEXT: movl (%eax), %eax 93; CHECK-NEXT: retl 94; 95; CHECK64-LABEL: load_i32_by_i16: 96; CHECK64: # %bb.0: 97; CHECK64-NEXT: movl (%rdi), %eax 98; CHECK64-NEXT: retq 99 %tmp1 = load i16, ptr %arg, align 1 100 %tmp2 = zext i16 %tmp1 to i32 101 %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1 102 %tmp4 = load i16, ptr %tmp3, align 1 103 %tmp5 = zext i16 %tmp4 to i32 104 %tmp6 = shl nuw nsw i32 %tmp5, 16 105 %tmp7 = or i32 %tmp6, %tmp2 106 ret i32 %tmp7 107} 108 109; ptr p_16; 110; ptr p_8 = (ptr) p_16; 111; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24) 112define i32 @load_i32_by_i16_i8(ptr %arg) { 113; CHECK-LABEL: load_i32_by_i16_i8: 114; CHECK: # %bb.0: 115; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 116; CHECK-NEXT: movl (%eax), %eax 117; CHECK-NEXT: retl 118; 119; CHECK64-LABEL: load_i32_by_i16_i8: 120; CHECK64: # %bb.0: 121; CHECK64-NEXT: movl (%rdi), %eax 122; CHECK64-NEXT: retq 123 %tmp2 = load i16, ptr %arg, align 1 124 %tmp3 = zext i16 %tmp2 to i32 125 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 2 126 %tmp5 = load i8, ptr %tmp4, align 1 127 %tmp6 = zext i8 %tmp5 to i32 128 %tmp7 = shl nuw nsw i32 %tmp6, 16 129 %tmp8 = getelementptr inbounds i8, ptr %arg, i32 3 130 %tmp9 = load i8, ptr %tmp8, align 1 131 %tmp10 = zext i8 %tmp9 to i32 132 %tmp11 = shl nuw nsw i32 %tmp10, 24 133 %tmp12 = or i32 %tmp7, %tmp11 134 %tmp13 = or i32 %tmp12, %tmp3 135 ret i32 %tmp13 136} 137 138 139; ptr p; 140; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16) 141define i32 @load_i32_by_i16_by_i8(ptr %arg) { 142; CHECK-LABEL: load_i32_by_i16_by_i8: 143; CHECK: # %bb.0: 144; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 145; CHECK-NEXT: movl (%eax), %eax 146; CHECK-NEXT: retl 147; 148; CHECK64-LABEL: load_i32_by_i16_by_i8: 149; CHECK64: # %bb.0: 150; CHECK64-NEXT: movl (%rdi), %eax 151; CHECK64-NEXT: retq 152 %tmp1 = load i8, ptr %arg, align 1 153 %tmp2 = zext i8 %tmp1 to i16 154 %tmp3 = getelementptr inbounds i8, ptr %arg, i32 1 155 %tmp4 = load i8, ptr %tmp3, align 1 156 %tmp5 = zext i8 %tmp4 to i16 157 %tmp6 = shl nuw nsw i16 %tmp5, 8 158 %tmp7 = or i16 %tmp6, %tmp2 159 %tmp8 = getelementptr inbounds i8, ptr %arg, i32 2 160 %tmp9 = load i8, ptr %tmp8, align 1 161 %tmp10 = zext i8 %tmp9 to i16 162 %tmp11 = getelementptr inbounds i8, ptr %arg, i32 3 163 %tmp12 = load i8, ptr %tmp11, align 1 164 %tmp13 = zext i8 %tmp12 to i16 165 %tmp14 = shl nuw nsw i16 %tmp13, 8 166 %tmp15 = or i16 %tmp14, %tmp10 167 %tmp16 = zext i16 %tmp7 to i32 168 %tmp17 = zext i16 %tmp15 to i32 169 %tmp18 = shl nuw nsw i32 %tmp17, 16 170 %tmp19 = or i32 %tmp18, %tmp16 171 ret i32 %tmp19 172} 173 174; ptr p; 175; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4]) 176define i32 @load_i32_by_i16_by_i8_bswap(ptr %arg) { 177; BSWAP-LABEL: load_i32_by_i16_by_i8_bswap: 178; BSWAP: # %bb.0: 179; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 180; BSWAP-NEXT: movl (%eax), %eax 181; BSWAP-NEXT: bswapl %eax 182; BSWAP-NEXT: retl 183; 184; MOVBE-LABEL: load_i32_by_i16_by_i8_bswap: 185; MOVBE: # %bb.0: 186; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 187; MOVBE-NEXT: movbel (%eax), %eax 188; MOVBE-NEXT: retl 189; 190; BSWAP64-LABEL: load_i32_by_i16_by_i8_bswap: 191; BSWAP64: # %bb.0: 192; BSWAP64-NEXT: movl (%rdi), %eax 193; BSWAP64-NEXT: bswapl %eax 194; BSWAP64-NEXT: retq 195; 196; MOVBE64-LABEL: load_i32_by_i16_by_i8_bswap: 197; MOVBE64: # %bb.0: 198; MOVBE64-NEXT: movbel (%rdi), %eax 199; MOVBE64-NEXT: retq 200 %tmp1 = load i8, ptr %arg, align 1 201 %tmp2 = zext i8 %tmp1 to i16 202 %tmp3 = getelementptr inbounds i8, ptr %arg, i32 1 203 %tmp4 = load i8, ptr %tmp3, align 1 204 %tmp5 = zext i8 %tmp4 to i16 205 %tmp6 = shl nuw nsw i16 %tmp2, 8 206 %tmp7 = or i16 %tmp6, %tmp5 207 %tmp8 = getelementptr inbounds i8, ptr %arg, i32 2 208 %tmp9 = load i8, ptr %tmp8, align 1 209 %tmp10 = zext i8 %tmp9 to i16 210 %tmp11 = getelementptr inbounds i8, ptr %arg, i32 3 211 %tmp12 = load i8, ptr %tmp11, align 1 212 %tmp13 = zext i8 %tmp12 to i16 213 %tmp14 = shl nuw nsw i16 %tmp10, 8 214 %tmp15 = or i16 %tmp14, %tmp13 215 %tmp16 = zext i16 %tmp7 to i32 216 %tmp17 = zext i16 %tmp15 to i32 217 %tmp18 = shl nuw nsw i32 %tmp16, 16 218 %tmp19 = or i32 %tmp18, %tmp17 219 ret i32 %tmp19 220} 221 222; ptr p; 223; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56) 224define i64 @load_i64_by_i8(ptr %arg) { 225; CHECK-LABEL: load_i64_by_i8: 226; CHECK: # %bb.0: 227; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 228; CHECK-NEXT: movl (%ecx), %eax 229; CHECK-NEXT: movl 4(%ecx), %edx 230; CHECK-NEXT: retl 231; 232; CHECK64-LABEL: load_i64_by_i8: 233; CHECK64: # %bb.0: 234; CHECK64-NEXT: movq (%rdi), %rax 235; CHECK64-NEXT: retq 236 %tmp1 = load i8, ptr %arg, align 1 237 %tmp2 = zext i8 %tmp1 to i64 238 %tmp3 = getelementptr inbounds i8, ptr %arg, i64 1 239 %tmp4 = load i8, ptr %tmp3, align 1 240 %tmp5 = zext i8 %tmp4 to i64 241 %tmp6 = shl nuw nsw i64 %tmp5, 8 242 %tmp7 = or i64 %tmp6, %tmp2 243 %tmp8 = getelementptr inbounds i8, ptr %arg, i64 2 244 %tmp9 = load i8, ptr %tmp8, align 1 245 %tmp10 = zext i8 %tmp9 to i64 246 %tmp11 = shl nuw nsw i64 %tmp10, 16 247 %tmp12 = or i64 %tmp7, %tmp11 248 %tmp13 = getelementptr inbounds i8, ptr %arg, i64 3 249 %tmp14 = load i8, ptr %tmp13, align 1 250 %tmp15 = zext i8 %tmp14 to i64 251 %tmp16 = shl nuw nsw i64 %tmp15, 24 252 %tmp17 = or i64 %tmp12, %tmp16 253 %tmp18 = getelementptr inbounds i8, ptr %arg, i64 4 254 %tmp19 = load i8, ptr %tmp18, align 1 255 %tmp20 = zext i8 %tmp19 to i64 256 %tmp21 = shl nuw nsw i64 %tmp20, 32 257 %tmp22 = or i64 %tmp17, %tmp21 258 %tmp23 = getelementptr inbounds i8, ptr %arg, i64 5 259 %tmp24 = load i8, ptr %tmp23, align 1 260 %tmp25 = zext i8 %tmp24 to i64 261 %tmp26 = shl nuw nsw i64 %tmp25, 40 262 %tmp27 = or i64 %tmp22, %tmp26 263 %tmp28 = getelementptr inbounds i8, ptr %arg, i64 6 264 %tmp29 = load i8, ptr %tmp28, align 1 265 %tmp30 = zext i8 %tmp29 to i64 266 %tmp31 = shl nuw nsw i64 %tmp30, 48 267 %tmp32 = or i64 %tmp27, %tmp31 268 %tmp33 = getelementptr inbounds i8, ptr %arg, i64 7 269 %tmp34 = load i8, ptr %tmp33, align 1 270 %tmp35 = zext i8 %tmp34 to i64 271 %tmp36 = shl nuw i64 %tmp35, 56 272 %tmp37 = or i64 %tmp32, %tmp36 273 ret i64 %tmp37 274} 275 276; ptr p; 277; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7] 278define i64 @load_i64_by_i8_bswap(ptr %arg) { 279; BSWAP-LABEL: load_i64_by_i8_bswap: 280; BSWAP: # %bb.0: 281; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 282; BSWAP-NEXT: movl (%eax), %edx 283; BSWAP-NEXT: movl 4(%eax), %eax 284; BSWAP-NEXT: bswapl %eax 285; BSWAP-NEXT: bswapl %edx 286; BSWAP-NEXT: retl 287; 288; MOVBE-LABEL: load_i64_by_i8_bswap: 289; MOVBE: # %bb.0: 290; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx 291; MOVBE-NEXT: movbel 4(%ecx), %eax 292; MOVBE-NEXT: movbel (%ecx), %edx 293; MOVBE-NEXT: retl 294; 295; BSWAP64-LABEL: load_i64_by_i8_bswap: 296; BSWAP64: # %bb.0: 297; BSWAP64-NEXT: movq (%rdi), %rax 298; BSWAP64-NEXT: bswapq %rax 299; BSWAP64-NEXT: retq 300; 301; MOVBE64-LABEL: load_i64_by_i8_bswap: 302; MOVBE64: # %bb.0: 303; MOVBE64-NEXT: movbeq (%rdi), %rax 304; MOVBE64-NEXT: retq 305 %tmp1 = load i8, ptr %arg, align 1 306 %tmp2 = zext i8 %tmp1 to i64 307 %tmp3 = shl nuw i64 %tmp2, 56 308 %tmp4 = getelementptr inbounds i8, ptr %arg, i64 1 309 %tmp5 = load i8, ptr %tmp4, align 1 310 %tmp6 = zext i8 %tmp5 to i64 311 %tmp7 = shl nuw nsw i64 %tmp6, 48 312 %tmp8 = or i64 %tmp7, %tmp3 313 %tmp9 = getelementptr inbounds i8, ptr %arg, i64 2 314 %tmp10 = load i8, ptr %tmp9, align 1 315 %tmp11 = zext i8 %tmp10 to i64 316 %tmp12 = shl nuw nsw i64 %tmp11, 40 317 %tmp13 = or i64 %tmp8, %tmp12 318 %tmp14 = getelementptr inbounds i8, ptr %arg, i64 3 319 %tmp15 = load i8, ptr %tmp14, align 1 320 %tmp16 = zext i8 %tmp15 to i64 321 %tmp17 = shl nuw nsw i64 %tmp16, 32 322 %tmp18 = or i64 %tmp13, %tmp17 323 %tmp19 = getelementptr inbounds i8, ptr %arg, i64 4 324 %tmp20 = load i8, ptr %tmp19, align 1 325 %tmp21 = zext i8 %tmp20 to i64 326 %tmp22 = shl nuw nsw i64 %tmp21, 24 327 %tmp23 = or i64 %tmp18, %tmp22 328 %tmp24 = getelementptr inbounds i8, ptr %arg, i64 5 329 %tmp25 = load i8, ptr %tmp24, align 1 330 %tmp26 = zext i8 %tmp25 to i64 331 %tmp27 = shl nuw nsw i64 %tmp26, 16 332 %tmp28 = or i64 %tmp23, %tmp27 333 %tmp29 = getelementptr inbounds i8, ptr %arg, i64 6 334 %tmp30 = load i8, ptr %tmp29, align 1 335 %tmp31 = zext i8 %tmp30 to i64 336 %tmp32 = shl nuw nsw i64 %tmp31, 8 337 %tmp33 = or i64 %tmp28, %tmp32 338 %tmp34 = getelementptr inbounds i8, ptr %arg, i64 7 339 %tmp35 = load i8, ptr %tmp34, align 1 340 %tmp36 = zext i8 %tmp35 to i64 341 %tmp37 = or i64 %tmp33, %tmp36 342 ret i64 %tmp37 343} 344 345; Part of the load by bytes pattern is used outside of the pattern 346; ptr p; 347; i32 x = (i32) p[1] 348; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3] 349; x | res 350define i32 @load_i32_by_i8_bswap_uses(ptr %arg) { 351; CHECK-LABEL: load_i32_by_i8_bswap_uses: 352; CHECK: # %bb.0: 353; CHECK-NEXT: pushl %esi 354; CHECK-NEXT: .cfi_def_cfa_offset 8 355; CHECK-NEXT: .cfi_offset %esi, -8 356; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 357; CHECK-NEXT: movzbl (%eax), %ecx 358; CHECK-NEXT: shll $24, %ecx 359; CHECK-NEXT: movzbl 1(%eax), %edx 360; CHECK-NEXT: movl %edx, %esi 361; CHECK-NEXT: shll $16, %esi 362; CHECK-NEXT: orl %ecx, %esi 363; CHECK-NEXT: movzbl 2(%eax), %ecx 364; CHECK-NEXT: shll $8, %ecx 365; CHECK-NEXT: orl %esi, %ecx 366; CHECK-NEXT: movzbl 3(%eax), %eax 367; CHECK-NEXT: orl %ecx, %eax 368; CHECK-NEXT: orl %edx, %eax 369; CHECK-NEXT: popl %esi 370; CHECK-NEXT: .cfi_def_cfa_offset 4 371; CHECK-NEXT: retl 372; 373; CHECK64-LABEL: load_i32_by_i8_bswap_uses: 374; CHECK64: # %bb.0: 375; CHECK64-NEXT: movzbl (%rdi), %eax 376; CHECK64-NEXT: shll $24, %eax 377; CHECK64-NEXT: movzbl 1(%rdi), %ecx 378; CHECK64-NEXT: movl %ecx, %edx 379; CHECK64-NEXT: shll $16, %edx 380; CHECK64-NEXT: orl %eax, %edx 381; CHECK64-NEXT: movzbl 2(%rdi), %esi 382; CHECK64-NEXT: shll $8, %esi 383; CHECK64-NEXT: orl %edx, %esi 384; CHECK64-NEXT: movzbl 3(%rdi), %eax 385; CHECK64-NEXT: orl %esi, %eax 386; CHECK64-NEXT: orl %ecx, %eax 387; CHECK64-NEXT: retq 388 %tmp1 = load i8, ptr %arg, align 1 389 %tmp2 = zext i8 %tmp1 to i32 390 %tmp3 = shl nuw nsw i32 %tmp2, 24 391 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1 392 %tmp5 = load i8, ptr %tmp4, align 1 393 %tmp6 = zext i8 %tmp5 to i32 394 %tmp7 = shl nuw nsw i32 %tmp6, 16 395 %tmp8 = or i32 %tmp7, %tmp3 396 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2 397 %tmp10 = load i8, ptr %tmp9, align 1 398 %tmp11 = zext i8 %tmp10 to i32 399 %tmp12 = shl nuw nsw i32 %tmp11, 8 400 %tmp13 = or i32 %tmp8, %tmp12 401 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3 402 %tmp15 = load i8, ptr %tmp14, align 1 403 %tmp16 = zext i8 %tmp15 to i32 404 %tmp17 = or i32 %tmp13, %tmp16 405 ; Use individual part of the pattern outside of the pattern 406 %tmp18 = or i32 %tmp6, %tmp17 407 ret i32 %tmp18 408} 409 410; One of the loads is volatile 411; ptr p; 412; p0 = volatile *p; 413; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3] 414define i32 @load_i32_by_i8_bswap_volatile(ptr %arg) { 415; CHECK-LABEL: load_i32_by_i8_bswap_volatile: 416; CHECK: # %bb.0: 417; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 418; CHECK-NEXT: movzbl (%eax), %ecx 419; CHECK-NEXT: shll $24, %ecx 420; CHECK-NEXT: movzbl 1(%eax), %edx 421; CHECK-NEXT: shll $16, %edx 422; CHECK-NEXT: orl %ecx, %edx 423; CHECK-NEXT: movzbl 2(%eax), %ecx 424; CHECK-NEXT: shll $8, %ecx 425; CHECK-NEXT: orl %edx, %ecx 426; CHECK-NEXT: movzbl 3(%eax), %eax 427; CHECK-NEXT: orl %ecx, %eax 428; CHECK-NEXT: retl 429; 430; CHECK64-LABEL: load_i32_by_i8_bswap_volatile: 431; CHECK64: # %bb.0: 432; CHECK64-NEXT: movzbl (%rdi), %eax 433; CHECK64-NEXT: shll $24, %eax 434; CHECK64-NEXT: movzbl 1(%rdi), %ecx 435; CHECK64-NEXT: shll $16, %ecx 436; CHECK64-NEXT: orl %eax, %ecx 437; CHECK64-NEXT: movzbl 2(%rdi), %edx 438; CHECK64-NEXT: shll $8, %edx 439; CHECK64-NEXT: orl %ecx, %edx 440; CHECK64-NEXT: movzbl 3(%rdi), %eax 441; CHECK64-NEXT: orl %edx, %eax 442; CHECK64-NEXT: retq 443 %tmp1 = load volatile i8, ptr %arg, align 1 444 %tmp2 = zext i8 %tmp1 to i32 445 %tmp3 = shl nuw nsw i32 %tmp2, 24 446 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1 447 %tmp5 = load i8, ptr %tmp4, align 1 448 %tmp6 = zext i8 %tmp5 to i32 449 %tmp7 = shl nuw nsw i32 %tmp6, 16 450 %tmp8 = or i32 %tmp7, %tmp3 451 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2 452 %tmp10 = load i8, ptr %tmp9, align 1 453 %tmp11 = zext i8 %tmp10 to i32 454 %tmp12 = shl nuw nsw i32 %tmp11, 8 455 %tmp13 = or i32 %tmp8, %tmp12 456 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 3 457 %tmp15 = load i8, ptr %tmp14, align 1 458 %tmp16 = zext i8 %tmp15 to i32 459 %tmp17 = or i32 %tmp13, %tmp16 460 ret i32 %tmp17 461} 462 463; There is a store in between individual loads 464; ptr p, q; 465; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16) 466; *q = 0; 467; res2 = ((i32) p[2] << 8) | (i32) p[3] 468; res1 | res2 469define i32 @load_i32_by_i8_bswap_store_in_between(ptr %arg, ptr %arg1) { 470; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between: 471; CHECK: # %bb.0: 472; CHECK-NEXT: pushl %esi 473; CHECK-NEXT: .cfi_def_cfa_offset 8 474; CHECK-NEXT: .cfi_offset %esi, -8 475; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 476; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 477; CHECK-NEXT: movzbl (%eax), %edx 478; CHECK-NEXT: shll $24, %edx 479; CHECK-NEXT: movzbl 1(%eax), %esi 480; CHECK-NEXT: movl $0, (%ecx) 481; CHECK-NEXT: shll $16, %esi 482; CHECK-NEXT: orl %edx, %esi 483; CHECK-NEXT: movzbl 2(%eax), %ecx 484; CHECK-NEXT: shll $8, %ecx 485; CHECK-NEXT: orl %esi, %ecx 486; CHECK-NEXT: movzbl 3(%eax), %eax 487; CHECK-NEXT: orl %ecx, %eax 488; CHECK-NEXT: popl %esi 489; CHECK-NEXT: .cfi_def_cfa_offset 4 490; CHECK-NEXT: retl 491; 492; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between: 493; CHECK64: # %bb.0: 494; CHECK64-NEXT: movzbl (%rdi), %eax 495; CHECK64-NEXT: shll $24, %eax 496; CHECK64-NEXT: movzbl 1(%rdi), %ecx 497; CHECK64-NEXT: movl $0, (%rsi) 498; CHECK64-NEXT: shll $16, %ecx 499; CHECK64-NEXT: orl %eax, %ecx 500; CHECK64-NEXT: movzbl 2(%rdi), %edx 501; CHECK64-NEXT: shll $8, %edx 502; CHECK64-NEXT: orl %ecx, %edx 503; CHECK64-NEXT: movzbl 3(%rdi), %eax 504; CHECK64-NEXT: orl %edx, %eax 505; CHECK64-NEXT: retq 506 %tmp2 = load i8, ptr %arg, align 1 507 %tmp3 = zext i8 %tmp2 to i32 508 %tmp4 = shl nuw nsw i32 %tmp3, 24 509 %tmp5 = getelementptr inbounds i8, ptr %arg, i32 1 510 %tmp6 = load i8, ptr %tmp5, align 1 511 ; This store will prevent folding of the pattern 512 store i32 0, ptr %arg1 513 %tmp7 = zext i8 %tmp6 to i32 514 %tmp8 = shl nuw nsw i32 %tmp7, 16 515 %tmp9 = or i32 %tmp8, %tmp4 516 %tmp10 = getelementptr inbounds i8, ptr %arg, i32 2 517 %tmp11 = load i8, ptr %tmp10, align 1 518 %tmp12 = zext i8 %tmp11 to i32 519 %tmp13 = shl nuw nsw i32 %tmp12, 8 520 %tmp14 = or i32 %tmp9, %tmp13 521 %tmp15 = getelementptr inbounds i8, ptr %arg, i32 3 522 %tmp16 = load i8, ptr %tmp15, align 1 523 %tmp17 = zext i8 %tmp16 to i32 524 %tmp18 = or i32 %tmp14, %tmp17 525 ret i32 %tmp18 526} 527 528; One of the loads is from an unrelated location 529; ptr p, q; 530; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3] 531define i32 @load_i32_by_i8_bswap_unrelated_load(ptr %arg, ptr %arg1) { 532; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load: 533; CHECK: # %bb.0: 534; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 535; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 536; CHECK-NEXT: movzbl (%ecx), %edx 537; CHECK-NEXT: shll $24, %edx 538; CHECK-NEXT: movzbl 1(%eax), %eax 539; CHECK-NEXT: shll $16, %eax 540; CHECK-NEXT: orl %edx, %eax 541; CHECK-NEXT: movzbl 2(%ecx), %edx 542; CHECK-NEXT: shll $8, %edx 543; CHECK-NEXT: orl %eax, %edx 544; CHECK-NEXT: movzbl 3(%ecx), %eax 545; CHECK-NEXT: orl %edx, %eax 546; CHECK-NEXT: retl 547; 548; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load: 549; CHECK64: # %bb.0: 550; CHECK64-NEXT: movzbl (%rdi), %eax 551; CHECK64-NEXT: shll $24, %eax 552; CHECK64-NEXT: movzbl 1(%rsi), %ecx 553; CHECK64-NEXT: shll $16, %ecx 554; CHECK64-NEXT: orl %eax, %ecx 555; CHECK64-NEXT: movzbl 2(%rdi), %edx 556; CHECK64-NEXT: shll $8, %edx 557; CHECK64-NEXT: orl %ecx, %edx 558; CHECK64-NEXT: movzbl 3(%rdi), %eax 559; CHECK64-NEXT: orl %edx, %eax 560; CHECK64-NEXT: retq 561 %tmp3 = load i8, ptr %arg, align 1 562 %tmp4 = zext i8 %tmp3 to i32 563 %tmp5 = shl nuw nsw i32 %tmp4, 24 564 ; Load from an unrelated address 565 %tmp6 = getelementptr inbounds i8, ptr %arg1, i32 1 566 %tmp7 = load i8, ptr %tmp6, align 1 567 %tmp8 = zext i8 %tmp7 to i32 568 %tmp9 = shl nuw nsw i32 %tmp8, 16 569 %tmp10 = or i32 %tmp9, %tmp5 570 %tmp11 = getelementptr inbounds i8, ptr %arg, i32 2 571 %tmp12 = load i8, ptr %tmp11, align 1 572 %tmp13 = zext i8 %tmp12 to i32 573 %tmp14 = shl nuw nsw i32 %tmp13, 8 574 %tmp15 = or i32 %tmp10, %tmp14 575 %tmp16 = getelementptr inbounds i8, ptr %arg, i32 3 576 %tmp17 = load i8, ptr %tmp16, align 1 577 %tmp18 = zext i8 %tmp17 to i32 578 %tmp19 = or i32 %tmp15, %tmp18 579 ret i32 %tmp19 580} 581 582; ptr p; 583; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24) 584define i32 @load_i32_by_i8_nonzero_offset(ptr %arg) { 585; CHECK-LABEL: load_i32_by_i8_nonzero_offset: 586; CHECK: # %bb.0: 587; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 588; CHECK-NEXT: movl 1(%eax), %eax 589; CHECK-NEXT: retl 590; 591; CHECK64-LABEL: load_i32_by_i8_nonzero_offset: 592; CHECK64: # %bb.0: 593; CHECK64-NEXT: movl 1(%rdi), %eax 594; CHECK64-NEXT: retq 595 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1 596 %tmp2 = load i8, ptr %tmp1, align 1 597 %tmp3 = zext i8 %tmp2 to i32 598 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 2 599 %tmp5 = load i8, ptr %tmp4, align 1 600 %tmp6 = zext i8 %tmp5 to i32 601 %tmp7 = shl nuw nsw i32 %tmp6, 8 602 %tmp8 = or i32 %tmp7, %tmp3 603 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 3 604 %tmp10 = load i8, ptr %tmp9, align 1 605 %tmp11 = zext i8 %tmp10 to i32 606 %tmp12 = shl nuw nsw i32 %tmp11, 16 607 %tmp13 = or i32 %tmp8, %tmp12 608 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 4 609 %tmp15 = load i8, ptr %tmp14, align 1 610 %tmp16 = zext i8 %tmp15 to i32 611 %tmp17 = shl nuw nsw i32 %tmp16, 24 612 %tmp18 = or i32 %tmp13, %tmp17 613 ret i32 %tmp18 614} 615 616; ptr p; 617; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24) 618define i32 @load_i32_by_i8_neg_offset(ptr %arg) { 619; CHECK-LABEL: load_i32_by_i8_neg_offset: 620; CHECK: # %bb.0: 621; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 622; CHECK-NEXT: movl -4(%eax), %eax 623; CHECK-NEXT: retl 624; 625; CHECK64-LABEL: load_i32_by_i8_neg_offset: 626; CHECK64: # %bb.0: 627; CHECK64-NEXT: movl -4(%rdi), %eax 628; CHECK64-NEXT: retq 629 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -4 630 %tmp2 = load i8, ptr %tmp1, align 1 631 %tmp3 = zext i8 %tmp2 to i32 632 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -3 633 %tmp5 = load i8, ptr %tmp4, align 1 634 %tmp6 = zext i8 %tmp5 to i32 635 %tmp7 = shl nuw nsw i32 %tmp6, 8 636 %tmp8 = or i32 %tmp7, %tmp3 637 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -2 638 %tmp10 = load i8, ptr %tmp9, align 1 639 %tmp11 = zext i8 %tmp10 to i32 640 %tmp12 = shl nuw nsw i32 %tmp11, 16 641 %tmp13 = or i32 %tmp8, %tmp12 642 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -1 643 %tmp15 = load i8, ptr %tmp14, align 1 644 %tmp16 = zext i8 %tmp15 to i32 645 %tmp17 = shl nuw nsw i32 %tmp16, 24 646 %tmp18 = or i32 %tmp13, %tmp17 647 ret i32 %tmp18 648} 649 650; ptr p; 651; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24) 652define i32 @load_i32_by_i8_nonzero_offset_bswap(ptr %arg) { 653; BSWAP-LABEL: load_i32_by_i8_nonzero_offset_bswap: 654; BSWAP: # %bb.0: 655; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 656; BSWAP-NEXT: movl 1(%eax), %eax 657; BSWAP-NEXT: bswapl %eax 658; BSWAP-NEXT: retl 659; 660; MOVBE-LABEL: load_i32_by_i8_nonzero_offset_bswap: 661; MOVBE: # %bb.0: 662; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 663; MOVBE-NEXT: movbel 1(%eax), %eax 664; MOVBE-NEXT: retl 665; 666; BSWAP64-LABEL: load_i32_by_i8_nonzero_offset_bswap: 667; BSWAP64: # %bb.0: 668; BSWAP64-NEXT: movl 1(%rdi), %eax 669; BSWAP64-NEXT: bswapl %eax 670; BSWAP64-NEXT: retq 671; 672; MOVBE64-LABEL: load_i32_by_i8_nonzero_offset_bswap: 673; MOVBE64: # %bb.0: 674; MOVBE64-NEXT: movbel 1(%rdi), %eax 675; MOVBE64-NEXT: retq 676 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 4 677 %tmp2 = load i8, ptr %tmp1, align 1 678 %tmp3 = zext i8 %tmp2 to i32 679 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 3 680 %tmp5 = load i8, ptr %tmp4, align 1 681 %tmp6 = zext i8 %tmp5 to i32 682 %tmp7 = shl nuw nsw i32 %tmp6, 8 683 %tmp8 = or i32 %tmp7, %tmp3 684 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 2 685 %tmp10 = load i8, ptr %tmp9, align 1 686 %tmp11 = zext i8 %tmp10 to i32 687 %tmp12 = shl nuw nsw i32 %tmp11, 16 688 %tmp13 = or i32 %tmp8, %tmp12 689 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 1 690 %tmp15 = load i8, ptr %tmp14, align 1 691 %tmp16 = zext i8 %tmp15 to i32 692 %tmp17 = shl nuw nsw i32 %tmp16, 24 693 %tmp18 = or i32 %tmp13, %tmp17 694 ret i32 %tmp18 695} 696 697; ptr p; 698; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24) 699define i32 @load_i32_by_i8_neg_offset_bswap(ptr %arg) { 700; BSWAP-LABEL: load_i32_by_i8_neg_offset_bswap: 701; BSWAP: # %bb.0: 702; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 703; BSWAP-NEXT: movl -4(%eax), %eax 704; BSWAP-NEXT: bswapl %eax 705; BSWAP-NEXT: retl 706; 707; MOVBE-LABEL: load_i32_by_i8_neg_offset_bswap: 708; MOVBE: # %bb.0: 709; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 710; MOVBE-NEXT: movbel -4(%eax), %eax 711; MOVBE-NEXT: retl 712; 713; BSWAP64-LABEL: load_i32_by_i8_neg_offset_bswap: 714; BSWAP64: # %bb.0: 715; BSWAP64-NEXT: movl -4(%rdi), %eax 716; BSWAP64-NEXT: bswapl %eax 717; BSWAP64-NEXT: retq 718; 719; MOVBE64-LABEL: load_i32_by_i8_neg_offset_bswap: 720; MOVBE64: # %bb.0: 721; MOVBE64-NEXT: movbel -4(%rdi), %eax 722; MOVBE64-NEXT: retq 723 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 -1 724 %tmp2 = load i8, ptr %tmp1, align 1 725 %tmp3 = zext i8 %tmp2 to i32 726 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 -2 727 %tmp5 = load i8, ptr %tmp4, align 1 728 %tmp6 = zext i8 %tmp5 to i32 729 %tmp7 = shl nuw nsw i32 %tmp6, 8 730 %tmp8 = or i32 %tmp7, %tmp3 731 %tmp9 = getelementptr inbounds i8, ptr %arg, i32 -3 732 %tmp10 = load i8, ptr %tmp9, align 1 733 %tmp11 = zext i8 %tmp10 to i32 734 %tmp12 = shl nuw nsw i32 %tmp11, 16 735 %tmp13 = or i32 %tmp8, %tmp12 736 %tmp14 = getelementptr inbounds i8, ptr %arg, i32 -4 737 %tmp15 = load i8, ptr %tmp14, align 1 738 %tmp16 = zext i8 %tmp15 to i32 739 %tmp17 = shl nuw nsw i32 %tmp16, 24 740 %tmp18 = or i32 %tmp13, %tmp17 741 ret i32 %tmp18 742} 743 744; ptr p; i32 i; 745; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3] 746define i32 @load_i32_by_i8_bswap_base_index_offset(ptr %arg, i32 %arg1) { 747; BSWAP-LABEL: load_i32_by_i8_bswap_base_index_offset: 748; BSWAP: # %bb.0: 749; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 750; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %ecx 751; BSWAP-NEXT: movl (%ecx,%eax), %eax 752; BSWAP-NEXT: bswapl %eax 753; BSWAP-NEXT: retl 754; 755; MOVBE-LABEL: load_i32_by_i8_bswap_base_index_offset: 756; MOVBE: # %bb.0: 757; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 758; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %ecx 759; MOVBE-NEXT: movbel (%ecx,%eax), %eax 760; MOVBE-NEXT: retl 761; 762; BSWAP64-LABEL: load_i32_by_i8_bswap_base_index_offset: 763; BSWAP64: # %bb.0: 764; BSWAP64-NEXT: movslq %esi, %rax 765; BSWAP64-NEXT: movl (%rdi,%rax), %eax 766; BSWAP64-NEXT: bswapl %eax 767; BSWAP64-NEXT: retq 768; 769; MOVBE64-LABEL: load_i32_by_i8_bswap_base_index_offset: 770; MOVBE64: # %bb.0: 771; MOVBE64-NEXT: movslq %esi, %rax 772; MOVBE64-NEXT: movbel (%rdi,%rax), %eax 773; MOVBE64-NEXT: retq 774 %tmp2 = getelementptr inbounds i8, ptr %arg, i32 %arg1 775 %tmp3 = load i8, ptr %tmp2, align 1 776 %tmp4 = zext i8 %tmp3 to i32 777 %tmp5 = shl nuw nsw i32 %tmp4, 24 778 %tmp6 = add nuw nsw i32 %arg1, 1 779 %tmp7 = getelementptr inbounds i8, ptr %arg, i32 %tmp6 780 %tmp8 = load i8, ptr %tmp7, align 1 781 %tmp9 = zext i8 %tmp8 to i32 782 %tmp10 = shl nuw nsw i32 %tmp9, 16 783 %tmp11 = or i32 %tmp10, %tmp5 784 %tmp12 = add nuw nsw i32 %arg1, 2 785 %tmp13 = getelementptr inbounds i8, ptr %arg, i32 %tmp12 786 %tmp14 = load i8, ptr %tmp13, align 1 787 %tmp15 = zext i8 %tmp14 to i32 788 %tmp16 = shl nuw nsw i32 %tmp15, 8 789 %tmp17 = or i32 %tmp11, %tmp16 790 %tmp18 = add nuw nsw i32 %arg1, 3 791 %tmp19 = getelementptr inbounds i8, ptr %arg, i32 %tmp18 792 %tmp20 = load i8, ptr %tmp19, align 1 793 %tmp21 = zext i8 %tmp20 to i32 794 %tmp22 = or i32 %tmp17, %tmp21 795 ret i32 %tmp22 796} 797 798; Verify that we don't crash handling shl i32 %conv57, 32 799define void @shift_i32_by_32(ptr %src1, ptr %src2, ptr %dst) { 800; CHECK-LABEL: shift_i32_by_32: 801; CHECK: # %bb.0: # %entry 802; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 803; CHECK-NEXT: movl $-1, 4(%eax) 804; CHECK-NEXT: movl $-1, (%eax) 805; CHECK-NEXT: retl 806; 807; CHECK64-LABEL: shift_i32_by_32: 808; CHECK64: # %bb.0: # %entry 809; CHECK64-NEXT: movq $-1, (%rdx) 810; CHECK64-NEXT: retq 811entry: 812 %load1 = load i8, ptr %src1, align 1 813 %conv46 = zext i8 %load1 to i32 814 %shl47 = shl i32 %conv46, 56 815 %or55 = or i32 %shl47, 0 816 %load2 = load i8, ptr %src2, align 1 817 %conv57 = zext i8 %load2 to i32 818 %shl58 = shl i32 %conv57, 32 819 %or59 = or i32 %or55, %shl58 820 %or74 = or i32 %or59, 0 821 %conv75 = sext i32 %or74 to i64 822 store i64 %conv75, ptr %dst, align 8 823 ret void 824} 825 826declare i16 @llvm.bswap.i16(i16) 827 828; ptr p; 829; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16) 830define i32 @load_i32_by_bswap_i16(ptr %arg) { 831; BSWAP-LABEL: load_i32_by_bswap_i16: 832; BSWAP: # %bb.0: 833; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 834; BSWAP-NEXT: movl (%eax), %eax 835; BSWAP-NEXT: bswapl %eax 836; BSWAP-NEXT: retl 837; 838; MOVBE-LABEL: load_i32_by_bswap_i16: 839; MOVBE: # %bb.0: 840; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 841; MOVBE-NEXT: movbel (%eax), %eax 842; MOVBE-NEXT: retl 843; 844; BSWAP64-LABEL: load_i32_by_bswap_i16: 845; BSWAP64: # %bb.0: 846; BSWAP64-NEXT: movl (%rdi), %eax 847; BSWAP64-NEXT: bswapl %eax 848; BSWAP64-NEXT: retq 849; 850; MOVBE64-LABEL: load_i32_by_bswap_i16: 851; MOVBE64: # %bb.0: 852; MOVBE64-NEXT: movbel (%rdi), %eax 853; MOVBE64-NEXT: retq 854 %tmp1 = load i16, ptr %arg, align 4 855 %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1) 856 %tmp2 = zext i16 %tmp11 to i32 857 %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1 858 %tmp4 = load i16, ptr %tmp3, align 1 859 %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4) 860 %tmp5 = zext i16 %tmp41 to i32 861 %tmp6 = shl nuw nsw i32 %tmp2, 16 862 %tmp7 = or i32 %tmp6, %tmp5 863 ret i32 %tmp7 864} 865 866; ptr p; 867; (i32) p[0] | (sext(p[1] << 16) to i32) 868define i32 @load_i32_by_sext_i16(ptr %arg) { 869; CHECK-LABEL: load_i32_by_sext_i16: 870; CHECK: # %bb.0: 871; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 872; CHECK-NEXT: movl (%eax), %eax 873; CHECK-NEXT: retl 874; 875; CHECK64-LABEL: load_i32_by_sext_i16: 876; CHECK64: # %bb.0: 877; CHECK64-NEXT: movl (%rdi), %eax 878; CHECK64-NEXT: retq 879 %tmp1 = load i16, ptr %arg, align 1 880 %tmp2 = zext i16 %tmp1 to i32 881 %tmp3 = getelementptr inbounds i16, ptr %arg, i32 1 882 %tmp4 = load i16, ptr %tmp3, align 1 883 %tmp5 = sext i16 %tmp4 to i32 884 %tmp6 = shl nuw nsw i32 %tmp5, 16 885 %tmp7 = or i32 %tmp6, %tmp2 886 ret i32 %tmp7 887} 888 889; ptr arg; i32 i; 890; p = arg + 12; 891; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24) 892define i32 @load_i32_by_i8_base_offset_index(ptr %arg, i32 %i) { 893; CHECK-LABEL: load_i32_by_i8_base_offset_index: 894; CHECK: # %bb.0: 895; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 896; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 897; CHECK-NEXT: movl 12(%eax,%ecx), %eax 898; CHECK-NEXT: retl 899; 900; CHECK64-LABEL: load_i32_by_i8_base_offset_index: 901; CHECK64: # %bb.0: 902; CHECK64-NEXT: movl %esi, %eax 903; CHECK64-NEXT: movl 12(%rdi,%rax), %eax 904; CHECK64-NEXT: retq 905 %tmp = add nuw nsw i32 %i, 3 906 %tmp2 = add nuw nsw i32 %i, 2 907 %tmp3 = add nuw nsw i32 %i, 1 908 %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12 909 %tmp5 = zext i32 %i to i64 910 %tmp6 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp5 911 %tmp7 = load i8, ptr %tmp6, align 1 912 %tmp8 = zext i8 %tmp7 to i32 913 %tmp9 = zext i32 %tmp3 to i64 914 %tmp10 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp9 915 %tmp11 = load i8, ptr %tmp10, align 1 916 %tmp12 = zext i8 %tmp11 to i32 917 %tmp13 = shl nuw nsw i32 %tmp12, 8 918 %tmp14 = or i32 %tmp13, %tmp8 919 %tmp15 = zext i32 %tmp2 to i64 920 %tmp16 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp15 921 %tmp17 = load i8, ptr %tmp16, align 1 922 %tmp18 = zext i8 %tmp17 to i32 923 %tmp19 = shl nuw nsw i32 %tmp18, 16 924 %tmp20 = or i32 %tmp14, %tmp19 925 %tmp21 = zext i32 %tmp to i64 926 %tmp22 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp21 927 %tmp23 = load i8, ptr %tmp22, align 1 928 %tmp24 = zext i8 %tmp23 to i32 929 %tmp25 = shl nuw i32 %tmp24, 24 930 %tmp26 = or i32 %tmp20, %tmp25 931 ret i32 %tmp26 932} 933 934; ptr arg; i32 i; 935; p = arg + 12; 936; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24) 937define i32 @load_i32_by_i8_base_offset_index_2(ptr %arg, i32 %i) { 938; CHECK-LABEL: load_i32_by_i8_base_offset_index_2: 939; CHECK: # %bb.0: 940; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 941; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 942; CHECK-NEXT: movl 13(%eax,%ecx), %eax 943; CHECK-NEXT: retl 944; 945; CHECK64-LABEL: load_i32_by_i8_base_offset_index_2: 946; CHECK64: # %bb.0: 947; CHECK64-NEXT: movl %esi, %eax 948; CHECK64-NEXT: movl 13(%rax,%rdi), %eax 949; CHECK64-NEXT: retq 950 %tmp = add nuw nsw i32 %i, 4 951 %tmp2 = add nuw nsw i32 %i, 3 952 %tmp3 = add nuw nsw i32 %i, 2 953 %tmp4 = getelementptr inbounds i8, ptr %arg, i64 12 954 %tmp5 = add nuw nsw i32 %i, 1 955 %tmp27 = zext i32 %tmp5 to i64 956 %tmp28 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp27 957 %tmp29 = load i8, ptr %tmp28, align 1 958 %tmp30 = zext i8 %tmp29 to i32 959 %tmp31 = zext i32 %tmp3 to i64 960 %tmp32 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp31 961 %tmp33 = load i8, ptr %tmp32, align 1 962 %tmp34 = zext i8 %tmp33 to i32 963 %tmp35 = shl nuw nsw i32 %tmp34, 8 964 %tmp36 = or i32 %tmp35, %tmp30 965 %tmp37 = zext i32 %tmp2 to i64 966 %tmp38 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp37 967 %tmp39 = load i8, ptr %tmp38, align 1 968 %tmp40 = zext i8 %tmp39 to i32 969 %tmp41 = shl nuw nsw i32 %tmp40, 16 970 %tmp42 = or i32 %tmp36, %tmp41 971 %tmp43 = zext i32 %tmp to i64 972 %tmp44 = getelementptr inbounds i8, ptr %tmp4, i64 %tmp43 973 %tmp45 = load i8, ptr %tmp44, align 1 974 %tmp46 = zext i8 %tmp45 to i32 975 %tmp47 = shl nuw i32 %tmp46, 24 976 %tmp48 = or i32 %tmp42, %tmp47 977 ret i32 %tmp48 978} 979 980; ptr arg; i32 i; 981; 982; p0 = arg; 983; p1 = arg + i + 1; 984; p2 = arg + i + 2; 985; p3 = arg + i + 3; 986; 987; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24) 988; 989; This test excercises zero and any extend loads as a part of load combine pattern. 990; In order to fold the pattern above we need to reassociate the address computation 991; first. By the time the address computation is reassociated loads are combined to 992; to zext and aext loads. 993define i32 @load_i32_by_i8_zaext_loads(ptr %arg, i32 %arg1) { 994; CHECK-LABEL: load_i32_by_i8_zaext_loads: 995; CHECK: # %bb.0: 996; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 997; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 998; CHECK-NEXT: movl 12(%eax,%ecx), %eax 999; CHECK-NEXT: retl 1000; 1001; CHECK64-LABEL: load_i32_by_i8_zaext_loads: 1002; CHECK64: # %bb.0: 1003; CHECK64-NEXT: movl %esi, %eax 1004; CHECK64-NEXT: movl 12(%rdi,%rax), %eax 1005; CHECK64-NEXT: retq 1006 %tmp = add nuw nsw i32 %arg1, 3 1007 %tmp2 = add nuw nsw i32 %arg1, 2 1008 %tmp3 = add nuw nsw i32 %arg1, 1 1009 %tmp4 = zext i32 %tmp to i64 1010 %tmp5 = zext i32 %tmp2 to i64 1011 %tmp6 = zext i32 %tmp3 to i64 1012 %tmp24 = getelementptr inbounds i8, ptr %arg, i64 %tmp4 1013 %tmp30 = getelementptr inbounds i8, ptr %arg, i64 %tmp5 1014 %tmp31 = getelementptr inbounds i8, ptr %arg, i64 %tmp6 1015 %tmp32 = getelementptr inbounds i8, ptr %arg, i64 12 1016 %tmp33 = zext i32 %arg1 to i64 1017 %tmp34 = getelementptr inbounds i8, ptr %tmp32, i64 %tmp33 1018 %tmp35 = load i8, ptr %tmp34, align 1 1019 %tmp36 = zext i8 %tmp35 to i32 1020 %tmp37 = getelementptr inbounds i8, ptr %tmp31, i64 12 1021 %tmp38 = load i8, ptr %tmp37, align 1 1022 %tmp39 = zext i8 %tmp38 to i32 1023 %tmp40 = shl nuw nsw i32 %tmp39, 8 1024 %tmp41 = or i32 %tmp40, %tmp36 1025 %tmp42 = getelementptr inbounds i8, ptr %tmp30, i64 12 1026 %tmp43 = load i8, ptr %tmp42, align 1 1027 %tmp44 = zext i8 %tmp43 to i32 1028 %tmp45 = shl nuw nsw i32 %tmp44, 16 1029 %tmp46 = or i32 %tmp41, %tmp45 1030 %tmp47 = getelementptr inbounds i8, ptr %tmp24, i64 12 1031 %tmp48 = load i8, ptr %tmp47, align 1 1032 %tmp49 = zext i8 %tmp48 to i32 1033 %tmp50 = shl nuw i32 %tmp49, 24 1034 %tmp51 = or i32 %tmp46, %tmp50 1035 ret i32 %tmp51 1036} 1037 1038; The same as load_i32_by_i8_zaext_loads but the last load is combined to 1039; a sext load. 1040; 1041; ptr arg; i32 i; 1042; 1043; p0 = arg; 1044; p1 = arg + i + 1; 1045; p2 = arg + i + 2; 1046; p3 = arg + i + 3; 1047; 1048; (i32) p0[12] | ((i32) p1[12] << 8) | ((i32) p2[12] << 16) | ((i32) p3[12] << 24) 1049define i32 @load_i32_by_i8_zsext_loads(ptr %arg, i32 %arg1) { 1050; CHECK-LABEL: load_i32_by_i8_zsext_loads: 1051; CHECK: # %bb.0: 1052; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1053; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1054; CHECK-NEXT: movl 12(%eax,%ecx), %eax 1055; CHECK-NEXT: retl 1056; 1057; CHECK64-LABEL: load_i32_by_i8_zsext_loads: 1058; CHECK64: # %bb.0: 1059; CHECK64-NEXT: movl %esi, %eax 1060; CHECK64-NEXT: movl 12(%rdi,%rax), %eax 1061; CHECK64-NEXT: retq 1062 %tmp = add nuw nsw i32 %arg1, 3 1063 %tmp2 = add nuw nsw i32 %arg1, 2 1064 %tmp3 = add nuw nsw i32 %arg1, 1 1065 %tmp4 = zext i32 %tmp to i64 1066 %tmp5 = zext i32 %tmp2 to i64 1067 %tmp6 = zext i32 %tmp3 to i64 1068 %tmp24 = getelementptr inbounds i8, ptr %arg, i64 %tmp4 1069 %tmp30 = getelementptr inbounds i8, ptr %arg, i64 %tmp5 1070 %tmp31 = getelementptr inbounds i8, ptr %arg, i64 %tmp6 1071 %tmp32 = getelementptr inbounds i8, ptr %arg, i64 12 1072 %tmp33 = zext i32 %arg1 to i64 1073 %tmp34 = getelementptr inbounds i8, ptr %tmp32, i64 %tmp33 1074 %tmp35 = load i8, ptr %tmp34, align 1 1075 %tmp36 = zext i8 %tmp35 to i32 1076 %tmp37 = getelementptr inbounds i8, ptr %tmp31, i64 12 1077 %tmp38 = load i8, ptr %tmp37, align 1 1078 %tmp39 = zext i8 %tmp38 to i32 1079 %tmp40 = shl nuw nsw i32 %tmp39, 8 1080 %tmp41 = or i32 %tmp40, %tmp36 1081 %tmp42 = getelementptr inbounds i8, ptr %tmp30, i64 12 1082 %tmp43 = load i8, ptr %tmp42, align 1 1083 %tmp44 = zext i8 %tmp43 to i32 1084 %tmp45 = shl nuw nsw i32 %tmp44, 16 1085 %tmp46 = or i32 %tmp41, %tmp45 1086 %tmp47 = getelementptr inbounds i8, ptr %tmp24, i64 12 1087 %tmp48 = load i8, ptr %tmp47, align 1 1088 %tmp49 = sext i8 %tmp48 to i16 1089 %tmp50 = zext i16 %tmp49 to i32 1090 %tmp51 = shl nuw i32 %tmp50, 24 1091 %tmp52 = or i32 %tmp46, %tmp51 1092 ret i32 %tmp52 1093} 1094 1095; ptr p; 1096; (i32) p[0] | ((i32) p[1] << 8) 1097define i32 @zext_load_i32_by_i8(ptr %arg) { 1098; CHECK-LABEL: zext_load_i32_by_i8: 1099; CHECK: # %bb.0: 1100; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1101; CHECK-NEXT: movzwl (%eax), %eax 1102; CHECK-NEXT: retl 1103; 1104; CHECK64-LABEL: zext_load_i32_by_i8: 1105; CHECK64: # %bb.0: 1106; CHECK64-NEXT: movzwl (%rdi), %eax 1107; CHECK64-NEXT: retq 1108 %tmp2 = load i8, ptr %arg, align 1 1109 %tmp3 = zext i8 %tmp2 to i32 1110 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1 1111 %tmp5 = load i8, ptr %tmp4, align 1 1112 %tmp6 = zext i8 %tmp5 to i32 1113 %tmp7 = shl nuw nsw i32 %tmp6, 8 1114 %tmp8 = or i32 %tmp7, %tmp3 1115 ret i32 %tmp8 1116} 1117 1118; ptr p; 1119; ((i32) p[0] << 8) | ((i32) p[1] << 16) 1120define i32 @zext_load_i32_by_i8_shl_8(ptr %arg) { 1121; CHECK-LABEL: zext_load_i32_by_i8_shl_8: 1122; CHECK: # %bb.0: 1123; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1124; CHECK-NEXT: movzbl (%eax), %ecx 1125; CHECK-NEXT: shll $8, %ecx 1126; CHECK-NEXT: movzbl 1(%eax), %eax 1127; CHECK-NEXT: shll $16, %eax 1128; CHECK-NEXT: orl %ecx, %eax 1129; CHECK-NEXT: retl 1130; 1131; CHECK64-LABEL: zext_load_i32_by_i8_shl_8: 1132; CHECK64: # %bb.0: 1133; CHECK64-NEXT: movzbl (%rdi), %ecx 1134; CHECK64-NEXT: shll $8, %ecx 1135; CHECK64-NEXT: movzbl 1(%rdi), %eax 1136; CHECK64-NEXT: shll $16, %eax 1137; CHECK64-NEXT: orl %ecx, %eax 1138; CHECK64-NEXT: retq 1139 %tmp2 = load i8, ptr %arg, align 1 1140 %tmp3 = zext i8 %tmp2 to i32 1141 %tmp30 = shl nuw nsw i32 %tmp3, 8 1142 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1 1143 %tmp5 = load i8, ptr %tmp4, align 1 1144 %tmp6 = zext i8 %tmp5 to i32 1145 %tmp7 = shl nuw nsw i32 %tmp6, 16 1146 %tmp8 = or i32 %tmp7, %tmp30 1147 ret i32 %tmp8 1148} 1149 1150; ptr p; 1151; ((i32) p[0] << 16) | ((i32) p[1] << 24) 1152define i32 @zext_load_i32_by_i8_shl_16(ptr %arg) { 1153; CHECK-LABEL: zext_load_i32_by_i8_shl_16: 1154; CHECK: # %bb.0: 1155; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1156; CHECK-NEXT: movzbl (%eax), %ecx 1157; CHECK-NEXT: shll $16, %ecx 1158; CHECK-NEXT: movzbl 1(%eax), %eax 1159; CHECK-NEXT: shll $24, %eax 1160; CHECK-NEXT: orl %ecx, %eax 1161; CHECK-NEXT: retl 1162; 1163; CHECK64-LABEL: zext_load_i32_by_i8_shl_16: 1164; CHECK64: # %bb.0: 1165; CHECK64-NEXT: movzbl (%rdi), %ecx 1166; CHECK64-NEXT: shll $16, %ecx 1167; CHECK64-NEXT: movzbl 1(%rdi), %eax 1168; CHECK64-NEXT: shll $24, %eax 1169; CHECK64-NEXT: orl %ecx, %eax 1170; CHECK64-NEXT: retq 1171 %tmp2 = load i8, ptr %arg, align 1 1172 %tmp3 = zext i8 %tmp2 to i32 1173 %tmp30 = shl nuw nsw i32 %tmp3, 16 1174 %tmp4 = getelementptr inbounds i8, ptr %arg, i32 1 1175 %tmp5 = load i8, ptr %tmp4, align 1 1176 %tmp6 = zext i8 %tmp5 to i32 1177 %tmp7 = shl nuw nsw i32 %tmp6, 24 1178 %tmp8 = or i32 %tmp7, %tmp30 1179 ret i32 %tmp8 1180} 1181 1182; ptr p; 1183; (i32) p[1] | ((i32) p[0] << 8) 1184define i32 @zext_load_i32_by_i8_bswap(ptr %arg) { 1185; BSWAP-LABEL: zext_load_i32_by_i8_bswap: 1186; BSWAP: # %bb.0: 1187; BSWAP-NEXT: movl {{[0-9]+}}(%esp), %eax 1188; BSWAP-NEXT: movzwl (%eax), %eax 1189; BSWAP-NEXT: rolw $8, %ax 1190; BSWAP-NEXT: movzwl %ax, %eax 1191; BSWAP-NEXT: retl 1192; 1193; MOVBE-LABEL: zext_load_i32_by_i8_bswap: 1194; MOVBE: # %bb.0: 1195; MOVBE-NEXT: movl {{[0-9]+}}(%esp), %eax 1196; MOVBE-NEXT: movbew (%eax), %ax 1197; MOVBE-NEXT: movzwl %ax, %eax 1198; MOVBE-NEXT: retl 1199; 1200; BSWAP64-LABEL: zext_load_i32_by_i8_bswap: 1201; BSWAP64: # %bb.0: 1202; BSWAP64-NEXT: movzwl (%rdi), %eax 1203; BSWAP64-NEXT: rolw $8, %ax 1204; BSWAP64-NEXT: movzwl %ax, %eax 1205; BSWAP64-NEXT: retq 1206; 1207; MOVBE64-LABEL: zext_load_i32_by_i8_bswap: 1208; MOVBE64: # %bb.0: 1209; MOVBE64-NEXT: movbew (%rdi), %ax 1210; MOVBE64-NEXT: movzwl %ax, %eax 1211; MOVBE64-NEXT: retq 1212 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1 1213 %tmp2 = load i8, ptr %tmp1, align 1 1214 %tmp3 = zext i8 %tmp2 to i32 1215 %tmp5 = load i8, ptr %arg, align 1 1216 %tmp6 = zext i8 %tmp5 to i32 1217 %tmp7 = shl nuw nsw i32 %tmp6, 8 1218 %tmp8 = or i32 %tmp7, %tmp3 1219 ret i32 %tmp8 1220} 1221 1222; ptr p; 1223; ((i32) p[1] << 8) | ((i32) p[0] << 16) 1224define i32 @zext_load_i32_by_i8_bswap_shl_8(ptr %arg) { 1225; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8: 1226; CHECK: # %bb.0: 1227; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1228; CHECK-NEXT: movzbl 1(%eax), %ecx 1229; CHECK-NEXT: shll $8, %ecx 1230; CHECK-NEXT: movzbl (%eax), %eax 1231; CHECK-NEXT: shll $16, %eax 1232; CHECK-NEXT: orl %ecx, %eax 1233; CHECK-NEXT: retl 1234; 1235; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_8: 1236; CHECK64: # %bb.0: 1237; CHECK64-NEXT: movzbl 1(%rdi), %ecx 1238; CHECK64-NEXT: shll $8, %ecx 1239; CHECK64-NEXT: movzbl (%rdi), %eax 1240; CHECK64-NEXT: shll $16, %eax 1241; CHECK64-NEXT: orl %ecx, %eax 1242; CHECK64-NEXT: retq 1243 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1 1244 %tmp2 = load i8, ptr %tmp1, align 1 1245 %tmp3 = zext i8 %tmp2 to i32 1246 %tmp30 = shl nuw nsw i32 %tmp3, 8 1247 %tmp5 = load i8, ptr %arg, align 1 1248 %tmp6 = zext i8 %tmp5 to i32 1249 %tmp7 = shl nuw nsw i32 %tmp6, 16 1250 %tmp8 = or i32 %tmp7, %tmp30 1251 ret i32 %tmp8 1252} 1253 1254; ptr p; 1255; ((i32) p[1] << 16) | ((i32) p[0] << 24) 1256define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) { 1257; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16: 1258; CHECK: # %bb.0: 1259; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 1260; CHECK-NEXT: movzbl 1(%eax), %ecx 1261; CHECK-NEXT: shll $16, %ecx 1262; CHECK-NEXT: movzbl (%eax), %eax 1263; CHECK-NEXT: shll $24, %eax 1264; CHECK-NEXT: orl %ecx, %eax 1265; CHECK-NEXT: retl 1266; 1267; CHECK64-LABEL: zext_load_i32_by_i8_bswap_shl_16: 1268; CHECK64: # %bb.0: 1269; CHECK64-NEXT: movzbl 1(%rdi), %ecx 1270; CHECK64-NEXT: shll $16, %ecx 1271; CHECK64-NEXT: movzbl (%rdi), %eax 1272; CHECK64-NEXT: shll $24, %eax 1273; CHECK64-NEXT: orl %ecx, %eax 1274; CHECK64-NEXT: retq 1275 %tmp1 = getelementptr inbounds i8, ptr %arg, i32 1 1276 %tmp2 = load i8, ptr %tmp1, align 1 1277 %tmp3 = zext i8 %tmp2 to i32 1278 %tmp30 = shl nuw nsw i32 %tmp3, 16 1279 %tmp5 = load i8, ptr %arg, align 1 1280 %tmp6 = zext i8 %tmp5 to i32 1281 %tmp7 = shl nuw nsw i32 %tmp6, 24 1282 %tmp8 = or i32 %tmp7, %tmp30 1283 ret i32 %tmp8 1284} 1285 1286define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind { 1287; CHECK-LABEL: pr80911_vector_load_multiuse: 1288; CHECK: # %bb.0: 1289; CHECK-NEXT: pushl %esi 1290; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx 1291; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 1292; CHECK-NEXT: movl (%edx), %esi 1293; CHECK-NEXT: movzwl (%edx), %eax 1294; CHECK-NEXT: movl $0, (%ecx) 1295; CHECK-NEXT: movl %esi, (%edx) 1296; CHECK-NEXT: popl %esi 1297; CHECK-NEXT: retl 1298; 1299; CHECK64-LABEL: pr80911_vector_load_multiuse: 1300; CHECK64: # %bb.0: 1301; CHECK64-NEXT: movl (%rdi), %ecx 1302; CHECK64-NEXT: movzwl (%rdi), %eax 1303; CHECK64-NEXT: movl $0, (%rsi) 1304; CHECK64-NEXT: movl %ecx, (%rdi) 1305; CHECK64-NEXT: retq 1306 %load = load <4 x i8>, ptr %ptr, align 16 1307 store i32 0, ptr %clobber 1308 store <4 x i8> %load, ptr %ptr, align 16 1309 %e1 = extractelement <4 x i8> %load, i64 1 1310 %e1.ext = zext i8 %e1 to i32 1311 %e1.ext.shift = shl nuw nsw i32 %e1.ext, 8 1312 %e0 = extractelement <4 x i8> %load, i64 0 1313 %e0.ext = zext i8 %e0 to i32 1314 %res = or i32 %e1.ext.shift, %e0.ext 1315 ret i32 %res 1316} 1317