xref: /llvm-project/llvm/test/CodeGen/X86/optimize-max-0.ll (revision e6bf48d11047e970cb24554a01b65b566d6b5d22)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s | FileCheck %s
3
4; LSR should be able to eliminate the max computations by
5; making the loops use slt/ult comparisons instead of ne comparisons.
6
7target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
8target triple = "i386-apple-darwin9"
9
10define void @foo(ptr %r, i32 %s, i32 %w, i32 %x, ptr %j, i32 %d) nounwind {
11; CHECK-LABEL: foo:
12; CHECK:       ## %bb.0: ## %entry
13; CHECK-NEXT:    pushl %ebp
14; CHECK-NEXT:    pushl %ebx
15; CHECK-NEXT:    pushl %edi
16; CHECK-NEXT:    pushl %esi
17; CHECK-NEXT:    subl $28, %esp
18; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
19; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
20; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
21; CHECK-NEXT:    movl %edi, %ecx
22; CHECK-NEXT:    imull %ebp, %ecx
23; CHECK-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
24; CHECK-NEXT:    movl %ecx, (%esp) ## 4-byte Spill
25; CHECK-NEXT:    je LBB0_19
26; CHECK-NEXT:  ## %bb.1: ## %bb10.preheader
27; CHECK-NEXT:    movl %ecx, %eax
28; CHECK-NEXT:    sarl $31, %eax
29; CHECK-NEXT:    shrl $30, %eax
30; CHECK-NEXT:    addl %ecx, %eax
31; CHECK-NEXT:    sarl $2, %eax
32; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
33; CHECK-NEXT:    testl %edi, %edi
34; CHECK-NEXT:    jle LBB0_12
35; CHECK-NEXT:  ## %bb.2: ## %bb.nph9
36; CHECK-NEXT:    testl %ebp, %ebp
37; CHECK-NEXT:    jle LBB0_12
38; CHECK-NEXT:  ## %bb.3: ## %bb.nph9.split
39; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
40; CHECK-NEXT:    incl %eax
41; CHECK-NEXT:    xorl %ecx, %ecx
42; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
43; CHECK-NEXT:    xorl %esi, %esi
44; CHECK-NEXT:    .p2align 4
45; CHECK-NEXT:  LBB0_4: ## %bb6
46; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
47; CHECK-NEXT:    movzbl (%eax,%esi,2), %ebx
48; CHECK-NEXT:    movb %bl, (%edx,%esi)
49; CHECK-NEXT:    incl %esi
50; CHECK-NEXT:    cmpl %ebp, %esi
51; CHECK-NEXT:    jl LBB0_4
52; CHECK-NEXT:  ## %bb.5: ## %bb9
53; CHECK-NEXT:    ## in Loop: Header=BB0_4 Depth=1
54; CHECK-NEXT:    incl %ecx
55; CHECK-NEXT:    addl {{[0-9]+}}(%esp), %eax
56; CHECK-NEXT:    addl %ebp, %edx
57; CHECK-NEXT:    cmpl %edi, %ecx
58; CHECK-NEXT:    je LBB0_12
59; CHECK-NEXT:  ## %bb.6: ## %bb7.preheader
60; CHECK-NEXT:    ## in Loop: Header=BB0_4 Depth=1
61; CHECK-NEXT:    xorl %esi, %esi
62; CHECK-NEXT:    jmp LBB0_4
63; CHECK-NEXT:  LBB0_12: ## %bb18.loopexit
64; CHECK-NEXT:    movl (%esp), %eax ## 4-byte Reload
65; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
66; CHECK-NEXT:    addl %ecx, %eax
67; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
68; CHECK-NEXT:    cmpl $1, %edi
69; CHECK-NEXT:    jle LBB0_13
70; CHECK-NEXT:  ## %bb.7: ## %bb.nph5
71; CHECK-NEXT:    cmpl $2, %ebp
72; CHECK-NEXT:    jl LBB0_13
73; CHECK-NEXT:  ## %bb.8: ## %bb.nph5.split
74; CHECK-NEXT:    movl %ebp, %edx
75; CHECK-NEXT:    shrl $31, %edx
76; CHECK-NEXT:    addl %ebp, %edx
77; CHECK-NEXT:    sarl %edx
78; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
79; CHECK-NEXT:    movl %eax, %ecx
80; CHECK-NEXT:    shrl $31, %ecx
81; CHECK-NEXT:    addl %eax, %ecx
82; CHECK-NEXT:    sarl %ecx
83; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
84; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
85; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
86; CHECK-NEXT:    addl %ecx, %eax
87; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
88; CHECK-NEXT:    addl $2, %esi
89; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
90; CHECK-NEXT:    movl (%esp), %esi ## 4-byte Reload
91; CHECK-NEXT:    addl %esi, %ecx
92; CHECK-NEXT:    xorl %esi, %esi
93; CHECK-NEXT:    xorl %edi, %edi
94; CHECK-NEXT:    .p2align 4
95; CHECK-NEXT:  LBB0_9: ## %bb13
96; CHECK-NEXT:    ## =>This Loop Header: Depth=1
97; CHECK-NEXT:    ## Child Loop BB0_10 Depth 2
98; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
99; CHECK-NEXT:    andl $1, %edi
100; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
101; CHECK-NEXT:    addl %esi, %edi
102; CHECK-NEXT:    imull {{[0-9]+}}(%esp), %edi
103; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
104; CHECK-NEXT:    xorl %esi, %esi
105; CHECK-NEXT:    .p2align 4
106; CHECK-NEXT:  LBB0_10: ## %bb14
107; CHECK-NEXT:    ## Parent Loop BB0_9 Depth=1
108; CHECK-NEXT:    ## => This Inner Loop Header: Depth=2
109; CHECK-NEXT:    movzbl -2(%edi,%esi,4), %ebx
110; CHECK-NEXT:    movb %bl, (%ecx,%esi)
111; CHECK-NEXT:    movzbl (%edi,%esi,4), %ebx
112; CHECK-NEXT:    movb %bl, (%eax,%esi)
113; CHECK-NEXT:    incl %esi
114; CHECK-NEXT:    cmpl %edx, %esi
115; CHECK-NEXT:    jl LBB0_10
116; CHECK-NEXT:  ## %bb.11: ## %bb17
117; CHECK-NEXT:    ## in Loop: Header=BB0_9 Depth=1
118; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload
119; CHECK-NEXT:    incl %edi
120; CHECK-NEXT:    addl %edx, %eax
121; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload
122; CHECK-NEXT:    addl $2, %esi
123; CHECK-NEXT:    addl %edx, %ecx
124; CHECK-NEXT:    cmpl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
125; CHECK-NEXT:    jl LBB0_9
126; CHECK-NEXT:  LBB0_13: ## %bb20
127; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
128; CHECK-NEXT:    cmpl $1, %eax
129; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
130; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
131; CHECK-NEXT:    je LBB0_19
132; CHECK-NEXT:  ## %bb.14: ## %bb20
133; CHECK-NEXT:    cmpl $3, %eax
134; CHECK-NEXT:    jne LBB0_24
135; CHECK-NEXT:  ## %bb.15: ## %bb22
136; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
137; CHECK-NEXT:    addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
138; CHECK-NEXT:    testl %edi, %edi
139; CHECK-NEXT:    jle LBB0_18
140; CHECK-NEXT:  ## %bb.16: ## %bb.nph
141; CHECK-NEXT:    leal 15(%edi), %eax
142; CHECK-NEXT:    andl $-16, %eax
143; CHECK-NEXT:    imull {{[0-9]+}}(%esp), %eax
144; CHECK-NEXT:    addl %ebx, %ebx
145; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
146; CHECK-NEXT:    movl (%esp), %esi ## 4-byte Reload
147; CHECK-NEXT:    addl %esi, %ecx
148; CHECK-NEXT:    addl %ecx, %ebx
149; CHECK-NEXT:    addl %eax, %edx
150; CHECK-NEXT:    leal 15(%ebp), %eax
151; CHECK-NEXT:    andl $-16, %eax
152; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
153; CHECK-NEXT:    .p2align 4
154; CHECK-NEXT:  LBB0_17: ## %bb23
155; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
156; CHECK-NEXT:    subl $4, %esp
157; CHECK-NEXT:    pushl %ebp
158; CHECK-NEXT:    pushl %edx
159; CHECK-NEXT:    pushl %ebx
160; CHECK-NEXT:    movl %ebx, %esi
161; CHECK-NEXT:    movl %edx, %ebx
162; CHECK-NEXT:    calll _memcpy
163; CHECK-NEXT:    movl %ebx, %edx
164; CHECK-NEXT:    movl %esi, %ebx
165; CHECK-NEXT:    addl $16, %esp
166; CHECK-NEXT:    addl %ebp, %ebx
167; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
168; CHECK-NEXT:    decl %edi
169; CHECK-NEXT:    jne LBB0_17
170; CHECK-NEXT:  LBB0_18: ## %bb26
171; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
172; CHECK-NEXT:    movl (%esp), %edx ## 4-byte Reload
173; CHECK-NEXT:    addl %edx, %eax
174; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
175; CHECK-NEXT:    addl %eax, %ecx
176; CHECK-NEXT:    jmp LBB0_23
177; CHECK-NEXT:  LBB0_19: ## %bb29
178; CHECK-NEXT:    testl %edi, %edi
179; CHECK-NEXT:    jle LBB0_22
180; CHECK-NEXT:  ## %bb.20: ## %bb.nph11
181; CHECK-NEXT:    movl %edi, %esi
182; CHECK-NEXT:    leal 15(%ebp), %eax
183; CHECK-NEXT:    andl $-16, %eax
184; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
185; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
186; CHECK-NEXT:    .p2align 4
187; CHECK-NEXT:  LBB0_21: ## %bb30
188; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
189; CHECK-NEXT:    subl $4, %esp
190; CHECK-NEXT:    pushl %ebp
191; CHECK-NEXT:    pushl %edx
192; CHECK-NEXT:    pushl %edi
193; CHECK-NEXT:    movl %edx, %ebx
194; CHECK-NEXT:    calll _memcpy
195; CHECK-NEXT:    movl %ebx, %edx
196; CHECK-NEXT:    addl $16, %esp
197; CHECK-NEXT:    addl %ebp, %edi
198; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Folded Reload
199; CHECK-NEXT:    decl %esi
200; CHECK-NEXT:    jne LBB0_21
201; CHECK-NEXT:  LBB0_22: ## %bb33
202; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
203; CHECK-NEXT:    movl (%esp), %edx ## 4-byte Reload
204; CHECK-NEXT:    addl %edx, %ecx
205; CHECK-NEXT:  LBB0_23: ## %bb33
206; CHECK-NEXT:    movl %edx, %eax
207; CHECK-NEXT:    shrl $31, %eax
208; CHECK-NEXT:    addl %edx, %eax
209; CHECK-NEXT:    sarl %eax
210; CHECK-NEXT:    subl $4, %esp
211; CHECK-NEXT:    pushl %eax
212; CHECK-NEXT:    pushl $128
213; CHECK-NEXT:    pushl %ecx
214; CHECK-NEXT:    calll _memset
215; CHECK-NEXT:    addl $44, %esp
216; CHECK-NEXT:  LBB0_25: ## %return
217; CHECK-NEXT:    popl %esi
218; CHECK-NEXT:    popl %edi
219; CHECK-NEXT:    popl %ebx
220; CHECK-NEXT:    popl %ebp
221; CHECK-NEXT:    retl
222; CHECK-NEXT:  LBB0_24: ## %return
223; CHECK-NEXT:    addl $28, %esp
224; CHECK-NEXT:    jmp LBB0_25
225entry:
226  %0 = mul i32 %x, %w
227  %1 = mul i32 %x, %w
228  %2 = sdiv i32 %1, 4
229  %.sum2 = add i32 %2, %0
230  %cond = icmp eq i32 %d, 1
231  br i1 %cond, label %bb29, label %bb10.preheader
232
233bb10.preheader:                                   ; preds = %entry
234  %3 = icmp sgt i32 %x, 0
235  br i1 %3, label %bb.nph9, label %bb18.loopexit
236
237bb.nph7:                                          ; preds = %bb7.preheader
238  %4 = mul i32 %y.08, %w
239  %5 = mul i32 %y.08, %s
240  %6 = add i32 %5, 1
241  %tmp8 = icmp sgt i32 1, %w
242  %smax9 = select i1 %tmp8, i32 1, i32 %w
243  br label %bb6
244
245bb6:                                              ; preds = %bb7, %bb.nph7
246  %x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]
247  %7 = add i32 %x.06, %4
248  %8 = shl i32 %x.06, 1
249  %9 = add i32 %6, %8
250  %10 = getelementptr i8, ptr %r, i32 %9
251  %11 = load i8, ptr %10, align 1
252  %12 = getelementptr i8, ptr %j, i32 %7
253  store i8 %11, ptr %12, align 1
254  br label %bb7
255
256bb7:                                              ; preds = %bb6
257  %indvar.next7 = add i32 %x.06, 1
258  %exitcond10 = icmp ne i32 %indvar.next7, %smax9
259  br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
260
261bb7.bb9_crit_edge:                                ; preds = %bb7
262  br label %bb9
263
264bb9:                                              ; preds = %bb7.preheader, %bb7.bb9_crit_edge
265  br label %bb10
266
267bb10:                                             ; preds = %bb9
268  %indvar.next11 = add i32 %y.08, 1
269  %exitcond12 = icmp ne i32 %indvar.next11, %x
270  br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
271
272bb10.bb18.loopexit_crit_edge:                     ; preds = %bb10
273  br label %bb10.bb18.loopexit_crit_edge.split
274
275bb10.bb18.loopexit_crit_edge.split:               ; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
276  br label %bb18.loopexit
277
278bb.nph9:                                          ; preds = %bb10.preheader
279  %13 = icmp sgt i32 %w, 0
280  br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
281
282bb.nph9.split:                                    ; preds = %bb.nph9
283  br label %bb7.preheader
284
285bb7.preheader:                                    ; preds = %bb.nph9.split, %bb10
286  %y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]
287  br i1 true, label %bb.nph7, label %bb9
288
289bb.nph5:                                          ; preds = %bb18.loopexit
290  %14 = sdiv i32 %w, 2
291  %15 = icmp slt i32 %w, 2
292  %16 = sdiv i32 %x, 2
293  br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
294
295bb.nph5.split:                                    ; preds = %bb.nph5
296  %tmp2 = icmp sgt i32 1, %16
297  %smax3 = select i1 %tmp2, i32 1, i32 %16
298  br label %bb13
299
300bb13:                                             ; preds = %bb18, %bb.nph5.split
301  %y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]
302  %17 = mul i32 %14, %y.14
303  %18 = shl i32 %y.14, 1
304  %19 = srem i32 %y.14, 2
305  %20 = add i32 %19, %18
306  %21 = mul i32 %20, %s
307  br i1 true, label %bb.nph3, label %bb17
308
309bb.nph3:                                          ; preds = %bb13
310  %22 = add i32 %17, %0
311  %23 = add i32 %17, %.sum2
312  %24 = sdiv i32 %w, 2
313  %tmp = icmp sgt i32 1, %24
314  %smax = select i1 %tmp, i32 1, i32 %24
315  br label %bb14
316
317bb14:                                             ; preds = %bb15, %bb.nph3
318  %x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]
319  %25 = shl i32 %x.12, 2
320  %26 = add i32 %25, %21
321  %27 = getelementptr i8, ptr %r, i32 %26
322  %28 = load i8, ptr %27, align 1
323  %.sum = add i32 %22, %x.12
324  %29 = getelementptr i8, ptr %j, i32 %.sum
325  store i8 %28, ptr %29, align 1
326  %30 = shl i32 %x.12, 2
327  %31 = or disjoint i32 %30, 2
328  %32 = add i32 %31, %21
329  %33 = getelementptr i8, ptr %r, i32 %32
330  %34 = load i8, ptr %33, align 1
331  %.sum6 = add i32 %23, %x.12
332  %35 = getelementptr i8, ptr %j, i32 %.sum6
333  store i8 %34, ptr %35, align 1
334  br label %bb15
335
336bb15:                                             ; preds = %bb14
337  %indvar.next = add i32 %x.12, 1
338  %exitcond = icmp ne i32 %indvar.next, %smax
339  br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
340
341bb15.bb17_crit_edge:                              ; preds = %bb15
342  br label %bb17
343
344bb17:                                             ; preds = %bb15.bb17_crit_edge, %bb13
345  br label %bb18
346
347bb18.loopexit:                                    ; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
348  %36 = icmp slt i32 %x, 2
349  br i1 %36, label %bb20, label %bb.nph5
350
351bb18:                                             ; preds = %bb17
352  %indvar.next1 = add i32 %y.14, 1
353  %exitcond4 = icmp ne i32 %indvar.next1, %smax3
354  br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
355
356bb18.bb20_crit_edge:                              ; preds = %bb18
357  br label %bb18.bb20_crit_edge.split
358
359bb18.bb20_crit_edge.split:                        ; preds = %bb18.bb20_crit_edge, %bb.nph5
360  br label %bb20
361
362bb20:                                             ; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
363  switch i32 %d, label %return [
364    i32 3, label %bb22
365    i32 1, label %bb29
366  ]
367
368bb22:                                             ; preds = %bb20
369  %37 = mul i32 %x, %w
370  %38 = sdiv i32 %37, 4
371  %.sum3 = add i32 %38, %.sum2
372  %39 = add i32 %x, 15
373  %40 = and i32 %39, -16
374  %41 = add i32 %w, 15
375  %42 = and i32 %41, -16
376  %43 = mul i32 %40, %s
377  %44 = icmp sgt i32 %x, 0
378  br i1 %44, label %bb.nph, label %bb26
379
380bb.nph:                                           ; preds = %bb22
381  br label %bb23
382
383bb23:                                             ; preds = %bb24, %bb.nph
384  %y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]
385  %45 = mul i32 %y.21, %42
386  %.sum1 = add i32 %45, %43
387  %46 = getelementptr i8, ptr %r, i32 %.sum1
388  %47 = mul i32 %y.21, %w
389  %.sum5 = add i32 %47, %.sum3
390  %48 = getelementptr i8, ptr %j, i32 %.sum5
391  tail call void @llvm.memcpy.p0.p0.i32(ptr %48, ptr %46, i32 %w, i1 false)
392  br label %bb24
393
394bb24:                                             ; preds = %bb23
395  %indvar.next5 = add i32 %y.21, 1
396  %exitcond6 = icmp ne i32 %indvar.next5, %x
397  br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
398
399bb24.bb26_crit_edge:                              ; preds = %bb24
400  br label %bb26
401
402bb26:                                             ; preds = %bb24.bb26_crit_edge, %bb22
403  %49 = mul i32 %x, %w
404  %.sum4 = add i32 %.sum3, %49
405  %50 = getelementptr i8, ptr %j, i32 %.sum4
406  %51 = mul i32 %x, %w
407  %52 = sdiv i32 %51, 2
408  tail call void @llvm.memset.p0.i32(ptr %50, i8 -128, i32 %52, i1 false)
409  ret void
410
411bb29:                                             ; preds = %bb20, %entry
412  %53 = add i32 %w, 15
413  %54 = and i32 %53, -16
414  %55 = icmp sgt i32 %x, 0
415  br i1 %55, label %bb.nph11, label %bb33
416
417bb.nph11:                                         ; preds = %bb29
418  br label %bb30
419
420bb30:                                             ; preds = %bb31, %bb.nph11
421  %y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]
422  %56 = mul i32 %y.310, %54
423  %57 = getelementptr i8, ptr %r, i32 %56
424  %58 = mul i32 %y.310, %w
425  %59 = getelementptr i8, ptr %j, i32 %58
426  tail call void @llvm.memcpy.p0.p0.i32(ptr %59, ptr %57, i32 %w, i1 false)
427  br label %bb31
428
429bb31:                                             ; preds = %bb30
430  %indvar.next13 = add i32 %y.310, 1
431  %exitcond14 = icmp ne i32 %indvar.next13, %x
432  br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
433
434bb31.bb33_crit_edge:                              ; preds = %bb31
435  br label %bb33
436
437bb33:                                             ; preds = %bb31.bb33_crit_edge, %bb29
438  %60 = mul i32 %x, %w
439  %61 = getelementptr i8, ptr %j, i32 %60
440  %62 = mul i32 %x, %w
441  %63 = sdiv i32 %62, 2
442  tail call void @llvm.memset.p0.i32(ptr %61, i8 -128, i32 %63, i1 false)
443  ret void
444
445return:                                           ; preds = %bb20
446  ret void
447}
448
449define void @bar(ptr %r, i32 %s, i32 %w, i32 %x, ptr %j, i32 %d) nounwind {
450; CHECK-LABEL: bar:
451; CHECK:       ## %bb.0: ## %entry
452; CHECK-NEXT:    pushl %ebp
453; CHECK-NEXT:    pushl %ebx
454; CHECK-NEXT:    pushl %edi
455; CHECK-NEXT:    pushl %esi
456; CHECK-NEXT:    subl $28, %esp
457; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
458; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
459; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
460; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
461; CHECK-NEXT:    movl %ebp, %edx
462; CHECK-NEXT:    imull %eax, %edx
463; CHECK-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
464; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
465; CHECK-NEXT:    je LBB1_19
466; CHECK-NEXT:  ## %bb.1: ## %bb10.preheader
467; CHECK-NEXT:    movl %edx, %ecx
468; CHECK-NEXT:    shrl $2, %ecx
469; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
470; CHECK-NEXT:    testl %ebp, %ebp
471; CHECK-NEXT:    movl %eax, %edi
472; CHECK-NEXT:    je LBB1_12
473; CHECK-NEXT:  ## %bb.2: ## %bb.nph9
474; CHECK-NEXT:    testl %eax, %eax
475; CHECK-NEXT:    je LBB1_12
476; CHECK-NEXT:  ## %bb.3: ## %bb.nph9.split
477; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
478; CHECK-NEXT:    incl %eax
479; CHECK-NEXT:    xorl %ecx, %ecx
480; CHECK-NEXT:    movl %esi, %edx
481; CHECK-NEXT:    xorl %esi, %esi
482; CHECK-NEXT:    .p2align 4
483; CHECK-NEXT:  LBB1_4: ## %bb6
484; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
485; CHECK-NEXT:    movzbl (%eax,%esi,2), %ebx
486; CHECK-NEXT:    movb %bl, (%edx,%esi)
487; CHECK-NEXT:    incl %esi
488; CHECK-NEXT:    cmpl %edi, %esi
489; CHECK-NEXT:    jb LBB1_4
490; CHECK-NEXT:  ## %bb.5: ## %bb9
491; CHECK-NEXT:    ## in Loop: Header=BB1_4 Depth=1
492; CHECK-NEXT:    incl %ecx
493; CHECK-NEXT:    addl {{[0-9]+}}(%esp), %eax
494; CHECK-NEXT:    addl %edi, %edx
495; CHECK-NEXT:    cmpl %ebp, %ecx
496; CHECK-NEXT:    je LBB1_12
497; CHECK-NEXT:  ## %bb.6: ## %bb7.preheader
498; CHECK-NEXT:    ## in Loop: Header=BB1_4 Depth=1
499; CHECK-NEXT:    xorl %esi, %esi
500; CHECK-NEXT:    jmp LBB1_4
501; CHECK-NEXT:  LBB1_12: ## %bb18.loopexit
502; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
503; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
504; CHECK-NEXT:    addl %ecx, %eax
505; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
506; CHECK-NEXT:    cmpl $1, %ebp
507; CHECK-NEXT:    jbe LBB1_13
508; CHECK-NEXT:  ## %bb.7: ## %bb.nph5
509; CHECK-NEXT:    cmpl $2, %edi
510; CHECK-NEXT:    jb LBB1_13
511; CHECK-NEXT:  ## %bb.8: ## %bb.nph5.split
512; CHECK-NEXT:    movl %edi, %ebp
513; CHECK-NEXT:    shrl %ebp
514; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
515; CHECK-NEXT:    shrl %eax
516; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
517; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
518; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
519; CHECK-NEXT:    addl %eax, %ecx
520; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
521; CHECK-NEXT:    addl $2, %edx
522; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
523; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
524; CHECK-NEXT:    addl %edx, %eax
525; CHECK-NEXT:    xorl %edx, %edx
526; CHECK-NEXT:    xorl %ebx, %ebx
527; CHECK-NEXT:    .p2align 4
528; CHECK-NEXT:  LBB1_9: ## %bb13
529; CHECK-NEXT:    ## =>This Loop Header: Depth=1
530; CHECK-NEXT:    ## Child Loop BB1_10 Depth 2
531; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
532; CHECK-NEXT:    andl $1, %ebx
533; CHECK-NEXT:    movl %edx, (%esp) ## 4-byte Spill
534; CHECK-NEXT:    addl %edx, %ebx
535; CHECK-NEXT:    imull {{[0-9]+}}(%esp), %ebx
536; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
537; CHECK-NEXT:    xorl %esi, %esi
538; CHECK-NEXT:    .p2align 4
539; CHECK-NEXT:  LBB1_10: ## %bb14
540; CHECK-NEXT:    ## Parent Loop BB1_9 Depth=1
541; CHECK-NEXT:    ## => This Inner Loop Header: Depth=2
542; CHECK-NEXT:    movzbl -2(%ebx,%esi,4), %edx
543; CHECK-NEXT:    movb %dl, (%eax,%esi)
544; CHECK-NEXT:    movzbl (%ebx,%esi,4), %edx
545; CHECK-NEXT:    movb %dl, (%ecx,%esi)
546; CHECK-NEXT:    incl %esi
547; CHECK-NEXT:    cmpl %ebp, %esi
548; CHECK-NEXT:    jb LBB1_10
549; CHECK-NEXT:  ## %bb.11: ## %bb17
550; CHECK-NEXT:    ## in Loop: Header=BB1_9 Depth=1
551; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
552; CHECK-NEXT:    incl %ebx
553; CHECK-NEXT:    addl %ebp, %ecx
554; CHECK-NEXT:    movl (%esp), %edx ## 4-byte Reload
555; CHECK-NEXT:    addl $2, %edx
556; CHECK-NEXT:    addl %ebp, %eax
557; CHECK-NEXT:    cmpl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
558; CHECK-NEXT:    jb LBB1_9
559; CHECK-NEXT:  LBB1_13: ## %bb20
560; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
561; CHECK-NEXT:    cmpl $1, %esi
562; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
563; CHECK-NEXT:    movl %edi, %eax
564; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
565; CHECK-NEXT:    je LBB1_19
566; CHECK-NEXT:  ## %bb.14: ## %bb20
567; CHECK-NEXT:    cmpl $3, %esi
568; CHECK-NEXT:    jne LBB1_24
569; CHECK-NEXT:  ## %bb.15: ## %bb22
570; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
571; CHECK-NEXT:    addl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
572; CHECK-NEXT:    testl %ebp, %ebp
573; CHECK-NEXT:    je LBB1_18
574; CHECK-NEXT:  ## %bb.16: ## %bb.nph
575; CHECK-NEXT:    movl %ebp, %esi
576; CHECK-NEXT:    leal 15(%ebp), %eax
577; CHECK-NEXT:    andl $-16, %eax
578; CHECK-NEXT:    imull {{[0-9]+}}(%esp), %eax
579; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
580; CHECK-NEXT:    addl $15, %edx
581; CHECK-NEXT:    andl $-16, %edx
582; CHECK-NEXT:    movl %edx, (%esp) ## 4-byte Spill
583; CHECK-NEXT:    addl %eax, %ecx
584; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
585; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
586; CHECK-NEXT:    leal (%edx,%eax), %ebp
587; CHECK-NEXT:    .p2align 4
588; CHECK-NEXT:  LBB1_17: ## %bb23
589; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
590; CHECK-NEXT:    subl $4, %esp
591; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebx
592; CHECK-NEXT:    pushl %ebx
593; CHECK-NEXT:    pushl %ecx
594; CHECK-NEXT:    pushl %ebp
595; CHECK-NEXT:    movl %ecx, %edi
596; CHECK-NEXT:    calll _memcpy
597; CHECK-NEXT:    movl %edi, %ecx
598; CHECK-NEXT:    addl $16, %esp
599; CHECK-NEXT:    addl %ebx, %ebp
600; CHECK-NEXT:    addl (%esp), %ecx ## 4-byte Folded Reload
601; CHECK-NEXT:    decl %esi
602; CHECK-NEXT:    jne LBB1_17
603; CHECK-NEXT:  LBB1_18: ## %bb26
604; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
605; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
606; CHECK-NEXT:    addl %ecx, %eax
607; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
608; CHECK-NEXT:    addl %eax, %edx
609; CHECK-NEXT:    shrl %ecx
610; CHECK-NEXT:    subl $4, %esp
611; CHECK-NEXT:    pushl %ecx
612; CHECK-NEXT:    pushl $128
613; CHECK-NEXT:    pushl %edx
614; CHECK-NEXT:    jmp LBB1_23
615; CHECK-NEXT:  LBB1_19: ## %bb29
616; CHECK-NEXT:    testl %ebp, %ebp
617; CHECK-NEXT:    je LBB1_22
618; CHECK-NEXT:  ## %bb.20: ## %bb.nph11
619; CHECK-NEXT:    movl %ebp, %esi
620; CHECK-NEXT:    movl %eax, %edi
621; CHECK-NEXT:    addl $15, %eax
622; CHECK-NEXT:    andl $-16, %eax
623; CHECK-NEXT:    movl %eax, (%esp) ## 4-byte Spill
624; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
625; CHECK-NEXT:    .p2align 4
626; CHECK-NEXT:  LBB1_21: ## %bb30
627; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
628; CHECK-NEXT:    subl $4, %esp
629; CHECK-NEXT:    pushl %edi
630; CHECK-NEXT:    pushl %ecx
631; CHECK-NEXT:    pushl %ebp
632; CHECK-NEXT:    movl %ecx, %ebx
633; CHECK-NEXT:    calll _memcpy
634; CHECK-NEXT:    movl %ebx, %ecx
635; CHECK-NEXT:    addl $16, %esp
636; CHECK-NEXT:    addl %edi, %ebp
637; CHECK-NEXT:    addl (%esp), %ecx ## 4-byte Folded Reload
638; CHECK-NEXT:    decl %esi
639; CHECK-NEXT:    jne LBB1_21
640; CHECK-NEXT:  LBB1_22: ## %bb33
641; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
642; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
643; CHECK-NEXT:    addl %eax, %ecx
644; CHECK-NEXT:    shrl %eax
645; CHECK-NEXT:    subl $4, %esp
646; CHECK-NEXT:    pushl %eax
647; CHECK-NEXT:    pushl $128
648; CHECK-NEXT:    pushl %ecx
649; CHECK-NEXT:  LBB1_23: ## %bb33
650; CHECK-NEXT:    calll _memset
651; CHECK-NEXT:    addl $44, %esp
652; CHECK-NEXT:  LBB1_25: ## %return
653; CHECK-NEXT:    popl %esi
654; CHECK-NEXT:    popl %edi
655; CHECK-NEXT:    popl %ebx
656; CHECK-NEXT:    popl %ebp
657; CHECK-NEXT:    retl
658; CHECK-NEXT:  LBB1_24: ## %return
659; CHECK-NEXT:    addl $28, %esp
660; CHECK-NEXT:    jmp LBB1_25
661entry:
662  %0 = mul i32 %x, %w
663  %1 = mul i32 %x, %w
664  %2 = udiv i32 %1, 4
665  %.sum2 = add i32 %2, %0
666  %cond = icmp eq i32 %d, 1
667  br i1 %cond, label %bb29, label %bb10.preheader
668
669bb10.preheader:                                   ; preds = %entry
670  %3 = icmp ne i32 %x, 0
671  br i1 %3, label %bb.nph9, label %bb18.loopexit
672
673bb.nph7:                                          ; preds = %bb7.preheader
674  %4 = mul i32 %y.08, %w
675  %5 = mul i32 %y.08, %s
676  %6 = add i32 %5, 1
677  %tmp8 = icmp ugt i32 1, %w
678  %smax9 = select i1 %tmp8, i32 1, i32 %w
679  br label %bb6
680
681bb6:                                              ; preds = %bb7, %bb.nph7
682  %x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]
683  %7 = add i32 %x.06, %4
684  %8 = shl i32 %x.06, 1
685  %9 = add i32 %6, %8
686  %10 = getelementptr i8, ptr %r, i32 %9
687  %11 = load i8, ptr %10, align 1
688  %12 = getelementptr i8, ptr %j, i32 %7
689  store i8 %11, ptr %12, align 1
690  br label %bb7
691
692bb7:                                              ; preds = %bb6
693  %indvar.next7 = add i32 %x.06, 1
694  %exitcond10 = icmp ne i32 %indvar.next7, %smax9
695  br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
696
697bb7.bb9_crit_edge:                                ; preds = %bb7
698  br label %bb9
699
700bb9:                                              ; preds = %bb7.preheader, %bb7.bb9_crit_edge
701  br label %bb10
702
703bb10:                                             ; preds = %bb9
704  %indvar.next11 = add i32 %y.08, 1
705  %exitcond12 = icmp ne i32 %indvar.next11, %x
706  br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
707
708bb10.bb18.loopexit_crit_edge:                     ; preds = %bb10
709  br label %bb10.bb18.loopexit_crit_edge.split
710
711bb10.bb18.loopexit_crit_edge.split:               ; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
712  br label %bb18.loopexit
713
714bb.nph9:                                          ; preds = %bb10.preheader
715  %13 = icmp ugt i32 %w, 0
716  br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
717
718bb.nph9.split:                                    ; preds = %bb.nph9
719  br label %bb7.preheader
720
721bb7.preheader:                                    ; preds = %bb.nph9.split, %bb10
722  %y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]
723  br i1 true, label %bb.nph7, label %bb9
724
725bb.nph5:                                          ; preds = %bb18.loopexit
726  %14 = udiv i32 %w, 2
727  %15 = icmp ult i32 %w, 2
728  %16 = udiv i32 %x, 2
729  br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
730
731bb.nph5.split:                                    ; preds = %bb.nph5
732  %tmp2 = icmp ugt i32 1, %16
733  %smax3 = select i1 %tmp2, i32 1, i32 %16
734  br label %bb13
735
736bb13:                                             ; preds = %bb18, %bb.nph5.split
737  %y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]
738  %17 = mul i32 %14, %y.14
739  %18 = shl i32 %y.14, 1
740  %19 = urem i32 %y.14, 2
741  %20 = add i32 %19, %18
742  %21 = mul i32 %20, %s
743  br i1 true, label %bb.nph3, label %bb17
744
745bb.nph3:                                          ; preds = %bb13
746  %22 = add i32 %17, %0
747  %23 = add i32 %17, %.sum2
748  %24 = udiv i32 %w, 2
749  %tmp = icmp ugt i32 1, %24
750  %smax = select i1 %tmp, i32 1, i32 %24
751  br label %bb14
752
753bb14:                                             ; preds = %bb15, %bb.nph3
754  %x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]
755  %25 = shl i32 %x.12, 2
756  %26 = add i32 %25, %21
757  %27 = getelementptr i8, ptr %r, i32 %26
758  %28 = load i8, ptr %27, align 1
759  %.sum = add i32 %22, %x.12
760  %29 = getelementptr i8, ptr %j, i32 %.sum
761  store i8 %28, ptr %29, align 1
762  %30 = shl i32 %x.12, 2
763  %31 = or disjoint i32 %30, 2
764  %32 = add i32 %31, %21
765  %33 = getelementptr i8, ptr %r, i32 %32
766  %34 = load i8, ptr %33, align 1
767  %.sum6 = add i32 %23, %x.12
768  %35 = getelementptr i8, ptr %j, i32 %.sum6
769  store i8 %34, ptr %35, align 1
770  br label %bb15
771
772bb15:                                             ; preds = %bb14
773  %indvar.next = add i32 %x.12, 1
774  %exitcond = icmp ne i32 %indvar.next, %smax
775  br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
776
777bb15.bb17_crit_edge:                              ; preds = %bb15
778  br label %bb17
779
780bb17:                                             ; preds = %bb15.bb17_crit_edge, %bb13
781  br label %bb18
782
783bb18.loopexit:                                    ; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
784  %36 = icmp ult i32 %x, 2
785  br i1 %36, label %bb20, label %bb.nph5
786
787bb18:                                             ; preds = %bb17
788  %indvar.next1 = add i32 %y.14, 1
789  %exitcond4 = icmp ne i32 %indvar.next1, %smax3
790  br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
791
792bb18.bb20_crit_edge:                              ; preds = %bb18
793  br label %bb18.bb20_crit_edge.split
794
795bb18.bb20_crit_edge.split:                        ; preds = %bb18.bb20_crit_edge, %bb.nph5
796  br label %bb20
797
798bb20:                                             ; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
799  switch i32 %d, label %return [
800    i32 3, label %bb22
801    i32 1, label %bb29
802  ]
803
804bb22:                                             ; preds = %bb20
805  %37 = mul i32 %x, %w
806  %38 = udiv i32 %37, 4
807  %.sum3 = add i32 %38, %.sum2
808  %39 = add i32 %x, 15
809  %40 = and i32 %39, -16
810  %41 = add i32 %w, 15
811  %42 = and i32 %41, -16
812  %43 = mul i32 %40, %s
813  %44 = icmp ugt i32 %x, 0
814  br i1 %44, label %bb.nph, label %bb26
815
816bb.nph:                                           ; preds = %bb22
817  br label %bb23
818
819bb23:                                             ; preds = %bb24, %bb.nph
820  %y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]
821  %45 = mul i32 %y.21, %42
822  %.sum1 = add i32 %45, %43
823  %46 = getelementptr i8, ptr %r, i32 %.sum1
824  %47 = mul i32 %y.21, %w
825  %.sum5 = add i32 %47, %.sum3
826  %48 = getelementptr i8, ptr %j, i32 %.sum5
827  tail call void @llvm.memcpy.p0.p0.i32(ptr %48, ptr %46, i32 %w, i1 false)
828  br label %bb24
829
830bb24:                                             ; preds = %bb23
831  %indvar.next5 = add i32 %y.21, 1
832  %exitcond6 = icmp ne i32 %indvar.next5, %x
833  br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
834
835bb24.bb26_crit_edge:                              ; preds = %bb24
836  br label %bb26
837
838bb26:                                             ; preds = %bb24.bb26_crit_edge, %bb22
839  %49 = mul i32 %x, %w
840  %.sum4 = add i32 %.sum3, %49
841  %50 = getelementptr i8, ptr %j, i32 %.sum4
842  %51 = mul i32 %x, %w
843  %52 = udiv i32 %51, 2
844  tail call void @llvm.memset.p0.i32(ptr %50, i8 -128, i32 %52, i1 false)
845  ret void
846
847bb29:                                             ; preds = %bb20, %entry
848  %53 = add i32 %w, 15
849  %54 = and i32 %53, -16
850  %55 = icmp ugt i32 %x, 0
851  br i1 %55, label %bb.nph11, label %bb33
852
853bb.nph11:                                         ; preds = %bb29
854  br label %bb30
855
856bb30:                                             ; preds = %bb31, %bb.nph11
857  %y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]
858  %56 = mul i32 %y.310, %54
859  %57 = getelementptr i8, ptr %r, i32 %56
860  %58 = mul i32 %y.310, %w
861  %59 = getelementptr i8, ptr %j, i32 %58
862  tail call void @llvm.memcpy.p0.p0.i32(ptr %59, ptr %57, i32 %w, i1 false)
863  br label %bb31
864
865bb31:                                             ; preds = %bb30
866  %indvar.next13 = add i32 %y.310, 1
867  %exitcond14 = icmp ne i32 %indvar.next13, %x
868  br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
869
870bb31.bb33_crit_edge:                              ; preds = %bb31
871  br label %bb33
872
873bb33:                                             ; preds = %bb31.bb33_crit_edge, %bb29
874  %60 = mul i32 %x, %w
875  %61 = getelementptr i8, ptr %j, i32 %60
876  %62 = mul i32 %x, %w
877  %63 = udiv i32 %62, 2
878  tail call void @llvm.memset.p0.i32(ptr %61, i8 -128, i32 %63, i1 false)
879  ret void
880
881return:                                           ; preds = %bb20
882  ret void
883}
884
885declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind
886
887declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) nounwind
888