1; RUN: opt -mtriple=nvptx64-nvidia-cuda -passes=load-store-vectorizer -S -o - %s | FileCheck %s 2 3; Check that the load/store vectorizer is willing to move loads/stores across 4; intervening instructions only if it's safe. 5; 6; - Loads can be moved across instructions that don't write or throw. 7; - Stores can only be moved across instructions which don't read, write, or 8; throw. 9 10declare void @fn() 11declare void @fn_nounwind() #0 12declare void @fn_nounwind_writeonly() #1 13declare void @fn_nounwind_readonly() #2 14declare void @fn_writeonly() #3 15declare void @fn_readonly() #4 16declare void @fn_readnone() #5 17 18; CHECK-LABEL: @load_fn 19; CHECK: load 20; CHECK: call void @fn() 21; CHECK: load 22define void @load_fn(ptr %p) #0 { 23 %p.1 = getelementptr i32, ptr %p, i32 1 24 25 %v0 = load i32, ptr %p, align 8 26 call void @fn() 27 %v1 = load i32, ptr %p.1, align 4 28 ret void 29} 30 31; CHECK-LABEL: @load_fn_nounwind 32; CHECK: load 33; CHECK: call void @fn_nounwind() 34; CHECK: load 35define void @load_fn_nounwind(ptr %p) #0 { 36 %p.1 = getelementptr i32, ptr %p, i32 1 37 38 %v0 = load i32, ptr %p, align 8 39 call void @fn_nounwind() #0 40 %v1 = load i32, ptr %p.1, align 4 41 ret void 42} 43 44; CHECK-LABEL: @load_fn_nounwind_writeonly 45; CHECK: load 46; CHECK: call void @fn_nounwind_writeonly() 47; CHECK: load 48define void @load_fn_nounwind_writeonly(ptr %p) #0 { 49 %p.1 = getelementptr i32, ptr %p, i32 1 50 51 %v0 = load i32, ptr %p, align 8 52 call void @fn_nounwind_writeonly() #1 53 %v1 = load i32, ptr %p.1, align 4 54 ret void 55} 56 57; CHECK-LABEL: @load_fn_nounwind_readonly 58; CHECK-DAG: load <2 x i32> 59; CHECK-DAG: call void @fn_nounwind_readonly() 60define void @load_fn_nounwind_readonly(ptr %p) #0 { 61 %p.1 = getelementptr i32, ptr %p, i32 1 62 63 %v0 = load i32, ptr %p, align 8 64 call void @fn_nounwind_readonly() #2 65 %v1 = load i32, ptr %p.1, align 4 66 ret void 67} 68 69; CHECK-LABEL: @load_fn_readonly 70; CHECK: load 71; CHECK: call void @fn_readonly 72; CHECK: load 73define void @load_fn_readonly(ptr %p) #0 { 74 %p.1 = getelementptr i32, ptr %p, i32 1 75 76 %v0 = load i32, ptr %p, align 8 77 call void @fn_readonly() #4 78 %v1 = load i32, ptr %p.1, align 4 79 ret void 80} 81 82; CHECK-LABEL: @load_fn_writeonly 83; CHECK: load 84; CHECK: call void @fn_writeonly() 85; CHECK: load 86define void @load_fn_writeonly(ptr %p) #0 { 87 %p.1 = getelementptr i32, ptr %p, i32 1 88 89 %v0 = load i32, ptr %p, align 8 90 call void @fn_writeonly() #3 91 %v1 = load i32, ptr %p.1, align 4 92 ret void 93} 94 95; CHECK-LABEL: @load_fn_readnone 96; CHECK-DAG: load <2 x i32> 97; CHECK-DAG: call void @fn_readnone() 98define void @load_fn_readnone(ptr %p) #0 { 99 %p.1 = getelementptr i32, ptr %p, i32 1 100 101 %v0 = load i32, ptr %p, align 8 102 call void @fn_readnone() #5 103 %v1 = load i32, ptr %p.1, align 4 104 ret void 105} 106 107; ------------------------------------------------ 108; Same tests, but now for stores instead of loads. 109; ------------------------------------------------ 110 111; CHECK-LABEL: @store_fn 112; CHECK: store 113; CHECK: call void @fn() 114; CHECK: store 115define void @store_fn(ptr %p) #0 { 116 %p.1 = getelementptr i32, ptr %p, i32 1 117 118 store i32 0, ptr %p 119 call void @fn() 120 store i32 0, ptr %p.1 121 ret void 122} 123 124; CHECK-LABEL: @store_fn_nounwind 125; CHECK: store 126; CHECK: call void @fn_nounwind() 127; CHECK: store 128define void @store_fn_nounwind(ptr %p) #0 { 129 %p.1 = getelementptr i32, ptr %p, i32 1 130 131 store i32 0, ptr %p 132 call void @fn_nounwind() #0 133 store i32 0, ptr %p.1 134 ret void 135} 136 137; CHECK-LABEL: @store_fn_nounwind_writeonly 138; CHECK: store 139; CHECK: call void @fn_nounwind_writeonly() 140; CHECK: store 141define void @store_fn_nounwind_writeonly(ptr %p) #0 { 142 %p.1 = getelementptr i32, ptr %p, i32 1 143 144 store i32 0, ptr %p 145 call void @fn_nounwind_writeonly() #1 146 store i32 0, ptr %p.1 147 ret void 148} 149 150; CHECK-LABEL: @store_fn_nounwind_readonly 151; CHECK: store 152; CHECK: call void @fn_nounwind_readonly() 153; CHECK: store 154define void @store_fn_nounwind_readonly(ptr %p) #0 { 155 %p.1 = getelementptr i32, ptr %p, i32 1 156 157 store i32 0, ptr %p 158 call void @fn_nounwind_readonly() #2 159 store i32 0, ptr %p.1 160 ret void 161} 162 163; CHECK-LABEL: @store_fn_readonly 164; CHECK: store 165; CHECK: call void @fn_readonly 166; CHECK: store 167define void @store_fn_readonly(ptr %p) #0 { 168 %p.1 = getelementptr i32, ptr %p, i32 1 169 170 store i32 0, ptr %p 171 call void @fn_readonly() #4 172 store i32 0, ptr %p.1 173 ret void 174} 175 176; CHECK-LABEL: @store_fn_writeonly 177; CHECK: store 178; CHECK: call void @fn_writeonly() 179; CHECK: store 180define void @store_fn_writeonly(ptr %p) #0 { 181 %p.1 = getelementptr i32, ptr %p, i32 1 182 183 store i32 0, ptr %p 184 call void @fn_writeonly() #3 185 store i32 0, ptr %p.1 186 ret void 187} 188 189; This is the only store idiom we can vectorize. 190; CHECK-LABEL: @store_fn_readnone 191; CHECK-DAG: store <2 x i32> 192; CHECK-DAG: call void @fn_readnone() 193define void @store_fn_readnone(ptr %p) #0 { 194 %p.1 = getelementptr i32, ptr %p, i32 1 195 196 store i32 0, ptr %p, align 8 197 call void @fn_readnone() #5 198 store i32 0, ptr %p.1, align 8 199 ret void 200} 201 202 203attributes #0 = { nounwind willreturn } 204attributes #1 = { nounwind willreturn writeonly } 205attributes #2 = { nounwind readonly willreturn } 206attributes #3 = { writeonly } 207attributes #4 = { readonly } 208; readnone implies nounwind, so no need to test separately 209attributes #5 = { nounwind willreturn readnone } 210