1 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK,OMP50 2 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s 3 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s 4 5 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=CHECK,OMP45 6 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -fopenmp-version=45 -o %t %s 7 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s 8 9 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s 10 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s 11 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s 12 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} 13 // expected-no-diagnostics 14 #ifndef HEADER 15 #define HEADER 16 17 // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } 18 // CHECK: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* 19 // CHECK-LABEL: define {{.*void}} @{{.*}}static_not_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) 20 void static_not_chunked(float *a, float *b, float *c, float *d) { 21 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) 22 #pragma omp for schedule(static) ordered 23 // CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 66, i32 0, i32 4571423, i32 1, i32 1) 24 // 25 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]]) 26 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 27 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]] 28 29 // Loop header 30 // CHECK: [[O_LOOP1_BODY]] 31 // CHECK: [[LB:%.+]] = load i32, i32* [[OMP_LB]] 32 // CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]] 33 // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]] 34 35 // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]] 36 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]] 37 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]] 38 for (int i = 32000000; i > 33; i += -7) { 39 // CHECK: [[LOOP1_BODY]] 40 // Start of body: calculate i from IV: 41 // CHECK: [[IV1_1:%.+]] = load i32, i32* [[OMP_IV]] 42 // CHECK-NEXT: [[CALC_I_1:%.+]] = mul nsw i32 [[IV1_1]], 7 43 // CHECK-NEXT: [[CALC_I_2:%.+]] = sub nsw i32 32000000, [[CALC_I_1]] 44 // CHECK-NEXT: store i32 [[CALC_I_2]], i32* [[LC_I:.+]] 45 46 // ... start of ordered region ... 47 // CHECK-NEXT: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) 48 // ... loop body ... 49 // End of body: store into a[i]: 50 // CHECK: store float [[RESULT:%.+]], float* {{%.+}} 51 // CHECK-NOT: !llvm.access.group 52 // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) 53 // ... end of ordered region ... 54 #pragma omp ordered 55 a[i] = b[i] * c[i] * d[i]; 56 // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}} 57 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1 58 // CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]] 59 // CHECK-NEXT: call void @__kmpc_dispatch_fini_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) 60 // CHECK-NEXT: br label %{{.+}} 61 } 62 // CHECK: [[LOOP1_END]] 63 // CHECK: [[O_LOOP1_END]] 64 // CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]]) 65 // CHECK: ret void 66 } 67 68 // CHECK-LABEL: define {{.*void}} @{{.*}}dynamic1{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) 69 void dynamic1(float *a, float *b, float *c, float *d) { 70 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) 71 #pragma omp for schedule(dynamic) ordered 72 // OMP45: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 67, i64 0, i64 16908287, i64 1, i64 1) 73 // OMP50: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 1073741891, i64 0, i64 16908287, i64 1, i64 1) 74 // 75 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]]) 76 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 77 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]] 78 79 // Loop header 80 // CHECK: [[O_LOOP1_BODY]] 81 // CHECK: [[LB:%.+]] = load i64, i64* [[OMP_LB]] 82 // CHECK-NEXT: store i64 [[LB]], i64* [[OMP_IV:[^,]+]] 83 // CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]] 84 85 // CHECK-NEXT: [[UB:%.+]] = load i64, i64* [[OMP_UB]] 86 // CHECK-NEXT: [[BOUND:%.+]] = add i64 [[UB]], 1 87 // CHECK-NEXT: [[CMP:%.+]] = icmp ult i64 [[IV]], [[BOUND]] 88 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]] 89 for (unsigned long long i = 131071; i < 2147483647; i += 127) { 90 // CHECK: [[LOOP1_BODY]] 91 // Start of body: calculate i from IV: 92 // CHECK: [[IV1_1:%.+]] = load i64, i64* [[OMP_IV]] 93 // CHECK-NEXT: [[CALC_I_1:%.+]] = mul i64 [[IV1_1]], 127 94 // CHECK-NEXT: [[CALC_I_2:%.+]] = add i64 131071, [[CALC_I_1]] 95 // CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]] 96 97 // ... start of ordered region ... 98 // CHECK-NEXT: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) 99 // ... loop body ... 100 // End of body: store into a[i]: 101 // CHECK: store float [[RESULT:%.+]], float* {{%.+}} 102 // CHECK-NOT: !llvm.access.group 103 // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) 104 // ... end of ordered region ... 105 #pragma omp ordered threads 106 a[i] = b[i] * c[i] * d[i]; 107 // CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}} 108 // CHECK-NEXT: [[ADD1_2:%.+]] = add i64 [[IV1_2]], 1 109 // CHECK-NEXT: store i64 [[ADD1_2]], i64* [[OMP_IV]] 110 111 // ... end iteration for ordered loop ... 112 // CHECK-NEXT: call void @__kmpc_dispatch_fini_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) 113 // CHECK-NEXT: br label %{{.+}} 114 } 115 // CHECK: [[LOOP1_END]] 116 // CHECK: [[O_LOOP1_END]] 117 // CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]]) 118 // CHECK: ret void 119 } 120 121 // CHECK-LABEL: define {{.*void}} @{{.*}}test_auto{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) 122 void test_auto(float *a, float *b, float *c, float *d) { 123 unsigned int x = 0; 124 unsigned int y = 0; 125 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) 126 #pragma omp for schedule(auto) collapse(2) ordered 127 // OMP45: call void @__kmpc_dispatch_init_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 70, i64 0, i64 [[LAST_ITER:%[^,]+]], i64 1, i64 1) 128 // OMP50: call void @__kmpc_dispatch_init_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 1073741894, i64 0, i64 [[LAST_ITER:%[^,]+]], i64 1, i64 1) 129 // 130 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]]) 131 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 132 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]] 133 134 // Loop header 135 // CHECK: [[O_LOOP1_BODY]] 136 // CHECK: [[LB:%.+]] = load i64, i64* [[OMP_LB]] 137 // CHECK-NEXT: store i64 [[LB]], i64* [[OMP_IV:[^,]+]] 138 // CHECK: [[IV:%.+]] = load i64, i64* [[OMP_IV]] 139 140 // CHECK-NEXT: [[UB:%.+]] = load i64, i64* [[OMP_UB]] 141 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i64 [[IV]], [[UB]] 142 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]] 143 // FIXME: When the iteration count of some nested loop is not a known constant, 144 // we should pre-calculate it, like we do for the total number of iterations! 145 for (char i = static_cast<char>(y); i <= '9'; ++i) 146 for (x = 11; x > 0; --x) { 147 // CHECK: [[LOOP1_BODY]] 148 // Start of body: indices are calculated from IV: 149 // CHECK: store i8 {{%[^,]+}}, i8* {{%[^,]+}} 150 // CHECK: store i32 {{%[^,]+}}, i32* {{%[^,]+}} 151 152 // ... start of ordered region ... 153 // CHECK: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) 154 // ... loop body ... 155 // End of body: store into a[i]: 156 // CHECK: store float [[RESULT:%.+]], float* {{%.+}} 157 // CHECK-NOT: !llvm.access.group 158 // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) 159 // ... end of ordered region ... 160 #pragma omp ordered 161 a[i] = b[i] * c[i] * d[i]; 162 // CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}} 163 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i64 [[IV1_2]], 1 164 // CHECK-NEXT: store i64 [[ADD1_2]], i64* [[OMP_IV]] 165 166 // ... end iteration for ordered loop ... 167 // CHECK-NEXT: call void @__kmpc_dispatch_fini_8([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) 168 // CHECK-NEXT: br label %{{.+}} 169 } 170 // CHECK: [[LOOP1_END]] 171 // CHECK: [[O_LOOP1_END]] 172 // CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]]) 173 // CHECK: ret void 174 } 175 176 // CHECK-LABEL: define {{.*void}} @{{.*}}runtime{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) 177 void runtime(float *a, float *b, float *c, float *d) { 178 int x = 0; 179 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) 180 #pragma omp for collapse(2) schedule(runtime) ordered 181 // OMP45: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 69, i32 0, i32 199, i32 1, i32 1) 182 // OMP50: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 1073741893, i32 0, i32 199, i32 1, i32 1) 183 // 184 // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]]) 185 // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 186 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]] 187 188 // Loop header 189 // CHECK: [[O_LOOP1_BODY]] 190 // CHECK: [[LB:%.+]] = load i32, i32* [[OMP_LB]] 191 // CHECK-NEXT: store i32 [[LB]], i32* [[OMP_IV:[^,]+]] 192 // CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]] 193 194 // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]] 195 // CHECK-NEXT: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]] 196 // CHECK-NEXT: br i1 [[CMP]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]] 197 for (unsigned char i = '0' ; i <= '9'; ++i) 198 for (x = -10; x < 10; ++x) { 199 // CHECK: [[LOOP1_BODY]] 200 // Start of body: indices are calculated from IV: 201 // CHECK: store i8 {{%[^,]+}}, i8* {{%[^,]+}} 202 // CHECK: store i32 {{%[^,]+}}, i32* {{%[^,]+}} 203 204 // ... start of ordered region ... 205 // CHECK: call void @__kmpc_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) 206 // ... loop body ... 207 // End of body: store into a[i]: 208 // CHECK: store float [[RESULT:%.+]], float* {{%.+}} 209 // CHECK-NOT: !llvm.access.group 210 // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) 211 // ... end of ordered region ... 212 #pragma omp ordered threads 213 a[i] = b[i] * c[i] * d[i]; 214 // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}} 215 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1 216 // CHECK-NEXT: store i32 [[ADD1_2]], i32* [[OMP_IV]] 217 218 // ... end iteration for ordered loop ... 219 // CHECK-NEXT: call void @__kmpc_dispatch_fini_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) 220 // CHECK-NEXT: br label %{{.+}} 221 } 222 // CHECK: [[LOOP1_END]] 223 // CHECK: [[O_LOOP1_END]] 224 // CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]]) 225 // CHECK: ret void 226 } 227 228 float f[10]; 229 // CHECK-LABEL: foo_simd 230 void foo_simd(int low, int up) { 231 // CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}}, !llvm.access.group ! 232 // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}), !llvm.access.group ! 233 #pragma omp simd 234 for (int i = low; i < up; ++i) { 235 f[i] = 0.0; 236 #pragma omp ordered simd 237 f[i] = 1.0; 238 } 239 // CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}} 240 // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) 241 #pragma omp for simd ordered 242 for (int i = low; i < up; ++i) { 243 f[i] = 0.0; 244 #pragma omp ordered simd 245 f[i] = 1.0; 246 } 247 } 248 249 // CHECK: define internal void [[CAP_FUNC]](i32* nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %{{.+}}) # 250 // CHECK: store float 1.000000e+00, float* %{{.+}}, align 251 // CHECK-NEXT: ret void 252 253 #endif // HEADER 254 255