Lines Matching defs:chunk
54 // Loop with static scheduling with chunk
63 * lower bound of first chunk
65 * upper bound of first chunk
69 * @param[in] chunk size
72 // helper function for static chunk
73 static void ForStaticChunk(int &last, T &lb, T &ub, ST &stride, ST chunk,
78 stride = numberOfEntities * chunk;
79 lb = lb + entityId * chunk;
81 ub = lb + chunk - 1; // Clang uses i <= ub
82 // Say ub' is the begining of the last chunk. Then who ever has a
85 T beginingLastChunk = inputUb - (inputUb % chunk);
90 // Loop with static scheduling without chunk
92 // helper function for static no chunk
93 static void ForStaticNoChunk(int &last, T &lb, T &ub, ST &stride, ST &chunk,
95 // No chunk size specified. Each thread or warp gets at most one
96 // chunk; chunks are all almost of equal size
99 chunk = loopSize / numberOfEntities;
100 T leftOver = loopSize - chunk * numberOfEntities;
103 chunk++;
104 lb = lb + entityId * chunk;
106 lb = lb + entityId * chunk + leftOver;
110 ub = lb + chunk - 1; // Clang uses i <= ub
112 stride = loopSize; // make sure we only do 1 chunk per warp
119 T *plower, T *pupper, ST *pstride, ST chunk,
138 if (chunk > 0) {
139 ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid,
144 } // note: if chunk <=0, use nochunk
146 if (chunk > 0) {
147 // round up to make sure the chunk is enough to cover all iterations
151 // perform chunk adjustment
152 chunk = (span + chunk - 1) & ~(chunk - 1);
156 ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid,
163 } // note: if chunk <=0, use nochunk
165 ForStaticNoChunk(lastiter, lb, ub, stride, chunk, gtid,
170 if (chunk > 0) {
171 ForStaticChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(),
176 } // note: if chunk <=0, use nochunk
178 ForStaticNoChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(),
183 ForStaticChunk(lastiter, lb, ub, stride, chunk,
190 ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid,
211 kmp_sched_t schedule, T lb, T ub, ST st, ST chunk,
234 chunk = tripCount; // one thread gets the whole loop
240 chunk = ChunkInt;
243 if (chunk > 0)
251 chunk = 1;
262 chunk = 1;
266 // "unknown schedule %d & chunk %lld\n", (int)schedule,
267 // (long long)chunk);
272 ASSERT0(LT_FUSSY, chunk > 0, "bad chunk value");
277 // compute static chunk
280 ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum);
282 DST->Chunk = chunk;
286 ASSERT0(LT_FUSSY, chunk > 0, "bad chunk value");
291 // compute static chunk
294 // round up to make sure the chunk is enough to cover all iterations
296 // perform chunk adjustment
297 chunk = (span + chunk - 1) & ~(chunk - 1);
300 ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum);
305 DST->Chunk = chunk;
309 ASSERT0(LT_FUSSY, chunk == 0, "bad chunk value");
314 // compute static chunk
317 ForStaticNoChunk(lastiter, lb, ub, stride, chunk, threadId, tnum);
319 DST->Chunk = chunk;
325 if (chunk < 1)
326 chunk = 1;
327 DST->Chunk = chunk;
364 // b. lb < loopUpperBound and ub >= loopUpperBound: last chunk -->
366 // c. lb and ub >= loopUpperBound: empty chunk --> FINISHED
403 ST chunk = DST->Chunk;
405 T myUb = myLb + chunk - 1; // Clang uses i <= ub
426 // not finished (either not finished or last chunk)
525 int32_t lb, int32_t ub, int32_t st, int32_t chunk) {
528 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST);
533 int32_t chunk) {
536 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST);
540 int64_t lb, int64_t ub, int64_t st, int64_t chunk) {
543 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST);
548 int64_t chunk) {
551 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST);
610 int32_t *pstride, int32_t incr, int32_t chunk) {
612 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
619 int32_t *pstride, int32_t incr, int32_t chunk) {
621 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
628 int64_t *pstride, int64_t incr, int64_t chunk) {
630 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
637 int64_t *pstride, int64_t incr, int64_t chunk) {
639 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
647 int32_t chunk) {
649 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
657 int32_t chunk) {
659 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
667 int64_t chunk) {
669 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
677 int64_t chunk) {
679 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
695 /// absence of user specified chunk sizes. This implicitly picks a block chunk
696 /// size equal to the number of threads in the block and a thread chunk size
717 // Every thread executed one block and thread chunk now.
728 /// presence of user specified chunk sizes (for at least one of them).
788 // If the thread chunk is not specified we pick a default now.
830 // If the block chunk is not specified we pick a default now.
877 // If the block chunk is not specified we pick a default now.
881 // If the thread chunk is not specified we pick a default now.