1 /* Copyright (C) 2021-2023 Free Software Foundation, Inc.
2 Contributed by Oracle.
3
4 This file is part of GNU Binutils.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, 51 Franklin Street - Fifth Floor, Boston,
19 MA 02110-1301, USA. */
20
21 /*
22 * -----------------------------------------------------------------------------
23 * This program implements the multiplication of an m by n matrix with a vector
24 * of length n. The Posix Threads parallel programming model is used to
25 * parallelize the core matrix-vector multiplication algorithm.
26 * -----------------------------------------------------------------------------
27 */
28
29 #include "mydefs.h"
30
main(int argc,char ** argv)31 int main (int argc, char **argv)
32 {
33 bool verbose = false;
34
35 thread_data *thread_data_arguments;
36 pthread_t *pthread_ids;
37
38 int64_t remainder_rows;
39 int64_t rows_per_thread;
40 int64_t active_threads;
41
42 int64_t number_of_rows;
43 int64_t number_of_columns;
44 int64_t number_of_threads;
45 int64_t repeat_count;
46
47 double **A;
48 double *b;
49 double *c;
50 double *ref;
51
52 int64_t errors;
53
54 /*
55 * -----------------------------------------------------------------------------
56 * Start the ball rolling - Get the user options and parse them.
57 * -----------------------------------------------------------------------------
58 */
59 (void) get_user_options (
60 argc,
61 argv,
62 &number_of_rows,
63 &number_of_columns,
64 &repeat_count,
65 &number_of_threads,
66 &verbose);
67
68 if (verbose) printf ("Verbose mode enabled\n");
69
70 /*
71 * -----------------------------------------------------------------------------
72 * Allocate storage for all data structures.
73 * -----------------------------------------------------------------------------
74 */
75 (void) allocate_data (
76 number_of_threads, number_of_rows,
77 number_of_columns, &A, &b, &c, &ref,
78 &thread_data_arguments, &pthread_ids);
79
80 if (verbose) printf ("Allocated data structures\n");
81
82 /*
83 * -----------------------------------------------------------------------------
84 * Initialize the data.
85 * -----------------------------------------------------------------------------
86 */
87 (void) init_data (number_of_rows, number_of_columns, A, b, c, ref);
88
89 if (verbose) printf ("Initialized matrix and vectors\n");
90
91 /*
92 * -----------------------------------------------------------------------------
93 * Determine the main workload settings.
94 * -----------------------------------------------------------------------------
95 */
96 (void) get_workload_stats (
97 number_of_threads, number_of_rows,
98 number_of_columns, &rows_per_thread,
99 &remainder_rows, &active_threads);
100
101 if (verbose) printf ("Defined workload distribution\n");
102
103 for (int64_t TID=active_threads; TID<number_of_threads; TID++)
104 {
105 thread_data_arguments[TID].do_work = false;
106 }
107 for (int64_t TID=0; TID<active_threads; TID++)
108 {
109 thread_data_arguments[TID].thread_id = TID;
110 thread_data_arguments[TID].verbose = verbose;
111 thread_data_arguments[TID].do_work = true;
112 thread_data_arguments[TID].repeat_count = repeat_count;
113
114 (void) determine_work_per_thread (
115 TID, rows_per_thread, remainder_rows,
116 &thread_data_arguments[TID].row_index_start,
117 &thread_data_arguments[TID].row_index_end);
118
119 thread_data_arguments[TID].m = number_of_rows;
120 thread_data_arguments[TID].n = number_of_columns;
121 thread_data_arguments[TID].b = b;
122 thread_data_arguments[TID].c = c;
123 thread_data_arguments[TID].A = A;
124 }
125
126 if (verbose) printf ("Assigned work to threads\n");
127
128 /*
129 * -----------------------------------------------------------------------------
130 * Create and execute the threads. Note that this means that there will be
131 * <t+1> threads, with <t> the number of threads specified on the commandline,
132 * or the default if the -t option was not used.
133 *
134 * Per the pthread_create () call, the threads start executing right away.
135 * -----------------------------------------------------------------------------
136 */
137 for (int TID=0; TID<active_threads; TID++)
138 {
139 if (pthread_create (&pthread_ids[TID], NULL, driver_mxv,
140 (void *) &thread_data_arguments[TID]) != 0)
141 {
142 printf ("Error creating thread %d\n", TID);
143 perror ("pthread_create"); exit (-1);
144 }
145 else
146 {
147 if (verbose) printf ("Thread %d has been created\n", TID);
148 }
149 }
150 /*
151 * -----------------------------------------------------------------------------
152 * Wait for all threads to finish.
153 * -----------------------------------------------------------------------------
154 */
155 for (int TID=0; TID<active_threads; TID++)
156 {
157 pthread_join (pthread_ids[TID], NULL);
158 }
159
160 if (verbose)
161 {
162 printf ("Matrix vector multiplication has completed\n");
163 printf ("Verify correctness of result\n");
164 }
165
166 /*
167 * -----------------------------------------------------------------------------
168 * Check the numerical results.
169 * -----------------------------------------------------------------------------
170 */
171 if ((errors = check_results (number_of_rows, number_of_columns,
172 c, ref)) == 0)
173 {
174 if (verbose) printf ("Error check passed\n");
175 }
176 else
177 {
178 printf ("Error: %ld differences in the results detected\n", errors);
179 }
180
181 /*
182 * -----------------------------------------------------------------------------
183 * Print a summary of the execution.
184 * -----------------------------------------------------------------------------
185 */
186 print_all_results (number_of_rows, number_of_columns, number_of_threads,
187 errors);
188
189 /*
190 * -----------------------------------------------------------------------------
191 * Release the allocated memory and end execution.
192 * -----------------------------------------------------------------------------
193 */
194 free (A);
195 free (b);
196 free (c);
197 free (ref);
198 free (pthread_ids);
199
200 return (0);
201 }
202
203 /*
204 * -----------------------------------------------------------------------------
205 * Parse user options and set variables accordingly. In case of an error, print
206 * a message, but do not bail out yet. In this way we can catch multiple input
207 * errors.
208 * -----------------------------------------------------------------------------
209 */
get_user_options(int argc,char * argv[],int64_t * number_of_rows,int64_t * number_of_columns,int64_t * repeat_count,int64_t * number_of_threads,bool * verbose)210 int get_user_options (int argc, char *argv[],
211 int64_t *number_of_rows,
212 int64_t *number_of_columns,
213 int64_t *repeat_count,
214 int64_t *number_of_threads,
215 bool *verbose)
216 {
217 int opt;
218 int errors = 0;
219 int64_t default_number_of_threads = 1;
220 int64_t default_rows = 2000;
221 int64_t default_columns = 3000;
222 int64_t default_repeat_count = 200;
223 bool default_verbose = false;
224
225 *number_of_rows = default_rows;
226 *number_of_columns = default_columns;
227 *number_of_threads = default_number_of_threads;
228 *repeat_count = default_repeat_count;
229 *verbose = default_verbose;
230
231 while ((opt = getopt (argc, argv, "m:n:r:t:vh")) != -1)
232 {
233 switch (opt)
234 {
235 case 'm':
236 *number_of_rows = atol (optarg);
237 break;
238 case 'n':
239 *number_of_columns = atol (optarg);
240 break;
241 case 'r':
242 *repeat_count = atol (optarg);
243 break;
244 case 't':
245 *number_of_threads = atol (optarg);
246 break;
247 case 'v':
248 *verbose = true;
249 break;
250 case 'h':
251 default:
252 printf ("Usage: %s " \
253 "[-m <number of rows>] " \
254 "[-n <number of columns] [-r <repeat count>] " \
255 "[-t <number of threads] [-v] [-h]\n", argv[0]);
256 printf ("\t-m - number of rows, default = %ld\n",
257 default_rows);
258 printf ("\t-n - number of columns, default = %ld\n",
259 default_columns);
260 printf ("\t-r - the number of times the algorithm is " \
261 "repeatedly executed, default = %ld\n",
262 default_repeat_count);
263 printf ("\t-t - the number of threads used, default = %ld\n",
264 default_number_of_threads);
265 printf ("\t-v - enable verbose mode, %s by default\n",
266 (default_verbose) ? "on" : "off");
267 printf ("\t-h - print this usage overview and exit\n");
268
269 exit (0);
270 break;
271 }
272 }
273
274 /*
275 * -----------------------------------------------------------------------------
276 * Check for errors and bail out in case of problems.
277 * -----------------------------------------------------------------------------
278 */
279 if (*number_of_rows <= 0)
280 {
281 errors++;
282 printf ("Error: The number of rows is %ld but should be strictly " \
283 "positive\n", *number_of_rows);
284 }
285 if (*number_of_columns <= 0)
286 {
287 errors++;
288 printf ("Error: The number of columns is %ld but should be strictly " \
289 "positive\n", *number_of_columns);
290 }
291 if (*repeat_count <= 0)
292 {
293 errors++;
294 printf ("Error: The repeat count is %ld but should be strictly " \
295 "positive\n", *repeat_count);
296 }
297 if (*number_of_threads <= 0)
298 {
299 errors++;
300 printf ("Error: The number of threads is %ld but should be strictly " \
301 "positive\n", *number_of_threads);
302 }
303 if (errors != 0)
304 {
305 printf ("There are %d input error (s)\n", errors); exit (-1);
306 }
307
308 return (errors);
309 }
310
311 /*
312 * -----------------------------------------------------------------------------
313 * Print a summary of the execution status.
314 * -----------------------------------------------------------------------------
315 */
print_all_results(int64_t number_of_rows,int64_t number_of_columns,int64_t number_of_threads,int64_t errors)316 void print_all_results (int64_t number_of_rows,
317 int64_t number_of_columns,
318 int64_t number_of_threads,
319 int64_t errors)
320 {
321 printf ("mxv: error check %s - rows = %ld columns = %ld threads = %ld\n",
322 (errors == 0) ? "passed" : "failed",
323 number_of_rows, number_of_columns, number_of_threads);
324 }
325
326 /*
327 * -----------------------------------------------------------------------------
328 * Check whether the computations produced the correct results.
329 * -----------------------------------------------------------------------------
330 */
check_results(int64_t m,int64_t n,double * c,double * ref)331 int64_t check_results (int64_t m, int64_t n, double *c, double *ref)
332 {
333 char *marker;
334 int64_t errors = 0;
335 double relerr;
336 double TOL = 100.0 * DBL_EPSILON;
337 double SMALL = 100.0 * DBL_MIN;
338
339 if ((marker=(char *)malloc (m*sizeof (char))) == NULL)
340 {
341 perror ("array marker");
342 exit (-1);
343 }
344
345 for (int64_t i=0; i<m; i++)
346 {
347 if (fabs (ref[i]) > SMALL)
348 {
349 relerr = fabs ((c[i]-ref[i])/ref[i]);
350 }
351 else
352 {
353 relerr = fabs ((c[i]-ref[i]));
354 }
355 if (relerr <= TOL)
356 {
357 marker[i] = ' ';
358 }
359 else
360 {
361 errors++;
362 marker[i] = '*';
363 }
364 }
365 if (errors > 0)
366 {
367 printf ("Found %ld differences in results for m = %ld n = %ld:\n",
368 errors,m,n);
369 for (int64_t i=0; i<m; i++)
370 printf (" %c c[%ld] = %f ref[%ld] = %f\n",marker[i],i,c[i],i,ref[i]);
371 }
372
373 return (errors);
374 }
375