xref: /netbsd-src/external/mit/libuv/dist/src/unix/linux-core.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
2  * Permission is hereby granted, free of charge, to any person obtaining a copy
3  * of this software and associated documentation files (the "Software"), to
4  * deal in the Software without restriction, including without limitation the
5  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
6  * sell copies of the Software, and to permit persons to whom the Software is
7  * furnished to do so, subject to the following conditions:
8  *
9  * The above copyright notice and this permission notice shall be included in
10  * all copies or substantial portions of the Software.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
17  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
18  * IN THE SOFTWARE.
19  */
20 
21 /* We lean on the fact that POLL{IN,OUT,ERR,HUP} correspond with their
22  * EPOLL* counterparts.  We use the POLL* variants in this file because that
23  * is what libuv uses elsewhere.
24  */
25 
26 #include "uv.h"
27 #include "internal.h"
28 
29 #include <inttypes.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <assert.h>
35 #include <errno.h>
36 
37 #include <net/if.h>
38 #include <sys/epoll.h>
39 #include <sys/param.h>
40 #include <sys/prctl.h>
41 #include <sys/sysinfo.h>
42 #include <unistd.h>
43 #include <fcntl.h>
44 #include <time.h>
45 
46 #define HAVE_IFADDRS_H 1
47 
48 #ifdef __UCLIBC__
49 # if __UCLIBC_MAJOR__ < 0 && __UCLIBC_MINOR__ < 9 && __UCLIBC_SUBLEVEL__ < 32
50 #  undef HAVE_IFADDRS_H
51 # endif
52 #endif
53 
54 #ifdef HAVE_IFADDRS_H
55 # if defined(__ANDROID__)
56 #  include "uv/android-ifaddrs.h"
57 # else
58 #  include <ifaddrs.h>
59 # endif
60 # include <sys/socket.h>
61 # include <net/ethernet.h>
62 # include <netpacket/packet.h>
63 #endif /* HAVE_IFADDRS_H */
64 
65 /* Available from 2.6.32 onwards. */
66 #ifndef CLOCK_MONOTONIC_COARSE
67 # define CLOCK_MONOTONIC_COARSE 6
68 #endif
69 
70 /* This is rather annoying: CLOCK_BOOTTIME lives in <linux/time.h> but we can't
71  * include that file because it conflicts with <time.h>. We'll just have to
72  * define it ourselves.
73  */
74 #ifndef CLOCK_BOOTTIME
75 # define CLOCK_BOOTTIME 7
76 #endif
77 
78 static int read_models(unsigned int numcpus, uv_cpu_info_t* ci);
79 static int read_times(FILE* statfile_fp,
80                       unsigned int numcpus,
81                       uv_cpu_info_t* ci);
82 static void read_speeds(unsigned int numcpus, uv_cpu_info_t* ci);
83 static uint64_t read_cpufreq(unsigned int cpunum);
84 
85 
86 int uv__platform_loop_init(uv_loop_t* loop) {
87   int fd;
88 
89   /* It was reported that EPOLL_CLOEXEC is not defined on Android API < 21,
90    * a.k.a. Lollipop. Since EPOLL_CLOEXEC is an alias for O_CLOEXEC on all
91    * architectures, we just use that instead.
92    */
93 #if defined(__ANDROID_API__) && __ANDROID_API__ < 21
94   fd = -1;
95   errno = ENOSYS;
96 #else
97   fd = epoll_create1(O_CLOEXEC);
98 #endif
99 
100   /* epoll_create1() can fail either because it's not implemented (old kernel)
101    * or because it doesn't understand the O_CLOEXEC flag.
102    */
103   if (fd == -1 && (errno == ENOSYS || errno == EINVAL)) {
104     fd = epoll_create(256);
105 
106     if (fd != -1)
107       uv__cloexec(fd, 1);
108   }
109 
110   loop->backend_fd = fd;
111   loop->inotify_fd = -1;
112   loop->inotify_watchers = NULL;
113 
114   if (fd == -1)
115     return UV__ERR(errno);
116 
117   return 0;
118 }
119 
120 
121 int uv__io_fork(uv_loop_t* loop) {
122   int err;
123   void* old_watchers;
124 
125   old_watchers = loop->inotify_watchers;
126 
127   uv__close(loop->backend_fd);
128   loop->backend_fd = -1;
129   uv__platform_loop_delete(loop);
130 
131   err = uv__platform_loop_init(loop);
132   if (err)
133     return err;
134 
135   return uv__inotify_fork(loop, old_watchers);
136 }
137 
138 
139 void uv__platform_loop_delete(uv_loop_t* loop) {
140   if (loop->inotify_fd == -1) return;
141   uv__io_stop(loop, &loop->inotify_read_watcher, POLLIN);
142   uv__close(loop->inotify_fd);
143   loop->inotify_fd = -1;
144 }
145 
146 
147 void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) {
148   struct epoll_event* events;
149   struct epoll_event dummy;
150   uintptr_t i;
151   uintptr_t nfds;
152 
153   assert(loop->watchers != NULL);
154   assert(fd >= 0);
155 
156   events = (struct epoll_event*) loop->watchers[loop->nwatchers];
157   nfds = (uintptr_t) loop->watchers[loop->nwatchers + 1];
158   if (events != NULL)
159     /* Invalidate events with same file descriptor */
160     for (i = 0; i < nfds; i++)
161       if (events[i].data.fd == fd)
162         events[i].data.fd = -1;
163 
164   /* Remove the file descriptor from the epoll.
165    * This avoids a problem where the same file description remains open
166    * in another process, causing repeated junk epoll events.
167    *
168    * We pass in a dummy epoll_event, to work around a bug in old kernels.
169    */
170   if (loop->backend_fd >= 0) {
171     /* Work around a bug in kernels 3.10 to 3.19 where passing a struct that
172      * has the EPOLLWAKEUP flag set generates spurious audit syslog warnings.
173      */
174     memset(&dummy, 0, sizeof(dummy));
175     epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy);
176   }
177 }
178 
179 
180 int uv__io_check_fd(uv_loop_t* loop, int fd) {
181   struct epoll_event e;
182   int rc;
183 
184   memset(&e, 0, sizeof(e));
185   e.events = POLLIN;
186   e.data.fd = -1;
187 
188   rc = 0;
189   if (epoll_ctl(loop->backend_fd, EPOLL_CTL_ADD, fd, &e))
190     if (errno != EEXIST)
191       rc = UV__ERR(errno);
192 
193   if (rc == 0)
194     if (epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &e))
195       abort();
196 
197   return rc;
198 }
199 
200 
201 void uv__io_poll(uv_loop_t* loop, int timeout) {
202   /* A bug in kernels < 2.6.37 makes timeouts larger than ~30 minutes
203    * effectively infinite on 32 bits architectures.  To avoid blocking
204    * indefinitely, we cap the timeout and poll again if necessary.
205    *
206    * Note that "30 minutes" is a simplification because it depends on
207    * the value of CONFIG_HZ.  The magic constant assumes CONFIG_HZ=1200,
208    * that being the largest value I have seen in the wild (and only once.)
209    */
210   static const int max_safe_timeout = 1789569;
211   static int no_epoll_pwait;
212   static int no_epoll_wait;
213   struct epoll_event events[1024];
214   struct epoll_event* pe;
215   struct epoll_event e;
216   int real_timeout;
217   QUEUE* q;
218   uv__io_t* w;
219   sigset_t sigset;
220   uint64_t sigmask;
221   uint64_t base;
222   int have_signals;
223   int nevents;
224   int count;
225   int nfds;
226   int fd;
227   int op;
228   int i;
229 
230   if (loop->nfds == 0) {
231     assert(QUEUE_EMPTY(&loop->watcher_queue));
232     return;
233   }
234 
235   memset(&e, 0, sizeof(e));
236 
237   while (!QUEUE_EMPTY(&loop->watcher_queue)) {
238     q = QUEUE_HEAD(&loop->watcher_queue);
239     QUEUE_REMOVE(q);
240     QUEUE_INIT(q);
241 
242     w = QUEUE_DATA(q, uv__io_t, watcher_queue);
243     assert(w->pevents != 0);
244     assert(w->fd >= 0);
245     assert(w->fd < (int) loop->nwatchers);
246 
247     e.events = w->pevents;
248     e.data.fd = w->fd;
249 
250     if (w->events == 0)
251       op = EPOLL_CTL_ADD;
252     else
253       op = EPOLL_CTL_MOD;
254 
255     /* XXX Future optimization: do EPOLL_CTL_MOD lazily if we stop watching
256      * events, skip the syscall and squelch the events after epoll_wait().
257      */
258     if (epoll_ctl(loop->backend_fd, op, w->fd, &e)) {
259       if (errno != EEXIST)
260         abort();
261 
262       assert(op == EPOLL_CTL_ADD);
263 
264       /* We've reactivated a file descriptor that's been watched before. */
265       if (epoll_ctl(loop->backend_fd, EPOLL_CTL_MOD, w->fd, &e))
266         abort();
267     }
268 
269     w->events = w->pevents;
270   }
271 
272   sigmask = 0;
273   if (loop->flags & UV_LOOP_BLOCK_SIGPROF) {
274     sigemptyset(&sigset);
275     sigaddset(&sigset, SIGPROF);
276     sigmask |= 1 << (SIGPROF - 1);
277   }
278 
279   assert(timeout >= -1);
280   base = loop->time;
281   count = 48; /* Benchmarks suggest this gives the best throughput. */
282   real_timeout = timeout;
283 
284   for (;;) {
285     /* See the comment for max_safe_timeout for an explanation of why
286      * this is necessary.  Executive summary: kernel bug workaround.
287      */
288     if (sizeof(int32_t) == sizeof(long) && timeout >= max_safe_timeout)
289       timeout = max_safe_timeout;
290 
291     if (sigmask != 0 && no_epoll_pwait != 0)
292       if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
293         abort();
294 
295     if (no_epoll_wait != 0 || (sigmask != 0 && no_epoll_pwait == 0)) {
296 #if defined(__ANDROID_API__) && __ANDROID_API__ < 21
297       nfds = -1;
298       errno = ENOSYS;
299 #else
300       nfds = epoll_pwait(loop->backend_fd,
301                          events,
302                          ARRAY_SIZE(events),
303                          timeout,
304                          &sigset);
305 #endif
306       if (nfds == -1 && errno == ENOSYS)
307         no_epoll_pwait = 1;
308     } else {
309       nfds = epoll_wait(loop->backend_fd,
310                         events,
311                         ARRAY_SIZE(events),
312                         timeout);
313       if (nfds == -1 && errno == ENOSYS)
314         no_epoll_wait = 1;
315     }
316 
317     if (sigmask != 0 && no_epoll_pwait != 0)
318       if (pthread_sigmask(SIG_UNBLOCK, &sigset, NULL))
319         abort();
320 
321     /* Update loop->time unconditionally. It's tempting to skip the update when
322      * timeout == 0 (i.e. non-blocking poll) but there is no guarantee that the
323      * operating system didn't reschedule our process while in the syscall.
324      */
325     SAVE_ERRNO(uv__update_time(loop));
326 
327     if (nfds == 0) {
328       assert(timeout != -1);
329 
330       if (timeout == 0)
331         return;
332 
333       /* We may have been inside the system call for longer than |timeout|
334        * milliseconds so we need to update the timestamp to avoid drift.
335        */
336       goto update_timeout;
337     }
338 
339     if (nfds == -1) {
340       if (errno == ENOSYS) {
341         /* epoll_wait() or epoll_pwait() failed, try the other system call. */
342         assert(no_epoll_wait == 0 || no_epoll_pwait == 0);
343         continue;
344       }
345 
346       if (errno != EINTR)
347         abort();
348 
349       if (timeout == -1)
350         continue;
351 
352       if (timeout == 0)
353         return;
354 
355       /* Interrupted by a signal. Update timeout and poll again. */
356       goto update_timeout;
357     }
358 
359     have_signals = 0;
360     nevents = 0;
361 
362     {
363       /* Squelch a -Waddress-of-packed-member warning with gcc >= 9. */
364       union {
365         struct epoll_event* events;
366         uv__io_t* watchers;
367       } x;
368 
369       x.events = events;
370       assert(loop->watchers != NULL);
371       loop->watchers[loop->nwatchers] = x.watchers;
372       loop->watchers[loop->nwatchers + 1] = (void*) (uintptr_t) nfds;
373     }
374 
375     for (i = 0; i < nfds; i++) {
376       pe = events + i;
377       fd = pe->data.fd;
378 
379       /* Skip invalidated events, see uv__platform_invalidate_fd */
380       if (fd == -1)
381         continue;
382 
383       assert(fd >= 0);
384       assert((unsigned) fd < loop->nwatchers);
385 
386       w = loop->watchers[fd];
387 
388       if (w == NULL) {
389         /* File descriptor that we've stopped watching, disarm it.
390          *
391          * Ignore all errors because we may be racing with another thread
392          * when the file descriptor is closed.
393          */
394         epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, pe);
395         continue;
396       }
397 
398       /* Give users only events they're interested in. Prevents spurious
399        * callbacks when previous callback invocation in this loop has stopped
400        * the current watcher. Also, filters out events that users has not
401        * requested us to watch.
402        */
403       pe->events &= w->pevents | POLLERR | POLLHUP;
404 
405       /* Work around an epoll quirk where it sometimes reports just the
406        * EPOLLERR or EPOLLHUP event.  In order to force the event loop to
407        * move forward, we merge in the read/write events that the watcher
408        * is interested in; uv__read() and uv__write() will then deal with
409        * the error or hangup in the usual fashion.
410        *
411        * Note to self: happens when epoll reports EPOLLIN|EPOLLHUP, the user
412        * reads the available data, calls uv_read_stop(), then sometime later
413        * calls uv_read_start() again.  By then, libuv has forgotten about the
414        * hangup and the kernel won't report EPOLLIN again because there's
415        * nothing left to read.  If anything, libuv is to blame here.  The
416        * current hack is just a quick bandaid; to properly fix it, libuv
417        * needs to remember the error/hangup event.  We should get that for
418        * free when we switch over to edge-triggered I/O.
419        */
420       if (pe->events == POLLERR || pe->events == POLLHUP)
421         pe->events |=
422           w->pevents & (POLLIN | POLLOUT | UV__POLLRDHUP | UV__POLLPRI);
423 
424       if (pe->events != 0) {
425         /* Run signal watchers last.  This also affects child process watchers
426          * because those are implemented in terms of signal watchers.
427          */
428         if (w == &loop->signal_io_watcher)
429           have_signals = 1;
430         else
431           w->cb(loop, w, pe->events);
432 
433         nevents++;
434       }
435     }
436 
437     if (have_signals != 0)
438       loop->signal_io_watcher.cb(loop, &loop->signal_io_watcher, POLLIN);
439 
440     loop->watchers[loop->nwatchers] = NULL;
441     loop->watchers[loop->nwatchers + 1] = NULL;
442 
443     if (have_signals != 0)
444       return;  /* Event loop should cycle now so don't poll again. */
445 
446     if (nevents != 0) {
447       if (nfds == ARRAY_SIZE(events) && --count != 0) {
448         /* Poll for more events but don't block this time. */
449         timeout = 0;
450         continue;
451       }
452       return;
453     }
454 
455     if (timeout == 0)
456       return;
457 
458     if (timeout == -1)
459       continue;
460 
461 update_timeout:
462     assert(timeout > 0);
463 
464     real_timeout -= (loop->time - base);
465     if (real_timeout <= 0)
466       return;
467 
468     timeout = real_timeout;
469   }
470 }
471 
472 
473 uint64_t uv__hrtime(uv_clocktype_t type) {
474   static clock_t fast_clock_id = -1;
475   struct timespec t;
476   clock_t clock_id;
477 
478   /* Prefer CLOCK_MONOTONIC_COARSE if available but only when it has
479    * millisecond granularity or better.  CLOCK_MONOTONIC_COARSE is
480    * serviced entirely from the vDSO, whereas CLOCK_MONOTONIC may
481    * decide to make a costly system call.
482    */
483   /* TODO(bnoordhuis) Use CLOCK_MONOTONIC_COARSE for UV_CLOCK_PRECISE
484    * when it has microsecond granularity or better (unlikely).
485    */
486   if (type == UV_CLOCK_FAST && fast_clock_id == -1) {
487     if (clock_getres(CLOCK_MONOTONIC_COARSE, &t) == 0 &&
488         t.tv_nsec <= 1 * 1000 * 1000) {
489       fast_clock_id = CLOCK_MONOTONIC_COARSE;
490     } else {
491       fast_clock_id = CLOCK_MONOTONIC;
492     }
493   }
494 
495   clock_id = CLOCK_MONOTONIC;
496   if (type == UV_CLOCK_FAST)
497     clock_id = fast_clock_id;
498 
499   if (clock_gettime(clock_id, &t))
500     return 0;  /* Not really possible. */
501 
502   return t.tv_sec * (uint64_t) 1e9 + t.tv_nsec;
503 }
504 
505 
506 int uv_resident_set_memory(size_t* rss) {
507   char buf[1024];
508   const char* s;
509   ssize_t n;
510   long val;
511   int fd;
512   int i;
513 
514   do
515     fd = open("/proc/self/stat", O_RDONLY);
516   while (fd == -1 && errno == EINTR);
517 
518   if (fd == -1)
519     return UV__ERR(errno);
520 
521   do
522     n = read(fd, buf, sizeof(buf) - 1);
523   while (n == -1 && errno == EINTR);
524 
525   uv__close(fd);
526   if (n == -1)
527     return UV__ERR(errno);
528   buf[n] = '\0';
529 
530   s = strchr(buf, ' ');
531   if (s == NULL)
532     goto err;
533 
534   s += 1;
535   if (*s != '(')
536     goto err;
537 
538   s = strchr(s, ')');
539   if (s == NULL)
540     goto err;
541 
542   for (i = 1; i <= 22; i++) {
543     s = strchr(s + 1, ' ');
544     if (s == NULL)
545       goto err;
546   }
547 
548   errno = 0;
549   val = strtol(s, NULL, 10);
550   if (errno != 0)
551     goto err;
552   if (val < 0)
553     goto err;
554 
555   *rss = val * getpagesize();
556   return 0;
557 
558 err:
559   return UV_EINVAL;
560 }
561 
562 
563 int uv_uptime(double* uptime) {
564   static volatile int no_clock_boottime;
565   struct timespec now;
566   int r;
567 
568   /* Try CLOCK_BOOTTIME first, fall back to CLOCK_MONOTONIC if not available
569    * (pre-2.6.39 kernels). CLOCK_MONOTONIC doesn't increase when the system
570    * is suspended.
571    */
572   if (no_clock_boottime) {
573     retry: r = clock_gettime(CLOCK_MONOTONIC, &now);
574   }
575   else if ((r = clock_gettime(CLOCK_BOOTTIME, &now)) && errno == EINVAL) {
576     no_clock_boottime = 1;
577     goto retry;
578   }
579 
580   if (r)
581     return UV__ERR(errno);
582 
583   *uptime = now.tv_sec;
584   return 0;
585 }
586 
587 
588 static int uv__cpu_num(FILE* statfile_fp, unsigned int* numcpus) {
589   unsigned int num;
590   char buf[1024];
591 
592   if (!fgets(buf, sizeof(buf), statfile_fp))
593     return UV_EIO;
594 
595   num = 0;
596   while (fgets(buf, sizeof(buf), statfile_fp)) {
597     if (strncmp(buf, "cpu", 3))
598       break;
599     num++;
600   }
601 
602   if (num == 0)
603     return UV_EIO;
604 
605   *numcpus = num;
606   return 0;
607 }
608 
609 
610 int uv_cpu_info(uv_cpu_info_t** cpu_infos, int* count) {
611   unsigned int numcpus;
612   uv_cpu_info_t* ci;
613   int err;
614   FILE* statfile_fp;
615 
616   *cpu_infos = NULL;
617   *count = 0;
618 
619   statfile_fp = uv__open_file("/proc/stat");
620   if (statfile_fp == NULL)
621     return UV__ERR(errno);
622 
623   err = uv__cpu_num(statfile_fp, &numcpus);
624   if (err < 0)
625     goto out;
626 
627   err = UV_ENOMEM;
628   ci = uv__calloc(numcpus, sizeof(*ci));
629   if (ci == NULL)
630     goto out;
631 
632   err = read_models(numcpus, ci);
633   if (err == 0)
634     err = read_times(statfile_fp, numcpus, ci);
635 
636   if (err) {
637     uv_free_cpu_info(ci, numcpus);
638     goto out;
639   }
640 
641   /* read_models() on x86 also reads the CPU speed from /proc/cpuinfo.
642    * We don't check for errors here. Worst case, the field is left zero.
643    */
644   if (ci[0].speed == 0)
645     read_speeds(numcpus, ci);
646 
647   *cpu_infos = ci;
648   *count = numcpus;
649   err = 0;
650 
651 out:
652 
653   if (fclose(statfile_fp))
654     if (errno != EINTR && errno != EINPROGRESS)
655       abort();
656 
657   return err;
658 }
659 
660 
661 static void read_speeds(unsigned int numcpus, uv_cpu_info_t* ci) {
662   unsigned int num;
663 
664   for (num = 0; num < numcpus; num++)
665     ci[num].speed = read_cpufreq(num) / 1000;
666 }
667 
668 
669 /* Also reads the CPU frequency on x86. The other architectures only have
670  * a BogoMIPS field, which may not be very accurate.
671  *
672  * Note: Simply returns on error, uv_cpu_info() takes care of the cleanup.
673  */
674 static int read_models(unsigned int numcpus, uv_cpu_info_t* ci) {
675   static const char model_marker[] = "model name\t: ";
676   static const char speed_marker[] = "cpu MHz\t\t: ";
677   const char* inferred_model;
678   unsigned int model_idx;
679   unsigned int speed_idx;
680   char buf[1024];
681   char* model;
682   FILE* fp;
683 
684   /* Most are unused on non-ARM, non-MIPS and non-x86 architectures. */
685   (void) &model_marker;
686   (void) &speed_marker;
687   (void) &speed_idx;
688   (void) &model;
689   (void) &buf;
690   (void) &fp;
691 
692   model_idx = 0;
693   speed_idx = 0;
694 
695 #if defined(__arm__) || \
696     defined(__i386__) || \
697     defined(__mips__) || \
698     defined(__x86_64__)
699   fp = uv__open_file("/proc/cpuinfo");
700   if (fp == NULL)
701     return UV__ERR(errno);
702 
703   while (fgets(buf, sizeof(buf), fp)) {
704     if (model_idx < numcpus) {
705       if (strncmp(buf, model_marker, sizeof(model_marker) - 1) == 0) {
706         model = buf + sizeof(model_marker) - 1;
707         model = uv__strndup(model, strlen(model) - 1);  /* Strip newline. */
708         if (model == NULL) {
709           fclose(fp);
710           return UV_ENOMEM;
711         }
712         ci[model_idx++].model = model;
713         continue;
714       }
715     }
716 #if defined(__arm__) || defined(__mips__)
717     if (model_idx < numcpus) {
718 #if defined(__arm__)
719       /* Fallback for pre-3.8 kernels. */
720       static const char model_marker[] = "Processor\t: ";
721 #else	/* defined(__mips__) */
722       static const char model_marker[] = "cpu model\t\t: ";
723 #endif
724       if (strncmp(buf, model_marker, sizeof(model_marker) - 1) == 0) {
725         model = buf + sizeof(model_marker) - 1;
726         model = uv__strndup(model, strlen(model) - 1);  /* Strip newline. */
727         if (model == NULL) {
728           fclose(fp);
729           return UV_ENOMEM;
730         }
731         ci[model_idx++].model = model;
732         continue;
733       }
734     }
735 #else  /* !__arm__ && !__mips__ */
736     if (speed_idx < numcpus) {
737       if (strncmp(buf, speed_marker, sizeof(speed_marker) - 1) == 0) {
738         ci[speed_idx++].speed = atoi(buf + sizeof(speed_marker) - 1);
739         continue;
740       }
741     }
742 #endif  /* __arm__ || __mips__ */
743   }
744 
745   fclose(fp);
746 #endif  /* __arm__ || __i386__ || __mips__ || __x86_64__ */
747 
748   /* Now we want to make sure that all the models contain *something* because
749    * it's not safe to leave them as null. Copy the last entry unless there
750    * isn't one, in that case we simply put "unknown" into everything.
751    */
752   inferred_model = "unknown";
753   if (model_idx > 0)
754     inferred_model = ci[model_idx - 1].model;
755 
756   while (model_idx < numcpus) {
757     model = uv__strndup(inferred_model, strlen(inferred_model));
758     if (model == NULL)
759       return UV_ENOMEM;
760     ci[model_idx++].model = model;
761   }
762 
763   return 0;
764 }
765 
766 
767 static int read_times(FILE* statfile_fp,
768                       unsigned int numcpus,
769                       uv_cpu_info_t* ci) {
770   struct uv_cpu_times_s ts;
771   unsigned int ticks;
772   unsigned int multiplier;
773   uint64_t user;
774   uint64_t nice;
775   uint64_t sys;
776   uint64_t idle;
777   uint64_t dummy;
778   uint64_t irq;
779   uint64_t num;
780   uint64_t len;
781   char buf[1024];
782 
783   ticks = (unsigned int)sysconf(_SC_CLK_TCK);
784   multiplier = ((uint64_t)1000L / ticks);
785   assert(ticks != (unsigned int) -1);
786   assert(ticks != 0);
787 
788   rewind(statfile_fp);
789 
790   if (!fgets(buf, sizeof(buf), statfile_fp))
791     abort();
792 
793   num = 0;
794 
795   while (fgets(buf, sizeof(buf), statfile_fp)) {
796     if (num >= numcpus)
797       break;
798 
799     if (strncmp(buf, "cpu", 3))
800       break;
801 
802     /* skip "cpu<num> " marker */
803     {
804       unsigned int n;
805       int r = sscanf(buf, "cpu%u ", &n);
806       assert(r == 1);
807       (void) r;  /* silence build warning */
808       for (len = sizeof("cpu0"); n /= 10; len++);
809     }
810 
811     /* Line contains user, nice, system, idle, iowait, irq, softirq, steal,
812      * guest, guest_nice but we're only interested in the first four + irq.
813      *
814      * Don't use %*s to skip fields or %ll to read straight into the uint64_t
815      * fields, they're not allowed in C89 mode.
816      */
817     if (6 != sscanf(buf + len,
818                     "%" PRIu64 " %" PRIu64 " %" PRIu64
819                     "%" PRIu64 " %" PRIu64 " %" PRIu64,
820                     &user,
821                     &nice,
822                     &sys,
823                     &idle,
824                     &dummy,
825                     &irq))
826       abort();
827 
828     ts.user = user * multiplier;
829     ts.nice = nice * multiplier;
830     ts.sys  = sys * multiplier;
831     ts.idle = idle * multiplier;
832     ts.irq  = irq * multiplier;
833     ci[num++].cpu_times = ts;
834   }
835   assert(num == numcpus);
836 
837   return 0;
838 }
839 
840 
841 static uint64_t read_cpufreq(unsigned int cpunum) {
842   uint64_t val;
843   char buf[1024];
844   FILE* fp;
845 
846   snprintf(buf,
847            sizeof(buf),
848            "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_cur_freq",
849            cpunum);
850 
851   fp = uv__open_file(buf);
852   if (fp == NULL)
853     return 0;
854 
855   if (fscanf(fp, "%" PRIu64, &val) != 1)
856     val = 0;
857 
858   fclose(fp);
859 
860   return val;
861 }
862 
863 
864 static int uv__ifaddr_exclude(struct ifaddrs *ent, int exclude_type) {
865   if (!((ent->ifa_flags & IFF_UP) && (ent->ifa_flags & IFF_RUNNING)))
866     return 1;
867   if (ent->ifa_addr == NULL)
868     return 1;
869   /*
870    * On Linux getifaddrs returns information related to the raw underlying
871    * devices. We're not interested in this information yet.
872    */
873   if (ent->ifa_addr->sa_family == PF_PACKET)
874     return exclude_type;
875   return !exclude_type;
876 }
877 
878 int uv_interface_addresses(uv_interface_address_t** addresses, int* count) {
879 #ifndef HAVE_IFADDRS_H
880   *count = 0;
881   *addresses = NULL;
882   return UV_ENOSYS;
883 #else
884   struct ifaddrs *addrs, *ent;
885   uv_interface_address_t* address;
886   int i;
887   struct sockaddr_ll *sll;
888 
889   *count = 0;
890   *addresses = NULL;
891 
892   if (getifaddrs(&addrs))
893     return UV__ERR(errno);
894 
895   /* Count the number of interfaces */
896   for (ent = addrs; ent != NULL; ent = ent->ifa_next) {
897     if (uv__ifaddr_exclude(ent, UV__EXCLUDE_IFADDR))
898       continue;
899 
900     (*count)++;
901   }
902 
903   if (*count == 0) {
904     freeifaddrs(addrs);
905     return 0;
906   }
907 
908   /* Make sure the memory is initiallized to zero using calloc() */
909   *addresses = uv__calloc(*count, sizeof(**addresses));
910   if (!(*addresses)) {
911     freeifaddrs(addrs);
912     return UV_ENOMEM;
913   }
914 
915   address = *addresses;
916 
917   for (ent = addrs; ent != NULL; ent = ent->ifa_next) {
918     if (uv__ifaddr_exclude(ent, UV__EXCLUDE_IFADDR))
919       continue;
920 
921     address->name = uv__strdup(ent->ifa_name);
922 
923     if (ent->ifa_addr->sa_family == AF_INET6) {
924       address->address.address6 = *((struct sockaddr_in6*) ent->ifa_addr);
925     } else {
926       address->address.address4 = *((struct sockaddr_in*) ent->ifa_addr);
927     }
928 
929     if (ent->ifa_netmask->sa_family == AF_INET6) {
930       address->netmask.netmask6 = *((struct sockaddr_in6*) ent->ifa_netmask);
931     } else {
932       address->netmask.netmask4 = *((struct sockaddr_in*) ent->ifa_netmask);
933     }
934 
935     address->is_internal = !!(ent->ifa_flags & IFF_LOOPBACK);
936 
937     address++;
938   }
939 
940   /* Fill in physical addresses for each interface */
941   for (ent = addrs; ent != NULL; ent = ent->ifa_next) {
942     if (uv__ifaddr_exclude(ent, UV__EXCLUDE_IFPHYS))
943       continue;
944 
945     address = *addresses;
946 
947     for (i = 0; i < (*count); i++) {
948       size_t namelen = strlen(ent->ifa_name);
949       /* Alias interface share the same physical address */
950       if (strncmp(address->name, ent->ifa_name, namelen) == 0 &&
951           (address->name[namelen] == 0 || address->name[namelen] == ':')) {
952         sll = (struct sockaddr_ll*)ent->ifa_addr;
953         memcpy(address->phys_addr, sll->sll_addr, sizeof(address->phys_addr));
954       }
955       address++;
956     }
957   }
958 
959   freeifaddrs(addrs);
960 
961   return 0;
962 #endif
963 }
964 
965 
966 void uv_free_interface_addresses(uv_interface_address_t* addresses,
967   int count) {
968   int i;
969 
970   for (i = 0; i < count; i++) {
971     uv__free(addresses[i].name);
972   }
973 
974   uv__free(addresses);
975 }
976 
977 
978 void uv__set_process_title(const char* title) {
979 #if defined(PR_SET_NAME)
980   prctl(PR_SET_NAME, title);  /* Only copies first 16 characters. */
981 #endif
982 }
983 
984 
985 static uint64_t uv__read_proc_meminfo(const char* what) {
986   uint64_t rc;
987   ssize_t n;
988   char* p;
989   int fd;
990   char buf[4096];  /* Large enough to hold all of /proc/meminfo. */
991 
992   rc = 0;
993   fd = uv__open_cloexec("/proc/meminfo", O_RDONLY);
994 
995   if (fd < 0)
996     return 0;
997 
998   n = read(fd, buf, sizeof(buf) - 1);
999 
1000   if (n <= 0)
1001     goto out;
1002 
1003   buf[n] = '\0';
1004   p = strstr(buf, what);
1005 
1006   if (p == NULL)
1007     goto out;
1008 
1009   p += strlen(what);
1010 
1011   if (1 != sscanf(p, "%" PRIu64 " kB", &rc))
1012     goto out;
1013 
1014   rc *= 1024;
1015 
1016 out:
1017 
1018   if (uv__close_nocheckstdio(fd))
1019     abort();
1020 
1021   return rc;
1022 }
1023 
1024 
1025 uint64_t uv_get_free_memory(void) {
1026   struct sysinfo info;
1027   uint64_t rc;
1028 
1029   rc = uv__read_proc_meminfo("MemFree:");
1030 
1031   if (rc != 0)
1032     return rc;
1033 
1034   if (0 == sysinfo(&info))
1035     return (uint64_t) info.freeram * info.mem_unit;
1036 
1037   return 0;
1038 }
1039 
1040 
1041 uint64_t uv_get_total_memory(void) {
1042   struct sysinfo info;
1043   uint64_t rc;
1044 
1045   rc = uv__read_proc_meminfo("MemTotal:");
1046 
1047   if (rc != 0)
1048     return rc;
1049 
1050   if (0 == sysinfo(&info))
1051     return (uint64_t) info.totalram * info.mem_unit;
1052 
1053   return 0;
1054 }
1055 
1056 
1057 static uint64_t uv__read_cgroups_uint64(const char* cgroup, const char* param) {
1058   char filename[256];
1059   uint64_t rc;
1060   int fd;
1061   ssize_t n;
1062   char buf[32];  /* Large enough to hold an encoded uint64_t. */
1063 
1064   snprintf(filename, 256, "/sys/fs/cgroup/%s/%s", cgroup, param);
1065 
1066   rc = 0;
1067   fd = uv__open_cloexec(filename, O_RDONLY);
1068 
1069   if (fd < 0)
1070     return 0;
1071 
1072   n = read(fd, buf, sizeof(buf) - 1);
1073 
1074   if (n > 0) {
1075     buf[n] = '\0';
1076     sscanf(buf, "%" PRIu64, &rc);
1077   }
1078 
1079   if (uv__close_nocheckstdio(fd))
1080     abort();
1081 
1082   return rc;
1083 }
1084 
1085 
1086 uint64_t uv_get_constrained_memory(void) {
1087   /*
1088    * This might return 0 if there was a problem getting the memory limit from
1089    * cgroups. This is OK because a return value of 0 signifies that the memory
1090    * limit is unknown.
1091    */
1092   return uv__read_cgroups_uint64("memory", "memory.limit_in_bytes");
1093 }
1094