1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27
28 /*
29 * Serengeti Environmental Information driver (sgenv)
30 *
31 * This driver requests the environmental properties from the SC. These
32 * request-response transactions are transferred through the SBBC mailbox,
33 * between the Domain and the SC.
34 *
35 * All sensors have the same sort of properties: Low and high limits, warning
36 * thresholds, last measured value, time of measurement, units (e.g., degrees
37 * Celsius, volts, etc.), and so on.
38 *
39 * Each sensor is named by a unique Tag. The Tag identifies the geographical
40 * location of the sensor in the Serengeti, and what it is the sensor measures.
41 *
42 * Requestable sensor properties are broken into two types: Those which are
43 * quasi-constant (infrequently change) - e.g., tolerance-defining low and high
44 * limits; and those which are volatile (typically change) - e.g., the current
45 * measurement.
46 *
47 * Unfortunately, property sets are too large to comprise a single mailbox
48 * message, so the sets are further subdivided into notionally arbitrary
49 * collections. NOTE: The SC-mailbox framework now supports fragmented messages
50 * which could allow us to request the data in larger chunks in the future.
51 *
52 * Each collection is fetched by a separate transaction.
53 *
54 * Firstly there is a transaction to obtain a list of all collections. Each non-
55 * zero key in this list is associated whith one of the collections of sensors.
56 * (This sparse list of keys is then used as an index to obtain all the sensor
57 * data for each collection).
58 *
59 * For each collection, there is one request-reply transaction to obtain a list
60 * of all sensors in that collection and the limits that apply to each; and a
61 * separate request-reply transaction to obtain the measurements from the
62 * sensors in the collection.
63 *
64 * The sgenv driver assembles each property set from the constituent
65 * collections, and caches the assembled property sets into the appropriate
66 * cache (env_cache, board_cache). The caches are created at startup and are
67 * updated on receipt of events from the SC. These events (which include DR
68 * events and ENV events) notify sgenv of configuration changes and
69 * environmental state changes (such as a sensor state change, Fan speed
70 * change).
71 *
72 * The SC-APP maintains a pseudo-sensor in each collection "measuring" changes
73 * to the quasi-constants in that collection. By monitoring these pseudo-sensor
74 * measurements, the kstat driver avoids redundant or speculative re-fetches of
75 * the quasi-constant properties.
76 */
77
78 #include <sys/time.h>
79 #include <sys/errno.h>
80 #include <sys/kmem.h>
81 #include <sys/stat.h>
82 #include <sys/cmn_err.h>
83 #include <sys/disp.h>
84
85 #include <sys/conf.h>
86 #include <sys/modctl.h>
87 #include <sys/devops.h>
88 #include <sys/ddi.h>
89 #include <sys/sunddi.h>
90
91 #include <sys/sgevents.h>
92 #include <sys/sysevent.h>
93 #include <sys/sysevent/eventdefs.h>
94 #include <sys/sysevent/domain.h>
95 #include <sys/sysevent/env.h>
96
97 #include <sys/serengeti.h>
98 #include <sys/sgfrutypes.h>
99
100 #include <sys/sgsbbc.h>
101 #include <sys/sgsbbc_iosram.h>
102 #include <sys/sgsbbc_mailbox.h>
103
104 #include <sys/sbd_ioctl.h> /* sbd header files needed for board support */
105 #include <sys/sbdp_priv.h>
106 #include <sys/sbd.h>
107
108 #include <sys/sgenv_impl.h>
109
110
111 /*
112 * Global Variables - can be patched from Solaris
113 * ==============================================
114 */
115
116 /*
117 * the maximum amount of time this driver is prepared to wait for the mailbox
118 * to reply before it decides to timeout. The value is initially set in the
119 * _init() routine to the global Serengeti variable <sbbc_mbox_default_timeout>
120 * but could be tuned specifically for SGENV after booting up the system.
121 */
122 int sgenv_max_mbox_wait_time = 0;
123
124 #ifdef DEBUG
125 /*
126 * This variable controls the level of debug output
127 */
128 uint_t sgenv_debug = SGENV_DEBUG_NONE;
129 #endif
130
131
132 /*
133 * Module Variables
134 * ================
135 */
136
137 /*
138 * Driver entry points
139 */
140 static struct cb_ops sgenv_cb_ops = {
141 nodev, /* open() */
142 nodev, /* close() */
143 nodev, /* strategy() */
144 nodev, /* print() */
145 nodev, /* dump() */
146 nodev, /* read() */
147 nodev, /* write() */
148 nodev, /* ioctl() */
149 nodev, /* devmap() */
150 nodev, /* mmap() */
151 ddi_segmap, /* segmap() */
152 nochpoll, /* poll() */
153 ddi_prop_op, /* prop_op() */
154 NULL, /* cb_str */
155 D_NEW | D_MP /* cb_flag */
156 };
157
158
159 static struct dev_ops sgenv_ops = {
160 DEVO_REV,
161 0, /* ref count */
162 ddi_getinfo_1to1, /* getinfo() */
163 nulldev, /* identify() */
164 nulldev, /* probe() */
165 sgenv_attach, /* attach() */
166 sgenv_detach, /* detach */
167 nodev, /* reset */
168 &sgenv_cb_ops, /* pointer to cb_ops structure */
169 (struct bus_ops *)NULL,
170 nulldev, /* power() */
171 ddi_quiesce_not_needed, /* quiesce() */
172 };
173
174 /*
175 * Loadable module support.
176 */
177 extern struct mod_ops mod_driverops;
178
179 static struct modldrv modldrv = {
180 &mod_driverops, /* Type of module. This is a driver */
181 "Environmental Driver", /* Name of the module */
182 &sgenv_ops /* pointer to the dev_ops structure */
183 };
184
185 static struct modlinkage modlinkage = {
186 MODREV_1,
187 &modldrv,
188 NULL
189 };
190
191 /* Opaque state structure pointer */
192 static void *sgenv_statep;
193
194 /*
195 * <env_cache> is a cache of all the sensor readings which is persistent
196 * between kstat reads. It is created at init and gets updated upon receipt
197 * of events from the SC.
198 *
199 * The kstat_update function takes a copy of the non-zero entries in this
200 * cache and creates a temp buffer called env_cache_snapshot. The
201 * kstat_snapshot function then bcopies the env_cache_snapshot into the
202 * kstat buffer. This is done because there is no way to ensure that the
203 * env_cache won't change between the kstat_update and the kstat_snapshot
204 * which will cause problems as the update sets the ks_data_size.
205 */
206 static env_sensor_t *env_cache[SGENV_MAX_HPU_KEYS] = {NULL};
207 static void *env_cache_snapshot = NULL;
208 static size_t env_cache_snapshot_size = 0;
209
210 /*
211 * This is set to TRUE the first time env data is stored in the cache
212 * so that at least from then on, old data can be returned if a call to
213 * the mailbox fails.
214 */
215 static int env_cache_updated = FALSE;
216
217 /*
218 * This lock is needed by the variable-sized kstat which returns
219 * environmental info. It prevents data-size races with kstat clients.
220 */
221 static kmutex_t env_kstat_lock;
222
223 /*
224 * The <env_cache> can be accessed asynchronously by the polling function
225 * and the kstat_read framework. This mutex ensures that access to the data
226 * is controlled correctly.
227 */
228 static kmutex_t env_cache_lock;
229
230 /*
231 * We need to store the last time we asked the SC for environmental information
232 * so that we do not send too many requests in a short period of time.
233 */
234 static hrtime_t last_env_read_time = 0;
235
236 /*
237 * Variables to coordinate between the handlers which are triggered when
238 * the env cache needs to be updated and the thread which does the work.
239 */
240 static volatile int env_thread_run = 0;
241 static kthread_t *env_thread = NULL;
242 static kt_did_t env_thread_tid;
243
244 static kcondvar_t env_flag_cond;
245 static kmutex_t env_flag_lock;
246 static boolean_t env_cache_updating = B_FALSE;
247 static boolean_t env_cache_update_needed = B_TRUE;
248
249 /*
250 * <board_cache> is a cache of all the board status info and it is persistent
251 * between kstat reads.
252 *
253 * The kstat_update function takes a copy of the non-zero entries in this
254 * cache and copies them into the board_cache_snapshot buffer. The
255 * kstat_snapshot function then bcopies the board_cache_snapshot into the
256 * kstat buffer. This is done because there is no way to ensure that the
257 * board_cache won't change between the kstat_update and the kstat_snapshot
258 * which will cause problems as the update sets the ks_data_size.
259 */
260 static sg_board_info_t board_cache[SG_MAX_BDS] = {NULL};
261 static sg_board_info_t board_cache_snapshot[SG_MAX_BDS] = {NULL};
262 static int board_cache_updated = FALSE;
263
264 /*
265 * This mutex ensures the <board_cache> is not destroyed while the board data
266 * is being collected.
267 */
268 static kmutex_t board_cache_lock;
269
270 /*
271 * This lock is needed by the variable-sized kstat which returns
272 * board status info. It prevents data-size races with kstat clients.
273 */
274 static kmutex_t board_kstat_lock;
275
276 /*
277 * This is a count of the number of board readings were stored by
278 * the kstat_update routine - this is needed by the kstat_snapshot routine.
279 */
280 static int board_count = 0;
281 static int board_count_snapshot = 0;
282
283 /*
284 * We need to store the last time we asked the SC for board information
285 * so that we do not send too many requests in a short period of time.
286 */
287 static hrtime_t last_board_read_time = 0;
288
289 /*
290 * Variables to coordinate between the handlers which are triggered when
291 * the board cache needs to be updated and the thread which does the work.
292 */
293 static volatile int board_thread_run = 0;
294 static kthread_t *board_thread = NULL;
295 static kt_did_t board_thread_tid;
296 static kcondvar_t board_flag_cond;
297
298 static kmutex_t board_flag_lock;
299 static boolean_t board_cache_updating = B_FALSE;
300 static boolean_t board_cache_update_needed = B_TRUE;
301
302 /*
303 * Used to keep track of the number of sensors associated with each key.
304 * The sum of all the values in this array is used to set ks_data_size.
305 */
306 static int vol_sensor_count[SGENV_MAX_HPU_KEYS] = {0};
307
308 /*
309 * This variable keeps a count of the number of errors that have occurred
310 * when we make calls to the mailbox for Env or Board data.
311 */
312 static int sgenv_mbox_error_count = 0;
313
314 /*
315 * mutex which protects the keyswitch interrupt handler.
316 */
317 static kmutex_t keysw_hdlr_lock;
318
319 /*
320 * mutex which protects the env interrupt handler.
321 */
322 static kmutex_t env_hdlr_lock;
323
324 /*
325 * mutex which protects the DR handler interrupt handler.
326 */
327 static kmutex_t dr_hdlr_lock;
328
329 /*
330 * Payloads of the event handlers.
331 */
332 static sg_event_key_position_t keysw_payload;
333 static sbbc_msg_t keysw_payload_msg;
334
335 static sg_event_env_changed_t env_payload;
336 static sbbc_msg_t env_payload_msg;
337
338 static sg_event_fan_status_t fan_payload;
339 static sbbc_msg_t fan_payload_msg;
340
341 static sg_system_fru_descriptor_t dr_payload;
342 static sbbc_msg_t dr_payload_msg;
343
344 /*
345 * The following 3 arrays list all possible HPUs, Parts and Device types
346 */
347
348 /*
349 * ensure that all possible HPUs exported, as described in the main comment
350 * in <sys/sensor_tag.h>, are accounted for here.
351 */
352 static const hpu_value_t hpus[] = {
353 HPU_ENTRY(SG_HPU_TYPE_UNKNOWN),
354 HPU_ENTRY(SG_HPU_TYPE_CPU_BOARD),
355 HPU_ENTRY(SG_HPU_TYPE_PCI_IO_BOARD),
356 HPU_ENTRY(SG_HPU_TYPE_CPCI_IO_BOARD),
357 HPU_ENTRY(SG_HPU_TYPE_SP_CPCI_IO_BOARD),
358 HPU_ENTRY(SG_HPU_TYPE_REPEATER_BOARD),
359 HPU_ENTRY(SG_HPU_TYPE_L2_REPEATER_BOARD),
360 HPU_ENTRY(SG_HPU_TYPE_SYSTEM_CONTROLLER_BOARD),
361 HPU_ENTRY(SG_HPU_TYPE_SP_SYSTEM_CONTROLLER_BOARD),
362 HPU_ENTRY(SG_HPU_TYPE_A123_POWER_SUPPLY),
363 HPU_ENTRY(SG_HPU_TYPE_A138_POWER_SUPPLY),
364 HPU_ENTRY(SG_HPU_TYPE_A145_POWER_SUPPLY),
365 HPU_ENTRY(SG_HPU_TYPE_A152_POWER_SUPPLY),
366 HPU_ENTRY(SG_HPU_TYPE_A153_POWER_SUPPLY),
367 HPU_ENTRY(SG_HPU_TYPE_RACK_FAN_TRAY),
368 HPU_ENTRY(SG_HPU_TYPE_SP_FAN_TRAY),
369 HPU_ENTRY(SG_HPU_TYPE_MD_TOP_IO_FAN_TRAY),
370 HPU_ENTRY(SG_HPU_TYPE_MD_BOTTOM_IO_FAN_TRAY),
371 HPU_ENTRY(SG_HPU_TYPE_R12_THREE_FAN_TRAY),
372 HPU_ENTRY(SG_HPU_TYPE_K12_IO_ONE_FAN_TRAY),
373 HPU_ENTRY(SG_HPU_TYPE_K12_CPU_THREE_FAN_TRAY),
374 HPU_ENTRY(SG_HPU_TYPE_R24_IO_FOUR_FAN_TRAY),
375 HPU_ENTRY(SG_HPU_TYPE_R24_CPU_SIX_FAN_TRAY),
376 0, (char *)NULL
377 };
378
379 static const struct part_value parts[] = {
380 PART_VALUE(SG_SENSOR_PART_SBBC),
381 PART_VALUE(SG_SENSOR_PART_SDC),
382 PART_VALUE(SG_SENSOR_PART_AR),
383 PART_VALUE(SG_SENSOR_PART_CBH),
384 PART_VALUE(SG_SENSOR_PART_DX),
385 PART_VALUE(SG_SENSOR_PART_CHEETAH),
386 PART_VALUE(SG_SENSOR_PART_1_5_VDC),
387 PART_VALUE(SG_SENSOR_PART_3_3_VDC),
388 PART_VALUE(SG_SENSOR_PART_5_VDC),
389 PART_VALUE(SG_SENSOR_PART_12_VDC),
390 PART_VALUE(SG_SENSOR_PART_48_VDC),
391 PART_VALUE(SG_SENSOR_PART_CURRENT),
392 PART_VALUE(SG_SENSOR_PART_BOARD),
393 PART_VALUE(SG_SENSOR_PART_SCAPP),
394 PART_VALUE(SG_SENSOR_PART_SCHIZO),
395 PART_VALUE(SG_SENSOR_PART_FAN),
396 0, (char *)NULL
397 };
398
399 static const struct type_value types[] = {
400 TYPE_VALUE(SG_SENSOR_TYPE_CURRENT, SG_CURRENT_SCALE),
401 TYPE_VALUE(SG_SENSOR_TYPE_TEMPERATURE, SG_TEMPERATURE_SCALE),
402 TYPE_VALUE(SG_SENSOR_TYPE_1_5_VDC, SG_1_5_VDC_SCALE),
403 TYPE_VALUE(SG_SENSOR_TYPE_1_8_VDC, SG_1_8_VDC_SCALE),
404 TYPE_VALUE(SG_SENSOR_TYPE_3_3_VDC, SG_3_3_VDC_SCALE),
405 TYPE_VALUE(SG_SENSOR_TYPE_5_VDC, SG_5_VDC_SCALE),
406 TYPE_VALUE(SG_SENSOR_TYPE_12_VDC, SG_12_VDC_SCALE),
407 TYPE_VALUE(SG_SENSOR_TYPE_48_VDC, SG_48_VDC_SCALE),
408 TYPE_VALUE(SG_SENSOR_TYPE_ENVDB, 1),
409 TYPE_VALUE(SG_SENSOR_TYPE_COOLING, 1),
410 0, (char *)NULL
411 };
412
413 int
_init(void)414 _init(void)
415 {
416 int error = 0;
417
418 error = ddi_soft_state_init(&sgenv_statep,
419 sizeof (sgenv_soft_state_t), 1);
420
421 if (error)
422 return (error);
423
424 error = mod_install(&modlinkage);
425 if (error) {
426 ddi_soft_state_fini(&sgenv_statep);
427 return (error);
428 }
429
430 mutex_init(&env_kstat_lock, NULL, MUTEX_DEFAULT, NULL);
431 mutex_init(&env_cache_lock, NULL, MUTEX_DEFAULT, NULL);
432 mutex_init(&env_flag_lock, NULL, MUTEX_DEFAULT, NULL);
433 cv_init(&env_flag_cond, NULL, CV_DEFAULT, NULL);
434
435 mutex_init(&board_cache_lock, NULL, MUTEX_DEFAULT, NULL);
436 mutex_init(&board_kstat_lock, NULL, MUTEX_DEFAULT, NULL);
437 mutex_init(&board_flag_lock, NULL, MUTEX_DEFAULT, NULL);
438 cv_init(&board_flag_cond, NULL, CV_DEFAULT, NULL);
439
440 mutex_init(&keysw_hdlr_lock, NULL, MUTEX_DEFAULT, NULL);
441 mutex_init(&env_hdlr_lock, NULL, MUTEX_DEFAULT, NULL);
442 mutex_init(&dr_hdlr_lock, NULL, MUTEX_DEFAULT, NULL);
443
444 /* set the default timeout value */
445 sgenv_max_mbox_wait_time = sbbc_mbox_default_timeout;
446
447 return (error);
448 }
449
450
451 int
_info(struct modinfo * modinfop)452 _info(struct modinfo *modinfop)
453 {
454 return (mod_info(&modlinkage, modinfop));
455 }
456
457
458 int
_fini(void)459 _fini(void)
460 {
461 int error = 0;
462
463 error = mod_remove(&modlinkage);
464 if (error)
465 return (error);
466
467 mutex_destroy(&env_kstat_lock);
468 mutex_destroy(&env_cache_lock);
469
470 mutex_destroy(&board_cache_lock);
471 mutex_destroy(&board_kstat_lock);
472
473 mutex_destroy(&keysw_hdlr_lock);
474 mutex_destroy(&env_hdlr_lock);
475 mutex_destroy(&dr_hdlr_lock);
476
477 ddi_soft_state_fini(&sgenv_statep);
478
479 return (error);
480 }
481
482
483 static int
sgenv_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)484 sgenv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
485 {
486 sgenv_soft_state_t *softsp;
487
488 int instance;
489 int err;
490
491 switch (cmd) {
492 case DDI_ATTACH:
493
494 instance = ddi_get_instance(dip);
495
496 /* allocate a global sgenv_soft_state structure */
497 err = ddi_soft_state_zalloc(sgenv_statep, instance);
498 if (err != DDI_SUCCESS) {
499 cmn_err(CE_WARN, "attach: could not allocate state "
500 "structure for inst %d.", instance);
501 return (DDI_FAILURE);
502 }
503
504 softsp = ddi_get_soft_state(sgenv_statep, instance);
505 if (softsp == NULL) {
506 ddi_soft_state_free(sgenv_statep, instance);
507 cmn_err(CE_WARN, "attach: could not get state "
508 "structure for inst %d.", instance);
509 return (DDI_FAILURE);
510 }
511
512 softsp->dip = dip;
513 softsp->instance = instance;
514
515 err = sgenv_add_kstats(softsp);
516 if (err != 0) {
517 /*
518 * Some of the kstats may have been created before the
519 * error occurred in sgenv_add_kstats(), so we call
520 * sgenv_remove_kstats() which removes any kstats
521 * already created.
522 */
523 sgenv_remove_kstats(softsp);
524 ddi_soft_state_free(sgenv_statep, instance);
525 return (DDI_FAILURE);
526 }
527
528 /*
529 * Before we setup the framework to read the data from the SC
530 * we need to ensure the caches are initialized correctly.
531 */
532 sgenv_init_board_cache();
533 sgenv_init_env_cache();
534
535 /*
536 * Add the threads which will update the env and board caches
537 * and post events to Sysevent Framework in the background
538 * when the interrupt handlers watching for ENV/DR events
539 * indicate to the threads that they need to do so.
540 */
541 err = sgenv_create_cache_update_threads();
542 if (err != DDI_SUCCESS) {
543 sgenv_remove_kstats(softsp);
544 ddi_soft_state_free(sgenv_statep, instance);
545 return (DDI_FAILURE);
546 }
547
548 err = ddi_create_minor_node(dip, SGENV_DRV_NAME, S_IFCHR,
549 instance, DDI_PSEUDO, NULL);
550 if (err != DDI_SUCCESS) {
551 sgenv_remove_kstats(softsp);
552 (void) sgenv_remove_cache_update_threads();
553 ddi_soft_state_free(sgenv_statep, instance);
554 return (DDI_FAILURE);
555 }
556
557 /*
558 * Add the handlers which watch for unsolicited messages
559 * and post event to Sysevent Framework.
560 */
561 err = sgenv_add_intr_handlers();
562 if (err != DDI_SUCCESS) {
563 cmn_err(CE_WARN, "Failed to add event handlers");
564 (void) sgenv_remove_intr_handlers();
565 sgenv_remove_kstats(softsp);
566 (void) sgenv_remove_cache_update_threads();
567 ddi_soft_state_free(sgenv_statep, instance);
568 return (DDI_FAILURE);
569 }
570
571 ddi_report_dev(dip);
572
573 return (DDI_SUCCESS);
574
575 case DDI_RESUME:
576 return (DDI_SUCCESS);
577
578 default:
579 return (DDI_FAILURE);
580 }
581 }
582
583
584 static int
sgenv_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)585 sgenv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
586 {
587 sgenv_soft_state_t *softsp;
588
589 int instance;
590 int err;
591
592 switch (cmd) {
593 case DDI_DETACH:
594
595 instance = ddi_get_instance(dip);
596
597 softsp = ddi_get_soft_state(sgenv_statep, instance);
598 if (softsp == NULL) {
599 cmn_err(CE_WARN, "detach: could not get state "
600 "structure for inst %d.", instance);
601 return (DDI_FAILURE);
602 }
603
604 err = sgenv_remove_cache_update_threads();
605 if (err != DDI_SUCCESS) {
606 cmn_err(CE_WARN, "Failed to remove update threads");
607 }
608
609 /*
610 * Remove the handlers which watch for unsolicited messages
611 * and post event to Sysevent Framework.
612 */
613 err = sgenv_remove_intr_handlers();
614 if (err != DDI_SUCCESS) {
615 cmn_err(CE_WARN, "Failed to remove event handlers");
616 }
617
618 sgenv_remove_kstats(softsp);
619
620 ddi_soft_state_free(sgenv_statep, instance);
621
622 ddi_remove_minor_node(dip, NULL);
623
624 return (DDI_SUCCESS);
625
626 case DDI_SUSPEND:
627 return (DDI_SUCCESS);
628
629 default:
630 return (DDI_FAILURE);
631 }
632 }
633
634
635 static int
sgenv_add_kstats(sgenv_soft_state_t * softsp)636 sgenv_add_kstats(sgenv_soft_state_t *softsp)
637 {
638 kstat_t *ksp;
639 kstat_named_t *keyswitch_named_data;
640
641 int inst = softsp->instance;
642
643 /*
644 * Create the 'keyswitch position' named kstat.
645 */
646 ksp = kstat_create(SGENV_DRV_NAME, inst, SG_KEYSWITCH_KSTAT_NAME,
647 "misc", KSTAT_TYPE_NAMED, 1, NULL);
648
649 if (ksp != NULL) {
650 /* initialize the named kstat */
651 keyswitch_named_data = (struct kstat_named *)(ksp->ks_data);
652
653 kstat_named_init(&keyswitch_named_data[0],
654 POSITION_KSTAT_NAME,
655 KSTAT_DATA_INT32);
656
657 ksp->ks_update = sgenv_keyswitch_kstat_update;
658 kstat_install(ksp);
659
660 /* update the soft state */
661 softsp->keyswitch_ksp = ksp;
662
663 } else {
664 cmn_err(CE_WARN, "Keyswitch: kstat_create failed");
665 return (-1);
666 }
667
668
669 /*
670 * Environmental Information.
671 */
672 ksp = kstat_create(SGENV_DRV_NAME, inst, SG_ENV_INFO_KSTAT_NAME,
673 "misc", KSTAT_TYPE_RAW, 0,
674 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
675
676 if (ksp != NULL) {
677 ksp->ks_data = NULL;
678 ksp->ks_data_size = 0;
679 ksp->ks_snaptime = 0;
680 ksp->ks_update = sgenv_env_info_kstat_update;
681 ksp->ks_snapshot = sgenv_env_info_kstat_snapshot;
682 ksp->ks_lock = &env_kstat_lock;
683 kstat_install(ksp);
684
685 /* update the soft state */
686 softsp->env_info_ksp = ksp;
687
688 } else {
689 cmn_err(CE_WARN, "Environmental Info: kstat_create failed");
690 return (-1);
691 }
692
693
694 /*
695 * Board Status Information.
696 */
697 ksp = kstat_create(SGENV_DRV_NAME, inst, SG_BOARD_STATUS_KSTAT_NAME,
698 "misc", KSTAT_TYPE_RAW, 0,
699 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
700
701 if (ksp != NULL) {
702 ksp->ks_data = NULL;
703 ksp->ks_data_size = 0;
704 ksp->ks_snaptime = 0;
705 ksp->ks_update = sgenv_board_info_kstat_update;
706 ksp->ks_snapshot = sgenv_board_info_kstat_snapshot;
707 ksp->ks_lock = &board_kstat_lock;
708 kstat_install(ksp);
709
710 /* update the soft state */
711 softsp->board_info_ksp = ksp;
712
713 } else {
714 cmn_err(CE_WARN, "Board Status Info: kstat_create failed");
715 return (-1);
716 }
717
718 return (0);
719 }
720
721
722 static void
sgenv_remove_kstats(sgenv_soft_state_t * softsp)723 sgenv_remove_kstats(sgenv_soft_state_t *softsp)
724 {
725 kstat_t *ksp;
726
727 ksp = softsp->keyswitch_ksp;
728 if (ksp != NULL) {
729 softsp->keyswitch_ksp = NULL;
730 kstat_delete(ksp);
731 }
732
733 ksp = softsp->env_info_ksp;
734 if (ksp != NULL) {
735 sgenv_destroy_env_cache();
736 softsp->env_info_ksp = NULL;
737 ksp->ks_lock = NULL;
738 kstat_delete(ksp);
739 }
740
741 ksp = softsp->board_info_ksp;
742 if (ksp != NULL) {
743 softsp->board_info_ksp = NULL;
744 ksp->ks_lock = NULL;
745 kstat_delete(ksp);
746 }
747 }
748
749
750 /*
751 * This function registers mailbox interrupt handlers to watch for certain
752 * unsolicited mailbox messages, which indicate that some event has occurred.
753 *
754 * Currently only the following events are handled:
755 * MBOX_EVENT_KEY_SWITCH
756 * MBOX_EVENT_ENV
757 * - Thresholds/Limits Exceeded
758 * - Fan Status changed
759 *
760 * ERRORS:
761 * We return DDI_FAILURE if we fail to register any one of the
762 * interrupt handlers.
763 */
764 static int
sgenv_add_intr_handlers(void)765 sgenv_add_intr_handlers(void)
766 {
767 int err;
768
769 /*
770 * Register an interrupt handler with the sgsbbc driver for the
771 * MBOX_EVENT_KEY_SWITCH events.
772 * - The virtual keyswitch has changed, we generate a sysevent.
773 */
774 keysw_payload_msg.msg_buf = (caddr_t)&keysw_payload;
775 keysw_payload_msg.msg_len = sizeof (keysw_payload);
776
777 err = sbbc_mbox_reg_intr(MBOX_EVENT_KEY_SWITCH, sgenv_keyswitch_handler,
778 &keysw_payload_msg, NULL, &keysw_hdlr_lock);
779 if (err != 0) {
780 cmn_err(CE_WARN, "Failed to register MBOX_EVENT_KEY_SWITCH "
781 "handler. Err=%d", err);
782 return (DDI_FAILURE);
783 }
784
785 /*
786 * Register an interrupt handler with the sgsbbc driver for the
787 * MBOX_EVENT_ENV events.
788 * - Thresholds/Limits Exceeded, we generate a sysevent
789 * and we update our caches.
790 */
791 env_payload_msg.msg_buf = (caddr_t)&env_payload;
792 env_payload_msg.msg_len = sizeof (env_payload);
793
794 err = sbbc_mbox_reg_intr(MBOX_EVENT_ENV, sgenv_env_data_handler,
795 &env_payload_msg, NULL, &env_hdlr_lock);
796 if (err != 0) {
797 cmn_err(CE_WARN, "Failed to register MBOX_EVENT_ENV "
798 "(env) handler. Err=%d", err);
799 return (DDI_FAILURE);
800 }
801
802 /*
803 * Register an interrupt handler with the sgsbbc driver for the
804 * MBOX_EVENT_ENV events.
805 * - Fan Status changed, we generate a sysevent, and
806 * we update the env cache only.
807 */
808 fan_payload_msg.msg_buf = (caddr_t)&fan_payload;
809 fan_payload_msg.msg_len = sizeof (fan_payload);
810
811 err = sbbc_mbox_reg_intr(MBOX_EVENT_ENV, sgenv_fan_status_handler,
812 &fan_payload_msg, NULL, &env_hdlr_lock);
813 if (err != 0) {
814 cmn_err(CE_WARN, "Failed to register MBOX_EVENT_ENV (fan)"
815 "handler. Err=%d", err);
816 return (DDI_FAILURE);
817 }
818
819 /*
820 * Register an interrupt handler with the sgsbbc driver for the
821 * MBOX_EVENT_GENERIC events.
822 * - DR state change, we update our caches.
823 */
824 dr_payload_msg.msg_buf = (caddr_t)&dr_payload;
825 dr_payload_msg.msg_len = sizeof (dr_payload);
826
827 err = sbbc_mbox_reg_intr(MBOX_EVENT_GENERIC, sgenv_dr_event_handler,
828 &dr_payload_msg, NULL, &dr_hdlr_lock);
829 if (err != 0) {
830 cmn_err(CE_WARN, "Failed to register MBOX_EVENT_GENERIC (DR)"
831 "handler. Err=%d", err);
832 return (DDI_FAILURE);
833 }
834
835 return (DDI_SUCCESS);
836 }
837
838 /*
839 * This function unregisters the mailbox interrupt handlers.
840 *
841 * ERRORS:
842 * We return DDI_FAILURE if we fail to register any one of the
843 * interrupt handlers.
844 */
845 static int
sgenv_remove_intr_handlers(void)846 sgenv_remove_intr_handlers(void)
847 {
848 int rv = DDI_SUCCESS;
849 int err;
850
851 err = sbbc_mbox_unreg_intr(MBOX_EVENT_KEY_SWITCH,
852 sgenv_keyswitch_handler);
853 if (err != 0) {
854 cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_KEY_SWITCH "
855 "handler. Err=%d", err);
856 rv = DDI_FAILURE;
857 }
858
859 err = sbbc_mbox_unreg_intr(MBOX_EVENT_ENV, sgenv_env_data_handler);
860 if (err != 0) {
861 cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_ENV (env)"
862 "handler. Err=%d", err);
863 rv = DDI_FAILURE;
864 }
865
866 err = sbbc_mbox_unreg_intr(MBOX_EVENT_ENV, sgenv_fan_status_handler);
867 if (err != 0) {
868 cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_ENV (fan)"
869 "handler. Err=%d", err);
870 rv = DDI_FAILURE;
871 }
872
873 err = sbbc_mbox_unreg_intr(MBOX_EVENT_GENERIC, sgenv_dr_event_handler);
874 if (err != 0) {
875 cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_GENERIC (DR) "
876 "handler. Err=%d", err);
877 rv = DDI_FAILURE;
878 }
879
880 return (rv);
881 }
882
883
884 static int
sgenv_create_cache_update_threads(void)885 sgenv_create_cache_update_threads(void)
886 {
887 DCMN_ERR_S(f, "sgenv_create_cache_update_threads()");
888
889 DCMN_ERR_THREAD(CE_NOTE, "Entering %s", f);
890
891 /* Create thread to ensure env_cache is updated */
892 env_thread_run = 1;
893
894 env_thread = thread_create(NULL, 0, sgenv_update_env_cache,
895 NULL, 0, &p0, TS_RUN, minclsyspri);
896 env_thread_tid = env_thread->t_did;
897
898 /* Create thread to ensure board_cache is updated */
899 board_thread_run = 1;
900
901 board_thread = thread_create(NULL, 0, sgenv_update_board_cache,
902 NULL, 0, &p0, TS_RUN, minclsyspri);
903 board_thread_tid = board_thread->t_did;
904
905 DCMN_ERR_THREAD(CE_NOTE, "Exiting %s", f);
906
907 return (DDI_SUCCESS);
908 }
909
910
911 static int
sgenv_remove_cache_update_threads(void)912 sgenv_remove_cache_update_threads(void)
913 {
914 DCMN_ERR_S(f, "sgenv_remove_cache_update_threads()");
915
916 DCMN_ERR_THREAD(CE_NOTE, "%s: Waiting for cache update threads", f);
917
918 /* Cause the env_cache thread to terminate. */
919 mutex_enter(&env_flag_lock);
920 env_thread_run = 0;
921 cv_signal(&env_flag_cond);
922 mutex_exit(&env_flag_lock);
923
924 thread_join(env_thread_tid);
925
926 /* Cause the board_cache thread to terminate. */
927 mutex_enter(&board_flag_lock);
928 board_thread_run = 0;
929 cv_signal(&board_flag_cond);
930 mutex_exit(&board_flag_lock);
931
932 thread_join(board_thread_tid);
933
934 DCMN_ERR_THREAD(CE_NOTE, "%s: cache update threads finished", f);
935
936 return (DDI_SUCCESS);
937 }
938
939
940 static int
sgenv_keyswitch_kstat_update(kstat_t * ksp,int rw)941 sgenv_keyswitch_kstat_update(kstat_t *ksp, int rw)
942 {
943 sg_keyswitch_kstat_t *keysw_data;
944
945 int8_t posn; /* keysw posn read from IO-SRAM */
946 int size; /* size of IO-SRAM chunk */
947 int rv = 0; /* return value of iosram_read() */
948
949 keysw_data = (sg_keyswitch_kstat_t *)ksp->ks_data;
950
951 switch (rw) {
952 case KSTAT_WRITE:
953 /*
954 * Write not permitted
955 */
956 return (EACCES);
957
958 case KSTAT_READ:
959 /*
960 * Get the size of the keyswitch IO-SRAM chunk.
961 * This should be one byte.
962 *
963 * If the size is not 1 byte we set the position to UNKNOWN
964 *
965 * Otherwise we read the keyswitch position from IO-SRAM.
966 * Then check that this is a valid keyswitch position.
967 * If it is not valid then something is corrupt and set
968 * the position to UNKNOWN.
969 */
970 size = iosram_size(SBBC_KEYSWITCH_KEY);
971 if (size != 1) {
972 posn = SG_KEYSWITCH_POSN_UNKNOWN;
973 rv = -1;
974
975 } else if ((rv = iosram_read(SBBC_KEYSWITCH_KEY, 0,
976 (char *)&posn, size)) != 0) {
977 posn = SG_KEYSWITCH_POSN_UNKNOWN;
978
979 } else {
980 /* Check posn is not corrupt */
981 switch (posn) {
982 case SG_KEYSWITCH_POSN_ON:
983 case SG_KEYSWITCH_POSN_DIAG:
984 case SG_KEYSWITCH_POSN_SECURE:
985 /* value read from kstat is OK */
986 break;
987
988 default:
989 /* value read from kstat is corrupt */
990 posn = SG_KEYSWITCH_POSN_UNKNOWN;
991 break;
992 }
993 }
994
995 /* Write position to kstat. */
996 keysw_data->keyswitch_position.value.i32 = posn;
997
998 return (rv);
999
1000 default:
1001 return (EINVAL);
1002 }
1003 }
1004
1005 static void
sgenv_init_env_cache(void)1006 sgenv_init_env_cache(void)
1007 {
1008 ASSERT(env_thread_run == 0);
1009 ASSERT(env_thread == NULL);
1010 }
1011
1012
1013 /*
1014 * This thread runs in the background and waits for an interrupt handler
1015 * registered to wait for ENV/DR events from the SC to signal/flag that we
1016 * need to update our Env Cache.
1017 */
1018 static void
sgenv_update_env_cache(void)1019 sgenv_update_env_cache(void)
1020 {
1021 DCMN_ERR_S(f, "sgenv_update_env_cache()");
1022
1023 mutex_enter(&env_flag_lock);
1024
1025 while (env_thread_run == 1) {
1026
1027 /*
1028 * We check to see if the update needed flag is set.
1029 * If it is then this means that:
1030 * 1) This is the first time through the while loop
1031 * and we need to initialize the cache.
1032 * 2) An interrupt handler was triggered while we
1033 * we were updating the env cache during the previous
1034 * iteration of the while loop and we need to refresh
1035 * the env data to ensure we are completely up to date.
1036 *
1037 * Otherwise we wait until we get a signal from one of the
1038 * interrupt handlers.
1039 */
1040 if (env_cache_update_needed) {
1041 DCMN_ERR_THREAD(CE_NOTE, "%s: update needed", f);
1042
1043 env_cache_update_needed = B_FALSE;
1044
1045 } else {
1046 DCMN_ERR_THREAD(CE_NOTE, "%s: Waiting for signal", f);
1047
1048 cv_wait(&env_flag_cond, &env_flag_lock);
1049
1050 /* Check if we are being asked to terminate */
1051 if (env_thread_run == 0) {
1052 break;
1053 }
1054
1055 env_cache_updating = B_TRUE;
1056 }
1057
1058 mutex_exit(&env_flag_lock);
1059 (void) sgenv_get_env_info_data();
1060
1061 (void) sgenv_check_sensor_thresholds();
1062 mutex_enter(&env_flag_lock);
1063
1064 if (env_cache_update_needed == B_FALSE)
1065 env_cache_updating = B_FALSE;
1066 }
1067
1068 mutex_exit(&env_flag_lock);
1069
1070 DCMN_ERR_THREAD(CE_NOTE, "Exiting %s", f);
1071
1072 env_thread_run = -1;
1073 thread_exit();
1074 }
1075
1076
1077 /*
1078 * We always return what is in the env_cache. It is up to the SC to ensure
1079 * that the env_cache is current by sending events to us when something
1080 * changes. The cache will then be updated by going to the SC to get the
1081 * new data. That way the kstat_update code can always be sure that it gets
1082 * current data without having to wait while the SC responds (slowly) to our
1083 * request for data.
1084 *
1085 * The way the update and snapshot code works, we cannot be guaranteed that
1086 * someone won't grab the env_cache_lock between the update and snapshot
1087 * calls so we use a temporary snapshot of the env_cache. We cannot hold
1088 * any locks across the calls from the update to the snapshot as we are
1089 * not guaranteed that the snapshot function will be called. So we create
1090 * the snapshot of the env_cache in the update routine and dump this to the
1091 * kstat user buffer in the snapshot routine. (There are error conditions in
1092 * which the snapshot will not be called by the kstat framework so we need
1093 * to handle these appropriately.)
1094 */
1095 static int
sgenv_env_info_kstat_update(kstat_t * ksp,int rw)1096 sgenv_env_info_kstat_update(kstat_t *ksp, int rw)
1097 {
1098 DCMN_ERR_S(f, "sgenv_env_info_kstat_update()");
1099
1100 int err = 0;
1101 int key_posn;
1102 env_sensor_t *ptr;
1103
1104 switch (rw) {
1105 case KSTAT_WRITE:
1106 /*
1107 * Write not permitted
1108 */
1109 return (EACCES);
1110
1111 case KSTAT_READ:
1112
1113 mutex_enter(&env_cache_lock);
1114 /*
1115 * We now need to ensure that there is enough room allocated
1116 * by the kstat framework to return the data via ks_data.
1117 * It is possible there may be no data in the cache but
1118 * we still return zero sized kstats to ensure no client breaks
1119 */
1120 sgenv_update_env_kstat_size(ksp);
1121
1122 /*
1123 * If the snapshot still has data (this could be because the
1124 * kstat framework discovered an error and did not call the
1125 * snapshot code which should have freed this buffer) we free
1126 * it here.
1127 */
1128 if ((env_cache_snapshot != NULL) &&
1129 (env_cache_snapshot_size > 0)) {
1130 DCMN_ERR_CACHE(CE_NOTE, "%s freeing "
1131 "env_cache_snapshot buf", f);
1132 kmem_free(env_cache_snapshot, env_cache_snapshot_size);
1133 }
1134
1135 /*
1136 * Create a new snapshot buffer based on ks_data_size
1137 */
1138 env_cache_snapshot_size = ksp->ks_data_size;
1139 env_cache_snapshot = kmem_zalloc(
1140 env_cache_snapshot_size, KM_SLEEP);
1141
1142 /*
1143 * We need to take a fresh snapshot of the env_cache here.
1144 * For each sensor collection, we check to see if there is
1145 * data in the cache (ie. != NULL). If there is, we copy it
1146 * into the snapshot.
1147 */
1148 ptr = env_cache_snapshot;
1149 for (key_posn = 0; key_posn < SGENV_MAX_HPU_KEYS; key_posn++) {
1150 if (vol_sensor_count[key_posn] <= 0)
1151 continue;
1152
1153 ASSERT(vol_sensor_count[key_posn] <=
1154 SGENV_MAX_SENSORS_PER_KEY);
1155
1156 /*
1157 * <env_cache> entry should have been allocated
1158 * in the kstat_update function already.
1159 *
1160 * If this <env_cache> entry is NULL, then
1161 * it has already been destroyed or cleared
1162 * and the sensor readings have disappeared.
1163 */
1164 if (env_cache[key_posn] == NULL) {
1165 DCMN_ERR(CE_NOTE, "!Cache entry %d has "
1166 "disappeared", key_posn);
1167 vol_sensor_count[key_posn] = 0;
1168 continue;
1169 }
1170
1171 bcopy(&env_cache[key_posn][0], ptr,
1172 sizeof (env_sensor_t) *
1173 vol_sensor_count[key_posn]);
1174 ptr += vol_sensor_count[key_posn];
1175 }
1176 mutex_exit(&env_cache_lock);
1177
1178 return (err);
1179
1180 default:
1181 return (EINVAL);
1182 }
1183 }
1184
1185 static int
sgenv_env_info_kstat_snapshot(kstat_t * ksp,void * buf,int rw)1186 sgenv_env_info_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
1187 {
1188 DCMN_ERR_S(f, "sgenv_env_info_kstat_snapshot()");
1189
1190 switch (rw) {
1191 case KSTAT_WRITE:
1192 /*
1193 * Write not permitted
1194 */
1195 return (EACCES);
1196
1197 case KSTAT_READ:
1198
1199 /*
1200 * We have taken a snapshot of the env_cache in the
1201 * update routine so we simply bcopy this into the
1202 * kstat buf. No locks needed here.
1203 */
1204 if (env_cache_snapshot_size > 0)
1205 bcopy(env_cache_snapshot, buf, env_cache_snapshot_size);
1206
1207 ksp->ks_snaptime = last_env_read_time;
1208
1209 /*
1210 * Free the memory used by the snapshot. If for some reason
1211 * the kstat framework does not call this snapshot routine,
1212 * we also have a check in the update routine so the next
1213 * time it is called it checks for this condition and frees
1214 * the snapshot buffer there.
1215 */
1216 DCMN_ERR_CACHE(CE_NOTE, "%s freeing env_cache_snapshot buf", f);
1217 kmem_free(env_cache_snapshot, env_cache_snapshot_size);
1218 env_cache_snapshot = NULL;
1219 env_cache_snapshot_size = 0;
1220
1221 return (0);
1222
1223 default:
1224 return (EINVAL);
1225 }
1226 }
1227
1228 static void
sgenv_init_board_cache(void)1229 sgenv_init_board_cache(void)
1230 {
1231 int i;
1232
1233 ASSERT(board_thread_run == 0);
1234 ASSERT(board_thread == NULL);
1235
1236 /*
1237 * Init all node-ids to be -1.
1238 */
1239 mutex_enter(&board_cache_lock);
1240 for (i = 0; i < SG_MAX_BDS; i++)
1241 board_cache[i].node_id = (-1);
1242 mutex_exit(&board_cache_lock);
1243 }
1244
1245
1246 /*
1247 * This thread runs in the background and waits for an interrupt handler
1248 * registered to wait for DR events from the SC to signal/flag that we
1249 * need to update our Board Cache.
1250 */
1251 static void
sgenv_update_board_cache(void)1252 sgenv_update_board_cache(void)
1253 {
1254 DCMN_ERR_S(f, "sgenv_update_board_cache()");
1255
1256 mutex_enter(&board_flag_lock);
1257
1258 while (board_thread_run == 1) {
1259
1260 /*
1261 * We check to see if the update needed flag is set.
1262 * If it is then this means that:
1263 * 1) This is the first time through the while loop
1264 * and we need to initialize the cache.
1265 * 2) An interrupt handler was triggered while we
1266 * we were updating the cache during the previous
1267 * iteration of the while loop and we need to refresh
1268 * the env data to ensure we are completely up to date.
1269 *
1270 * Otherwise we wait until we get a signal from one of the
1271 * interrupt handlers.
1272 */
1273 if (board_cache_update_needed) {
1274 DCMN_ERR_THREAD(CE_NOTE, "%s: update needed", f);
1275 board_cache_update_needed = B_FALSE;
1276
1277 } else {
1278 DCMN_ERR_THREAD(CE_NOTE, "%s: Waiting for signal", f);
1279
1280 cv_wait(&board_flag_cond, &board_flag_lock);
1281
1282 /* Check if we are being asked to terminate */
1283 if (board_thread_run == 0) {
1284 break;
1285 }
1286
1287 board_cache_updating = B_TRUE;
1288 }
1289
1290 mutex_exit(&board_flag_lock);
1291 (void) sgenv_get_board_info_data();
1292 mutex_enter(&board_flag_lock);
1293
1294 if (board_cache_update_needed == B_FALSE)
1295 board_cache_updating = B_FALSE;
1296 }
1297
1298 mutex_exit(&board_flag_lock);
1299
1300 DCMN_ERR_THREAD(CE_NOTE, "Exiting %s", f);
1301
1302 board_thread_run = -1;
1303 thread_exit();
1304 }
1305
1306
1307 /*
1308 * We always return what is in the board_cache. It is up to the SC to ensure
1309 * that the board_cache is current by sending events to us when something
1310 * changes. The cache will then be updated by going to the SC to get the
1311 * new data. That way the kstat_update code can always be sure that it gets
1312 * current data without having to wait while the SC responds (slowly) to our
1313 * request for data.
1314 *
1315 * The way the update and snapshot code works, we cannot be guaranteed that
1316 * someone won't grab the board_cache_lock between the update and snapshot
1317 * calls so we use a snapshot buffer of the board_cache. We cannot hold
1318 * any locks across the calls from the update to the snapshot as we are
1319 * not guaranteed that the snapshot function will be called. So we create
1320 * the snapshot of the board_cache in the update routine and dump this to the
1321 * kstat user buffer in the snapshot routine. (There are error conditions in
1322 * which the snapshot will not be called by the kstat framework so we need
1323 * to handle these appropriately.)
1324 */
1325 static int
sgenv_board_info_kstat_update(kstat_t * ksp,int rw)1326 sgenv_board_info_kstat_update(kstat_t *ksp, int rw)
1327 {
1328 int i;
1329
1330 switch (rw) {
1331 case KSTAT_WRITE:
1332 /*
1333 * Write not permitted
1334 */
1335 return (EACCES);
1336
1337 case KSTAT_READ:
1338 /*
1339 * The board_cache is created during startup, and so should be
1340 * available before a user can log in and trigger a kstat read,
1341 * but we check just in case.
1342 */
1343 if (board_cache_updated == FALSE)
1344 return (ENXIO);
1345
1346 mutex_enter(&board_cache_lock);
1347
1348 /*
1349 * Set <ks_data_size> to the new number of board readings so
1350 * that the snapshot routine can allocate the correctly sized
1351 * kstat.
1352 */
1353 ksp->ks_data_size = board_count * sizeof (sg_board_info_t);
1354
1355 board_count_snapshot = board_count;
1356
1357 /*
1358 * We are now guaranteed that that board_cache is not in flux
1359 * (as we have the lock) so we take a copy of the board_cache
1360 * into the board_cache_snapshot so that the snapshot routine
1361 * can copy it from the board_cache_snapshot into the user kstat
1362 * buffer.
1363 */
1364 for (i = 0; i < SG_MAX_BDS; i++) {
1365 board_cache_snapshot[i] = board_cache[i];
1366 }
1367
1368 mutex_exit(&board_cache_lock);
1369
1370 return (0);
1371
1372 default:
1373 return (EINVAL);
1374 }
1375 }
1376
1377 static int
sgenv_board_info_kstat_snapshot(kstat_t * ksp,void * buf,int rw)1378 sgenv_board_info_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
1379 {
1380 DCMN_ERR_S(f, "sgenv_board_info_kstat_snapshot()");
1381
1382 sg_board_info_t *bdp;
1383 int i, num_bds = 0;
1384
1385 switch (rw) {
1386 case KSTAT_WRITE:
1387 /*
1388 * Write not permitted
1389 */
1390 return (EACCES);
1391
1392 case KSTAT_READ:
1393
1394 if (board_cache_updated == FALSE) {
1395 ksp->ks_data_size = 0;
1396 ksp->ks_data = NULL;
1397 return (ENOMEM);
1398 }
1399
1400 /*
1401 * Update the snap_time with the last time we got fresh data
1402 * from the SC.
1403 */
1404 ksp->ks_snaptime = last_board_read_time;
1405
1406 ASSERT(board_count_snapshot <= SG_MAX_BDS);
1407 /*
1408 * For each entry in the board_cache_snapshot we check to see
1409 * if the node_id is != NULL before we copy it into
1410 * the kstat buf.
1411 */
1412 for (i = 0; i < SG_MAX_BDS; i++) {
1413 bdp = &board_cache_snapshot[i];
1414 DCMN_ERR_CACHE(CE_NOTE, "%s: looking at "
1415 "cache_snapshot entry[%d], node=%d",
1416 f, i, bdp->node_id);
1417 if (bdp->node_id >= 0) {
1418 /*
1419 * Need a check to ensure that the buf
1420 * is still within the allocated size.
1421 * We check how many boards are already
1422 * in the user buf before adding one.
1423 */
1424 num_bds++;
1425 if (num_bds > board_count_snapshot) {
1426 ksp->ks_data_size = 0;
1427 ksp->ks_data = NULL;
1428 DCMN_ERR(CE_WARN, "%s: buf overflow."
1429 " %d >= %d.",
1430 f, num_bds, board_count_snapshot);
1431 return (EIO);
1432 }
1433
1434 DCMN_ERR_CACHE(CE_NOTE, "%s: about to bcopy"
1435 " cache_snapshot entry[%d], node=%d,"
1436 " board=%d", f, i, bdp->node_id,
1437 bdp->board_num);
1438 bcopy(bdp, buf, sizeof (sg_board_info_t));
1439 buf = ((sg_board_info_t *)buf) + 1;
1440 }
1441 }
1442 return (0);
1443
1444 default:
1445 return (EINVAL);
1446 }
1447 }
1448
1449
1450 /*
1451 * This function coordinates reading the env data from the SC.
1452 *
1453 * ERROR:
1454 * If an error occurs while making a call to the mailbox and we have data
1455 * in the cache from a previous call to the SC, we return an error of 0.
1456 * That way the kstat framework will return the old data instead of
1457 * returning an error and an empty kstat.
1458 */
1459 static int
sgenv_get_env_info_data(void)1460 sgenv_get_env_info_data(void)
1461 {
1462 DCMN_ERR_S(f, "sgenv_get_env_info_data()");
1463
1464 envresp_key_t new_keys[SGENV_MAX_HPU_KEYS] = {0};
1465 envresp_key_t old_key;
1466 envresp_key_t key;
1467
1468 int i;
1469
1470 int err = 0; /* return value of func's which get env data */
1471 int status = 0; /* reason why env data func returned an error */
1472
1473 DCMN_ERR_EVENT(CE_NOTE, "%s: entered.", f);
1474
1475 err = sgenv_get_hpu_keys(new_keys, &status);
1476
1477 if (err != 0) {
1478 /*
1479 * If we get an error getting the key values, then we return
1480 * as we cannot proceed any farther. If there is old env data
1481 * in the cache, then we return zero so that the kstat
1482 * framework will export the old data.
1483 */
1484 if (env_cache_updated == FALSE) {
1485 sgenv_mbox_error_msg("HPU Keys", err, status);
1486 return (err);
1487 } else {
1488 sgenv_mbox_error_msg("HPU Keys", err, status);
1489 return (0);
1490 }
1491 }
1492
1493
1494 for (i = 0; i < SGENV_MAX_HPU_KEYS; i++) {
1495
1496 if (vol_sensor_count[i] == 0) {
1497 /* empty collection */
1498 old_key = 0;
1499 } else {
1500 /*
1501 * populated collection:
1502 * (assert size is OK, and 1st sensor is pseudo-sensor)
1503 */
1504 ASSERT(env_cache[i] != NULL);
1505 ASSERT(env_cache[i][0].sd_id.id.sensor_part ==
1506 SG_SENSOR_PART_SCAPP);
1507 ASSERT(env_cache[i][0].sd_id.id.sensor_type ==
1508 SG_SENSOR_TYPE_ENVDB);
1509 ASSERT(SG_INFO_VALUESTATUS(env_cache[i][0].sd_infostamp)
1510 == SG_INFO_VALUE_OK);
1511
1512 old_key = env_cache[i][0].sd_value;
1513 }
1514
1515 key = new_keys[i];
1516
1517 /*
1518 * No data is associated with this key position and there was
1519 * no data on the previous read either so we simply continue
1520 * to the next key position.
1521 */
1522 if ((key == 0) && (old_key == 0)) {
1523 ASSERT(env_cache[i] == NULL);
1524 continue;
1525 }
1526
1527
1528 /*
1529 * We need to grab this lock every time we are going to
1530 * update a HPU. However, a kstat_read can grab
1531 * the env_cache_lock when it wants to get a snapshot of
1532 * the env_cache. This has the affect of stopping the
1533 * active env_cache writer after they have updated the
1534 * active HPU, allowing the kstat_read to get a dump of
1535 * the env_cache, then the env_cache writer can resume
1536 * updating the cache. For performance it is more important
1537 * that the kstat_read completes quickly so we allow the
1538 * kstat_read to interrupt the updating of the env_cache.
1539 * The updating can take anything from a few seconds to
1540 * several minutes to complete.
1541 */
1542 mutex_enter(&env_cache_lock);
1543
1544 /*
1545 * If the key just read is zero, then the
1546 * group of sensors have been removed by
1547 * some means and we need to zero out
1548 * the env_cache. (this ensures that data
1549 * belonging to a removed board is not
1550 * returned)
1551 */
1552 if (key == 0) {
1553 ASSERT(old_key != 0);
1554 (void) sgenv_clear_env_cache_entry(i);
1555 mutex_exit(&env_cache_lock);
1556 continue;
1557 }
1558
1559 /*
1560 * Check to see if this key has changed since
1561 * the last read.
1562 *
1563 * If it has changed, we need to update everything.
1564 *
1565 * If it hasn't we simply read the volatiles
1566 * and check to see if the constants have changed.
1567 */
1568 if (key != old_key) {
1569 /*
1570 * If the key is non-zero, then a new HPU has
1571 * been added to the system or it has changed
1572 * somehow and we need to re-read everything.
1573 * (we also need to zero out the env_cache as
1574 * there may be less sensors returned now and
1575 * the old ones may not be overwritten)
1576 */
1577
1578 /*
1579 * If the <env_cache> has not already been
1580 * allocated for this key position then we
1581 * go ahead and allocate it.
1582 */
1583 if (env_cache[i] == NULL) {
1584 err = sgenv_create_env_cache_entry(i);
1585 if (err == DDI_FAILURE) {
1586 mutex_exit(&env_cache_lock);
1587 continue;
1588 }
1589 }
1590
1591 err = sgenv_get_env_data(new_keys[i], i,
1592 SG_GET_ENV_CONSTANTS, &status);
1593 if (err) {
1594 err = sgenv_handle_env_data_error(err, status,
1595 i, old_key, "Constant Data");
1596 mutex_exit(&env_cache_lock);
1597 if (err != DDI_FAILURE) {
1598 continue;
1599 } else if (env_cache_updated == TRUE) {
1600 return (0);
1601 } else {
1602 return (DDI_FAILURE);
1603 }
1604 }
1605
1606 err = sgenv_get_env_data(new_keys[i], i,
1607 SG_GET_ENV_THRESHOLDS, &status);
1608 if (err) {
1609 err = sgenv_handle_env_data_error(err, status,
1610 i, old_key, "Threshold Data");
1611 mutex_exit(&env_cache_lock);
1612 if (err != DDI_FAILURE) {
1613 continue;
1614 } else if (env_cache_updated == TRUE) {
1615 return (0);
1616 } else {
1617 return (DDI_FAILURE);
1618 }
1619 }
1620
1621 err = sgenv_get_env_data(new_keys[i], i,
1622 SG_GET_ENV_VOLATILES, &status);
1623 if (err) {
1624 err = sgenv_handle_env_data_error(err, status,
1625 i, old_key, "Volatile Data (fresh)");
1626 mutex_exit(&env_cache_lock);
1627 if (err != DDI_FAILURE) {
1628 continue;
1629 } else if (env_cache_updated == TRUE) {
1630 return (0);
1631 } else {
1632 return (DDI_FAILURE);
1633 }
1634 }
1635
1636 /*
1637 * As we have successfully got env data for a HPU,
1638 * we ensure <env_cache_updated> is set to TRUE so that
1639 * in the future, if an error occurs during the mailbox
1640 * transfer, we know that there is old data for at
1641 * least one HPU in the <env_cache> which could be
1642 * returned instead of returning an error to the kstat
1643 * framework indicating that we have no data to return.
1644 */
1645 env_cache_updated = TRUE;
1646 last_env_read_time = gethrtime();
1647
1648 } else {
1649 /*
1650 * key == old_key
1651 *
1652 * Handle the case when the value of the old key and
1653 * the new key are identical.
1654 */
1655 ASSERT(env_cache[i] != NULL);
1656
1657 /*
1658 * If the keys are identical, then the quasi-constants
1659 * should not have changed (and so don't need updating).
1660 * Similarly for the threshold readings.
1661 */
1662
1663 /* Update the volatile data */
1664 err = sgenv_get_env_data(new_keys[i], i,
1665 SG_GET_ENV_VOLATILES, &status);
1666 if (err) {
1667 err = sgenv_handle_env_data_error(err, status,
1668 i, old_key, "Volatile Data (update)");
1669 mutex_exit(&env_cache_lock);
1670 if (err == DDI_FAILURE) {
1671 return (0);
1672 } else {
1673 continue;
1674 }
1675 }
1676
1677 }
1678 mutex_exit(&env_cache_lock);
1679 }
1680
1681 return (0);
1682 }
1683
1684
1685 static int
sgenv_get_board_info_data(void)1686 sgenv_get_board_info_data(void)
1687 {
1688 /*
1689 * This array keeps track of the valid nodes in a system. A call is
1690 * made to OBP to get the "nodeid" property from all the ssm nodes,
1691 * and for each nodeid found, that position in the array is set to
1692 * TRUE. For a Serengeti only one position in the array will be TRUE.
1693 */
1694 static uint_t node_present[SSM_MAX_INSTANCES] = {SGENV_NO_NODE_EXISTS};
1695
1696 static fn_t f = "sgenv_get_board_info_data()";
1697 static int first_time = TRUE;
1698
1699 sbbc_msg_t req;
1700 sbbc_msg_t resp;
1701 int node; /* loop index */
1702 int board; /* loop index */
1703 show_board_t show_bd, *shbp = &show_bd;
1704 info_t inform;
1705 int status; /* msg_status returned by response */
1706 int rv = 0; /* return value of call to mailbox */
1707 sg_board_info_t *ptr;
1708
1709 DCMN_ERR_EVENT(CE_NOTE, "%s: entered.", f);
1710
1711 ASSERT(board_cache != NULL);
1712
1713 if (first_time) {
1714 sgenv_set_valid_node_positions(node_present);
1715 first_time = FALSE;
1716 }
1717
1718 for (node = 0; node < SSM_MAX_INSTANCES; node++) {
1719
1720 if (node_present[node] == SGENV_NO_NODE_EXISTS)
1721 continue;
1722
1723 for (board = 0; board < SG_MAX_BDS; board++) {
1724
1725 /*
1726 * If we have discovered in a previous call to the SC
1727 * that there is no board in this slot on this type of
1728 * chassis then we don't waste resources asking the SC
1729 * for nonexistent data.
1730 */
1731 if ((node_present[node] & (1 << board)) == 0)
1732 continue;
1733
1734 inform.board = board;
1735 inform.node = node;
1736 inform.revision = 0xdead;
1737
1738 req.msg_type.type = DR_MBOX;
1739 req.msg_type.sub_type = DR_MBOX_SHOW_BOARD;
1740 req.msg_status = SG_MBOX_STATUS_SUCCESS;
1741 req.msg_len = sizeof (info_t);
1742 req.msg_bytes = sizeof (info_t);
1743 req.msg_buf = (caddr_t)&inform;
1744
1745 bzero(shbp, sizeof (show_board_t));
1746 shbp->s_cond = -1;
1747 shbp->s_power = -1;
1748 shbp->s_assigned = -1;
1749 shbp->s_claimed = -1;
1750 shbp->s_present = -1;
1751
1752 resp.msg_type.type = DR_MBOX;
1753 resp.msg_type.sub_type = DR_MBOX_SHOW_BOARD;
1754 resp.msg_bytes = sizeof (show_board_t);
1755 resp.msg_status = SG_MBOX_STATUS_SUCCESS;
1756 resp.msg_len = sizeof (show_board_t);
1757 resp.msg_buf = (caddr_t)shbp;
1758
1759
1760 /*
1761 * We want to avoid the case where an invalid time
1762 * is specified by a user (by patching the
1763 * global variable <sgenv_max_mbox_wait_time>).
1764 *
1765 * Any incorrect values are reset to the default time.
1766 */
1767 if (sgenv_max_mbox_wait_time <=
1768 max(sbbc_mbox_min_timeout, 0))
1769 sgenv_max_mbox_wait_time =
1770 sbbc_mbox_default_timeout;
1771
1772 rv = sbbc_mbox_request_response(&req, &resp,
1773 sgenv_max_mbox_wait_time);
1774 status = resp.msg_status;
1775
1776 if ((rv) || (status != SG_MBOX_STATUS_SUCCESS)) {
1777 /*
1778 * errors from Solaris sgsbbc driver
1779 */
1780 if (status > SG_MBOX_STATUS_SUCCESS) {
1781 sgenv_mbox_error_msg("Board Info", rv,
1782 resp.msg_status);
1783 return (rv);
1784 }
1785
1786 /*
1787 * errors from SCAPP
1788 */
1789 if (status == SG_MBOX_STATUS_ILLEGAL_NODE) {
1790 sgenv_mbox_error_msg("Board Info", rv,
1791 resp.msg_status);
1792 node_present[node] =
1793 SGENV_NO_NODE_EXISTS;
1794
1795 /*
1796 * No point looping through the rest of
1797 * the boards associated with this node.
1798 */
1799 break;
1800
1801 } else if (status ==
1802 SG_MBOX_STATUS_ILLEGAL_SLOT) {
1803
1804 /*
1805 * We clear the bit representing <board>
1806 * in <node> to indicate that this slot
1807 * cannot exist on this chassis.
1808 */
1809 node_present[node] &= (~(1 << board) &
1810 SGENV_NODE_TYPE_DS);
1811 continue;
1812
1813 } else if (status ==
1814 SG_MBOX_STATUS_BOARD_ACCESS_DENIED) {
1815 /*
1816 * We cannot access data for this slot,
1817 * however we may be able to do so in
1818 * the future. We do nothing.
1819 */
1820 rv = rv;
1821 } else {
1822 char err_msg[40];
1823
1824 (void) sprintf(err_msg,
1825 "Board data for "
1826 "Node%d/Slot%d", node, board);
1827 sgenv_mbox_error_msg(err_msg, rv,
1828 resp.msg_status);
1829
1830 if (rv == 0)
1831 rv = status;
1832
1833 continue;
1834 }
1835 }
1836
1837 mutex_enter(&board_cache_lock);
1838 ptr = &board_cache[board];
1839
1840 /*
1841 * Check if the SC returns data for this board.
1842 */
1843 if (shbp->s_assigned == -1) {
1844 /*
1845 * If this cache entry used to have data and
1846 * now doesn't we decrement the board_count
1847 * clear the env_cache. The board must have
1848 * been removed.
1849 */
1850 if (ptr->node_id != -1) {
1851 board_count--;
1852
1853 /*
1854 * clear board_cache entry by
1855 * setting node_id to -1;
1856 */
1857 ptr->node_id = -1;
1858 DCMN_ERR_CACHE(CE_NOTE, "%s: "
1859 "Clearing cache line %d [%p]",
1860 f, board, (void *)ptr);
1861 }
1862 } else {
1863 /*
1864 * If this cache entry was previously empty
1865 * and we now have data for it we increment
1866 * the board_count. A new board must have
1867 * been added.
1868 */
1869 if (ptr->node_id == -1)
1870 board_count++;
1871 /*
1872 * update the board_cache entry
1873 */
1874 DCMN_ERR_CACHE(CE_NOTE, "%s: "
1875 "Writing data for bd=%d into "
1876 " the board_cache at [%p]",
1877 f, board, (void *)ptr);
1878 ptr->node_id = node;
1879 ptr->board_num = board;
1880 ptr->condition = shbp->s_cond;
1881 ptr->assigned = shbp->s_assigned;
1882 ptr->claimed = shbp->s_claimed;
1883 ptr->present = shbp->s_present;
1884 ptr->led.led_status =
1885 shbp->s_ledstatus;
1886 last_board_read_time = gethrtime();
1887 }
1888 mutex_exit(&board_cache_lock);
1889 } /* board */
1890 } /* node */
1891
1892 /*
1893 * Indicate that have managed to store valid data in the <board_cache>
1894 * at least once.
1895 */
1896 if (board_count > 0)
1897 board_cache_updated = TRUE;
1898
1899
1900 return (rv);
1901 }
1902
1903
1904 static int
sgenv_get_hpu_keys(envresp_key_t * new,int * status)1905 sgenv_get_hpu_keys(envresp_key_t *new, int *status)
1906 {
1907 sbbc_msg_t req; /* request */
1908 sbbc_msg_t resp; /* response */
1909
1910 int rv; /* return value from call to mbox */
1911
1912 req.msg_type.type = SG_ENV;
1913 req.msg_type.sub_type = SG_GET_ENV_HPU_KEYS;
1914 req.msg_status = SG_MBOX_STATUS_SUCCESS;
1915 req.msg_len = 0;
1916 req.msg_bytes = 0;
1917
1918 resp.msg_type.type = SG_ENV;
1919 resp.msg_type.sub_type = SG_GET_ENV_HPU_KEYS;
1920 resp.msg_status = SG_MBOX_STATUS_SUCCESS;
1921 resp.msg_len = sizeof (envresp_key_t) * SGENV_MAX_HPU_KEYS;
1922 resp.msg_bytes = 0;
1923 resp.msg_buf = (caddr_t)new;
1924
1925 /*
1926 * We want to avoid the case where an invalid time
1927 * is specified by a user (by patching the
1928 * global variable <sgenv_max_mbox_wait_time>).
1929 *
1930 * Any incorrect values are reset to the default time.
1931 */
1932 if (sgenv_max_mbox_wait_time <= max(sbbc_mbox_min_timeout, 0))
1933 sgenv_max_mbox_wait_time = sbbc_mbox_default_timeout;
1934
1935 rv = sbbc_mbox_request_response(&req, &resp, sgenv_max_mbox_wait_time);
1936
1937 *status = resp.msg_status;
1938
1939 return (rv);
1940 }
1941
1942
1943 static int
sgenv_get_env_data(envresp_key_t key,int key_posn,uint16_t flag,int * status)1944 sgenv_get_env_data(envresp_key_t key, int key_posn, uint16_t flag, int *status)
1945 {
1946 /*
1947 * Only one of these buffers is ever going to be used in a call
1948 * so to save kernel stack space we use a union.
1949 */
1950 union {
1951 envresp_constants_t con[SGENV_MAX_SENSORS_PER_KEY];
1952 envresp_volatiles_t vol[SGENV_MAX_SENSORS_PER_KEY];
1953 envresp_thresholds_t thr[SGENV_MAX_SENSORS_PER_KEY];
1954 } buf;
1955
1956 sbbc_msg_t req; /* request */
1957 sbbc_msg_t resp; /* response */
1958
1959 int i; /* loop variable for mbox msg_buf */
1960 int rv; /* return value from call to mbox */
1961
1962 ASSERT(MUTEX_HELD(&env_cache_lock));
1963 ASSERT(env_cache[key_posn] != NULL);
1964
1965 if (flag == SG_GET_ENV_CONSTANTS) {
1966 resp.msg_len = sizeof (buf.con);
1967 resp.msg_buf = (caddr_t)buf.con;
1968
1969 } else if (flag == SG_GET_ENV_VOLATILES) {
1970 resp.msg_len = sizeof (buf.vol);
1971 resp.msg_buf = (caddr_t)buf.vol;
1972
1973 } else if (flag == SG_GET_ENV_THRESHOLDS) {
1974 resp.msg_len = sizeof (buf.thr);
1975 resp.msg_buf = (caddr_t)buf.thr;
1976
1977 } else {
1978 *status = EINVAL;
1979 return (-1);
1980 }
1981
1982 req.msg_type.type = SG_ENV;
1983 req.msg_type.sub_type = flag;
1984 req.msg_status = SG_MBOX_STATUS_SUCCESS;
1985 req.msg_len = 0;
1986 req.msg_bytes = 0;
1987 req.msg_data[0] = key;
1988
1989 resp.msg_type.type = SG_ENV;
1990 resp.msg_type.sub_type = flag;
1991 resp.msg_status = SG_MBOX_STATUS_SUCCESS;
1992 resp.msg_bytes = 0;
1993
1994 /*
1995 * We want to avoid the case where an invalid time
1996 * is specified by a user (by patching the
1997 * global variable <sgenv_max_mbox_wait_time>).
1998 *
1999 * Any incorrect values are reset to the default time.
2000 */
2001 if (sgenv_max_mbox_wait_time <= max(sbbc_mbox_min_timeout, 0))
2002 sgenv_max_mbox_wait_time = sbbc_mbox_default_timeout;
2003
2004
2005 rv = sbbc_mbox_request_response(&req, &resp, sgenv_max_mbox_wait_time);
2006
2007 *status = resp.msg_status;
2008
2009 /*
2010 * We now check that the data returned is valid.
2011 */
2012 if (rv != 0) {
2013 /*
2014 * The SBBC driver encountered an error.
2015 */
2016 return (rv);
2017
2018 } else {
2019 /*
2020 * The SC encountered an error.
2021 */
2022 switch (*status) {
2023 case SG_MBOX_STATUS_SUCCESS:
2024 /*
2025 * No problems encountered - continue and return the
2026 * new data.
2027 */
2028 break;
2029
2030 case ETIMEDOUT:
2031 /*
2032 * For some reason the mailbox failed to return data
2033 * and instead timed out so we return ETIMEDOUT
2034 */
2035 return (ETIMEDOUT);
2036
2037 case ENXIO:
2038 /*
2039 * no sensors associated with this key, this may have
2040 * changed since we read the keys.
2041 */
2042 return (ENXIO);
2043
2044 default:
2045 /*
2046 * The contents of the mbox message contain corrupt
2047 * data. Flag this as an error to be returned.
2048 */
2049 SGENV_PRINT_MBOX_MSG((&resp), "Env info problem");
2050 return (EINVAL);
2051 }
2052 }
2053
2054 /*
2055 * Depending on the type of data returned, save the constant/volatile
2056 * data returned in the mailbox message into the <env_cache>.
2057 */
2058 for (i = 0; i < resp.msg_data[0]; i++) {
2059
2060 if (flag == SG_GET_ENV_CONSTANTS) {
2061 env_cache[key_posn][i].sd_id.tag_id =
2062 buf.con[i].id.tag_id;
2063 env_cache[key_posn][i].sd_lo =
2064 buf.con[i].lo;
2065 env_cache[key_posn][i].sd_hi =
2066 buf.con[i].hi;
2067
2068 } else if (flag == SG_GET_ENV_VOLATILES) {
2069 env_cache[key_posn][i].sd_value =
2070 buf.vol[i].value;
2071 env_cache[key_posn][i].sd_infostamp =
2072 buf.vol[i].info;
2073
2074 sgenv_set_sensor_status(&env_cache[key_posn][i]);
2075
2076 } else if (flag == SG_GET_ENV_THRESHOLDS) {
2077 env_cache[key_posn][i].sd_lo_warn =
2078 buf.thr[i].lo_warn;
2079 env_cache[key_posn][i].sd_hi_warn =
2080 buf.thr[i].hi_warn;
2081 }
2082 }
2083
2084 if (flag == SG_GET_ENV_VOLATILES)
2085 vol_sensor_count[key_posn] = resp.msg_data[0];
2086
2087 return (rv);
2088 }
2089
2090
2091 /*
2092 * This function handles any errors received from the mailbox framework while
2093 * getting environmental data.
2094 *
2095 * INPUT PARAMETERS
2096 * err - return value from call to mailbox framework.
2097 * status - message status returned by mailbox framework.
2098 * key - key from previous (if any) reading of env data.
2099 * Needed to see if we have old data in the <env_cache>.
2100 * str - String indicating what type of env request failed.
2101 *
2102 * RETURN VALUES
2103 * rv == DDI_FAILURE - there is no point in continuing processing
2104 * the data, we should exit from the kstat
2105 * framework.
2106 * rv != DDI_FAILURE - error has been handled correctly, continue
2107 * processing the data returned from the SC.
2108 */
2109 static int
sgenv_handle_env_data_error(int err,int status,int key_posn,envresp_key_t key,char * str)2110 sgenv_handle_env_data_error(int err, int status, int key_posn,
2111 envresp_key_t key, char *str)
2112 {
2113 int rv = DDI_SUCCESS;
2114
2115 ASSERT(str != (char *)NULL);
2116
2117 switch (err) {
2118 case ENXIO:
2119 /*
2120 * The SC has changed the env data associated with this key
2121 * since we started getting the data. We cannot tell if the
2122 * data has disappeared due to the removal of the board from
2123 * our Domain or just that the data has been updated. We
2124 * simply return the last known data (if possible) and the
2125 * next time we request the env data, the SC will have
2126 * finished processing this board so we will receive the
2127 * correct key values and we can get the correct data.
2128 */
2129 DCMN_ERR_CACHE(CE_NOTE, "key @ posn %d has changed from %d"
2130 " while %s", key_posn, key, str);
2131 rv = ENXIO;
2132 break;
2133
2134 default:
2135 sgenv_mbox_error_msg(str, err, status);
2136 rv = DDI_FAILURE;
2137 break;
2138 }
2139
2140 /*
2141 * If there was no data in the <env_cache>, we need to clear the data
2142 * just added as the <env_cache> will only be partially filled.
2143 */
2144 if (key == 0)
2145 sgenv_clear_env_cache_entry(key_posn);
2146
2147 return (rv);
2148 }
2149
2150
2151 /*
2152 * If the sensor readings for a particular collection of HPUs become invalid,
2153 * then we clear the cache by freeing up the memory.
2154 */
2155 static void
sgenv_clear_env_cache_entry(int key_posn)2156 sgenv_clear_env_cache_entry(int key_posn)
2157 {
2158 ASSERT(MUTEX_HELD(&env_cache_lock));
2159
2160 if (env_cache[key_posn] != NULL) {
2161 kmem_free(env_cache[key_posn], sizeof (env_sensor_t) *
2162 SGENV_MAX_SENSORS_PER_KEY);
2163 env_cache[key_posn] = NULL;
2164 vol_sensor_count[key_posn] = 0;
2165 }
2166 }
2167
2168
2169 static void
sgenv_mbox_error_msg(char * str,int err,int status)2170 sgenv_mbox_error_msg(char *str, int err, int status)
2171 {
2172 /*
2173 * We update the count of errors we have encountered during calls to
2174 * the mailbox framework (unless we will cause a wraparound)
2175 */
2176 if (sgenv_mbox_error_count < INT_MAX)
2177 sgenv_mbox_error_count++;
2178
2179 #ifdef DEBUG
2180 if ((sgenv_debug & SGENV_DEBUG_MSG) == 0)
2181 return;
2182
2183 ASSERT(str != NULL);
2184
2185 switch (err) {
2186 case ENOTSUP:
2187 DCMN_ERR(CE_WARN, "!This system configuration does not "
2188 "support SGENV");
2189 break;
2190 case ETIMEDOUT:
2191 DCMN_ERR(CE_WARN, "!Mailbox timed out while servicing "
2192 "SGENV request for %s", str);
2193 break;
2194 default:
2195 DCMN_ERR(CE_WARN, "!Error occurred reading %s, Errno=%d,"
2196 " Status=%d", str, err, status);
2197 break;
2198 }
2199 #endif
2200 }
2201
2202
2203 /*
2204 * INPUT PARAMETERS
2205 * key_posn - The position in the env_cache for which we want to
2206 * allocate space for a HPU's env data.
2207 *
2208 * ERROR VALUES
2209 * DDI_FAILURE - We failed to allocate memory for this cache entry.
2210 * There is no point asking the SC for env data for this
2211 * HPU as we will have nowhere to store it.
2212 */
2213 static int
sgenv_create_env_cache_entry(int key_posn)2214 sgenv_create_env_cache_entry(int key_posn)
2215 {
2216 int i; /* used to loop thru each sensor to set the status */
2217
2218 ASSERT(key_posn < SGENV_MAX_HPU_KEYS);
2219 ASSERT(key_posn >= 0);
2220
2221 env_cache[key_posn] = (env_sensor_t *)kmem_zalloc(
2222 sizeof (env_sensor_t) * SGENV_MAX_SENSORS_PER_KEY, KM_NOSLEEP);
2223 if (env_cache[key_posn] == NULL) {
2224 cmn_err(CE_WARN, "Failed to allocate memory for env_cache[%d]",
2225 key_posn);
2226 return (DDI_FAILURE);
2227 }
2228
2229 for (i = 0; i < SGENV_MAX_SENSORS_PER_KEY; i++)
2230 env_cache[key_posn][i].sd_status = SG_SENSOR_STATUS_OK;
2231
2232 return (DDI_SUCCESS);
2233 }
2234
2235
2236 static void
sgenv_destroy_env_cache(void)2237 sgenv_destroy_env_cache(void)
2238 {
2239 int i;
2240
2241 ASSERT(MUTEX_HELD(&env_cache_lock) == FALSE);
2242 mutex_enter(&env_cache_lock);
2243 for (i = 0; i < SGENV_MAX_HPU_KEYS; i++) {
2244 if (env_cache[i] != NULL) {
2245 kmem_free(env_cache[i], sizeof (env_sensor_t) *
2246 SGENV_MAX_SENSORS_PER_KEY);
2247 env_cache[i] = NULL;
2248 vol_sensor_count[i] = 0;
2249 }
2250 }
2251 env_cache_updated = FALSE;
2252
2253 mutex_exit(&env_cache_lock);
2254 }
2255
2256 static void
sgenv_update_env_kstat_size(kstat_t * ksp)2257 sgenv_update_env_kstat_size(kstat_t *ksp)
2258 {
2259 int i;
2260
2261 ASSERT(MUTEX_HELD(&env_cache_lock));
2262
2263 /* reinitialize this and recount number of sensors */
2264 ksp->ks_data_size = 0;
2265
2266 for (i = 0; i < SGENV_MAX_HPU_KEYS; i++) {
2267 if (vol_sensor_count[i] <= 0)
2268 continue;
2269
2270 ASSERT(vol_sensor_count[i] <= SGENV_MAX_SENSORS_PER_KEY);
2271
2272 /*
2273 * increment ksp->ks_data_size by the number of
2274 * sensors in the collection <i>.
2275 */
2276 ksp->ks_data_size += vol_sensor_count[i] *
2277 sizeof (env_sensor_t);
2278 }
2279 ASSERT(ksp->ks_data_size >= 0);
2280 }
2281
2282
2283 /*
2284 * This function is triggered by the thread that updates the env_cache.
2285 * It checks for any sensors which have exceeded their limits/thresholds
2286 * and generates sysevents for the sensor values that have changed.
2287 */
2288 /*ARGSUSED*/
2289 static uint_t
sgenv_check_sensor_thresholds(void)2290 sgenv_check_sensor_thresholds(void)
2291 {
2292 DCMN_ERR_S(f, "sgenv_poll_env()");
2293
2294 int key; /* loop through keys */
2295 int i; /* loops through each sensor for each <key> */
2296
2297 env_sensor_t sensor;
2298 env_sensor_status_t status;
2299
2300 DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f);
2301
2302 mutex_enter(&env_cache_lock);
2303
2304 for (key = 0; key < SGENV_MAX_HPU_KEYS; key++) {
2305
2306 if (vol_sensor_count[key] == 0)
2307 continue;
2308
2309 for (i = 0; i < vol_sensor_count[key]; i++) {
2310 sensor = env_cache[key][i];
2311 status = sensor.sd_status;
2312
2313 if (SG_GET_SENSOR_STATUS(status) ==
2314 SG_GET_PREV_SENSOR_STATUS(status)) {
2315 continue;
2316 }
2317
2318 /*
2319 * This sensor has changed in status since the last
2320 * time we polled - we need to inform the sysevent
2321 * framework.
2322 */
2323 switch (sensor.sd_id.id.sensor_type) {
2324 /*
2325 * we don't care about the pseudo sensors and
2326 * the Fan Status is notified by a separate
2327 * unsolicited event so we simply get the next
2328 * reading
2329 */
2330 case SG_SENSOR_TYPE_ENVDB:
2331 case SG_SENSOR_TYPE_COOLING:
2332 continue;
2333
2334 /*
2335 * We have handled all the special cases by now.
2336 */
2337 default:
2338 (void) sgenv_process_threshold_event(sensor);
2339 break;
2340 }
2341
2342 SGENV_PRINT_POLL_INFO(sensor);
2343 }
2344 }
2345 mutex_exit(&env_cache_lock);
2346
2347 return (DDI_SUCCESS);
2348 }
2349
2350
2351 /*
2352 * This function is passed in an array of length SSM_MAX_INSTANCES and
2353 * it searches OBP to for ssm nodes, and for each one if finds, it sets the
2354 * corresponding position in the array to TRUE.
2355 */
2356 static void
sgenv_set_valid_node_positions(uint_t * node_present)2357 sgenv_set_valid_node_positions(uint_t *node_present)
2358 {
2359 dev_info_t *rdip; /* root dev info ptr */
2360 dev_info_t *dip;
2361
2362 ASSERT(node_present != NULL);
2363
2364 rdip = ddi_root_node();
2365
2366 for (dip = ddi_get_child(rdip); dip != NULL;
2367 dip = ddi_get_next_sibling(dip)) {
2368 if (strncmp("ssm", ddi_node_name(dip), 3) == 0) {
2369 int value;
2370
2371 value = ddi_getprop(DDI_DEV_T_ANY, dip,
2372 DDI_PROP_DONTPASS, "nodeid", 0);
2373
2374 /*
2375 * If we get a valid nodeID which has not already
2376 * been found in a previous call to this function,
2377 * then we set all 10 LSB bits to indicate there may
2378 * be a board present in each slot.
2379 *
2380 * It is the job of sgenv_get_board_info_data() to weed
2381 * out the invalid cases when we don't have a
2382 * DS chassis.
2383 *
2384 * NOTE: We make the assumption that a chassis cannot
2385 * be DR'ed out, which is true for a Serengeti.
2386 * By the time WildCat need this functionality Solaris
2387 * will be able to know what kind of a chassis is
2388 * present and there will be no need to try and work
2389 * this out from the msg_status from the mailbox.
2390 */
2391 if ((value >= 0) &&
2392 (value < SSM_MAX_INSTANCES) &&
2393 (node_present[value] == SGENV_NO_NODE_EXISTS)) {
2394 node_present[value] = SGENV_NODE_TYPE_DS;
2395 }
2396
2397 }
2398 }
2399 }
2400
2401
2402 static void
sgenv_set_sensor_status(env_sensor_t * sensor)2403 sgenv_set_sensor_status(env_sensor_t *sensor)
2404 {
2405 env_sensor_status_t *status;
2406
2407 ASSERT(sensor != NULL);
2408 status = &sensor->sd_status;
2409
2410 /*
2411 * Save the previous status so we can compare them later
2412 */
2413 SG_SET_PREV_SENSOR_STATUS(*status, *status);
2414
2415 switch (sensor->sd_id.id.sensor_type) {
2416 case SG_SENSOR_TYPE_ENVDB:
2417 /*
2418 * We want the status of this sensor to always be OK
2419 * The concept of limits/thresholds do not exist for it.
2420 */
2421 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_OK);
2422 break;
2423
2424 case SG_SENSOR_TYPE_COOLING:
2425 /*
2426 * Fans have no concept of limits/thresholds, they have a state
2427 * which we store in the <sd_status> field so that we can see
2428 * when this state is changed.
2429 */
2430 if (sensor->sd_value == SGENV_FAN_SPEED_HIGH) {
2431 SG_SET_SENSOR_STATUS(*status,
2432 SG_SENSOR_STATUS_FAN_HIGH);
2433
2434 } else if (sensor->sd_value == SGENV_FAN_SPEED_LOW) {
2435 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_FAN_LOW);
2436
2437 } else if (sensor->sd_value == SGENV_FAN_SPEED_OFF) {
2438 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_FAN_OFF);
2439
2440 } else {
2441 SG_SET_SENSOR_STATUS(*status,
2442 SG_SENSOR_STATUS_FAN_FAIL);
2443 }
2444
2445 /*
2446 * If this is the first time this fan status has been read,
2447 * then we need to initialize the previous reading to be the
2448 * same as the current reading so that an event is not
2449 * triggered.
2450 *
2451 * [ When the env_cache is being created, the status of the
2452 * sensors is set to SG_SENSOR_STATUS_OK, which is not a
2453 * valid Fan status ].
2454 */
2455 if (SG_GET_PREV_SENSOR_STATUS(*status) == SG_SENSOR_STATUS_OK) {
2456 SG_SET_PREV_SENSOR_STATUS(*status, *status);
2457 }
2458
2459 break;
2460
2461 default:
2462 if (sensor->sd_value > sensor->sd_hi) {
2463 SG_SET_SENSOR_STATUS(*status,
2464 SG_SENSOR_STATUS_HI_DANGER);
2465
2466 } else if (sensor->sd_value > sensor->sd_hi_warn) {
2467 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_HI_WARN);
2468
2469 } else if (sensor->sd_value < sensor->sd_lo) {
2470 SG_SET_SENSOR_STATUS(*status,
2471 SG_SENSOR_STATUS_LO_DANGER);
2472
2473 } else if (sensor->sd_value < sensor->sd_lo_warn) {
2474 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_LO_WARN);
2475
2476 } else {
2477 SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_OK);
2478 }
2479 break;
2480 }
2481 }
2482
2483
2484
2485
2486 /*
2487 * This function, when given an integer arg describing a HPU type,
2488 * returns the descriptive string associated with this HPU type.
2489 */
2490 static const char *
sgenv_get_hpu_id_str(uint_t hpu_type)2491 sgenv_get_hpu_id_str(uint_t hpu_type)
2492 {
2493 const hpu_value_t *hpu_list = hpus;
2494
2495 while (hpu_list->name != (char *)NULL) {
2496 if (hpu_list->value == hpu_type)
2497 return (hpu_list->IDstr);
2498 else
2499 hpu_list++;
2500 }
2501 return ((char *)NULL);
2502 }
2503
2504
2505 /*
2506 * This function, when given an integer arg describing a sensor part,
2507 * returns the descriptive string associated with this sensor part.
2508 */
2509 static const char *
sgenv_get_part_str(uint_t sensor_part)2510 sgenv_get_part_str(uint_t sensor_part)
2511 {
2512 const part_value_t *part_list = parts;
2513
2514 while (part_list->name != (char *)NULL) {
2515 if (part_list->value == sensor_part)
2516 return (part_list->name);
2517 else
2518 part_list++;
2519 }
2520 return ((char *)NULL);
2521 }
2522
2523
2524 /*
2525 * This function, when given an integer arg describing a sensor type,
2526 * returns the descriptive string associated with this sensor type.
2527 */
2528 static const char *
sgenv_get_type_str(uint_t sensor_type)2529 sgenv_get_type_str(uint_t sensor_type)
2530 {
2531 const type_value_t *type_list = types;
2532
2533 while (type_list->name != (char *)NULL) {
2534 if (type_list->value == sensor_type)
2535 return (type_list->name);
2536 else
2537 type_list++;
2538 }
2539 return ((char *)NULL);
2540 }
2541
2542
2543 /*
2544 * This function takes a sensor TagID and generates a string describing
2545 * where in the system the sensor is.
2546 */
2547 static void
sgenv_tagid_to_string(sensor_id_t id,char * str)2548 sgenv_tagid_to_string(sensor_id_t id, char *str)
2549 {
2550 const char *hpu_str;
2551 const char *part_str;
2552 const char *type_str;
2553
2554 ASSERT(str != NULL);
2555
2556 hpu_str = sgenv_get_hpu_id_str(id.id.hpu_type);
2557 part_str = sgenv_get_part_str(id.id.sensor_part);
2558 type_str = sgenv_get_type_str(id.id.sensor_type);
2559
2560 (void) sprintf(str,
2561 "Sensor: Node=%d, Board=%s%d, Device=%s%d, Type=%s%d: reading has ",
2562 id.id.node_id,
2563 ((hpu_str != NULL) ? hpu_str : ""),
2564 id.id.hpu_slot,
2565 ((part_str != NULL) ? part_str : ""),
2566 id.id.sensor_partnum,
2567 ((type_str != NULL) ? type_str : ""),
2568 id.id.sensor_typenum);
2569
2570 }
2571
2572
2573 /*
2574 * This interrupt handler watches for unsolicited mailbox messages from the SC
2575 * telling it that the Keyswitch Position had changed. It then informs the
2576 * Sysevent Framework of this change.
2577 */
2578 static uint_t
sgenv_keyswitch_handler(char * arg)2579 sgenv_keyswitch_handler(char *arg)
2580 {
2581 DCMN_ERR_S(f, "sgenv_keyswitch_handler()");
2582
2583 sysevent_t *ev = NULL;
2584 sysevent_id_t eid;
2585 sysevent_value_t se_val;
2586 sysevent_attr_list_t *ev_attr_list = NULL;
2587 sg_event_key_position_t *payload = NULL;
2588 sbbc_msg_t *msg = NULL;
2589 int err;
2590
2591 DCMN_ERR_EVENT(CE_NOTE, "%s called", f);
2592
2593 if (arg == NULL) {
2594 DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f);
2595 return (DDI_INTR_CLAIMED);
2596 }
2597
2598 msg = (sbbc_msg_t *)arg;
2599 if (msg->msg_buf == NULL) {
2600 DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f);
2601 return (DDI_INTR_CLAIMED);
2602 }
2603
2604 payload = (sg_event_key_position_t *)msg->msg_buf;
2605 if (payload == NULL) {
2606 DCMN_ERR_EVENT(CE_NOTE, "%s: payload == NULL", f);
2607 return (DDI_INTR_CLAIMED);
2608 }
2609
2610 DCMN_ERR_EVENT(CE_NOTE, "Key posn = %d", (int)*payload);
2611
2612
2613 /*
2614 * Allocate memory for sysevent buffer.
2615 */
2616 ev = sysevent_alloc(EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE,
2617 EP_SGENV, SE_NOSLEEP);
2618 if (ev == NULL) {
2619 cmn_err(CE_WARN, "%s: Failed to alloc mem for %s/%s event",
2620 f, EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE);
2621 return (DDI_INTR_CLAIMED);
2622 }
2623
2624
2625 /*
2626 * Set the DOMAIN_WHAT_CHANGED attribute.
2627 */
2628 se_val.value_type = SE_DATA_TYPE_STRING;
2629 se_val.value.sv_string = DOMAIN_KEYSWITCH;
2630 err = sysevent_add_attr(&ev_attr_list, DOMAIN_WHAT_CHANGED,
2631 &se_val, SE_NOSLEEP);
2632 if (err != 0) {
2633 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2634 DOMAIN_WHAT_CHANGED, EC_DOMAIN,
2635 ESC_DOMAIN_STATE_CHANGE);
2636 sysevent_free(ev);
2637 return (DDI_INTR_CLAIMED);
2638 }
2639
2640
2641 /*
2642 * Log this event with sysevent framework.
2643 */
2644 if (sysevent_attach_attributes(ev, ev_attr_list) != 0) {
2645 cmn_err(CE_WARN, "Failed to attach attr list for %s/%s event",
2646 EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE);
2647 sysevent_free_attr(ev_attr_list);
2648 sysevent_free(ev);
2649 return (DDI_INTR_CLAIMED);
2650 }
2651 err = log_sysevent(ev, SE_NOSLEEP, &eid);
2652 if (err != 0) {
2653 cmn_err(CE_WARN, "Failed to log %s/%s event",
2654 EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE);
2655 sysevent_free(ev);
2656 return (DDI_INTR_CLAIMED);
2657 }
2658
2659 /* clean up */
2660 sysevent_free(ev);
2661
2662 return (DDI_INTR_CLAIMED);
2663 }
2664
2665
2666 /*
2667 * This interrupt handler watches for unsolicited mailbox messages from the SC
2668 * telling it that an environmental sensor has exceeded a threshold/limit level
2669 * or has returned to normal having previously exceeded a threshold/limit level.
2670 * It then informs the Sysevent Framework of this change and updates the
2671 * env_cache.
2672 */
2673 static uint_t
sgenv_env_data_handler(char * arg)2674 sgenv_env_data_handler(char *arg)
2675 {
2676 DCMN_ERR_S(f, "sgenv_env_data_handler()");
2677
2678 sg_event_env_changed_t *payload = NULL;
2679 sbbc_msg_t *msg = NULL;
2680
2681 DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f);
2682
2683 if (arg == NULL) {
2684 DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f);
2685 return (DDI_INTR_CLAIMED);
2686 }
2687
2688 msg = (sbbc_msg_t *)arg;
2689
2690 if (msg->msg_buf == NULL) {
2691 DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f);
2692 return (DDI_INTR_CLAIMED);
2693 }
2694
2695 payload = (sg_event_env_changed_t *)msg->msg_buf;
2696
2697 /*
2698 * We check the first field of the msg_buf to see if the event_type
2699 * is SC_EVENT_ENV, if it is then we handle the event.
2700 */
2701 if (payload->event_type != SC_EVENT_ENV) {
2702 return (DDI_INTR_CLAIMED);
2703 }
2704
2705 /*
2706 * We now need to signal to the env background thread to ask the SC
2707 * for env readings and discover which sensor caused the SC to send
2708 * the ENV event before sending a sysevent to userland.
2709 */
2710 sgenv_indicate_cache_update_needed(ENV_CACHE);
2711
2712 return (DDI_INTR_CLAIMED);
2713 }
2714
2715
2716 /*
2717 * This interrupt handler watches for unsolicited mailbox messages from the SC
2718 * telling it that the status of a fan has changed. We register a sysevent
2719 * and trigger a softint to update the env cache.
2720 */
2721 static uint_t
sgenv_fan_status_handler(char * arg)2722 sgenv_fan_status_handler(char *arg)
2723 {
2724 DCMN_ERR_S(f, "sgenv_fan_status_handler()");
2725
2726 sysevent_t *ev = NULL;
2727 sysevent_id_t eid;
2728 sysevent_value_t se_val;
2729 sysevent_attr_list_t *ev_attr_list = NULL;
2730 sg_event_fan_status_t *payload = NULL;
2731 sbbc_msg_t *msg = NULL;
2732 char fan_str[MAXNAMELEN];
2733 int err;
2734
2735 DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f);
2736
2737 if (arg == NULL) {
2738 DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f);
2739 return (DDI_INTR_CLAIMED);
2740 }
2741
2742 msg = (sbbc_msg_t *)arg;
2743
2744 /*
2745 * We check the first field of the msg_buf to see if the event_type
2746 * is SC_EVENT_FAN
2747 */
2748 if (msg->msg_buf == NULL) {
2749 DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f);
2750 return (DDI_INTR_CLAIMED);
2751 }
2752
2753 payload = (sg_event_fan_status_t *)msg->msg_buf;
2754
2755 /*
2756 * If another type of ENV Event triggered this handler then we simply
2757 * return now.
2758 */
2759 if (payload->event_type != SC_EVENT_FAN) {
2760 return (DDI_INTR_CLAIMED);
2761 }
2762
2763 /*
2764 * Allocate memory for sysevent buffer.
2765 */
2766 ev = sysevent_alloc(EC_ENV, ESC_ENV_FAN, EP_SGENV, SE_NOSLEEP);
2767 if (ev == NULL) {
2768 cmn_err(CE_WARN, "%s: Failed to alloc mem for %s/%s event",
2769 f, EC_ENV, ESC_ENV_FAN);
2770 return (DDI_INTR_CLAIMED);
2771 }
2772
2773
2774 /*
2775 * Set the following attributes for this event:
2776 *
2777 * ENV_FRU_ID
2778 * ENV_FRU_RESOURCE_ID
2779 * ENV_FRU_DEVICE
2780 * ENV_FRU_STATE
2781 * ENV_MSG
2782 *
2783 */
2784 se_val.value_type = SE_DATA_TYPE_STRING;
2785 se_val.value.sv_string = ENV_RESERVED_ATTR;
2786 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_ID, &se_val, SE_NOSLEEP);
2787 if (err != 0) {
2788 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2789 ENV_FRU_ID, EC_ENV, ESC_ENV_FAN);
2790 sysevent_free(ev);
2791 return (DDI_INTR_CLAIMED);
2792 }
2793
2794 se_val.value_type = SE_DATA_TYPE_STRING;
2795 se_val.value.sv_string = ENV_RESERVED_ATTR;
2796 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_RESOURCE_ID,
2797 &se_val, SE_NOSLEEP);
2798 if (err != 0) {
2799 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2800 ENV_FRU_RESOURCE_ID, EC_ENV, ESC_ENV_FAN);
2801 sysevent_free_attr(ev_attr_list);
2802 sysevent_free(ev);
2803 return (DDI_INTR_CLAIMED);
2804 }
2805
2806 se_val.value_type = SE_DATA_TYPE_STRING;
2807 se_val.value.sv_string = ENV_RESERVED_ATTR;
2808 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_DEVICE,
2809 &se_val, SE_NOSLEEP);
2810 if (err != 0) {
2811 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2812 ENV_FRU_DEVICE, EC_ENV, ESC_ENV_FAN);
2813 sysevent_free_attr(ev_attr_list);
2814 sysevent_free(ev);
2815 return (DDI_INTR_CLAIMED);
2816 }
2817
2818 /*
2819 * Checks the fan to see if it has failed.
2820 */
2821 se_val.value_type = SE_DATA_TYPE_INT32;
2822 switch (payload->fan_speed) {
2823 case SGENV_FAN_SPEED_OFF:
2824 case SGENV_FAN_SPEED_LOW:
2825 case SGENV_FAN_SPEED_HIGH:
2826 se_val.value.sv_int32 = ENV_OK;
2827 break;
2828
2829 case SGENV_FAN_SPEED_UNKNOWN:
2830 default:
2831 se_val.value.sv_int32 = ENV_FAILED;
2832 break;
2833 }
2834
2835 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_STATE,
2836 &se_val, SE_NOSLEEP);
2837 if (err != 0) {
2838 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2839 ENV_FRU_STATE, EC_ENV, ESC_ENV_FAN);
2840 sysevent_free_attr(ev_attr_list);
2841 sysevent_free(ev);
2842 return (DDI_INTR_CLAIMED);
2843 }
2844
2845
2846 /*
2847 * Create the message to be sent to sysevent.
2848 */
2849 (void) sprintf(fan_str,
2850 "The status of the fan in Node%d/Slot%d is now ",
2851 payload->node_id, payload->slot_number);
2852 switch (payload->fan_speed) {
2853 case SGENV_FAN_SPEED_OFF:
2854 (void) strcat(fan_str, SGENV_FAN_SPEED_OFF_STR);
2855 break;
2856
2857 case SGENV_FAN_SPEED_LOW:
2858 (void) strcat(fan_str, SGENV_FAN_SPEED_LOW_STR);
2859 break;
2860
2861 case SGENV_FAN_SPEED_HIGH:
2862 (void) strcat(fan_str, SGENV_FAN_SPEED_HIGH_STR);
2863 break;
2864
2865 case SGENV_FAN_SPEED_UNKNOWN:
2866 default:
2867 (void) strcat(fan_str, SGENV_FAN_SPEED_UNKNOWN_STR);
2868 break;
2869 }
2870
2871 DCMN_ERR_EVENT(CE_NOTE, "Fan: %s", fan_str);
2872
2873 se_val.value_type = SE_DATA_TYPE_STRING;
2874 se_val.value.sv_string = fan_str;
2875 err = sysevent_add_attr(&ev_attr_list, ENV_MSG, &se_val, SE_NOSLEEP);
2876 if (err != 0) {
2877 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2878 ENV_MSG, EC_ENV, ESC_ENV_FAN);
2879 sysevent_free_attr(ev_attr_list);
2880 sysevent_free(ev);
2881 return (DDI_INTR_CLAIMED);
2882 }
2883
2884
2885 /*
2886 * Log this event with sysevent framework.
2887 */
2888 if (sysevent_attach_attributes(ev, ev_attr_list) != 0) {
2889 cmn_err(CE_WARN, "Failed to attach attr list for %s/%s event",
2890 EC_ENV, ESC_ENV_FAN);
2891 sysevent_free_attr(ev_attr_list);
2892 sysevent_free(ev);
2893 return (DDI_INTR_CLAIMED);
2894 }
2895 err = log_sysevent(ev, SE_NOSLEEP, &eid);
2896 if (err != 0) {
2897 cmn_err(CE_WARN, "Failed to log %s/%s event",
2898 EC_ENV, ESC_ENV_FAN);
2899 sysevent_free(ev);
2900 return (DDI_INTR_CLAIMED);
2901 }
2902 sysevent_free(ev);
2903
2904 /*
2905 * We now need to signal to the env background thread to ask the SC
2906 * for env readings and discover which sensor caused the SC to send
2907 * the ENV event before sending a sysevent to userland.
2908 */
2909 sgenv_indicate_cache_update_needed(ENV_CACHE);
2910
2911 return (DDI_INTR_CLAIMED);
2912 }
2913
2914
2915 /*
2916 * This function informs the Sysevent Framework that a temperature, voltage
2917 * or current reading for a sensor has exceeded its threshold/limit value or
2918 * that the reading has returned to a safe value having exceeded its
2919 * threshold/limit value previously.
2920 */
2921 static int
sgenv_process_threshold_event(env_sensor_t sensor)2922 sgenv_process_threshold_event(env_sensor_t sensor)
2923 {
2924 DCMN_ERR_S(f, "sgenv_process_threshold_event()");
2925
2926 sysevent_t *ev = NULL;
2927 sysevent_id_t eid;
2928 sysevent_value_t se_val;
2929 sysevent_attr_list_t *ev_attr_list = NULL;
2930 int err;
2931
2932 char sensor_str[MAX_TAG_ID_STR_LEN]; /* holds the sensor TagID */
2933
2934 /*
2935 * This function handles the case when a temperature reading passes
2936 * a threshold/limit level and also the case when there are power
2937 * fluctuations (voltage/current readings pass a threshold/limit level)
2938 * so we need to work out which case it is.
2939 *
2940 * if <temp_event_type> is TRUE, then need to handle an event
2941 * of type ESC_ENV_TEMP.
2942 */
2943 int temp_event_type;
2944
2945 switch (sensor.sd_id.id.sensor_type) {
2946 case SG_SENSOR_TYPE_TEMPERATURE:
2947 temp_event_type = TRUE;
2948 ev = sysevent_alloc(EC_ENV, ESC_ENV_TEMP, EP_SGENV, SE_NOSLEEP);
2949 if (ev == NULL) {
2950 cmn_err(CE_WARN, "Failed to allocate sysevent buffer "
2951 "for %s/%s event", EC_ENV, ESC_ENV_TEMP);
2952 return (DDI_FAILURE);
2953 }
2954 break;
2955
2956 default:
2957 temp_event_type = FALSE;
2958 ev = sysevent_alloc(EC_ENV, ESC_ENV_POWER,
2959 EP_SGENV, SE_NOSLEEP);
2960 if (ev == NULL) {
2961 cmn_err(CE_WARN, "Failed to allocate sysevent buffer "
2962 "for %s/%s event", EC_ENV, ESC_ENV_POWER);
2963 return (DDI_FAILURE);
2964 }
2965 break;
2966 }
2967
2968
2969 /*
2970 * Set the following attributes for this event:
2971 *
2972 * ENV_FRU_ID
2973 * ENV_FRU_RESOURCE_ID
2974 * ENV_FRU_DEVICE
2975 * ENV_FRU_STATE
2976 * ENV_MSG
2977 *
2978 */
2979 se_val.value_type = SE_DATA_TYPE_STRING;
2980 se_val.value.sv_string = ENV_RESERVED_ATTR;
2981 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_ID, &se_val, SE_NOSLEEP);
2982 if (err != 0) {
2983 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2984 ENV_FRU_ID, EC_ENV,
2985 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER));
2986 sysevent_free(ev);
2987 return (DDI_FAILURE);
2988 }
2989
2990 se_val.value_type = SE_DATA_TYPE_STRING;
2991 se_val.value.sv_string = ENV_RESERVED_ATTR;
2992 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_RESOURCE_ID,
2993 &se_val, SE_NOSLEEP);
2994 if (err != 0) {
2995 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2996 ENV_FRU_RESOURCE_ID, EC_ENV,
2997 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER));
2998 sysevent_free_attr(ev_attr_list);
2999 sysevent_free(ev);
3000 return (DDI_FAILURE);
3001 }
3002
3003 se_val.value_type = SE_DATA_TYPE_STRING;
3004 se_val.value.sv_string = ENV_RESERVED_ATTR;
3005 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_DEVICE,
3006 &se_val, SE_NOSLEEP);
3007 if (err != 0) {
3008 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
3009 ENV_FRU_DEVICE, EC_ENV,
3010 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER));
3011 sysevent_free_attr(ev_attr_list);
3012 sysevent_free(ev);
3013 return (DDI_FAILURE);
3014 }
3015
3016
3017 /*
3018 * We need to find out the status of the reading.
3019 */
3020 se_val.value_type = SE_DATA_TYPE_INT32;
3021 switch (SG_GET_SENSOR_STATUS(sensor.sd_status)) {
3022 case SG_SENSOR_STATUS_OK:
3023 se_val.value.sv_int32 = ENV_OK;
3024 break;
3025
3026 case SG_SENSOR_STATUS_LO_WARN:
3027 case SG_SENSOR_STATUS_HI_WARN:
3028 se_val.value.sv_int32 = ENV_WARNING;
3029 break;
3030
3031 case SG_SENSOR_STATUS_LO_DANGER:
3032 case SG_SENSOR_STATUS_HI_DANGER:
3033 default:
3034 se_val.value.sv_int32 = ENV_FAILED;
3035 break;
3036 }
3037
3038 /*
3039 * Add ENV_FRU_STATE attribute.
3040 */
3041 err = sysevent_add_attr(&ev_attr_list, ENV_FRU_STATE,
3042 &se_val, SE_NOSLEEP);
3043 if (err != 0) {
3044 cmn_err(CE_WARN, "Failed to add attr[%s] for %s/%s event "
3045 "(Err=%d)", ENV_FRU_STATE, EC_ENV,
3046 (temp_event_type ? ESC_ENV_TEMP: ESC_ENV_POWER),
3047 err);
3048 sysevent_free_attr(ev_attr_list);
3049 sysevent_free(ev);
3050 return (DDI_FAILURE);
3051 }
3052
3053
3054 /*
3055 * Save the sensor TagID as a string so that a meaningful message
3056 * can be passed to as part of the ENV_MSG attribute.
3057 */
3058 sgenv_tagid_to_string(sensor.sd_id, sensor_str);
3059
3060 /*
3061 * We need to add a string stating what type of event occurred.
3062 */
3063 switch (SG_GET_SENSOR_STATUS(sensor.sd_status)) {
3064 case SG_SENSOR_STATUS_OK:
3065 (void) strcat(sensor_str, SGENV_EVENT_MSG_OK);
3066 break;
3067
3068 case SG_SENSOR_STATUS_LO_WARN:
3069 (void) strcat(sensor_str, SGENV_EVENT_MSG_LO_WARN);
3070 break;
3071
3072 case SG_SENSOR_STATUS_HI_WARN:
3073 (void) strcat(sensor_str, SGENV_EVENT_MSG_HI_WARN);
3074 break;
3075
3076 case SG_SENSOR_STATUS_LO_DANGER:
3077 (void) strcat(sensor_str, SGENV_EVENT_MSG_LO_DANGER);
3078 break;
3079
3080 case SG_SENSOR_STATUS_HI_DANGER:
3081 (void) strcat(sensor_str, SGENV_EVENT_MSG_HI_DANGER);
3082 break;
3083
3084 default:
3085 DCMN_ERR_EVENT(CE_NOTE, "%s: Unknown sensor status", f);
3086 (void) strcat(sensor_str, SGENV_EVENT_MSG_UNKNOWN);
3087 break;
3088 }
3089
3090 DCMN_ERR_EVENT(CE_NOTE, "Temp/Power: %s", sensor_str);
3091
3092 /*
3093 * Add ENV_MSG attribute.
3094 */
3095 se_val.value_type = SE_DATA_TYPE_STRING;
3096 se_val.value.sv_string = sensor_str;
3097 err = sysevent_add_attr(&ev_attr_list, ENV_MSG, &se_val, SE_NOSLEEP);
3098 if (err != 0) {
3099 cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
3100 ENV_MSG, EC_ENV,
3101 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER));
3102 sysevent_free_attr(ev_attr_list);
3103 sysevent_free(ev);
3104 return (DDI_FAILURE);
3105 }
3106
3107
3108 /*
3109 * Log this event with sysevent framework.
3110 */
3111 if (sysevent_attach_attributes(ev, ev_attr_list) != 0) {
3112 cmn_err(CE_WARN, "Failed to attach attr list for %s/%s event",
3113 EC_ENV,
3114 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER));
3115 sysevent_free_attr(ev_attr_list);
3116 sysevent_free(ev);
3117 return (DDI_FAILURE);
3118 }
3119 err = log_sysevent(ev, SE_NOSLEEP, &eid);
3120 if (err != 0) {
3121 cmn_err(CE_WARN, "Failed to log %s/%s event", EC_ENV,
3122 (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER));
3123 sysevent_free(ev);
3124 return (DDI_FAILURE);
3125 }
3126 sysevent_free(ev);
3127
3128 return (DDI_SUCCESS);
3129 }
3130
3131
3132 /*
3133 * This function gets called when sgenv is notified of a DR event.
3134 * We need to update the board and env caches to ensure that they
3135 * now contain the latest system information..
3136 */
3137 static uint_t
sgenv_dr_event_handler(char * arg)3138 sgenv_dr_event_handler(char *arg)
3139 {
3140 DCMN_ERR_S(f, "sgenv_dr_event_handler()");
3141
3142 sg_system_fru_descriptor_t *payload = NULL;
3143 sbbc_msg_t *msg = NULL;
3144
3145 DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f);
3146 DCMN_ERR_EVENT(CE_NOTE, "%s: Start: %lld", f, gethrtime());
3147
3148
3149 if (arg == NULL) {
3150 DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f);
3151 return (DDI_INTR_CLAIMED);
3152 }
3153
3154 msg = (sbbc_msg_t *)arg;
3155
3156 if (msg->msg_buf == NULL) {
3157 DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f);
3158 return (DDI_INTR_CLAIMED);
3159 }
3160
3161 payload = (sg_system_fru_descriptor_t *)msg->msg_buf;
3162
3163 /*
3164 * We check the event_details field of the msg_buf to see if
3165 * we need to invalidate the caches
3166 */
3167 switch (payload->event_details) {
3168 case SG_EVT_BOARD_ABSENT:
3169 case SG_EVT_BOARD_PRESENT:
3170 case SG_EVT_UNASSIGN:
3171 case SG_EVT_ASSIGN:
3172 case SG_EVT_UNAVAILABLE:
3173 case SG_EVT_AVAILABLE:
3174 case SG_EVT_POWER_OFF:
3175 case SG_EVT_POWER_ON:
3176 case SG_EVT_PASSED_TEST:
3177 case SG_EVT_FAILED_TEST:
3178 /*
3179 * We now need to signal to the background threads to poll the
3180 * SC for env readings and board info which may have changed
3181 * as a result of the DR changes. This will cause the
3182 * env_cache and the board_cache to be updated.
3183 */
3184 DCMN_ERR_EVENT(CE_NOTE, "%s: about to signal to background "
3185 "threads due to event %d.", f, payload->event_details);
3186
3187 sgenv_indicate_cache_update_needed(ENV_CACHE);
3188 sgenv_indicate_cache_update_needed(BOARD_CACHE);
3189
3190 break;
3191
3192 default:
3193 DCMN_ERR_EVENT(CE_NOTE, "%s: Unknown DR event type.", f);
3194 break;
3195 }
3196
3197 DCMN_ERR_EVENT(CE_NOTE, "%s: Finish: %lld", f, gethrtime());
3198
3199 return (DDI_INTR_CLAIMED);
3200 }
3201
3202
3203 /*
3204 * This function is called by the interrupt handlers watching for ENV/DR events
3205 * from the SC. It indicates to the thread responsible for the cache specified
3206 * that it needs to update its data.
3207 */
3208 static void
sgenv_indicate_cache_update_needed(int cache_type)3209 sgenv_indicate_cache_update_needed(int cache_type)
3210 {
3211 DCMN_ERR_S(f, "sgenv_indicate_cache_update_needed()");
3212
3213 /*
3214 * If the cache is already being updated, we set a flag to
3215 * inform the thread that it needs to reread the data when
3216 * it is finished as we cannot be sure if the data was read
3217 * before or after the time this handler was triggered.
3218 *
3219 * Otherwise the thread is waiting for us and we signal
3220 * to it to start reading the data.
3221 */
3222 switch (cache_type) {
3223 case ENV_CACHE:
3224 mutex_enter(&env_flag_lock);
3225 if (env_cache_updating) {
3226 DCMN_ERR_THREAD(CE_NOTE, "%s: Thread already "
3227 "updating env cache", f);
3228 env_cache_update_needed = B_TRUE;
3229
3230 } else {
3231 DCMN_ERR_THREAD(CE_NOTE, "%s: Sending signal "
3232 "to env thread", f);
3233 cv_signal(&env_flag_cond);
3234 }
3235 mutex_exit(&env_flag_lock);
3236 break;
3237
3238 case BOARD_CACHE:
3239 mutex_enter(&board_flag_lock);
3240 if (board_cache_updating) {
3241 DCMN_ERR_THREAD(CE_NOTE, "%s: Thread already "
3242 "updating board cache", f);
3243 board_cache_update_needed = B_TRUE;
3244
3245 } else {
3246 DCMN_ERR_THREAD(CE_NOTE, "%s: Sending signal "
3247 "to board thread", f);
3248 cv_signal(&board_flag_cond);
3249 }
3250 mutex_exit(&board_flag_lock);
3251 break;
3252
3253 default:
3254 DCMN_ERR(CE_NOTE, "%s: Unknown cache type:0x%x", f, cache_type);
3255 break;
3256 }
3257 }
3258