Home | History | Annotate | Download | only in diskomizer
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 #pragma ident	"@(#)device_control.c	1.17	09/05/26 SMI"
     23 
     24 /*
     25  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     26  * Use is subject to license terms.
     27  */
     28 /*
     29  *	Device controls
     30  *
     31  *	This is state machine implemations.  THere are 5 states
     32  *	DEV_NOT_READY
     33  *	DEV_RUNNING
     34  *	DEV_STOPPING
     35  *	DEV_STOPPED
     36  *	DEV_STARTING
     37  *
     38  *	From DEV_NOT_READY you can only go to DEV_STOPPED.
     39  * 	From DEV_STOPPED you can only go to DEV_STARTING.
     40  *	From DEV_STARTING you can only go to DEV_RUNNING.
     41  *	From DEV_RUNNING you can only go to DEV_STOPPING.
     42  *	From DEV_STOPPING you can only go to DEV_STOPPED.
     43  *
     44  *	The complication is that to prevent the device state that
     45  *	is held in shared memory and protected by a lock from becoming
     46  *	"hot", the device states are cached in each processes and the
     47  *	shared state only checked when the time to live expires.
     48  *
     49  *	So it is possible for the local state in a process to be
     50  *	one state but the shared state to be another.  For example
     51  *	when moving from DEV_STOPPED to DEV_STARTING, the first process
     52  *	to change it's state will change it's state and the shared
     53  *	state.  When moving to DEV_STARTING to DEV_RUNNING it is
     54  *	the last process to change state that changes the shared state.
     55  *
     56  */
     57 #include "diskomizer64mpism.h"
     58 #include "device_control.h"
     59 #include "args.h"
     60 #include "timeval.h"
     61 #include <diskomizer/log.h>
     62 #include "list_ops.h"
     63 #include "locks.h"
     64 
     65 static void
     66 do_restart_stopped_devices(off64_t start, struct device *device,
     67 	struct timeval *now);
     68 
     69 restart_stopped_devices_t restart_stopped_devices =
     70 	do_restart_stopped_devices;
     71 
     72 static int not_running_devices;
     73 
     74 struct set_dev_args {
     75 	struct timeval *now;
     76 	dev_state newstate;
     77 	dev_state oldstate;
     78 };
     79 
     80 int
     81 all_running(void)
     82 {
     83 	return (not_running_devices == 0 ? 1 : 0);
     84 }
     85 
     86 static long long
     87 my_llrand(void)
     88 {
     89 	union {
     90 		long long ll;
     91 		ulong_t l[sizeof (long long)/sizeof (ulong_t)];
     92 	} u;
     93 	int i;
     94 	for (i = 0; i < (sizeof (long long)/sizeof (ulong_t)); i++)
     95 		u.l[i] = my_lrand();
     96 
     97 	return (u.ll);
     98 }
     99 static long long
    100 ll_mod(long long a, long long b)
    101 {
    102 	long long x = a % b;
    103 
    104 	assert(x < b);
    105 	return (x);
    106 #ifdef _NEVER
    107 	long long tmp = b;
    108 
    109 	while (tmp < a/2)
    110 		tmp *= 2;
    111 	while (tmp != b) {
    112 		if (a > tmp)
    113 			a -= tmp;
    114 		tmp = tmp / 2;
    115 	}
    116 	return (a);
    117 #endif
    118 }
    119 struct timeval
    120 set_ttl(struct timeval tv, long long max, long long min)
    121 {
    122 	long long diff = max - min;
    123 	long long rand = my_llrand();
    124 
    125 	if (diff <= 0)
    126 		return (timeval_llong_add(&tv, min));
    127 	else
    128 		return (timeval_llong_add(&tv, min + ll_mod(rand, diff)));
    129 }
    130 
    131 void
    132 init_device_control(struct device *devices)
    133 {
    134 	int ndevices = how_many_devices(devices);
    135 	struct device_control *control;
    136 
    137 	control = (struct device_control *)alloc_mem(ndevices,
    138 	    sizeof (struct device_control));
    139 
    140 	while (devices != NULL) {
    141 		(void) init_shared_lock(&control->lock);
    142 		devices->state = control->state = DEV_STOPPED;
    143 		control->processes_stopped = opts.nprocs;
    144 		devices->control = control++;
    145 		not_running_devices++;
    146 		devices->control->state_ttl = devices->state_ttl;
    147 		devices = devices->next;
    148 	}
    149 }
    150 
    151 static void *
    152 do_dev_control(struct device *device, void *(*func)(struct device *, void *),
    153 	void *arg)
    154 {
    155 	void *res;
    156 	if (device->control == NULL)
    157 		return (NULL);
    158 	pthread_mutex_lock(&device->control->lock);
    159 	res = func(device, arg);
    160 	pthread_mutex_unlock(&device->control->lock);
    161 	return (res);
    162 }
    163 #ifdef NOT_USED
    164 static void *
    165 get_state(struct device *device, void *x)
    166 {
    167 	return ((void*)device->control->state);
    168 }
    169 #endif
    170 static void *
    171 set_state(struct device *device, void *arg)
    172 {
    173 	struct set_dev_args *dap = (struct set_dev_args *)arg;
    174 	char buf[128];
    175 
    176 	dev_state state = dap->newstate;
    177 	if (dap->oldstate != device->control->state) {
    178 		if (device->control->state == dap->newstate)
    179 			device->state = state;
    180 		return ((void *) device->control->state);
    181 	}
    182 	device->state = state;
    183 	switch (state) {
    184 	case DEV_RUNNING:
    185 		assert(device->control->state != DEV_STOPPED);
    186 		assert(device->control->state != DEV_STOPPING);
    187 		assert(device->control->state != DEV_NOT_READY);
    188 		if ((--device->control->processes_stopped) == 0) {
    189 			device->control->state = DEV_RUNNING;
    190 			device->state_ttl = device->control->state_ttl =
    191 			    set_ttl(*dap->now, opts.expert_max_active_time,
    192 			    opts.expert_min_active_time);
    193 			if (would_stop_before(
    194 			    device->control->state_ttl.tv_sec)) {
    195 				plog(LOG_NOTICE,
    196 				    "%-*s DEV_RUNNING\n",
    197 				    longest_logical_name(),
    198 				    device->logicalname);
    199 			} else {
    200 				(void) strftime(buf, 128, TIME_FORMAT,
    201 				    localtime(
    202 				    &device->control->state_ttl.tv_sec));
    203 				plog(LOG_NOTICE,
    204 				    "%-*s DEV_RUNNING  stop at  %s\n",
    205 				    longest_logical_name(),
    206 				    device->logicalname, buf);
    207 			}
    208 		}
    209 		if (--not_running_devices == 0) {
    210 			restart_stopped_devices =
    211 			    (restart_stopped_devices_t)nop;
    212 		}
    213 		break;
    214 	case DEV_STARTING:
    215 		assert(device->control->state != DEV_RUNNING);
    216 		assert(device->control->state != DEV_STOPPING);
    217 		assert(device->control->state != DEV_NOT_READY);
    218 		(void) strftime(buf, 128, TIME_FORMAT,
    219 		    localtime(&dap->now->tv_sec));
    220 		plog(LOG_NOTICE, "%-*s DEV_STARTING time now %s\n",
    221 		    longest_logical_name(), device->logicalname, buf);
    222 		device->control->state = state;
    223 		break;
    224 	case DEV_STOPPED:
    225 		assert(device->control->state != DEV_RUNNING);
    226 		assert(device->control->state != DEV_STARTING);
    227 		/*
    228 		 * The order of the controls in this if is important.
    229 		 * the ++device->control->processes_stopped must be last
    230 		 * so it only gets incremented when the other two are not
    231 		 * true.
    232 		 */
    233 		if (device->control->state == DEV_STOPPED ||
    234 		    device->control->state == DEV_NOT_READY ||
    235 		    (++device->control->processes_stopped) == opts.nprocs) {
    236 			device->state_ttl = device->control->state_ttl =
    237 			    set_ttl(*dap->now, opts.expert_max_idle_time,
    238 			    opts.expert_min_idle_time);
    239 			(void) strftime(buf, 128, TIME_FORMAT,
    240 			    localtime(&device->state_ttl.tv_sec));
    241 			device->control->state = DEV_STOPPED;
    242 			plog(LOG_NOTICE, "%-*s DEV_STOPPED  start at %s\n",
    243 			    longest_logical_name(),
    244 			    device->logicalname, buf);
    245 		} else {
    246 			device->state_ttl.tv_sec = -1;
    247 		}
    248 		break;
    249 	case DEV_STOPPING:
    250 		assert(device->control->state != DEV_STOPPED);
    251 		assert(device->control->state != DEV_STARTING);
    252 		assert(device->control->state != DEV_NOT_READY);
    253 		(void) strftime(buf, 128, TIME_FORMAT,
    254 		    localtime(&dap->now->tv_sec));
    255 		plog(LOG_NOTICE, "%-*s DEV_STOPPING time now %s\n",
    256 		    longest_logical_name(), device->logicalname, buf);
    257 		device->control->state = state;
    258 		if (not_running_devices++ == 0) {
    259 			restart_stopped_devices = do_restart_stopped_devices;
    260 		}
    261 		break;
    262 	default:
    263 		abort(); /* This should NEVER happen */
    264 	}
    265 	return ((void *) device->control->state);
    266 }
    267 /*
    268  * check_ttl
    269  *
    270  * Check if the local time to live on the device has expired if we are
    271  * currently in the global state given.
    272  *
    273  *	return 1 if it has and 0 if it has not or we are not in that
    274  *	global state.
    275  */
    276 static int
    277 check_ttl(struct device *device, struct timeval *now, dev_state state)
    278 {
    279 	char ret_zero;
    280 	if (timeval_gt(device->state_ttl, *now))
    281 		return (0);
    282 	pthread_mutex_lock(&device->control->lock);
    283 	device->state_ttl = device->control->state_ttl;
    284 	ret_zero = (device->control->state == state ? 0 : 1);
    285 	pthread_mutex_unlock(&device->control->lock);
    286 	return (ret_zero == 1 ? 0 : timeval_lt(device->state_ttl, *now));
    287 }
    288 static void
    289 update_device_ttl(struct device *device, dev_state state)
    290 {
    291 	pthread_mutex_lock(&device->control->lock);
    292 
    293 	if (device->control->state != state)
    294 		device->state_ttl = device->control->state_ttl;
    295 
    296 	pthread_mutex_unlock(&device->control->lock);
    297 }
    298 
    299 dev_state
    300 get_dev_state(struct device *device, struct timeval *now)
    301 {
    302 	if (device->state == DEV_NOT_READY) {
    303 		pthread_mutex_lock(&device->control->lock);
    304 		device->state = device->control->state;
    305 		pthread_mutex_unlock(&device->control->lock);
    306 	}
    307 	if (device->state == DEV_RUNNING && device->state_ttl.tv_sec != 0 &&
    308 	    check_ttl(device, now, DEV_RUNNING)) {
    309 		dev_state x;
    310 		pthread_mutex_lock(&device->control->lock);
    311 		x = device->control->state;
    312 		if (x == DEV_RUNNING || x == DEV_STOPPING) {
    313 			struct set_dev_args da;
    314 			da.now = now;
    315 			da.oldstate = DEV_RUNNING;
    316 			da.newstate = DEV_STOPPING;
    317 			x = (dev_state) set_state(device, (void *)&da);
    318 			if (x == DEV_STOPPING)
    319 				device->state = DEV_STOPPING;
    320 		}
    321 		pthread_mutex_unlock(&device->control->lock);
    322 	} else if (device->state == DEV_STOPPED &&
    323 	    device->state_ttl.tv_sec == -1) {
    324 		update_device_ttl(device, DEV_STOPPING);
    325 	}
    326 
    327 	return (device->state);
    328 }
    329 dev_state
    330 set_dev_state(struct device *device, dev_state oldstate,
    331 	dev_state newstate, struct timeval *now)
    332 {
    333 	struct set_dev_args da;
    334 	da.now = now;
    335 	da.oldstate = oldstate;
    336 	da.newstate = newstate;
    337 	return ((dev_state) do_dev_control(device, set_state, (void *)&da));
    338 }
    339 static void
    340 start_io(off64_t start, struct device *device, struct timeval *now)
    341 {
    342 	struct aio_str *aio_resp;
    343 
    344 	/* NEED TO SET BOTH states to STARTING */
    345 	aio_resp = pop_from_aio_list(&device->stopped_ios);
    346 	if (aio_resp != NULL) {
    347 		if (set_dev_state(device, DEV_STOPPED, DEV_STARTING, now) !=
    348 		    DEV_STARTING)
    349 			abort();
    350 		if (aio_resp->fd != NULL)
    351 			add_to_aio_list(&aio_resp->fd->all_aios, aio_resp);
    352 		(void) aio_resp->handler(aio_resp, start);
    353 	}
    354 	if (device->stopped_ios.head == NULL) {
    355 		assert(device->state == DEV_STARTING);
    356 		(void) set_dev_state(device, DEV_STARTING, DEV_RUNNING, now);
    357 	}
    358 }
    359 
    360 static void
    361 do_restart_stopped_devices(off64_t start, struct device *device,
    362 	struct timeval *now)
    363 {
    364 	while (device != NULL) {
    365 		check_exit_flag();
    366 
    367 		if (device->state == DEV_NOT_READY) {
    368 			(void) get_dev_state(device, now);
    369 		}
    370 		if (device->state == DEV_STOPPED) {
    371 			if (device->state_ttl.tv_sec == -1) {
    372 				update_device_ttl(device, DEV_STOPPING);
    373 			}
    374 			if (device->state_ttl.tv_sec != -1 &&
    375 			    check_ttl(device, now, DEV_STOPPED)) {
    376 				(void) set_dev_state(device, DEV_STOPPED,
    377 				    DEV_STARTING, now);
    378 			}
    379 		}
    380 		if (device->state == DEV_STARTING)
    381 			start_io(start, device, now);
    382 		device = device->next;
    383 	}
    384 }
    385