Home | History | Annotate | Download | only in configd
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     28 
     29 /*
     30  * sqlite is not compatible with _FILE_OFFSET_BITS=64, but we need to
     31  * be able to statvfs(2) possibly large systems.  This define gives us
     32  * access to the transitional interfaces.  See lfcompile64(5) for how
     33  * _LARGEFILE64_SOURCE works.
     34  */
     35 #define	_LARGEFILE64_SOURCE
     36 
     37 #include <assert.h>
     38 #include <door.h>
     39 #include <dirent.h>
     40 #include <errno.h>
     41 #include <fcntl.h>
     42 #include <limits.h>
     43 #include <pthread.h>
     44 #include <stdarg.h>
     45 #include <stdio.h>
     46 #include <stdlib.h>
     47 #include <string.h>
     48 #include <sys/stat.h>
     49 #include <sys/statvfs.h>
     50 #include <unistd.h>
     51 #include <zone.h>
     52 #include <libscf_priv.h>
     53 
     54 #include "configd.h"
     55 #include "repcache_protocol.h"
     56 
     57 #include <sqlite.h>
     58 #include <sqlite-misc.h>
     59 
     60 /*
     61  * This file has two purposes:
     62  *
     63  * 1. It contains the database schema, and the code for setting up our backend
     64  *    databases, including installing said schema.
     65  *
     66  * 2. It provides a simplified interface to the SQL database library, and
     67  *    synchronizes MT access to the database.
     68  */
     69 
     70 typedef struct backend_spent {
     71 	uint64_t bs_count;
     72 	hrtime_t bs_time;
     73 	hrtime_t bs_vtime;
     74 } backend_spent_t;
     75 
     76 typedef struct backend_totals {
     77 	backend_spent_t	bt_lock;	/* waiting for lock */
     78 	backend_spent_t	bt_exec;	/* time spent executing SQL */
     79 } backend_totals_t;
     80 
     81 typedef struct sqlite_backend {
     82 	pthread_mutex_t	be_lock;
     83 	pthread_t	be_thread;	/* thread holding lock */
     84 	struct sqlite	*be_db;
     85 	const char	*be_path;	/* path to db */
     86 	int		be_readonly;	/* readonly at start, and still is */
     87 	int		be_writing;	/* held for writing */
     88 	backend_type_t	be_type;	/* type of db */
     89 	hrtime_t	be_lastcheck;	/* time of last read-only check */
     90 	backend_totals_t be_totals[2];	/* one for reading, one for writing */
     91 } sqlite_backend_t;
     92 
     93 struct backend_tx {
     94 	sqlite_backend_t	*bt_be;
     95 	int			bt_readonly;
     96 	int			bt_type;
     97 	int			bt_full;	/* SQLITE_FULL during tx */
     98 };
     99 
    100 #define	UPDATE_TOTALS_WR(sb, writing, field, ts, vts) { \
    101 	backend_spent_t *__bsp = &(sb)->be_totals[!!(writing)].field; \
    102 	__bsp->bs_count++;						\
    103 	__bsp->bs_time += (gethrtime() - ts);				\
    104 	__bsp->bs_vtime += (gethrvtime() - vts);			\
    105 }
    106 
    107 #define	UPDATE_TOTALS(sb, field, ts, vts) \
    108 	UPDATE_TOTALS_WR(sb, (sb)->be_writing, field, ts, vts)
    109 
    110 struct backend_query {
    111 	char	*bq_buf;
    112 	size_t	bq_size;
    113 };
    114 
    115 struct backend_tbl_info {
    116 	const char *bti_name;
    117 	const char *bti_cols;
    118 };
    119 
    120 struct backend_idx_info {
    121 	const char *bxi_tbl;
    122 	const char *bxi_idx;
    123 	const char *bxi_cols;
    124 };
    125 
    126 static pthread_mutex_t backend_panic_lock = PTHREAD_MUTEX_INITIALIZER;
    127 static pthread_cond_t backend_panic_cv = PTHREAD_COND_INITIALIZER;
    128 pthread_t backend_panic_thread = 0;
    129 
    130 int backend_do_trace = 0;		/* invoke tracing callback */
    131 int backend_print_trace = 0;		/* tracing callback prints SQL */
    132 int backend_panic_abort = 0;		/* abort when panicking */
    133 
    134 /* interval between read-only checks while starting up */
    135 #define	BACKEND_READONLY_CHECK_INTERVAL	(2 * (hrtime_t)NANOSEC)
    136 
    137 /*
    138  * Any incompatible change to the below schema should bump the version number.
    139  * The schema has been changed to support value ordering,  but this change
    140  * is backwards-compatible - i.e. a previous svc.configd can use a
    141  * repository database with the new schema perfectly well.  As a result,
    142  * the schema version has not been updated,  allowing downgrade of systems
    143  * without losing repository data.
    144  */
    145 #define	BACKEND_SCHEMA_VERSION		5
    146 
    147 static struct backend_tbl_info tbls_normal[] = { /* BACKEND_TYPE_NORMAL */
    148 	/*
    149 	 * service_tbl holds all services.  svc_id is the identifier of the
    150 	 * service.
    151 	 */
    152 	{
    153 		"service_tbl",
    154 		"svc_id          INTEGER PRIMARY KEY,"
    155 		"svc_name        CHAR(256) NOT NULL"
    156 	},
    157 
    158 	/*
    159 	 * instance_tbl holds all of the instances.  The parent service id
    160 	 * is instance_svc.
    161 	 */
    162 	{
    163 		"instance_tbl",
    164 		"instance_id     INTEGER PRIMARY KEY,"
    165 		"instance_name   CHAR(256) NOT NULL,"
    166 		"instance_svc    INTEGER NOT NULL"
    167 	},
    168 
    169 	/*
    170 	 * snapshot_lnk_tbl links (instance, snapshot name) with snapshots.
    171 	 */
    172 	{
    173 		"snapshot_lnk_tbl",
    174 		"lnk_id          INTEGER PRIMARY KEY,"
    175 		"lnk_inst_id     INTEGER NOT NULL,"
    176 		"lnk_snap_name   CHAR(256) NOT NULL,"
    177 		"lnk_snap_id     INTEGER NOT NULL"
    178 	},
    179 
    180 	/*
    181 	 * snaplevel_tbl maps a snapshot id to a set of named, ordered
    182 	 * snaplevels.
    183 	 */
    184 	{
    185 		"snaplevel_tbl",
    186 		"snap_id                 INTEGER NOT NULL,"
    187 		"snap_level_num          INTEGER NOT NULL,"
    188 		"snap_level_id           INTEGER NOT NULL,"
    189 		"snap_level_service_id   INTEGER NOT NULL,"
    190 		"snap_level_service      CHAR(256) NOT NULL,"
    191 		"snap_level_instance_id  INTEGER NULL,"
    192 		"snap_level_instance     CHAR(256) NULL"
    193 	},
    194 
    195 	/*
    196 	 * snaplevel_lnk_tbl links snaplevels to property groups.
    197 	 * snaplvl_pg_* is identical to the original property group,
    198 	 * and snaplvl_gen_id overrides the generation number.
    199 	 * The service/instance ids are as in the snaplevel.
    200 	 */
    201 	{
    202 		"snaplevel_lnk_tbl",
    203 		"snaplvl_level_id INTEGER NOT NULL,"
    204 		"snaplvl_pg_id    INTEGER NOT NULL,"
    205 		"snaplvl_pg_name  CHAR(256) NOT NULL,"
    206 		"snaplvl_pg_type  CHAR(256) NOT NULL,"
    207 		"snaplvl_pg_flags INTEGER NOT NULL,"
    208 		"snaplvl_gen_id   INTEGER NOT NULL"
    209 	},
    210 
    211 	{ NULL, NULL }
    212 };
    213 
    214 static struct backend_idx_info idxs_normal[] = { /* BACKEND_TYPE_NORMAL */
    215 	{ "service_tbl",	"name",	"svc_name" },
    216 	{ "instance_tbl",	"name",	"instance_svc, instance_name" },
    217 	{ "snapshot_lnk_tbl",	"name",	"lnk_inst_id, lnk_snap_name" },
    218 	{ "snapshot_lnk_tbl",	"snapid", "lnk_snap_id" },
    219 	{ "snaplevel_tbl",	"id",	"snap_id" },
    220 	{ "snaplevel_lnk_tbl",	"id",	"snaplvl_pg_id" },
    221 	{ "snaplevel_lnk_tbl",	"level", "snaplvl_level_id" },
    222 	{ NULL, NULL, NULL }
    223 };
    224 
    225 static struct backend_tbl_info tbls_np[] = { /* BACKEND_TYPE_NONPERSIST */
    226 	{ NULL, NULL }
    227 };
    228 
    229 static struct backend_idx_info idxs_np[] = {	/* BACKEND_TYPE_NONPERSIST */
    230 	{ NULL, NULL, NULL }
    231 };
    232 
    233 static struct backend_tbl_info tbls_common[] = { /* all backend types */
    234 	/*
    235 	 * pg_tbl defines property groups.  They are associated with a single
    236 	 * service or instance.  The pg_gen_id links them with the latest
    237 	 * "edited" version of its properties.
    238 	 */
    239 	{
    240 		"pg_tbl",
    241 		"pg_id           INTEGER PRIMARY KEY,"
    242 		"pg_parent_id    INTEGER NOT NULL,"
    243 		"pg_name         CHAR(256) NOT NULL,"
    244 		"pg_type         CHAR(256) NOT NULL,"
    245 		"pg_flags        INTEGER NOT NULL,"
    246 		"pg_gen_id       INTEGER NOT NULL"
    247 	},
    248 
    249 	/*
    250 	 * prop_lnk_tbl links a particular pg_id and gen_id to a set of
    251 	 * (prop_name, prop_type, val_id) trios.
    252 	 */
    253 	{
    254 		"prop_lnk_tbl",
    255 		"lnk_prop_id     INTEGER PRIMARY KEY,"
    256 		"lnk_pg_id       INTEGER NOT NULL,"
    257 		"lnk_gen_id      INTEGER NOT NULL,"
    258 		"lnk_prop_name   CHAR(256) NOT NULL,"
    259 		"lnk_prop_type   CHAR(2) NOT NULL,"
    260 		"lnk_val_id      INTEGER"
    261 	},
    262 
    263 	/*
    264 	 * value_tbl maps a value_id to a set of values.  For any given
    265 	 * value_id, value_type is constant.  The table definition here
    266 	 * is repeated in backend_check_upgrade(),  and must be kept in-sync.
    267 	 */
    268 	{
    269 		"value_tbl",
    270 		"value_id        INTEGER NOT NULL,"
    271 		"value_type      CHAR(1) NOT NULL,"
    272 		"value_value     VARCHAR NOT NULL,"
    273 		"value_order     INTEGER DEFAULT 0"
    274 	},
    275 
    276 	/*
    277 	 * id_tbl has one row per id space
    278 	 */
    279 	{
    280 		"id_tbl",
    281 		"id_name         STRING NOT NULL,"
    282 		"id_next         INTEGER NOT NULL"
    283 	},
    284 
    285 	/*
    286 	 * schema_version has a single row, which contains
    287 	 * BACKEND_SCHEMA_VERSION at the time of creation.
    288 	 */
    289 	{
    290 		"schema_version",
    291 		"schema_version  INTEGER"
    292 	},
    293 	{ NULL, NULL }
    294 };
    295 
    296 /*
    297  * The indexing of value_tbl is repeated in backend_check_upgrade() and
    298  * must be kept in sync with the indexing specification here.
    299  */
    300 static struct backend_idx_info idxs_common[] = { /* all backend types */
    301 	{ "pg_tbl",		"parent", "pg_parent_id" },
    302 	{ "pg_tbl",		"name",	"pg_parent_id, pg_name" },
    303 	{ "pg_tbl",		"type",	"pg_parent_id, pg_type" },
    304 	{ "prop_lnk_tbl",	"base",	"lnk_pg_id, lnk_gen_id" },
    305 	{ "prop_lnk_tbl",	"val",	"lnk_val_id" },
    306 	{ "value_tbl",		"id",	"value_id" },
    307 	{ "id_tbl",		"id",	"id_name" },
    308 	{ NULL, NULL, NULL }
    309 };
    310 
    311 struct run_single_int_info {
    312 	uint32_t	*rs_out;
    313 	int		rs_result;
    314 };
    315 
    316 /*ARGSUSED*/
    317 static int
    318 run_single_int_callback(void *arg, int columns, char **vals, char **names)
    319 {
    320 	struct run_single_int_info *info = arg;
    321 	uint32_t val;
    322 
    323 	char *endptr = vals[0];
    324 
    325 	assert(info->rs_result != REP_PROTOCOL_SUCCESS);
    326 	assert(columns == 1);
    327 
    328 	if (vals[0] == NULL)
    329 		return (BACKEND_CALLBACK_CONTINUE);
    330 
    331 	errno = 0;
    332 	val = strtoul(vals[0], &endptr, 10);
    333 	if ((val == 0 && endptr == vals[0]) || *endptr != 0 || errno != 0)
    334 		backend_panic("malformed integer \"%20s\"", vals[0]);
    335 
    336 	*info->rs_out = val;
    337 	info->rs_result = REP_PROTOCOL_SUCCESS;
    338 	return (BACKEND_CALLBACK_CONTINUE);
    339 }
    340 
    341 /*ARGSUSED*/
    342 int
    343 backend_fail_if_seen(void *arg, int columns, char **vals, char **names)
    344 {
    345 	return (BACKEND_CALLBACK_ABORT);
    346 }
    347 
    348 /*
    349  * check to see if we can successfully start a transaction;  if not, the
    350  * filesystem is mounted read-only.
    351  */
    352 static int
    353 backend_is_readonly(struct sqlite *db, const char *path)
    354 {
    355 	int r;
    356 	statvfs64_t stat;
    357 
    358 	if (statvfs64(path, &stat) == 0 && (stat.f_flag & ST_RDONLY))
    359 		return (SQLITE_READONLY);
    360 
    361 	r = sqlite_exec(db,
    362 	    "BEGIN TRANSACTION; "
    363 	    "UPDATE schema_version SET schema_version = schema_version; ",
    364 	    NULL, NULL, NULL);
    365 	(void) sqlite_exec(db, "ROLLBACK TRANSACTION", NULL, NULL, NULL);
    366 	return (r);
    367 }
    368 
    369 static void
    370 backend_trace_sql(void *arg, const char *sql)
    371 {
    372 	sqlite_backend_t *be = arg;
    373 
    374 	if (backend_print_trace) {
    375 		(void) fprintf(stderr, "%d: %s\n", be->be_type, sql);
    376 	}
    377 }
    378 
    379 static sqlite_backend_t be_info[BACKEND_TYPE_TOTAL];
    380 static sqlite_backend_t *bes[BACKEND_TYPE_TOTAL];
    381 
    382 /*
    383  * For a native build,  repositories are created from scratch, so upgrade
    384  * is not an issue.  This variable is implicitly protected by
    385  * bes[BACKEND_TYPE_NORMAL]->be_lock.
    386  */
    387 #ifdef NATIVE_BUILD
    388 static boolean_t be_normal_upgraded = B_TRUE;
    389 #else
    390 static boolean_t be_normal_upgraded = B_FALSE;
    391 #endif	/* NATIVE_BUILD */
    392 
    393 /*
    394  * Has backend been upgraded? In nonpersistent case, answer is always
    395  * yes.
    396  */
    397 boolean_t
    398 backend_is_upgraded(backend_tx_t *bt)
    399 {
    400 	if (bt->bt_type == BACKEND_TYPE_NONPERSIST)
    401 		return (B_TRUE);
    402 	return (be_normal_upgraded);
    403 }
    404 
    405 #define	BACKEND_PANIC_TIMEOUT	(50 * MILLISEC)
    406 /*
    407  * backend_panic() -- some kind of database problem or corruption has been hit.
    408  * We attempt to quiesce the other database users -- all of the backend sql
    409  * entry points will call backend_panic(NULL) if a panic is in progress, as
    410  * will any attempt to start a transaction.
    411  *
    412  * We give threads holding a backend lock 50ms (BACKEND_PANIC_TIMEOUT) to
    413  * either drop the lock or call backend_panic().  If they don't respond in
    414  * time, we'll just exit anyway.
    415  */
    416 void
    417 backend_panic(const char *format, ...)
    418 {
    419 	int i;
    420 	va_list args;
    421 	int failed = 0;
    422 
    423 	(void) pthread_mutex_lock(&backend_panic_lock);
    424 	if (backend_panic_thread != 0) {
    425 		(void) pthread_mutex_unlock(&backend_panic_lock);
    426 		/*
    427 		 * first, drop any backend locks we're holding, then
    428 		 * sleep forever on the panic_cv.
    429 		 */
    430 		for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
    431 			if (bes[i] != NULL &&
    432 			    bes[i]->be_thread == pthread_self())
    433 				(void) pthread_mutex_unlock(&bes[i]->be_lock);
    434 		}
    435 		(void) pthread_mutex_lock(&backend_panic_lock);
    436 		for (;;)
    437 			(void) pthread_cond_wait(&backend_panic_cv,
    438 			    &backend_panic_lock);
    439 	}
    440 	backend_panic_thread = pthread_self();
    441 	(void) pthread_mutex_unlock(&backend_panic_lock);
    442 
    443 	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
    444 		if (bes[i] != NULL && bes[i]->be_thread == pthread_self())
    445 			(void) pthread_mutex_unlock(&bes[i]->be_lock);
    446 	}
    447 
    448 	va_start(args, format);
    449 	configd_vcritical(format, args);
    450 	va_end(args);
    451 
    452 	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
    453 		timespec_t rel;
    454 
    455 		rel.tv_sec = 0;
    456 		rel.tv_nsec = BACKEND_PANIC_TIMEOUT;
    457 
    458 		if (bes[i] != NULL && bes[i]->be_thread != pthread_self()) {
    459 			if (pthread_mutex_reltimedlock_np(&bes[i]->be_lock,
    460 			    &rel) != 0)
    461 				failed++;
    462 		}
    463 	}
    464 	if (failed) {
    465 		configd_critical("unable to quiesce database\n");
    466 	}
    467 
    468 	if (backend_panic_abort)
    469 		abort();
    470 
    471 	exit(CONFIGD_EXIT_DATABASE_BAD);
    472 }
    473 
    474 /*
    475  * Returns
    476  *   _SUCCESS
    477  *   _DONE - callback aborted query
    478  *   _NO_RESOURCES - out of memory (_FULL & _TOOBIG?)
    479  */
    480 static int
    481 backend_error(sqlite_backend_t *be, int error, char *errmsg)
    482 {
    483 	if (error == SQLITE_OK)
    484 		return (REP_PROTOCOL_SUCCESS);
    485 
    486 	switch (error) {
    487 	case SQLITE_ABORT:
    488 		free(errmsg);
    489 		return (REP_PROTOCOL_DONE);
    490 
    491 	case SQLITE_NOMEM:
    492 	case SQLITE_FULL:
    493 	case SQLITE_TOOBIG:
    494 		free(errmsg);
    495 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
    496 
    497 	default:
    498 		backend_panic("%s: db error: %s", be->be_path, errmsg);
    499 		/*NOTREACHED*/
    500 	}
    501 }
    502 
    503 static void
    504 backend_backup_cleanup(const char **out_arg, ssize_t out_sz)
    505 {
    506 	char **out = (char **)out_arg;
    507 
    508 	while (out_sz-- > 0)
    509 		free(*out++);
    510 	free(out_arg);
    511 }
    512 
    513 /*
    514  * builds a inverse-time-sorted array of backup files.  The path is a
    515  * a single buffer, and the pointers look like:
    516  *
    517  *	/this/is/a/full/path/to/repository-name-YYYYMMDDHHMMSS
    518  *	^pathname		^	       ^(pathname+pathlen)
    519  *				basename
    520  *
    521  * dirname will either be pathname, or ".".
    522  *
    523  * Returns the number of elements in the array, 0 if there are no previous
    524  * backups, or -1 on error.
    525  */
    526 static ssize_t
    527 backend_backup_get_prev(char *pathname, size_t pathlen, const char ***out_arg)
    528 {
    529 	char b_start, b_end;
    530 	DIR *dir;
    531 	char **out = NULL;
    532 	char *name, *p;
    533 	char *dirname, *basename;
    534 	char *pathend;
    535 	struct dirent *ent;
    536 
    537 	size_t count = 0;
    538 	size_t baselen;
    539 
    540 	/*
    541 	 * year, month, day, hour, min, sec, plus an '_'.
    542 	 */
    543 	const size_t ndigits = 4 + 5*2 + 1;
    544 	const size_t baroffset = 4 + 2*2;
    545 
    546 	size_t idx;
    547 
    548 	pathend = pathname + pathlen;
    549 	b_end = *pathend;
    550 	*pathend = '\0';
    551 
    552 	basename = strrchr(pathname, '/');
    553 
    554 	if (basename != NULL) {
    555 		assert(pathend > pathname && basename < pathend);
    556 		basename++;
    557 		dirname = pathname;
    558 	} else {
    559 		basename = pathname;
    560 		dirname = ".";
    561 	}
    562 
    563 	baselen = strlen(basename);
    564 
    565 	/*
    566 	 * munge the string temporarily for the opendir(), then restore it.
    567 	 */
    568 	b_start = basename[0];
    569 
    570 	basename[0] = '\0';
    571 	dir = opendir(dirname);
    572 	basename[0] = b_start;		/* restore path */
    573 
    574 	if (dir == NULL)
    575 		goto fail;
    576 
    577 
    578 	while ((ent = readdir(dir)) != NULL) {
    579 		/*
    580 		 * Must match:
    581 		 *	basename-YYYYMMDD_HHMMSS
    582 		 * or we ignore it.
    583 		 */
    584 		if (strncmp(ent->d_name, basename, baselen) != 0)
    585 			continue;
    586 
    587 		name = ent->d_name;
    588 		if (name[baselen] != '-')
    589 			continue;
    590 
    591 		p = name + baselen + 1;
    592 
    593 		for (idx = 0; idx < ndigits; idx++) {
    594 			char c = p[idx];
    595 			if (idx == baroffset && c != '_')
    596 				break;
    597 			if (idx != baroffset && (c < '0' || c > '9'))
    598 				break;
    599 		}
    600 		if (idx != ndigits || p[idx] != '\0')
    601 			continue;
    602 
    603 		/*
    604 		 * We have a match.  insertion-sort it into our list.
    605 		 */
    606 		name = strdup(name);
    607 		if (name == NULL)
    608 			goto fail_closedir;
    609 		p = strrchr(name, '-');
    610 
    611 		for (idx = 0; idx < count; idx++) {
    612 			char *tmp = out[idx];
    613 			char *tp = strrchr(tmp, '-');
    614 
    615 			int cmp = strcmp(p, tp);
    616 			if (cmp == 0)
    617 				cmp = strcmp(name, tmp);
    618 
    619 			if (cmp == 0) {
    620 				free(name);
    621 				name = NULL;
    622 				break;
    623 			} else if (cmp > 0) {
    624 				out[idx] = name;
    625 				name = tmp;
    626 				p = tp;
    627 			}
    628 		}
    629 
    630 		if (idx == count) {
    631 			char **new_out = realloc(out,
    632 			    (count + 1) * sizeof (*out));
    633 
    634 			if (new_out == NULL) {
    635 				free(name);
    636 				goto fail_closedir;
    637 			}
    638 
    639 			out = new_out;
    640 			out[count++] = name;
    641 		} else {
    642 			assert(name == NULL);
    643 		}
    644 	}
    645 	(void) closedir(dir);
    646 
    647 	basename[baselen] = b_end;
    648 
    649 	*out_arg = (const char **)out;
    650 	return (count);
    651 
    652 fail_closedir:
    653 	(void) closedir(dir);
    654 fail:
    655 	basename[0] = b_start;
    656 	*pathend = b_end;
    657 
    658 	backend_backup_cleanup((const char **)out, count);
    659 
    660 	*out_arg = NULL;
    661 	return (-1);
    662 }
    663 
    664 /*
    665  * Copies the repository path into out, a buffer of out_len bytes,
    666  * removes the ".db" (or whatever) extension, and, if name is non-NULL,
    667  * appends "-name" to it.  If name is non-NULL, it can fail with:
    668  *
    669  *	_TRUNCATED	will not fit in buffer.
    670  *	_BAD_REQUEST	name is not a valid identifier
    671  */
    672 static rep_protocol_responseid_t
    673 backend_backup_base(sqlite_backend_t *be, const char *name,
    674     char *out, size_t out_len)
    675 {
    676 	char *p, *q;
    677 	size_t len;
    678 
    679 	/*
    680 	 * for paths of the form /path/to/foo.db, we truncate at the final
    681 	 * '.'.
    682 	 */
    683 	(void) strlcpy(out, be->be_path, out_len);
    684 
    685 	p = strrchr(out, '/');
    686 	q = strrchr(out, '.');
    687 
    688 	if (p != NULL && q != NULL && q > p)
    689 		*q = 0;
    690 
    691 	if (name != NULL) {
    692 		len = strlen(out);
    693 		assert(len < out_len);
    694 
    695 		out += len;
    696 		out_len -= len;
    697 
    698 		len = strlen(name);
    699 
    700 		/*
    701 		 * verify that the name tag is entirely alphabetic,
    702 		 * non-empty, and not too long.
    703 		 */
    704 		if (len == 0 || len >= REP_PROTOCOL_NAME_LEN ||
    705 		    uu_check_name(name, UU_NAME_DOMAIN) < 0)
    706 			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
    707 
    708 		if (snprintf(out, out_len, "-%s", name) >= out_len)
    709 			return (REP_PROTOCOL_FAIL_TRUNCATED);
    710 	}
    711 
    712 	return (REP_PROTOCOL_SUCCESS);
    713 }
    714 
    715 /*
    716  * See if a backup is needed.  We do a backup unless both files are
    717  * byte-for-byte identical.
    718  */
    719 static int
    720 backend_check_backup_needed(const char *rep_name, const char *backup_name)
    721 {
    722 	int repfd = open(rep_name, O_RDONLY);
    723 	int fd = open(backup_name, O_RDONLY);
    724 	struct stat s_rep, s_backup;
    725 	int c1, c2;
    726 
    727 	FILE *f_rep = NULL;
    728 	FILE *f_backup = NULL;
    729 
    730 	if (repfd < 0 || fd < 0)
    731 		goto fail;
    732 
    733 	if (fstat(repfd, &s_rep) < 0 || fstat(fd, &s_backup) < 0)
    734 		goto fail;
    735 
    736 	/*
    737 	 * if they are the same file, we need to do a backup to break the
    738 	 * hard link or symlink involved.
    739 	 */
    740 	if (s_rep.st_ino == s_backup.st_ino && s_rep.st_dev == s_backup.st_dev)
    741 		goto fail;
    742 
    743 	if (s_rep.st_size != s_backup.st_size)
    744 		goto fail;
    745 
    746 	if ((f_rep = fdopen(repfd, "r")) == NULL ||
    747 	    (f_backup = fdopen(fd, "r")) == NULL)
    748 		goto fail;
    749 
    750 	do {
    751 		c1 = getc(f_rep);
    752 		c2 = getc(f_backup);
    753 		if (c1 != c2)
    754 			goto fail;
    755 	} while (c1 != EOF);
    756 
    757 	if (!ferror(f_rep) && !ferror(f_backup)) {
    758 		(void) fclose(f_rep);
    759 		(void) fclose(f_backup);
    760 		(void) close(repfd);
    761 		(void) close(fd);
    762 		return (0);
    763 	}
    764 
    765 fail:
    766 	if (f_rep != NULL)
    767 		(void) fclose(f_rep);
    768 	if (f_backup != NULL)
    769 		(void) fclose(f_backup);
    770 	if (repfd >= 0)
    771 		(void) close(repfd);
    772 	if (fd >= 0)
    773 		(void) close(fd);
    774 	return (1);
    775 }
    776 
    777 /*
    778  * This interface is called to perform the actual copy
    779  *
    780  * Return:
    781  *	_FAIL_UNKNOWN		read/write fails
    782  *	_FAIL_NO_RESOURCES	out of memory
    783  *	_SUCCESS		copy succeeds
    784  */
    785 static rep_protocol_responseid_t
    786 backend_do_copy(const char *src, int srcfd, const char *dst,
    787     int dstfd, size_t *sz)
    788 {
    789 	char *buf;
    790 	off_t nrd, nwr, n, r_off = 0, w_off = 0;
    791 
    792 	if ((buf = malloc(8192)) == NULL)
    793 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
    794 
    795 	while ((nrd = read(srcfd, buf, 8192)) != 0) {
    796 		if (nrd < 0) {
    797 			if (errno == EINTR)
    798 				continue;
    799 
    800 			configd_critical(
    801 			    "Backend copy failed: fails to read from %s "
    802 			    "at offset %d: %s\n", src, r_off, strerror(errno));
    803 			free(buf);
    804 			return (REP_PROTOCOL_FAIL_UNKNOWN);
    805 		}
    806 
    807 		r_off += nrd;
    808 
    809 		nwr = 0;
    810 		do {
    811 			if ((n = write(dstfd, &buf[nwr], nrd - nwr)) < 0) {
    812 				if (errno == EINTR)
    813 					continue;
    814 
    815 				configd_critical(
    816 				    "Backend copy failed: fails to write to %s "
    817 				    "at offset %d: %s\n", dst, w_off,
    818 				    strerror(errno));
    819 				free(buf);
    820 				return (REP_PROTOCOL_FAIL_UNKNOWN);
    821 			}
    822 
    823 			nwr += n;
    824 			w_off += n;
    825 
    826 		} while (nwr < nrd);
    827 	}
    828 
    829 	if (sz)
    830 		*sz = w_off;
    831 
    832 	free(buf);
    833 	return (REP_PROTOCOL_SUCCESS);
    834 }
    835 
    836 /*
    837  * Can return:
    838  *	_BAD_REQUEST		name is not valid
    839  *	_TRUNCATED		name is too long for current repository path
    840  *	_UNKNOWN		failed for unknown reason (details written to
    841  *				console)
    842  *	_BACKEND_READONLY	backend is not writable
    843  *	_NO_RESOURCES		out of memory
    844  *	_SUCCESS		Backup completed successfully.
    845  */
    846 static rep_protocol_responseid_t
    847 backend_create_backup_locked(sqlite_backend_t *be, const char *name)
    848 {
    849 	const char **old_list;
    850 	ssize_t old_sz;
    851 	ssize_t old_max = max_repository_backups;
    852 	ssize_t cur;
    853 	char *finalname;
    854 	char *finalpath;
    855 	char *tmppath;
    856 	int infd, outfd;
    857 	size_t len;
    858 	time_t now;
    859 	struct tm now_tm;
    860 	rep_protocol_responseid_t result;
    861 
    862 	if ((finalpath = malloc(PATH_MAX)) == NULL)
    863 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
    864 
    865 	if ((tmppath = malloc(PATH_MAX)) == NULL) {
    866 		free(finalpath);
    867 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
    868 	}
    869 
    870 	if (be->be_readonly) {
    871 		result = REP_PROTOCOL_FAIL_BACKEND_READONLY;
    872 		goto out;
    873 	}
    874 
    875 	result = backend_backup_base(be, name, finalpath, PATH_MAX);
    876 	if (result != REP_PROTOCOL_SUCCESS)
    877 		goto out;
    878 
    879 	if (!backend_check_backup_needed(be->be_path, finalpath)) {
    880 		result = REP_PROTOCOL_SUCCESS;
    881 		goto out;
    882 	}
    883 
    884 	/*
    885 	 * remember the original length, and the basename location
    886 	 */
    887 	len = strlen(finalpath);
    888 	finalname = strrchr(finalpath, '/');
    889 	if (finalname != NULL)
    890 		finalname++;
    891 	else
    892 		finalname = finalpath;
    893 
    894 	(void) strlcpy(tmppath, finalpath, PATH_MAX);
    895 	if (strlcat(tmppath, "-tmpXXXXXX", PATH_MAX) >= PATH_MAX) {
    896 		result = REP_PROTOCOL_FAIL_TRUNCATED;
    897 		goto out;
    898 	}
    899 
    900 	now = time(NULL);
    901 	if (localtime_r(&now, &now_tm) == NULL) {
    902 		configd_critical(
    903 		    "\"%s\" backup failed: localtime(3C) failed: %s\n", name,
    904 		    be->be_path, strerror(errno));
    905 		result = REP_PROTOCOL_FAIL_UNKNOWN;
    906 		goto out;
    907 	}
    908 
    909 	if (strftime(finalpath + len, PATH_MAX - len,
    910 	    "-%Y""%m""%d""_""%H""%M""%S", &now_tm) >= PATH_MAX - len) {
    911 		result = REP_PROTOCOL_FAIL_TRUNCATED;
    912 		goto out;
    913 	}
    914 
    915 	infd = open(be->be_path, O_RDONLY);
    916 	if (infd < 0) {
    917 		configd_critical("\"%s\" backup failed: opening %s: %s\n", name,
    918 		    be->be_path, strerror(errno));
    919 		result = REP_PROTOCOL_FAIL_UNKNOWN;
    920 		goto out;
    921 	}
    922 
    923 	outfd = mkstemp(tmppath);
    924 	if (outfd < 0) {
    925 		configd_critical("\"%s\" backup failed: mkstemp(%s): %s\n",
    926 		    name, tmppath, strerror(errno));
    927 		(void) close(infd);
    928 		result = REP_PROTOCOL_FAIL_UNKNOWN;
    929 		goto out;
    930 	}
    931 
    932 	if ((result = backend_do_copy((const char *)be->be_path, infd,
    933 	    (const char *)tmppath, outfd, NULL)) != REP_PROTOCOL_SUCCESS)
    934 		goto fail;
    935 
    936 	/*
    937 	 * grab the old list before doing our re-name.
    938 	 */
    939 	if (old_max > 0)
    940 		old_sz = backend_backup_get_prev(finalpath, len, &old_list);
    941 
    942 	if (rename(tmppath, finalpath) < 0) {
    943 		configd_critical(
    944 		    "\"%s\" backup failed: rename(%s, %s): %s\n",
    945 		    name, tmppath, finalpath, strerror(errno));
    946 		result = REP_PROTOCOL_FAIL_UNKNOWN;
    947 		goto fail;
    948 	}
    949 
    950 	tmppath[len] = 0;	/* strip -XXXXXX, for reference symlink */
    951 
    952 	(void) unlink(tmppath);
    953 	if (symlink(finalname, tmppath) < 0) {
    954 		configd_critical(
    955 		    "\"%s\" backup completed, but updating "
    956 		    "\"%s\" symlink to \"%s\" failed: %s\n",
    957 		    name, tmppath, finalname, strerror(errno));
    958 	}
    959 
    960 	if (old_max > 0 && old_sz > 0) {
    961 		/* unlink all but the first (old_max - 1) files */
    962 		for (cur = old_max - 1; cur < old_sz; cur++) {
    963 			(void) strlcpy(finalname, old_list[cur],
    964 			    PATH_MAX - (finalname - finalpath));
    965 			if (unlink(finalpath) < 0)
    966 				configd_critical(
    967 				    "\"%s\" backup completed, but removing old "
    968 				    "file \"%s\" failed: %s\n",
    969 				    name, finalpath, strerror(errno));
    970 		}
    971 
    972 		backend_backup_cleanup(old_list, old_sz);
    973 	}
    974 
    975 	result = REP_PROTOCOL_SUCCESS;
    976 
    977 fail:
    978 	(void) close(infd);
    979 	(void) close(outfd);
    980 	if (result != REP_PROTOCOL_SUCCESS)
    981 		(void) unlink(tmppath);
    982 
    983 out:
    984 	free(finalpath);
    985 	free(tmppath);
    986 
    987 	return (result);
    988 }
    989 
    990 /*
    991  * Check if value_tbl has been upgraded in the main database,  and
    992  * if not (if the value_order column is not present),  and do_upgrade is true,
    993  * upgrade value_tbl in repository to contain the additional value_order
    994  * column. The version of sqlite used means ALTER TABLE is not
    995  * available, so we cannot simply use "ALTER TABLE value_tbl ADD COLUMN".
    996  * Rather we need to create a temporary table with the additional column,
    997  * import the value_tbl, drop the original value_tbl, recreate the value_tbl
    998  * with the additional column, import the values from value_tbl_tmp,
    999  * reindex and finally drop value_tbl_tmp.  During boot, we wish to check
   1000  * if the repository has been upgraded before it is writable,  so that
   1001  * property value retrieval can use the appropriate form of the SELECT
   1002  * statement that retrieves property values.  As a result, we need to check
   1003  * if the repository has been upgraded prior to the point when we can
   1004  * actually carry out the update.
   1005  */
   1006 void
   1007 backend_check_upgrade(sqlite_backend_t *be, boolean_t do_upgrade)
   1008 {
   1009 	char *errp;
   1010 	int r;
   1011 
   1012 	if (be_normal_upgraded)
   1013 		return;
   1014 	/*
   1015 	 * Test if upgrade is needed. If value_order column does not exist,
   1016 	 * we need to upgrade the schema.
   1017 	 */
   1018 	r = sqlite_exec(be->be_db, "SELECT value_order FROM value_tbl LIMIT 1;",
   1019 	    NULL, NULL, NULL);
   1020 	if (r == SQLITE_ERROR && do_upgrade) {
   1021 		/* No value_order column - needs upgrade */
   1022 		configd_info("Upgrading SMF repository format...");
   1023 		r = sqlite_exec(be->be_db,
   1024 		    "BEGIN TRANSACTION; "
   1025 		    "CREATE TABLE value_tbl_tmp ( "
   1026 		    "value_id   INTEGER NOT NULL, "
   1027 		    "value_type CHAR(1) NOT NULL, "
   1028 		    "value_value VARCHAR NOT NULL, "
   1029 		    "value_order INTEGER DEFAULT 0); "
   1030 		    "INSERT INTO value_tbl_tmp "
   1031 		    "(value_id, value_type, value_value) "
   1032 		    "SELECT value_id, value_type, value_value FROM value_tbl; "
   1033 		    "DROP TABLE value_tbl; "
   1034 		    "CREATE TABLE value_tbl( "
   1035 		    "value_id   INTEGER NOT NULL, "
   1036 		    "value_type CHAR(1) NOT NULL, "
   1037 		    "value_value VARCHAR NOT NULL, "
   1038 		    "value_order INTEGER DEFAULT 0); "
   1039 		    "INSERT INTO value_tbl SELECT * FROM value_tbl_tmp; "
   1040 		    "CREATE INDEX value_tbl_id ON value_tbl (value_id); "
   1041 		    "DROP TABLE value_tbl_tmp; "
   1042 		    "COMMIT TRANSACTION; "
   1043 		    "VACUUM; ",
   1044 		    NULL, NULL, &errp);
   1045 		if (r == SQLITE_OK) {
   1046 			configd_info("SMF repository upgrade is complete.");
   1047 		} else {
   1048 			backend_panic("%s: repository upgrade failed: %s",
   1049 			    be->be_path, errp);
   1050 			/* NOTREACHED */
   1051 		}
   1052 	}
   1053 	if (r == SQLITE_OK)
   1054 		be_normal_upgraded = B_TRUE;
   1055 	else
   1056 		be_normal_upgraded = B_FALSE;
   1057 }
   1058 
   1059 static int
   1060 backend_check_readonly(sqlite_backend_t *be, int writing, hrtime_t t)
   1061 {
   1062 	char *errp;
   1063 	struct sqlite *new;
   1064 	int r;
   1065 
   1066 	assert(be->be_readonly);
   1067 	assert(be == bes[BACKEND_TYPE_NORMAL]);
   1068 
   1069 	/*
   1070 	 * If we don't *need* to be writable, only check every once in a
   1071 	 * while.
   1072 	 */
   1073 	if (!writing) {
   1074 		if ((uint64_t)(t - be->be_lastcheck) <
   1075 		    BACKEND_READONLY_CHECK_INTERVAL)
   1076 			return (REP_PROTOCOL_SUCCESS);
   1077 		be->be_lastcheck = t;
   1078 	}
   1079 
   1080 	new = sqlite_open(be->be_path, 0600, &errp);
   1081 	if (new == NULL) {
   1082 		backend_panic("reopening %s: %s\n", be->be_path, errp);
   1083 		/*NOTREACHED*/
   1084 	}
   1085 	r = backend_is_readonly(new, be->be_path);
   1086 
   1087 	if (r != SQLITE_OK) {
   1088 		sqlite_close(new);
   1089 		if (writing)
   1090 			return (REP_PROTOCOL_FAIL_BACKEND_READONLY);
   1091 		return (REP_PROTOCOL_SUCCESS);
   1092 	}
   1093 
   1094 	/*
   1095 	 * We can write!  Swap the db handles, mark ourself writable,
   1096 	 * upgrade if necessary,  and make a backup.
   1097 	 */
   1098 	sqlite_close(be->be_db);
   1099 	be->be_db = new;
   1100 	be->be_readonly = 0;
   1101 
   1102 	if (be->be_type == BACKEND_TYPE_NORMAL)
   1103 		backend_check_upgrade(be, B_TRUE);
   1104 
   1105 	if (backend_create_backup_locked(be, REPOSITORY_BOOT_BACKUP) !=
   1106 	    REP_PROTOCOL_SUCCESS) {
   1107 		configd_critical(
   1108 		    "unable to create \"%s\" backup of \"%s\"\n",
   1109 		    REPOSITORY_BOOT_BACKUP, be->be_path);
   1110 	}
   1111 
   1112 	return (REP_PROTOCOL_SUCCESS);
   1113 }
   1114 
   1115 /*
   1116  * If t is not BACKEND_TYPE_NORMAL, can fail with
   1117  *   _BACKEND_ACCESS - backend does not exist
   1118  *
   1119  * If writing is nonzero, can also fail with
   1120  *   _BACKEND_READONLY - backend is read-only
   1121  */
   1122 static int
   1123 backend_lock(backend_type_t t, int writing, sqlite_backend_t **bep)
   1124 {
   1125 	sqlite_backend_t *be = NULL;
   1126 	hrtime_t ts, vts;
   1127 
   1128 	*bep = NULL;
   1129 
   1130 	assert(t == BACKEND_TYPE_NORMAL ||
   1131 	    t == BACKEND_TYPE_NONPERSIST);
   1132 
   1133 	be = bes[t];
   1134 	if (t == BACKEND_TYPE_NORMAL)
   1135 		assert(be != NULL);		/* should always be there */
   1136 
   1137 	if (be == NULL)
   1138 		return (REP_PROTOCOL_FAIL_BACKEND_ACCESS);
   1139 
   1140 	if (backend_panic_thread != 0)
   1141 		backend_panic(NULL);		/* don't proceed */
   1142 
   1143 	ts = gethrtime();
   1144 	vts = gethrvtime();
   1145 	(void) pthread_mutex_lock(&be->be_lock);
   1146 	UPDATE_TOTALS_WR(be, writing, bt_lock, ts, vts);
   1147 
   1148 	if (backend_panic_thread != 0) {
   1149 		(void) pthread_mutex_unlock(&be->be_lock);
   1150 		backend_panic(NULL);		/* don't proceed */
   1151 	}
   1152 	be->be_thread = pthread_self();
   1153 
   1154 	if (be->be_readonly) {
   1155 		int r;
   1156 		assert(t == BACKEND_TYPE_NORMAL);
   1157 
   1158 		r = backend_check_readonly(be, writing, ts);
   1159 		if (r != REP_PROTOCOL_SUCCESS) {
   1160 			be->be_thread = 0;
   1161 			(void) pthread_mutex_unlock(&be->be_lock);
   1162 			return (r);
   1163 		}
   1164 	}
   1165 
   1166 	if (backend_do_trace)
   1167 		(void) sqlite_trace(be->be_db, backend_trace_sql, be);
   1168 	else
   1169 		(void) sqlite_trace(be->be_db, NULL, NULL);
   1170 
   1171 	be->be_writing = writing;
   1172 	*bep = be;
   1173 	return (REP_PROTOCOL_SUCCESS);
   1174 }
   1175 
   1176 static void
   1177 backend_unlock(sqlite_backend_t *be)
   1178 {
   1179 	be->be_writing = 0;
   1180 	be->be_thread = 0;
   1181 	(void) pthread_mutex_unlock(&be->be_lock);
   1182 }
   1183 
   1184 static void
   1185 backend_destroy(sqlite_backend_t *be)
   1186 {
   1187 	if (be->be_db != NULL) {
   1188 		sqlite_close(be->be_db);
   1189 		be->be_db = NULL;
   1190 	}
   1191 	be->be_thread = 0;
   1192 	(void) pthread_mutex_unlock(&be->be_lock);
   1193 	(void) pthread_mutex_destroy(&be->be_lock);
   1194 }
   1195 
   1196 static void
   1197 backend_create_finish(backend_type_t backend_id, sqlite_backend_t *be)
   1198 {
   1199 	assert(MUTEX_HELD(&be->be_lock));
   1200 	assert(be == &be_info[backend_id]);
   1201 
   1202 	bes[backend_id] = be;
   1203 	(void) pthread_mutex_unlock(&be->be_lock);
   1204 }
   1205 
   1206 static int
   1207 backend_fd_write(int fd, const char *mess)
   1208 {
   1209 	int len = strlen(mess);
   1210 	int written;
   1211 
   1212 	while (len > 0) {
   1213 		if ((written = write(fd, mess, len)) < 0)
   1214 			return (-1);
   1215 		mess += written;
   1216 		len -= written;
   1217 	}
   1218 	return (0);
   1219 }
   1220 
   1221 /*
   1222  * Can return:
   1223  *	_BAD_REQUEST		name is not valid
   1224  *	_TRUNCATED		name is too long for current repository path
   1225  *	_UNKNOWN		failed for unknown reason (details written to
   1226  *				console)
   1227  *	_BACKEND_READONLY	backend is not writable
   1228  *	_NO_RESOURCES		out of memory
   1229  *	_SUCCESS		Backup completed successfully.
   1230  */
   1231 rep_protocol_responseid_t
   1232 backend_create_backup(const char *name)
   1233 {
   1234 	rep_protocol_responseid_t result;
   1235 	sqlite_backend_t *be;
   1236 
   1237 	result = backend_lock(BACKEND_TYPE_NORMAL, 0, &be);
   1238 	assert(result == REP_PROTOCOL_SUCCESS);
   1239 
   1240 	result = backend_create_backup_locked(be, name);
   1241 	backend_unlock(be);
   1242 
   1243 	return (result);
   1244 }
   1245 
   1246 /*
   1247  * Copy the repository.  If the sw_back flag is not set, we are
   1248  * copying the repository from the default location under /etc/svc to
   1249  * the tmpfs /etc/svc/volatile location.  If the flag is set, we are
   1250  * copying back to the /etc/svc location from the volatile location
   1251  * after manifest-import is completed.
   1252  *
   1253  * Can return:
   1254  *
   1255  *	REP_PROTOCOL_SUCCESS		successful copy and rename
   1256  *	REP_PROTOCOL_FAIL_UNKNOWN	file operation error
   1257  *	REP_PROTOCOL_FAIL_NO_RESOURCES	out of memory
   1258  */
   1259 static rep_protocol_responseid_t
   1260 backend_switch_copy(const char *src, const char *dst, int sw_back)
   1261 {
   1262 	int srcfd, dstfd;
   1263 	char *tmppath = malloc(PATH_MAX);
   1264 	rep_protocol_responseid_t res = REP_PROTOCOL_SUCCESS;
   1265 	struct stat s_buf;
   1266 	size_t cpsz, sz;
   1267 
   1268 	if (tmppath == NULL) {
   1269 		res = REP_PROTOCOL_FAIL_NO_RESOURCES;
   1270 		goto out;
   1271 	}
   1272 
   1273 	/*
   1274 	 * Create and open the related db files
   1275 	 */
   1276 	(void) strlcpy(tmppath, dst, PATH_MAX);
   1277 	sz = strlcat(tmppath, "-XXXXXX", PATH_MAX);
   1278 	assert(sz < PATH_MAX);
   1279 	if (sz >= PATH_MAX) {
   1280 		configd_critical(
   1281 		    "Backend copy failed: strlcat %s: overflow\n", tmppath);
   1282 		abort();
   1283 	}
   1284 
   1285 	if ((dstfd = mkstemp(tmppath)) < 0) {
   1286 		configd_critical("Backend copy failed: mkstemp %s: %s\n",
   1287 		    tmppath, strerror(errno));
   1288 		res = REP_PROTOCOL_FAIL_UNKNOWN;
   1289 		goto out;
   1290 	}
   1291 
   1292 	if ((srcfd = open(src, O_RDONLY)) < 0) {
   1293 		configd_critical("Backend copy failed: opening %s: %s\n",
   1294 		    src, strerror(errno));
   1295 		res = REP_PROTOCOL_FAIL_UNKNOWN;
   1296 		goto errexit;
   1297 	}
   1298 
   1299 	/*
   1300 	 * fstat the backend before copy for sanity check.
   1301 	 */
   1302 	if (fstat(srcfd, &s_buf) < 0) {
   1303 		configd_critical("Backend copy failed: fstat %s: %s\n",
   1304 		    src, strerror(errno));
   1305 		res = REP_PROTOCOL_FAIL_UNKNOWN;
   1306 		goto errexit;
   1307 	}
   1308 
   1309 	if ((res = backend_do_copy(src, srcfd, dst, dstfd, &cpsz)) !=
   1310 	    REP_PROTOCOL_SUCCESS)
   1311 		goto errexit;
   1312 
   1313 	if (cpsz != s_buf.st_size) {
   1314 		configd_critical("Backend copy failed: incomplete copy\n");
   1315 		res = REP_PROTOCOL_FAIL_UNKNOWN;
   1316 		goto errexit;
   1317 	}
   1318 
   1319 	/*
   1320 	 * Rename tmppath to dst
   1321 	 */
   1322 	if (rename(tmppath, dst) < 0) {
   1323 		configd_critical(
   1324 		    "Backend copy failed: rename %s to %s: %s\n",
   1325 		    tmppath, dst, strerror(errno));
   1326 		res = REP_PROTOCOL_FAIL_UNKNOWN;
   1327 	}
   1328 
   1329 errexit:
   1330 	if (res != REP_PROTOCOL_SUCCESS && unlink(tmppath) < 0)
   1331 		configd_critical(
   1332 		    "Backend copy failed: remove %s: %s\n",
   1333 		    tmppath, strerror(errno));
   1334 
   1335 	(void) close(srcfd);
   1336 	(void) close(dstfd);
   1337 
   1338 out:
   1339 	free(tmppath);
   1340 	if (sw_back) {
   1341 		if (unlink(src) < 0)
   1342 			configd_critical(
   1343 			    "Backend copy failed: remove %s: %s\n",
   1344 			    src, strerror(errno));
   1345 	}
   1346 
   1347 	return (res);
   1348 }
   1349 
   1350 /*
   1351  * Perform sanity check on the repository.
   1352  * Return 0 if check succeeds or -1 if fails.
   1353  */
   1354 static int
   1355 backend_switch_check(struct sqlite *be_db, char **errp)
   1356 {
   1357 	struct run_single_int_info info;
   1358 	uint32_t val = -1UL;
   1359 	int r;
   1360 
   1361 	info.rs_out = &val;
   1362 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
   1363 
   1364 	r = sqlite_exec(be_db,
   1365 	    "SELECT schema_version FROM schema_version;",
   1366 	    run_single_int_callback, &info, errp);
   1367 
   1368 	if (r == SQLITE_OK &&
   1369 	    info.rs_result != REP_PROTOCOL_FAIL_NOT_FOUND &&
   1370 	    val == BACKEND_SCHEMA_VERSION)
   1371 		return (0);
   1372 	else
   1373 		return (-1);
   1374 }
   1375 
   1376 /*
   1377  * Backend switch entry point.  It is called to perform the backend copy and
   1378  * switch from src to dst.  First, it blocks all other clients from accessing
   1379  * the repository by calling backend_lock to lock the repository.  Upon
   1380  * successful lock, copying and switching of the repository are performed.
   1381  *
   1382  * Can return:
   1383  *	REP_PROTOCOL_SUCCESS			successful switch
   1384  *	REP_PROTOCOL_FAIL_BACKEND_ACCESS	backen access fails
   1385  *	REP_PROTOCOL_FAIL_BACKEND_READONLY	backend is not writable
   1386  *	REP_PROTOCOL_FAIL_UNKNOWN		file operation error
   1387  *	REP_PROTOCOL_FAIL_NO_RESOURCES		out of memory
   1388  */
   1389 rep_protocol_responseid_t
   1390 backend_switch(int sw_back)
   1391 {
   1392 	rep_protocol_responseid_t result;
   1393 	sqlite_backend_t *be;
   1394 	struct sqlite *new;
   1395 	char *errp;
   1396 	const char *dst;
   1397 
   1398 	result = backend_lock(BACKEND_TYPE_NORMAL, 1, &be);
   1399 	if (result != REP_PROTOCOL_SUCCESS)
   1400 		return (result);
   1401 
   1402 	if (sw_back) {
   1403 		dst = REPOSITORY_DB;
   1404 	} else {
   1405 		dst = FAST_REPOSITORY_DB;
   1406 	}
   1407 
   1408 	/*
   1409 	 * Do the actual copy and rename
   1410 	 */
   1411 	result = backend_switch_copy(be->be_path, dst, sw_back);
   1412 	if (result != REP_PROTOCOL_SUCCESS) {
   1413 		goto errout;
   1414 	}
   1415 
   1416 	/*
   1417 	 * Do the backend sanity check and switch
   1418 	 */
   1419 	new = sqlite_open(dst, 0600, &errp);
   1420 	if (new != NULL) {
   1421 		/*
   1422 		 * Sanity check
   1423 		 */
   1424 		if (backend_switch_check(new, &errp) == 0) {
   1425 			free((char *)be->be_path);
   1426 			be->be_path = strdup(dst);
   1427 			if (be->be_path == NULL) {
   1428 				configd_critical(
   1429 				    "Backend switch failed: strdup %s: %s\n",
   1430 				    dst, strerror(errno));
   1431 				result = REP_PROTOCOL_FAIL_NO_RESOURCES;
   1432 				sqlite_close(new);
   1433 			} else {
   1434 				sqlite_close(be->be_db);
   1435 				be->be_db = new;
   1436 			}
   1437 		} else {
   1438 			configd_critical(
   1439 			    "Backend switch failed: integrity check %s: %s\n",
   1440 			    dst, errp);
   1441 			result = REP_PROTOCOL_FAIL_BACKEND_ACCESS;
   1442 		}
   1443 	} else {
   1444 		configd_critical("Backend switch failed: sqlite_open %s: %s\n",
   1445 		    dst, errp);
   1446 		result = REP_PROTOCOL_FAIL_BACKEND_ACCESS;
   1447 	}
   1448 
   1449 errout:
   1450 	backend_unlock(be);
   1451 	return (result);
   1452 }
   1453 
   1454 /*
   1455  * This routine is called to attempt the recovery of
   1456  * the most recent valid repository if possible when configd
   1457  * is restarted for some reasons or when system crashes
   1458  * during the switch operation.  The repository databases
   1459  * referenced here are indicators of successful switch
   1460  * operations.
   1461  */
   1462 static void
   1463 backend_switch_recovery(void)
   1464 {
   1465 	const char *fast_db = FAST_REPOSITORY_DB;
   1466 	char *errp;
   1467 	struct stat s_buf;
   1468 	struct sqlite *be_db;
   1469 
   1470 
   1471 	/*
   1472 	 * A good transient db containing most recent data can
   1473 	 * exist if system or svc.configd crashes during the
   1474 	 * switch operation.  If that is the case, check its
   1475 	 * integrity and use it.
   1476 	 */
   1477 	if (stat(fast_db, &s_buf) < 0) {
   1478 		return;
   1479 	}
   1480 
   1481 	/*
   1482 	 * Do sanity check on the db
   1483 	 */
   1484 	be_db = sqlite_open(fast_db, 0600, &errp);
   1485 
   1486 	if (be_db != NULL) {
   1487 		if (backend_switch_check(be_db, &errp) == 0)
   1488 			(void) backend_switch_copy(fast_db, REPOSITORY_DB, 1);
   1489 	}
   1490 
   1491 	(void) unlink(fast_db);
   1492 }
   1493 
   1494 /*ARGSUSED*/
   1495 static int
   1496 backend_integrity_callback(void *private, int narg, char **vals, char **cols)
   1497 {
   1498 	char **out = private;
   1499 	char *old = *out;
   1500 	char *new;
   1501 	const char *info;
   1502 	size_t len;
   1503 	int x;
   1504 
   1505 	for (x = 0; x < narg; x++) {
   1506 		if ((info = vals[x]) != NULL &&
   1507 		    strcmp(info, "ok") != 0) {
   1508 			len = (old == NULL)? 0 : strlen(old);
   1509 			len += strlen(info) + 2;	/* '\n' + '\0' */
   1510 
   1511 			new = realloc(old, len);
   1512 			if (new == NULL)
   1513 				return (BACKEND_CALLBACK_ABORT);
   1514 			if (old == NULL)
   1515 				new[0] = 0;
   1516 			old = *out = new;
   1517 			(void) strlcat(new, info, len);
   1518 			(void) strlcat(new, "\n", len);
   1519 		}
   1520 	}
   1521 	return (BACKEND_CALLBACK_CONTINUE);
   1522 }
   1523 
   1524 #define	BACKEND_CREATE_LOCKED		-2
   1525 #define	BACKEND_CREATE_FAIL		-1
   1526 #define	BACKEND_CREATE_SUCCESS		0
   1527 #define	BACKEND_CREATE_READONLY		1
   1528 #define	BACKEND_CREATE_NEED_INIT	2
   1529 static int
   1530 backend_create(backend_type_t backend_id, const char *db_file,
   1531     sqlite_backend_t **bep)
   1532 {
   1533 	char *errp;
   1534 	char *integrity_results = NULL;
   1535 	sqlite_backend_t *be;
   1536 	int r;
   1537 	uint32_t val = -1UL;
   1538 	struct run_single_int_info info;
   1539 	int fd;
   1540 
   1541 	assert(backend_id >= 0 && backend_id < BACKEND_TYPE_TOTAL);
   1542 
   1543 	be = &be_info[backend_id];
   1544 
   1545 	assert(be->be_db == NULL);
   1546 
   1547 	(void) pthread_mutex_init(&be->be_lock, NULL);
   1548 	(void) pthread_mutex_lock(&be->be_lock);
   1549 
   1550 	be->be_type = backend_id;
   1551 	be->be_path = strdup(db_file);
   1552 	if (be->be_path == NULL) {
   1553 		perror("malloc");
   1554 		goto fail;
   1555 	}
   1556 
   1557 	be->be_db = sqlite_open(be->be_path, 0600, &errp);
   1558 
   1559 	if (be->be_db == NULL) {
   1560 		if (strstr(errp, "out of memory") != NULL) {
   1561 			configd_critical("%s: %s\n", db_file, errp);
   1562 			free(errp);
   1563 
   1564 			goto fail;
   1565 		}
   1566 
   1567 		/* report it as an integrity failure */
   1568 		integrity_results = errp;
   1569 		errp = NULL;
   1570 		goto integrity_fail;
   1571 	}
   1572 
   1573 	/*
   1574 	 * check if we are inited and of the correct schema version
   1575 	 *
   1576 	 */
   1577 	info.rs_out = &val;
   1578 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
   1579 
   1580 	r = sqlite_exec(be->be_db, "SELECT schema_version FROM schema_version;",
   1581 	    run_single_int_callback, &info, &errp);
   1582 	if (r == SQLITE_ERROR &&
   1583 	    strcmp("no such table: schema_version", errp) == 0) {
   1584 		free(errp);
   1585 		/*
   1586 		 * Could be an empty repository, could be pre-schema_version
   1587 		 * schema.  Check for id_tbl, which has always been there.
   1588 		 */
   1589 		r = sqlite_exec(be->be_db, "SELECT count() FROM id_tbl;",
   1590 		    NULL, NULL, &errp);
   1591 		if (r == SQLITE_ERROR &&
   1592 		    strcmp("no such table: id_tbl", errp) == 0) {
   1593 			free(errp);
   1594 			*bep = be;
   1595 			return (BACKEND_CREATE_NEED_INIT);
   1596 		}
   1597 
   1598 		configd_critical("%s: schema version mismatch\n", db_file);
   1599 		goto fail;
   1600 	}
   1601 	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
   1602 		free(errp);
   1603 		*bep = NULL;
   1604 		backend_destroy(be);
   1605 		return (BACKEND_CREATE_LOCKED);
   1606 	}
   1607 	if (r == SQLITE_OK) {
   1608 		if (info.rs_result == REP_PROTOCOL_FAIL_NOT_FOUND ||
   1609 		    val != BACKEND_SCHEMA_VERSION) {
   1610 			configd_critical("%s: schema version mismatch\n",
   1611 			    db_file);
   1612 			goto fail;
   1613 		}
   1614 	}
   1615 
   1616 	/*
   1617 	 * pull in the whole database sequentially.
   1618 	 */
   1619 	if ((fd = open(db_file, O_RDONLY)) >= 0) {
   1620 		size_t sz = 64 * 1024;
   1621 		char *buffer = malloc(sz);
   1622 		if (buffer != NULL) {
   1623 			while (read(fd, buffer, sz) > 0)
   1624 				;
   1625 			free(buffer);
   1626 		}
   1627 		(void) close(fd);
   1628 	}
   1629 
   1630 	/*
   1631 	 * run an integrity check
   1632 	 */
   1633 	r = sqlite_exec(be->be_db, "PRAGMA integrity_check;",
   1634 	    backend_integrity_callback, &integrity_results, &errp);
   1635 
   1636 	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
   1637 		free(errp);
   1638 		*bep = NULL;
   1639 		backend_destroy(be);
   1640 		return (BACKEND_CREATE_LOCKED);
   1641 	}
   1642 	if (r == SQLITE_ABORT) {
   1643 		free(errp);
   1644 		errp = NULL;
   1645 		integrity_results = "out of memory running integrity check\n";
   1646 	} else if (r != SQLITE_OK && integrity_results == NULL) {
   1647 		integrity_results = errp;
   1648 		errp = NULL;
   1649 	}
   1650 
   1651 integrity_fail:
   1652 	if (integrity_results != NULL) {
   1653 		const char *fname = "/etc/svc/volatile/db_errors";
   1654 		if ((fd = open(fname, O_CREAT|O_WRONLY|O_APPEND, 0600)) < 0) {
   1655 			fname = NULL;
   1656 		} else {
   1657 			if (backend_fd_write(fd, "\n\n") < 0 ||
   1658 			    backend_fd_write(fd, db_file) < 0 ||
   1659 			    backend_fd_write(fd,
   1660 			    ": PRAGMA integrity_check; failed.  Results:\n") <
   1661 			    0 || backend_fd_write(fd, integrity_results) < 0 ||
   1662 			    backend_fd_write(fd, "\n\n") < 0) {
   1663 				fname = NULL;
   1664 			}
   1665 			(void) close(fd);
   1666 		}
   1667 
   1668 		if (!is_main_repository ||
   1669 		    backend_id == BACKEND_TYPE_NONPERSIST) {
   1670 			if (fname != NULL)
   1671 				configd_critical(
   1672 				    "%s: integrity check failed. Details in "
   1673 				    "%s\n", db_file, fname);
   1674 			else
   1675 				configd_critical(
   1676 				    "%s: integrity check failed.\n",
   1677 				    db_file);
   1678 		} else {
   1679 			(void) fprintf(stderr,
   1680 "\n"
   1681 "svc.configd: smf(5) database integrity check of:\n"
   1682 "\n"
   1683 "    %s\n"
   1684 "\n"
   1685 "  failed. The database might be damaged or a media error might have\n"
   1686 "  prevented it from being verified.  Additional information useful to\n"
   1687 "  your service provider%s%s\n"
   1688 "\n"
   1689 "  The system will not be able to boot until you have restored a working\n"
   1690 "  database.  svc.startd(1M) will provide a sulogin(1M) prompt for recovery\n"
   1691 "  purposes.  The command:\n"
   1692 "\n"
   1693 "    /lib/svc/bin/restore_repository\n"
   1694 "\n"
   1695 "  can be run to restore a backup version of your repository.  See\n"
   1696 "  http://sun.com/msg/SMF-8000-MY for more information.\n"
   1697 "\n",
   1698 			    db_file,
   1699 			    (fname == NULL)? ":\n\n" : " is in:\n\n    ",
   1700 			    (fname == NULL)? integrity_results : fname);
   1701 		}
   1702 		free(errp);
   1703 		goto fail;
   1704 	}
   1705 
   1706 	/*
   1707 	 * Simply do check if backend has been upgraded.  We do not wish
   1708 	 * to actually carry out upgrade here - the main repository may
   1709 	 * not be writable at this point.  Actual upgrade is carried out
   1710 	 * via backend_check_readonly().  This check is done so that
   1711 	 * we determine repository state - upgraded or not - and then
   1712 	 * the appropriate SELECT statement (value-ordered or not)
   1713 	 * can be used when retrieving property values early in boot.
   1714 	 */
   1715 	if (backend_id == BACKEND_TYPE_NORMAL)
   1716 		backend_check_upgrade(be, B_FALSE);
   1717 	/*
   1718 	 * check if we are writable
   1719 	 */
   1720 	r = backend_is_readonly(be->be_db, be->be_path);
   1721 
   1722 	if (r == SQLITE_BUSY || r == SQLITE_LOCKED) {
   1723 		free(errp);
   1724 		*bep = NULL;
   1725 		backend_destroy(be);
   1726 		return (BACKEND_CREATE_LOCKED);
   1727 	}
   1728 	if (r != SQLITE_OK && r != SQLITE_FULL) {
   1729 		free(errp);
   1730 		be->be_readonly = 1;
   1731 		*bep = be;
   1732 		return (BACKEND_CREATE_READONLY);
   1733 	}
   1734 
   1735 	*bep = be;
   1736 	return (BACKEND_CREATE_SUCCESS);
   1737 
   1738 fail:
   1739 	*bep = NULL;
   1740 	backend_destroy(be);
   1741 	return (BACKEND_CREATE_FAIL);
   1742 }
   1743 
   1744 /*
   1745  * (arg & -arg) is, through the magic of twos-complement arithmetic, the
   1746  * lowest set bit in arg.
   1747  */
   1748 static size_t
   1749 round_up_to_p2(size_t arg)
   1750 {
   1751 	/*
   1752 	 * Don't allow a zero result.
   1753 	 */
   1754 	assert(arg > 0 && ((ssize_t)arg > 0));
   1755 
   1756 	while ((arg & (arg - 1)) != 0)
   1757 		arg += (arg & -arg);
   1758 
   1759 	return (arg);
   1760 }
   1761 
   1762 /*
   1763  * Returns
   1764  *   _NO_RESOURCES - out of memory
   1765  *   _BACKEND_ACCESS - backend type t (other than _NORMAL) doesn't exist
   1766  *   _DONE - callback aborted query
   1767  *   _SUCCESS
   1768  */
   1769 int
   1770 backend_run(backend_type_t t, backend_query_t *q,
   1771     backend_run_callback_f *cb, void *data)
   1772 {
   1773 	char *errmsg = NULL;
   1774 	int ret;
   1775 	sqlite_backend_t *be;
   1776 	hrtime_t ts, vts;
   1777 
   1778 	if (q == NULL || q->bq_buf == NULL)
   1779 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
   1780 
   1781 	if ((ret = backend_lock(t, 0, &be)) != REP_PROTOCOL_SUCCESS)
   1782 		return (ret);
   1783 
   1784 	ts = gethrtime();
   1785 	vts = gethrvtime();
   1786 	ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
   1787 	UPDATE_TOTALS(be, bt_exec, ts, vts);
   1788 	ret = backend_error(be, ret, errmsg);
   1789 	backend_unlock(be);
   1790 
   1791 	return (ret);
   1792 }
   1793 
   1794 /*
   1795  * Starts a "read-only" transaction -- i.e., locks out writers as long
   1796  * as it is active.
   1797  *
   1798  * Fails with
   1799  *   _NO_RESOURCES - out of memory
   1800  *
   1801  * If t is not _NORMAL, can also fail with
   1802  *   _BACKEND_ACCESS - backend does not exist
   1803  *
   1804  * If writable is true, can also fail with
   1805  *   _BACKEND_READONLY
   1806  */
   1807 static int
   1808 backend_tx_begin_common(backend_type_t t, backend_tx_t **txp, int writable)
   1809 {
   1810 	backend_tx_t *ret;
   1811 	sqlite_backend_t *be;
   1812 	int r;
   1813 
   1814 	*txp = NULL;
   1815 
   1816 	ret = uu_zalloc(sizeof (*ret));
   1817 	if (ret == NULL)
   1818 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
   1819 
   1820 	if ((r = backend_lock(t, writable, &be)) != REP_PROTOCOL_SUCCESS) {
   1821 		uu_free(ret);
   1822 		return (r);
   1823 	}
   1824 
   1825 	ret->bt_be = be;
   1826 	ret->bt_readonly = !writable;
   1827 	ret->bt_type = t;
   1828 	ret->bt_full = 0;
   1829 
   1830 	*txp = ret;
   1831 	return (REP_PROTOCOL_SUCCESS);
   1832 }
   1833 
   1834 int
   1835 backend_tx_begin_ro(backend_type_t t, backend_tx_t **txp)
   1836 {
   1837 	return (backend_tx_begin_common(t, txp, 0));
   1838 }
   1839 
   1840 static void
   1841 backend_tx_end(backend_tx_t *tx)
   1842 {
   1843 	sqlite_backend_t *be;
   1844 
   1845 	be = tx->bt_be;
   1846 
   1847 	if (tx->bt_full) {
   1848 		struct sqlite *new;
   1849 
   1850 		/*
   1851 		 * sqlite tends to be sticky with SQLITE_FULL, so we try
   1852 		 * to get a fresh database handle if we got a FULL warning
   1853 		 * along the way.  If that fails, no harm done.
   1854 		 */
   1855 		new = sqlite_open(be->be_path, 0600, NULL);
   1856 		if (new != NULL) {
   1857 			sqlite_close(be->be_db);
   1858 			be->be_db = new;
   1859 		}
   1860 	}
   1861 	backend_unlock(be);
   1862 	tx->bt_be = NULL;
   1863 	uu_free(tx);
   1864 }
   1865 
   1866 void
   1867 backend_tx_end_ro(backend_tx_t *tx)
   1868 {
   1869 	assert(tx->bt_readonly);
   1870 	backend_tx_end(tx);
   1871 }
   1872 
   1873 /*
   1874  * Fails with
   1875  *   _NO_RESOURCES - out of memory
   1876  *   _BACKEND_ACCESS
   1877  *   _BACKEND_READONLY
   1878  */
   1879 int
   1880 backend_tx_begin(backend_type_t t, backend_tx_t **txp)
   1881 {
   1882 	int r;
   1883 	char *errmsg;
   1884 	hrtime_t ts, vts;
   1885 
   1886 	r = backend_tx_begin_common(t, txp, 1);
   1887 	if (r != REP_PROTOCOL_SUCCESS)
   1888 		return (r);
   1889 
   1890 	ts = gethrtime();
   1891 	vts = gethrvtime();
   1892 	r = sqlite_exec((*txp)->bt_be->be_db, "BEGIN TRANSACTION", NULL, NULL,
   1893 	    &errmsg);
   1894 	UPDATE_TOTALS((*txp)->bt_be, bt_exec, ts, vts);
   1895 	if (r == SQLITE_FULL)
   1896 		(*txp)->bt_full = 1;
   1897 	r = backend_error((*txp)->bt_be, r, errmsg);
   1898 
   1899 	if (r != REP_PROTOCOL_SUCCESS) {
   1900 		assert(r != REP_PROTOCOL_DONE);
   1901 		(void) sqlite_exec((*txp)->bt_be->be_db,
   1902 		    "ROLLBACK TRANSACTION", NULL, NULL, NULL);
   1903 		backend_tx_end(*txp);
   1904 		*txp = NULL;
   1905 		return (r);
   1906 	}
   1907 
   1908 	(*txp)->bt_readonly = 0;
   1909 
   1910 	return (REP_PROTOCOL_SUCCESS);
   1911 }
   1912 
   1913 void
   1914 backend_tx_rollback(backend_tx_t *tx)
   1915 {
   1916 	int r;
   1917 	char *errmsg;
   1918 	sqlite_backend_t *be;
   1919 	hrtime_t ts, vts;
   1920 
   1921 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
   1922 	be = tx->bt_be;
   1923 
   1924 	ts = gethrtime();
   1925 	vts = gethrvtime();
   1926 	r = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
   1927 	    &errmsg);
   1928 	UPDATE_TOTALS(be, bt_exec, ts, vts);
   1929 	if (r == SQLITE_FULL)
   1930 		tx->bt_full = 1;
   1931 	(void) backend_error(be, r, errmsg);
   1932 
   1933 	backend_tx_end(tx);
   1934 }
   1935 
   1936 /*
   1937  * Fails with
   1938  *   _NO_RESOURCES - out of memory
   1939  */
   1940 int
   1941 backend_tx_commit(backend_tx_t *tx)
   1942 {
   1943 	int r, r2;
   1944 	char *errmsg;
   1945 	sqlite_backend_t *be;
   1946 	hrtime_t ts, vts;
   1947 
   1948 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
   1949 	be = tx->bt_be;
   1950 	ts = gethrtime();
   1951 	vts = gethrvtime();
   1952 	r = sqlite_exec(be->be_db, "COMMIT TRANSACTION", NULL, NULL,
   1953 	    &errmsg);
   1954 	UPDATE_TOTALS(be, bt_exec, ts, vts);
   1955 	if (r == SQLITE_FULL)
   1956 		tx->bt_full = 1;
   1957 
   1958 	r = backend_error(be, r, errmsg);
   1959 	assert(r != REP_PROTOCOL_DONE);
   1960 
   1961 	if (r != REP_PROTOCOL_SUCCESS) {
   1962 		r2 = sqlite_exec(be->be_db, "ROLLBACK TRANSACTION", NULL, NULL,
   1963 		    &errmsg);
   1964 		r2 = backend_error(be, r2, errmsg);
   1965 		if (r2 != REP_PROTOCOL_SUCCESS)
   1966 			backend_panic("cannot rollback failed commit");
   1967 
   1968 		backend_tx_end(tx);
   1969 		return (r);
   1970 	}
   1971 	backend_tx_end(tx);
   1972 	return (REP_PROTOCOL_SUCCESS);
   1973 }
   1974 
   1975 static const char *
   1976 id_space_to_name(enum id_space id)
   1977 {
   1978 	switch (id) {
   1979 	case BACKEND_ID_SERVICE_INSTANCE:
   1980 		return ("SI");
   1981 	case BACKEND_ID_PROPERTYGRP:
   1982 		return ("PG");
   1983 	case BACKEND_ID_GENERATION:
   1984 		return ("GEN");
   1985 	case BACKEND_ID_PROPERTY:
   1986 		return ("PROP");
   1987 	case BACKEND_ID_VALUE:
   1988 		return ("VAL");
   1989 	case BACKEND_ID_SNAPNAME:
   1990 		return ("SNAME");
   1991 	case BACKEND_ID_SNAPSHOT:
   1992 		return ("SHOT");
   1993 	case BACKEND_ID_SNAPLEVEL:
   1994 		return ("SLVL");
   1995 	default:
   1996 		abort();
   1997 		/*NOTREACHED*/
   1998 	}
   1999 }
   2000 
   2001 /*
   2002  * Returns a new id or 0 if the id argument is invalid or the query fails.
   2003  */
   2004 uint32_t
   2005 backend_new_id(backend_tx_t *tx, enum id_space id)
   2006 {
   2007 	struct run_single_int_info info;
   2008 	uint32_t new_id = 0;
   2009 	const char *name = id_space_to_name(id);
   2010 	char *errmsg;
   2011 	int ret;
   2012 	sqlite_backend_t *be;
   2013 	hrtime_t ts, vts;
   2014 
   2015 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
   2016 	be = tx->bt_be;
   2017 
   2018 	info.rs_out = &new_id;
   2019 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
   2020 
   2021 	ts = gethrtime();
   2022 	vts = gethrvtime();
   2023 	ret = sqlite_exec_printf(be->be_db,
   2024 	    "SELECT id_next FROM id_tbl WHERE (id_name = '%q');"
   2025 	    "UPDATE id_tbl SET id_next = id_next + 1 WHERE (id_name = '%q');",
   2026 	    run_single_int_callback, &info, &errmsg, name, name);
   2027 	UPDATE_TOTALS(be, bt_exec, ts, vts);
   2028 	if (ret == SQLITE_FULL)
   2029 		tx->bt_full = 1;
   2030 
   2031 	ret = backend_error(be, ret, errmsg);
   2032 
   2033 	if (ret != REP_PROTOCOL_SUCCESS) {
   2034 		return (0);
   2035 	}
   2036 
   2037 	return (new_id);
   2038 }
   2039 
   2040 /*
   2041  * Returns
   2042  *   _NO_RESOURCES - out of memory
   2043  *   _DONE - callback aborted query
   2044  *   _SUCCESS
   2045  */
   2046 int
   2047 backend_tx_run(backend_tx_t *tx, backend_query_t *q,
   2048     backend_run_callback_f *cb, void *data)
   2049 {
   2050 	char *errmsg = NULL;
   2051 	int ret;
   2052 	sqlite_backend_t *be;
   2053 	hrtime_t ts, vts;
   2054 
   2055 	assert(tx != NULL && tx->bt_be != NULL);
   2056 	be = tx->bt_be;
   2057 
   2058 	if (q == NULL || q->bq_buf == NULL)
   2059 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
   2060 
   2061 	ts = gethrtime();
   2062 	vts = gethrvtime();
   2063 	ret = sqlite_exec(be->be_db, q->bq_buf, cb, data, &errmsg);
   2064 	UPDATE_TOTALS(be, bt_exec, ts, vts);
   2065 	if (ret == SQLITE_FULL)
   2066 		tx->bt_full = 1;
   2067 	ret = backend_error(be, ret, errmsg);
   2068 
   2069 	return (ret);
   2070 }
   2071 
   2072 /*
   2073  * Returns
   2074  *   _NO_RESOURCES - out of memory
   2075  *   _NOT_FOUND - the query returned no results
   2076  *   _SUCCESS - the query returned a single integer
   2077  */
   2078 int
   2079 backend_tx_run_single_int(backend_tx_t *tx, backend_query_t *q, uint32_t *buf)
   2080 {
   2081 	struct run_single_int_info info;
   2082 	int ret;
   2083 
   2084 	info.rs_out = buf;
   2085 	info.rs_result = REP_PROTOCOL_FAIL_NOT_FOUND;
   2086 
   2087 	ret = backend_tx_run(tx, q, run_single_int_callback, &info);
   2088 	assert(ret != REP_PROTOCOL_DONE);
   2089 
   2090 	if (ret != REP_PROTOCOL_SUCCESS)
   2091 		return (ret);
   2092 
   2093 	return (info.rs_result);
   2094 }
   2095 
   2096 /*
   2097  * Fails with
   2098  *   _NO_RESOURCES - out of memory
   2099  */
   2100 int
   2101 backend_tx_run_update(backend_tx_t *tx, const char *format, ...)
   2102 {
   2103 	va_list a;
   2104 	char *errmsg;
   2105 	int ret;
   2106 	sqlite_backend_t *be;
   2107 	hrtime_t ts, vts;
   2108 
   2109 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
   2110 	be = tx->bt_be;
   2111 
   2112 	va_start(a, format);
   2113 	ts = gethrtime();
   2114 	vts = gethrvtime();
   2115 	ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
   2116 	UPDATE_TOTALS(be, bt_exec, ts, vts);
   2117 	if (ret == SQLITE_FULL)
   2118 		tx->bt_full = 1;
   2119 	va_end(a);
   2120 	ret = backend_error(be, ret, errmsg);
   2121 	assert(ret != REP_PROTOCOL_DONE);
   2122 
   2123 	return (ret);
   2124 }
   2125 
   2126 /*
   2127  * returns REP_PROTOCOL_FAIL_NOT_FOUND if no changes occured
   2128  */
   2129 int
   2130 backend_tx_run_update_changed(backend_tx_t *tx, const char *format, ...)
   2131 {
   2132 	va_list a;
   2133 	char *errmsg;
   2134 	int ret;
   2135 	sqlite_backend_t *be;
   2136 	hrtime_t ts, vts;
   2137 
   2138 	assert(tx != NULL && tx->bt_be != NULL && !tx->bt_readonly);
   2139 	be = tx->bt_be;
   2140 
   2141 	va_start(a, format);
   2142 	ts = gethrtime();
   2143 	vts = gethrvtime();
   2144 	ret = sqlite_exec_vprintf(be->be_db, format, NULL, NULL, &errmsg, a);
   2145 	UPDATE_TOTALS(be, bt_exec, ts, vts);
   2146 	if (ret == SQLITE_FULL)
   2147 		tx->bt_full = 1;
   2148 	va_end(a);
   2149 
   2150 	ret = backend_error(be, ret, errmsg);
   2151 
   2152 	return (ret);
   2153 }
   2154 
   2155 #define	BACKEND_ADD_SCHEMA(be, file, tbls, idxs) \
   2156 	(backend_add_schema((be), (file), \
   2157 	    (tbls), sizeof (tbls) / sizeof (*(tbls)), \
   2158 	    (idxs), sizeof (idxs) / sizeof (*(idxs))))
   2159 
   2160 static int
   2161 backend_add_schema(sqlite_backend_t *be, const char *file,
   2162     struct backend_tbl_info *tbls, int tbl_count,
   2163     struct backend_idx_info *idxs, int idx_count)
   2164 {
   2165 	int i;
   2166 	char *errmsg;
   2167 	int ret;
   2168 
   2169 	/*
   2170 	 * Create the tables.
   2171 	 */
   2172 	for (i = 0; i < tbl_count; i++) {
   2173 		if (tbls[i].bti_name == NULL) {
   2174 			assert(i + 1 == tbl_count);
   2175 			break;
   2176 		}
   2177 		ret = sqlite_exec_printf(be->be_db,
   2178 		    "CREATE TABLE %s (%s);\n",
   2179 		    NULL, NULL, &errmsg, tbls[i].bti_name, tbls[i].bti_cols);
   2180 
   2181 		if (ret != SQLITE_OK) {
   2182 			configd_critical(
   2183 			    "%s: %s table creation fails: %s\n", file,
   2184 			    tbls[i].bti_name, errmsg);
   2185 			free(errmsg);
   2186 			return (-1);
   2187 		}
   2188 	}
   2189 
   2190 	/*
   2191 	 * Make indices on key tables and columns.
   2192 	 */
   2193 	for (i = 0; i < idx_count; i++) {
   2194 		if (idxs[i].bxi_tbl == NULL) {
   2195 			assert(i + 1 == idx_count);
   2196 			break;
   2197 		}
   2198 
   2199 		ret = sqlite_exec_printf(be->be_db,
   2200 		    "CREATE INDEX %s_%s ON %s (%s);\n",
   2201 		    NULL, NULL, &errmsg, idxs[i].bxi_tbl, idxs[i].bxi_idx,
   2202 		    idxs[i].bxi_tbl, idxs[i].bxi_cols);
   2203 
   2204 		if (ret != SQLITE_OK) {
   2205 			configd_critical(
   2206 			    "%s: %s_%s index creation fails: %s\n", file,
   2207 			    idxs[i].bxi_tbl, idxs[i].bxi_idx, errmsg);
   2208 			free(errmsg);
   2209 			return (-1);
   2210 		}
   2211 	}
   2212 	return (0);
   2213 }
   2214 
   2215 static int
   2216 backend_init_schema(sqlite_backend_t *be, const char *db_file, backend_type_t t)
   2217 {
   2218 	int i;
   2219 	char *errmsg;
   2220 	int ret;
   2221 
   2222 	assert(t == BACKEND_TYPE_NORMAL || t == BACKEND_TYPE_NONPERSIST);
   2223 
   2224 	if (t == BACKEND_TYPE_NORMAL) {
   2225 		ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_normal, idxs_normal);
   2226 	} else if (t == BACKEND_TYPE_NONPERSIST) {
   2227 		ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_np, idxs_np);
   2228 	} else {
   2229 		abort();		/* can't happen */
   2230 	}
   2231 
   2232 	if (ret < 0) {
   2233 		return (ret);
   2234 	}
   2235 
   2236 	ret = BACKEND_ADD_SCHEMA(be, db_file, tbls_common, idxs_common);
   2237 	if (ret < 0) {
   2238 		return (ret);
   2239 	}
   2240 
   2241 	/*
   2242 	 * Add the schema version to the table
   2243 	 */
   2244 	ret = sqlite_exec_printf(be->be_db,
   2245 	    "INSERT INTO schema_version (schema_version) VALUES (%d)",
   2246 	    NULL, NULL, &errmsg, BACKEND_SCHEMA_VERSION);
   2247 	if (ret != SQLITE_OK) {
   2248 		configd_critical(
   2249 		    "setting schema version fails: %s\n", errmsg);
   2250 		free(errmsg);
   2251 	}
   2252 
   2253 	/*
   2254 	 * Populate id_tbl with initial IDs.
   2255 	 */
   2256 	for (i = 0; i < BACKEND_ID_INVALID; i++) {
   2257 		const char *name = id_space_to_name(i);
   2258 
   2259 		ret = sqlite_exec_printf(be->be_db,
   2260 		    "INSERT INTO id_tbl (id_name, id_next) "
   2261 		    "VALUES ('%q', %d);", NULL, NULL, &errmsg, name, 1);
   2262 		if (ret != SQLITE_OK) {
   2263 			configd_critical(
   2264 			    "id insertion for %s fails: %s\n", name, errmsg);
   2265 			free(errmsg);
   2266 			return (-1);
   2267 		}
   2268 	}
   2269 	/*
   2270 	 * Set the persistance of the database.  The normal database is marked
   2271 	 * "synchronous", so that all writes are synchronized to stable storage
   2272 	 * before proceeding.
   2273 	 */
   2274 	ret = sqlite_exec_printf(be->be_db,
   2275 	    "PRAGMA default_synchronous = %s; PRAGMA synchronous = %s;",
   2276 	    NULL, NULL, &errmsg,
   2277 	    (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF",
   2278 	    (t == BACKEND_TYPE_NORMAL)? "ON" : "OFF");
   2279 	if (ret != SQLITE_OK) {
   2280 		configd_critical("pragma setting fails: %s\n", errmsg);
   2281 		free(errmsg);
   2282 		return (-1);
   2283 	}
   2284 
   2285 	return (0);
   2286 }
   2287 
   2288 int
   2289 backend_init(const char *db_file, const char *npdb_file, int have_np)
   2290 {
   2291 	sqlite_backend_t *be;
   2292 	int r;
   2293 	int writable_persist = 1;
   2294 
   2295 	/* set up our temporary directory */
   2296 	sqlite_temp_directory = "/etc/svc/volatile";
   2297 
   2298 	if (strcmp(SQLITE_VERSION, sqlite_version) != 0) {
   2299 		configd_critical("Mismatched link!  (%s should be %s)\n",
   2300 		    sqlite_version, SQLITE_VERSION);
   2301 		return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
   2302 	}
   2303 
   2304 	/*
   2305 	 * If the system crashed during a backend switch, there might
   2306 	 * be a leftover transient database which contains useful
   2307 	 * information which can be used for recovery.
   2308 	 */
   2309 	backend_switch_recovery();
   2310 
   2311 	if (db_file == NULL)
   2312 		db_file = REPOSITORY_DB;
   2313 	if (strcmp(db_file, REPOSITORY_DB) != 0) {
   2314 		is_main_repository = 0;
   2315 	}
   2316 
   2317 	r = backend_create(BACKEND_TYPE_NORMAL, db_file, &be);
   2318 	switch (r) {
   2319 	case BACKEND_CREATE_FAIL:
   2320 		return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
   2321 	case BACKEND_CREATE_LOCKED:
   2322 		return (CONFIGD_EXIT_DATABASE_LOCKED);
   2323 	case BACKEND_CREATE_SUCCESS:
   2324 		break;		/* success */
   2325 	case BACKEND_CREATE_READONLY:
   2326 		writable_persist = 0;
   2327 		break;
   2328 	case BACKEND_CREATE_NEED_INIT:
   2329 		if (backend_init_schema(be, db_file, BACKEND_TYPE_NORMAL)) {
   2330 			backend_destroy(be);
   2331 			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
   2332 		}
   2333 		break;
   2334 	default:
   2335 		abort();
   2336 		/*NOTREACHED*/
   2337 	}
   2338 	backend_create_finish(BACKEND_TYPE_NORMAL, be);
   2339 
   2340 	if (have_np) {
   2341 		if (npdb_file == NULL)
   2342 			npdb_file = NONPERSIST_DB;
   2343 
   2344 		r = backend_create(BACKEND_TYPE_NONPERSIST, npdb_file, &be);
   2345 		switch (r) {
   2346 		case BACKEND_CREATE_SUCCESS:
   2347 			break;		/* success */
   2348 		case BACKEND_CREATE_FAIL:
   2349 			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
   2350 		case BACKEND_CREATE_LOCKED:
   2351 			return (CONFIGD_EXIT_DATABASE_LOCKED);
   2352 		case BACKEND_CREATE_READONLY:
   2353 			configd_critical("%s: unable to write\n", npdb_file);
   2354 			return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
   2355 		case BACKEND_CREATE_NEED_INIT:
   2356 			if (backend_init_schema(be, db_file,
   2357 			    BACKEND_TYPE_NONPERSIST)) {
   2358 				backend_destroy(be);
   2359 				return (CONFIGD_EXIT_DATABASE_INIT_FAILED);
   2360 			}
   2361 			break;
   2362 		default:
   2363 			abort();
   2364 			/*NOTREACHED*/
   2365 		}
   2366 		backend_create_finish(BACKEND_TYPE_NONPERSIST, be);
   2367 
   2368 		/*
   2369 		 * If we started up with a writable filesystem, but the
   2370 		 * non-persistent database needed initialization, we
   2371 		 * are booting a non-global zone, so do a backup.
   2372 		 */
   2373 		if (r == BACKEND_CREATE_NEED_INIT && writable_persist &&
   2374 		    backend_lock(BACKEND_TYPE_NORMAL, 0, &be) ==
   2375 		    REP_PROTOCOL_SUCCESS) {
   2376 			if (backend_create_backup_locked(be,
   2377 			    REPOSITORY_BOOT_BACKUP) != REP_PROTOCOL_SUCCESS) {
   2378 				configd_critical(
   2379 				    "unable to create \"%s\" backup of "
   2380 				    "\"%s\"\n", REPOSITORY_BOOT_BACKUP,
   2381 				    be->be_path);
   2382 			}
   2383 			backend_unlock(be);
   2384 		}
   2385 	}
   2386 
   2387 	/*
   2388 	 * If the persistent backend is writable at this point, upgrade it.
   2389 	 * This can occur in a few cases, most notably on UFS roots if
   2390 	 * we are operating on the backend from another root, as is the case
   2391 	 * during alternate-root BFU.
   2392 	 *
   2393 	 * Otherwise, upgrade will occur via backend_check_readonly() when
   2394 	 * the repository is re-opened read-write.
   2395 	 */
   2396 	if (writable_persist) {
   2397 		r = backend_lock(BACKEND_TYPE_NORMAL, 1, &be);
   2398 		assert(r == REP_PROTOCOL_SUCCESS);
   2399 		backend_check_upgrade(be, B_TRUE);
   2400 		backend_unlock(be);
   2401 	}
   2402 
   2403 	return (CONFIGD_EXIT_OKAY);
   2404 }
   2405 
   2406 /*
   2407  * quiesce all database activity prior to exiting
   2408  */
   2409 void
   2410 backend_fini(void)
   2411 {
   2412 	sqlite_backend_t *be_normal, *be_np;
   2413 
   2414 	(void) backend_lock(BACKEND_TYPE_NORMAL, 1, &be_normal);
   2415 	(void) backend_lock(BACKEND_TYPE_NONPERSIST, 1, &be_np);
   2416 }
   2417 
   2418 #define	QUERY_BASE	128
   2419 backend_query_t *
   2420 backend_query_alloc(void)
   2421 {
   2422 	backend_query_t *q;
   2423 	q = calloc(1, sizeof (backend_query_t));
   2424 	if (q != NULL) {
   2425 		q->bq_size = QUERY_BASE;
   2426 		q->bq_buf = calloc(1, q->bq_size);
   2427 		if (q->bq_buf == NULL) {
   2428 			q->bq_size = 0;
   2429 		}
   2430 
   2431 	}
   2432 	return (q);
   2433 }
   2434 
   2435 void
   2436 backend_query_append(backend_query_t *q, const char *value)
   2437 {
   2438 	char *alloc;
   2439 	int count;
   2440 	size_t size, old_len;
   2441 
   2442 	if (q == NULL) {
   2443 		/* We'll discover the error when we try to run the query. */
   2444 		return;
   2445 	}
   2446 
   2447 	while (q->bq_buf != NULL) {
   2448 		old_len = strlen(q->bq_buf);
   2449 		size = q->bq_size;
   2450 		count = strlcat(q->bq_buf, value, size);
   2451 
   2452 		if (count < size)
   2453 			break;				/* success */
   2454 
   2455 		q->bq_buf[old_len] = 0;
   2456 		size = round_up_to_p2(count + 1);
   2457 
   2458 		assert(size > q->bq_size);
   2459 		alloc = realloc(q->bq_buf, size);
   2460 		if (alloc == NULL) {
   2461 			free(q->bq_buf);
   2462 			q->bq_buf = NULL;
   2463 			break;				/* can't grow */
   2464 		}
   2465 
   2466 		q->bq_buf = alloc;
   2467 		q->bq_size = size;
   2468 	}
   2469 }
   2470 
   2471 void
   2472 backend_query_add(backend_query_t *q, const char *format, ...)
   2473 {
   2474 	va_list args;
   2475 	char *new;
   2476 
   2477 	if (q == NULL || q->bq_buf == NULL)
   2478 		return;
   2479 
   2480 	va_start(args, format);
   2481 	new = sqlite_vmprintf(format, args);
   2482 	va_end(args);
   2483 
   2484 	if (new == NULL) {
   2485 		free(q->bq_buf);
   2486 		q->bq_buf = NULL;
   2487 		return;
   2488 	}
   2489 
   2490 	backend_query_append(q, new);
   2491 
   2492 	free(new);
   2493 }
   2494 
   2495 void
   2496 backend_query_free(backend_query_t *q)
   2497 {
   2498 	if (q != NULL) {
   2499 		if (q->bq_buf != NULL) {
   2500 			free(q->bq_buf);
   2501 		}
   2502 		free(q);
   2503 	}
   2504 }
   2505