Home | History | Annotate | Download | only in common
      1     0   stevel /*
      2     0   stevel  * CDDL HEADER START
      3     0   stevel  *
      4     0   stevel  * The contents of this file are subject to the terms of the
      5  3866      raf  * Common Development and Distribution License (the "License").
      6  3866      raf  * You may not use this file except in compliance with the License.
      7     0   stevel  *
      8     0   stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9     0   stevel  * or http://www.opensolaris.org/os/licensing.
     10     0   stevel  * See the License for the specific language governing permissions
     11     0   stevel  * and limitations under the License.
     12     0   stevel  *
     13     0   stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14     0   stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15     0   stevel  * If applicable, add the following below this CDDL HEADER, with the
     16     0   stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17     0   stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18     0   stevel  *
     19     0   stevel  * CDDL HEADER END
     20     0   stevel  */
     21  3866      raf 
     22     0   stevel /*
     23  8754    Surya  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24     0   stevel  * Use is subject to license terms.
     25     0   stevel  */
     26     0   stevel 
     27     0   stevel #include <mtmalloc.h>
     28     0   stevel #include "mtmalloc_impl.h"
     29     0   stevel #include <unistd.h>
     30     0   stevel #include <synch.h>
     31     0   stevel #include <thread.h>
     32  3866      raf #include <pthread.h>
     33     0   stevel #include <stdio.h>
     34     0   stevel #include <limits.h>
     35     0   stevel #include <errno.h>
     36     0   stevel #include <string.h>
     37     0   stevel #include <strings.h>
     38     0   stevel #include <sys/param.h>
     39     0   stevel #include <sys/sysmacros.h>
     40     0   stevel 
     41     0   stevel /*
     42     0   stevel  * To turn on the asserts just compile -DDEBUG
     43     0   stevel  */
     44     0   stevel 
     45     0   stevel #ifndef	DEBUG
     46     0   stevel #define	NDEBUG
     47     0   stevel #endif
     48     0   stevel 
     49     0   stevel #include <assert.h>
     50     0   stevel 
     51     0   stevel /*
     52     0   stevel  * The MT hot malloc implementation contained herein is designed to be
     53     0   stevel  * plug-compatible with the libc version of malloc. It is not intended
     54     0   stevel  * to replace that implementation until we decide that it is ok to break
     55     0   stevel  * customer apps (Solaris 3.0).
     56     0   stevel  *
     57     0   stevel  * For requests up to 2^^16, the allocator initializes itself into NCPUS
     58     0   stevel  * worth of chains of caches. When a memory request is made, the calling thread
     59     0   stevel  * is vectored into one of NCPUS worth of caches.  The LWP id gives us a cheap,
     60     0   stevel  * contention-reducing index to use, eventually, this should be replaced with
     61     0   stevel  * the actual CPU sequence number, when an interface to get it is available.
     62     0   stevel  *
     63     0   stevel  * Once the thread is vectored into one of the list of caches the real
     64     0   stevel  * allocation of the memory begins. The size is determined to figure out which
     65     0   stevel  * bucket the allocation should be satisfied from. The management of free
     66     0   stevel  * buckets is done via a bitmask. A free bucket is represented by a 1. The
     67     0   stevel  * first free bit represents the first free bucket. The position of the bit,
     68     0   stevel  * represents the position of the bucket in the arena.
     69     0   stevel  *
     70     0   stevel  * When the memory from the arena is handed out, the address of the cache
     71     0   stevel  * control structure is written in the word preceeding the returned memory.
     72     0   stevel  * This cache control address is used during free() to mark the buffer free
     73     0   stevel  * in the cache control structure.
     74     0   stevel  *
     75     0   stevel  * When all available memory in a cache has been depleted, a new chunk of memory
     76     0   stevel  * is allocated via sbrk(). The new cache is allocated from this chunk of memory
     77     0   stevel  * and initialized in the function create_cache(). New caches are installed at
     78     0   stevel  * the front of a singly linked list of the same size memory pools. This helps
     79     0   stevel  * to ensure that there will tend to be available memory in the beginning of the
     80     0   stevel  * list.
     81     0   stevel  *
     82     0   stevel  * Long linked lists hurt performance. To decrease this effect, there is a
     83     0   stevel  * tunable, requestsize, that bumps up the sbrk allocation size and thus
     84     0   stevel  * increases the number of available blocks within an arena.  We also keep
     85     0   stevel  * a "hint" for each cache list, which is the last cache in the list allocated
     86     0   stevel  * from.  This lowers the cost of searching if there are a lot of fully
     87     0   stevel  * allocated blocks at the front of the list.
     88     0   stevel  *
     89     0   stevel  * For requests greater than 2^^16 (oversize allocations), there are two pieces
     90     0   stevel  * of overhead. There is the OVERHEAD used to hold the cache addr
     91     0   stevel  * (&oversize_list), plus an oversize_t structure to further describe the block.
     92     0   stevel  *
     93     0   stevel  * The oversize list is kept as defragmented as possible by coalescing
     94     0   stevel  * freed oversized allocations with adjacent neighbors.
     95     0   stevel  *
     96     0   stevel  * Addresses handed out are stored in a hash table, and are aligned on
     97     0   stevel  * MTMALLOC_MIN_ALIGN-byte boundaries at both ends. Request sizes are rounded-up
     98     0   stevel  * where necessary in order to achieve this. This eases the implementation of
     99     0   stevel  * MTDEBUGPATTERN and MTINITPATTERN, particularly where coalescing occurs.
    100     0   stevel  *
    101     0   stevel  * A memalign allocation takes memalign header overhead.  There's two
    102     0   stevel  * types of memalign headers distinguished by MTMALLOC_MEMALIGN_MAGIC
    103     0   stevel  * and MTMALLOC_MEMALIGN_MIN_MAGIC.  When the size of memory taken to
    104     0   stevel  * get to the aligned address from malloc'ed address is the minimum size
    105     0   stevel  * OVERHEAD, we create a header taking only one OVERHEAD space with magic
    106     0   stevel  * number MTMALLOC_MEMALIGN_MIN_MAGIC, and we know by subtracting OVERHEAD
    107     0   stevel  * from memaligned address, we can get to the malloc'ed address. Otherwise,
    108     0   stevel  * we create a memalign header taking two OVERHEAD space, one stores
    109     0   stevel  * MTMALLOC_MEMALIGN_MAGIC magic number, the other one points back to the
    110     0   stevel  * malloc'ed address.
    111     0   stevel  */
    112     0   stevel 
    113     0   stevel #if defined(__i386) || defined(__amd64)
    114     0   stevel #include <arpa/inet.h>	/* for htonl() */
    115     0   stevel #endif
    116     0   stevel 
    117     0   stevel static void * morecore(size_t);
    118     0   stevel static void create_cache(cache_t *, size_t bufsize, uint_t hunks);
    119     0   stevel static void * malloc_internal(size_t, percpu_t *);
    120     0   stevel static void * oversize(size_t);
    121     0   stevel static oversize_t *find_oversize(size_t);
    122     0   stevel static void add_oversize(oversize_t *);
    123     0   stevel static void copy_pattern(uint32_t, void *, size_t);
    124     0   stevel static void * verify_pattern(uint32_t, void *, size_t);
    125     0   stevel static void reinit_cpu_list(void);
    126     0   stevel static void reinit_cache(cache_t *);
    127     0   stevel static void free_oversize(oversize_t *);
    128     0   stevel static oversize_t *oversize_header_alloc(uintptr_t, size_t);
    129     0   stevel 
    130     0   stevel /*
    131     0   stevel  * oversize hash table stuff
    132     0   stevel  */
    133     0   stevel #define	NUM_BUCKETS	67	/* must be prime */
    134     0   stevel #define	HASH_OVERSIZE(caddr)	((uintptr_t)(caddr) % NUM_BUCKETS)
    135     0   stevel oversize_t *ovsz_hashtab[NUM_BUCKETS];
    136     0   stevel 
    137     0   stevel #define	ALIGN(x, a)	((((uintptr_t)(x) + ((uintptr_t)(a) - 1)) \
    138     0   stevel 			& ~((uintptr_t)(a) - 1)))
    139     0   stevel 
    140     0   stevel /* need this to deal with little endianess of x86 */
    141     0   stevel #if defined(__i386) || defined(__amd64)
    142     0   stevel #define	FLIP_EM(x)	htonl((x))
    143     0   stevel #else
    144     0   stevel #define	FLIP_EM(x)	(x)
    145     0   stevel #endif
    146     0   stevel 
    147     0   stevel #define	INSERT_ONLY			0
    148     0   stevel #define	COALESCE_LEFT			0x00000001
    149     0   stevel #define	COALESCE_RIGHT			0x00000002
    150     0   stevel #define	COALESCE_WITH_BOTH_SIDES	(COALESCE_LEFT | COALESCE_RIGHT)
    151     0   stevel 
    152     0   stevel #define	OVERHEAD	8	/* size needed to write cache addr */
    153     0   stevel #define	HUNKSIZE	8192	/* just a multiplier */
    154     0   stevel 
    155     0   stevel #define	MAX_CACHED_SHIFT	16	/* 64K is the max cached size */
    156     0   stevel #define	MAX_CACHED		(1 << MAX_CACHED_SHIFT)
    157     0   stevel #define	MIN_CACHED_SHIFT	4	/* smaller requests rounded up */
    158     0   stevel #define	MTMALLOC_MIN_ALIGN	8	/* min guaranteed alignment */
    159  1412  rm88369 
    160  1412  rm88369 /* maximum size before overflow */
    161  1412  rm88369 #define	MAX_MTMALLOC	(SIZE_MAX - (SIZE_MAX % MTMALLOC_MIN_ALIGN) \
    162  1412  rm88369 			- OVSZ_HEADER_SIZE)
    163     0   stevel 
    164     0   stevel #define	NUM_CACHES	(MAX_CACHED_SHIFT - MIN_CACHED_SHIFT + 1)
    165     0   stevel #define	CACHELIST_SIZE	ALIGN(NUM_CACHES * sizeof (cache_head_t), \
    166     0   stevel     CACHE_COHERENCY_UNIT)
    167     0   stevel 
    168     0   stevel #define	MINSIZE		9	/* for requestsize, tunable */
    169     0   stevel #define	MAXSIZE		256	/* arbitrary, big enough, for requestsize */
    170     0   stevel 
    171     0   stevel #define	FREEPATTERN	0xdeadbeef /* debug fill pattern for free buf */
    172     0   stevel #define	INITPATTERN	0xbaddcafe /* debug fill pattern for new buf */
    173     0   stevel 
    174     0   stevel #define	misaligned(p)	((unsigned)(p) & (sizeof (int) - 1))
    175     0   stevel #define	IS_OVERSIZE(x, y)	(((x) < (y)) && (((x) > MAX_CACHED)? 1 : 0))
    176     0   stevel 
    177     0   stevel static long requestsize = MINSIZE; /* 9 pages per cache; tunable; 9 is min */
    178     0   stevel 
    179     0   stevel static uint_t cpu_mask;
    180     0   stevel static curcpu_func curcpu;
    181     0   stevel 
    182     0   stevel static int32_t debugopt;
    183     0   stevel static int32_t reinit;
    184     0   stevel 
    185     0   stevel static percpu_t *cpu_list;
    186     0   stevel static oversize_t oversize_list;
    187  3866      raf static mutex_t oversize_lock = DEFAULTMUTEX;
    188     0   stevel 
    189  3866      raf static int ncpus = 0;
    190     0   stevel 
    191     0   stevel #define	MTMALLOC_OVERSIZE_MAGIC		((uintptr_t)&oversize_list)
    192     0   stevel #define	MTMALLOC_MEMALIGN_MAGIC		((uintptr_t)&oversize_list + 1)
    193     0   stevel #define	MTMALLOC_MEMALIGN_MIN_MAGIC	((uintptr_t)&oversize_list + 2)
    194     0   stevel 
    195     0   stevel /*
    196     0   stevel  * We require allocations handed out to be aligned on MTMALLOC_MIN_ALIGN-byte
    197     0   stevel  * boundaries. We round up sizeof (oversize_t) (when necessary) to ensure that
    198     0   stevel  * this is achieved.
    199     0   stevel  */
    200     0   stevel #define	OVSZ_SIZE		(ALIGN(sizeof (oversize_t), MTMALLOC_MIN_ALIGN))
    201     0   stevel #define	OVSZ_HEADER_SIZE	(OVSZ_SIZE + OVERHEAD)
    202     0   stevel 
    203     0   stevel /*
    204     0   stevel  * memalign header takes 2 OVERHEAD space.  One for memalign magic, and the
    205     0   stevel  * other one points back to the start address of originally allocated space.
    206     0   stevel  */
    207     0   stevel #define	MEMALIGN_HEADER_SIZE	2 * OVERHEAD
    208     0   stevel #define	MEMALIGN_HEADER_ALLOC(x, shift, malloc_addr)\
    209     0   stevel 	if (shift == OVERHEAD)\
    210     0   stevel 		*((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
    211     0   stevel 			MTMALLOC_MEMALIGN_MIN_MAGIC; \
    212     0   stevel 	else {\
    213     0   stevel 		*((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
    214     0   stevel 			MTMALLOC_MEMALIGN_MAGIC; \
    215     0   stevel 		*((uintptr_t *)((caddr_t)x - 2 * OVERHEAD)) = \
    216     0   stevel 			(uintptr_t)malloc_addr; \
    217     0   stevel 	}
    218  7166      raf 
    219  7166      raf /*
    220  7166      raf  * Add big to the oversize hash table at the head of the relevant bucket.
    221  7166      raf  */
    222  7166      raf static void
    223  7166      raf insert_hash(oversize_t *big)
    224  7166      raf {
    225  7166      raf 	caddr_t ret = big->addr;
    226  7166      raf 	int bucket = HASH_OVERSIZE(ret);
    227  7166      raf 
    228  7166      raf 	assert(MUTEX_HELD(&oversize_lock));
    229  7166      raf 	big->hash_next = ovsz_hashtab[bucket];
    230  7166      raf 	ovsz_hashtab[bucket] = big;
    231  7166      raf }
    232     0   stevel 
    233     0   stevel void *
    234     0   stevel malloc(size_t bytes)
    235     0   stevel {
    236     0   stevel 	percpu_t *list_rotor;
    237     0   stevel 	uint_t	list_index;
    238     0   stevel 
    239     0   stevel 	if (bytes > MAX_CACHED)
    240     0   stevel 		return (oversize(bytes));
    241     0   stevel 
    242     0   stevel 	list_index = (curcpu() & cpu_mask);
    243     0   stevel 
    244     0   stevel 	list_rotor = &cpu_list[list_index];
    245     0   stevel 
    246     0   stevel 	return (malloc_internal(bytes, list_rotor));
    247     0   stevel }
    248     0   stevel 
    249     0   stevel void *
    250     0   stevel realloc(void * ptr, size_t bytes)
    251     0   stevel {
    252     0   stevel 	void *new, *data_ptr;
    253     0   stevel 	cache_t *cacheptr;
    254     0   stevel 	caddr_t mem;
    255     0   stevel 	size_t shift = 0;
    256     0   stevel 
    257     0   stevel 	if (ptr == NULL)
    258     0   stevel 		return (malloc(bytes));
    259     0   stevel 
    260     0   stevel 	if (bytes == 0) {
    261     0   stevel 		free(ptr);
    262     0   stevel 		return (NULL);
    263     0   stevel 	}
    264     0   stevel 
    265     0   stevel 	data_ptr = ptr;
    266     0   stevel 	mem = (caddr_t)ptr - OVERHEAD;
    267     0   stevel 
    268  8754    Surya 	/*
    269  8754    Surya 	 * Optimization possibility :
    270  8754    Surya 	 *	p = malloc(64);
    271  8754    Surya 	 *	q = realloc(p, 64);
    272  8754    Surya 	 * q can be same as p.
    273  8754    Surya 	 * Apply this optimization for the normal
    274  8754    Surya 	 * sized caches for now.
    275  8754    Surya 	 */
    276  8754    Surya 	if (*(uintptr_t *)mem < MTMALLOC_OVERSIZE_MAGIC ||
    277  8754    Surya 	    *(uintptr_t *)mem > MTMALLOC_MEMALIGN_MIN_MAGIC) {
    278  8754    Surya 		cacheptr = (cache_t *)*(uintptr_t *)mem;
    279  8754    Surya 		if (bytes <= (cacheptr->mt_size - OVERHEAD))
    280  8754    Surya 			return (ptr);
    281  8754    Surya 	}
    282  8754    Surya 
    283     0   stevel 	new = malloc(bytes);
    284     0   stevel 
    285     0   stevel 	if (new == NULL)
    286     0   stevel 		return (NULL);
    287     0   stevel 
    288     0   stevel 	/*
    289     0   stevel 	 * If new == ptr, ptr has previously been freed. Passing a freed pointer
    290     0   stevel 	 * to realloc() is not allowed - unless the caller specifically states
    291     0   stevel 	 * otherwise, in which case we must avoid freeing ptr (ie new) before we
    292     0   stevel 	 * return new. There is (obviously) no requirement to memcpy() ptr to
    293     0   stevel 	 * new before we return.
    294     0   stevel 	 */
    295     0   stevel 	if (new == ptr) {
    296     0   stevel 		if (!(debugopt & MTDOUBLEFREE))
    297     0   stevel 			abort();
    298     0   stevel 		return (new);
    299     0   stevel 	}
    300     0   stevel 
    301     0   stevel 	if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
    302     0   stevel 		mem -= OVERHEAD;
    303     0   stevel 		ptr = (void *)*(uintptr_t *)mem;
    304     0   stevel 		mem = (caddr_t)ptr - OVERHEAD;
    305     0   stevel 		shift = (size_t)((uintptr_t)data_ptr - (uintptr_t)ptr);
    306     0   stevel 	} else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
    307     0   stevel 		ptr = (void *) mem;
    308     0   stevel 		mem -= OVERHEAD;
    309     0   stevel 		shift = OVERHEAD;
    310     0   stevel 	}
    311     0   stevel 
    312     0   stevel 	if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
    313     0   stevel 		oversize_t *old;
    314     0   stevel 
    315     0   stevel 		old = (oversize_t *)(mem - OVSZ_SIZE);
    316     0   stevel 		(void) memcpy(new, data_ptr, MIN(bytes, old->size - shift));
    317     0   stevel 		free(ptr);
    318     0   stevel 		return (new);
    319     0   stevel 	}
    320     0   stevel 
    321     0   stevel 	cacheptr = (cache_t *)*(uintptr_t *)mem;
    322     0   stevel 
    323     0   stevel 	(void) memcpy(new, data_ptr,
    324  8754    Surya 	    MIN(cacheptr->mt_size - OVERHEAD - shift, bytes));
    325     0   stevel 	free(ptr);
    326     0   stevel 
    327     0   stevel 	return (new);
    328     0   stevel }
    329     0   stevel 
    330     0   stevel void *
    331     0   stevel calloc(size_t nelem, size_t bytes)
    332     0   stevel {
    333     0   stevel 	void * ptr;
    334     0   stevel 	size_t size = nelem * bytes;
    335     0   stevel 
    336     0   stevel 	ptr = malloc(size);
    337     0   stevel 	if (ptr == NULL)
    338     0   stevel 		return (NULL);
    339  3866      raf 	(void) memset(ptr, 0, size);
    340     0   stevel 
    341     0   stevel 	return (ptr);
    342     0   stevel }
    343     0   stevel 
    344     0   stevel void
    345     0   stevel free(void * ptr)
    346     0   stevel {
    347     0   stevel 	cache_t *cacheptr;
    348     0   stevel 	caddr_t mem;
    349     0   stevel 	int32_t i;
    350     0   stevel 	caddr_t freeblocks;
    351     0   stevel 	uintptr_t offset;
    352     0   stevel 	uchar_t mask;
    353     0   stevel 	int32_t which_bit, num_bytes;
    354     0   stevel 
    355     0   stevel 	if (ptr == NULL)
    356     0   stevel 		return;
    357     0   stevel 
    358     0   stevel 	mem = (caddr_t)ptr - OVERHEAD;
    359     0   stevel 
    360     0   stevel 	if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MAGIC) {
    361     0   stevel 		mem -= OVERHEAD;
    362     0   stevel 		ptr = (void *)*(uintptr_t *)mem;
    363     0   stevel 		mem = (caddr_t)ptr - OVERHEAD;
    364     0   stevel 	} else if (*(uintptr_t *)mem == MTMALLOC_MEMALIGN_MIN_MAGIC) {
    365     0   stevel 		ptr = (void *) mem;
    366     0   stevel 		mem -= OVERHEAD;
    367     0   stevel 	}
    368     0   stevel 
    369     0   stevel 	if (*(uintptr_t *)mem == MTMALLOC_OVERSIZE_MAGIC) {
    370     0   stevel 		oversize_t *big, **opp;
    371     0   stevel 		int bucket;
    372     0   stevel 
    373     0   stevel 		big = (oversize_t *)(mem - OVSZ_SIZE);
    374     0   stevel 		(void) mutex_lock(&oversize_lock);
    375     0   stevel 
    376     0   stevel 		bucket = HASH_OVERSIZE(big->addr);
    377     0   stevel 		for (opp = &ovsz_hashtab[bucket]; *opp != NULL;
    378     0   stevel 		    opp = &(*opp)->hash_next)
    379     0   stevel 			if (*opp == big)
    380     0   stevel 				break;
    381     0   stevel 
    382     0   stevel 		if (*opp == NULL) {
    383     0   stevel 			if (!(debugopt & MTDOUBLEFREE))
    384     0   stevel 				abort();
    385     0   stevel 			(void) mutex_unlock(&oversize_lock);
    386     0   stevel 			return;
    387     0   stevel 		}
    388     0   stevel 
    389     0   stevel 		*opp = big->hash_next;	/* remove big from the hash table */
    390     0   stevel 		big->hash_next = NULL;
    391     0   stevel 
    392     0   stevel 		if (debugopt & MTDEBUGPATTERN)
    393     0   stevel 			copy_pattern(FREEPATTERN, ptr, big->size);
    394     0   stevel 		add_oversize(big);
    395     0   stevel 		(void) mutex_unlock(&oversize_lock);
    396     0   stevel 		return;
    397     0   stevel 	}
    398     0   stevel 
    399     0   stevel 	cacheptr = (cache_t *)*(uintptr_t *)mem;
    400     0   stevel 	freeblocks = cacheptr->mt_freelist;
    401     0   stevel 
    402     0   stevel 	/*
    403     0   stevel 	 * This is the distance measured in bits into the arena.
    404     0   stevel 	 * The value of offset is in bytes but there is a 1-1 correlation
    405     0   stevel 	 * between distance into the arena and distance into the
    406     0   stevel 	 * freelist bitmask.
    407     0   stevel 	 */
    408     0   stevel 	offset = mem - cacheptr->mt_arena;
    409     0   stevel 
    410     0   stevel 	/*
    411     0   stevel 	 * i is total number of bits to offset into freelist bitmask.
    412     0   stevel 	 */
    413     0   stevel 
    414     0   stevel 	i = offset / cacheptr->mt_size;
    415     0   stevel 
    416     0   stevel 	num_bytes = i >> 3;
    417     0   stevel 
    418     0   stevel 	/*
    419     0   stevel 	 * which_bit is the bit offset into the byte in the freelist.
    420     0   stevel 	 * if our freelist bitmask looks like 0xf3 and we are freeing
    421     0   stevel 	 * block 5 (ie: the 6th block) our mask will be 0xf7 after
    422     0   stevel 	 * the free. Things go left to right that's why the mask is 0x80
    423     0   stevel 	 * and not 0x01.
    424     0   stevel 	 */
    425     0   stevel 	which_bit = i - (num_bytes << 3);
    426     0   stevel 
    427     0   stevel 	mask = 0x80 >> which_bit;
    428     0   stevel 
    429     0   stevel 	freeblocks += num_bytes;
    430     0   stevel 
    431     0   stevel 	if (debugopt & MTDEBUGPATTERN)
    432     0   stevel 		copy_pattern(FREEPATTERN, ptr, cacheptr->mt_size - OVERHEAD);
    433     0   stevel 
    434     0   stevel 	(void) mutex_lock(&cacheptr->mt_cache_lock);
    435     0   stevel 
    436     0   stevel 	if (*freeblocks & mask) {
    437     0   stevel 		if (!(debugopt & MTDOUBLEFREE))
    438     0   stevel 			abort();
    439     0   stevel 	} else {
    440     0   stevel 		*freeblocks |= mask;
    441     0   stevel 		cacheptr->mt_nfree++;
    442     0   stevel 	}
    443     0   stevel 
    444     0   stevel 	(void) mutex_unlock(&cacheptr->mt_cache_lock);
    445     0   stevel }
    446     0   stevel 
    447     0   stevel void *
    448     0   stevel memalign(size_t alignment, size_t size)
    449     0   stevel {
    450     0   stevel 	size_t alloc_size;
    451     0   stevel 	uintptr_t offset;
    452     0   stevel 	void *alloc_buf;
    453     0   stevel 	void *ret_buf;
    454     0   stevel 
    455  8754    Surya 	if (size == 0 || alignment == 0 || misaligned(alignment) ||
    456  8754    Surya 	    (alignment & (alignment - 1)) != 0) {
    457     0   stevel 		errno = EINVAL;
    458     0   stevel 		return (NULL);
    459     0   stevel 	}
    460     0   stevel 
    461     0   stevel 	/* <= MTMALLOC_MIN_ALIGN, malloc can provide directly */
    462     0   stevel 	if (alignment <= MTMALLOC_MIN_ALIGN)
    463     0   stevel 		return (malloc(size));
    464     0   stevel 
    465     0   stevel 	alloc_size = size + alignment - MTMALLOC_MIN_ALIGN;
    466     0   stevel 
    467     0   stevel 	if (alloc_size < size) { /* overflow */
    468     0   stevel 		errno = ENOMEM;
    469     0   stevel 		return (NULL);
    470     0   stevel 	}
    471     0   stevel 
    472     0   stevel 	alloc_buf = malloc(alloc_size);
    473     0   stevel 
    474     0   stevel 	if (alloc_buf == NULL)
    475     0   stevel 		/* malloc sets errno */
    476     0   stevel 		return (NULL);
    477     0   stevel 
    478     0   stevel 	/*
    479     0   stevel 	 * If alloc_size > MAX_CACHED, malloc() will have returned a multiple of
    480     0   stevel 	 * MTMALLOC_MIN_ALIGN, having rounded-up alloc_size if necessary. Since
    481     0   stevel 	 * we will use alloc_size to return the excess fragments to the free
    482     0   stevel 	 * list, we also round-up alloc_size if necessary.
    483     0   stevel 	 */
    484     0   stevel 	if ((alloc_size > MAX_CACHED) &&
    485     0   stevel 	    (alloc_size & (MTMALLOC_MIN_ALIGN - 1)))
    486     0   stevel 		alloc_size = ALIGN(alloc_size, MTMALLOC_MIN_ALIGN);
    487     0   stevel 
    488     0   stevel 	if ((offset = (uintptr_t)alloc_buf & (alignment - 1)) == 0) {
    489     0   stevel 		/* aligned correctly */
    490     0   stevel 
    491     0   stevel 		size_t frag_size = alloc_size -
    492  8754    Surya 		    (size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
    493     0   stevel 
    494     0   stevel 		/*
    495     0   stevel 		 * If the leftover piece of the memory > MAX_CACHED,
    496     0   stevel 		 * split off the piece and return it back to the freelist.
    497     0   stevel 		 */
    498     0   stevel 		if (IS_OVERSIZE(frag_size, alloc_size)) {
    499     0   stevel 			oversize_t *orig, *tail;
    500     0   stevel 			uintptr_t taddr;
    501     0   stevel 			size_t data_size;
    502     0   stevel 			taddr = ALIGN((uintptr_t)alloc_buf + size,
    503  8754    Surya 			    MTMALLOC_MIN_ALIGN);
    504     0   stevel 			data_size = taddr - (uintptr_t)alloc_buf;
    505     0   stevel 			orig = (oversize_t *)((uintptr_t)alloc_buf -
    506  8754    Surya 			    OVSZ_HEADER_SIZE);
    507     0   stevel 			frag_size = orig->size - data_size -
    508  8754    Surya 			    OVSZ_HEADER_SIZE;
    509     0   stevel 			orig->size = data_size;
    510     0   stevel 			tail = oversize_header_alloc(taddr, frag_size);
    511     0   stevel 			free_oversize(tail);
    512     0   stevel 		}
    513     0   stevel 		ret_buf = alloc_buf;
    514     0   stevel 	} else {
    515     0   stevel 		uchar_t	oversize_bits = 0;
    516     0   stevel 		size_t	head_sz, data_sz, tail_sz;
    517     0   stevel 		uintptr_t ret_addr, taddr, shift, tshift;
    518  7166      raf 		oversize_t *orig, *tail, *big;
    519     0   stevel 		size_t tsize;
    520     0   stevel 
    521     0   stevel 		/* needs to be aligned */
    522     0   stevel 		shift = alignment - offset;
    523     0   stevel 
    524     0   stevel 		assert(shift >= MTMALLOC_MIN_ALIGN);
    525     0   stevel 
    526     0   stevel 		ret_addr = ((uintptr_t)alloc_buf + shift);
    527     0   stevel 		ret_buf = (void *)ret_addr;
    528     0   stevel 
    529     0   stevel 		if (alloc_size <= MAX_CACHED) {
    530     0   stevel 			MEMALIGN_HEADER_ALLOC(ret_addr, shift, alloc_buf);
    531     0   stevel 			return (ret_buf);
    532     0   stevel 		}
    533     0   stevel 
    534     0   stevel 		/*
    535     0   stevel 		 * Only check for the fragments when the memory is allocted
    536     0   stevel 		 * from oversize_list.  Split off a fragment and return it
    537     0   stevel 		 * to the oversize freelist when it's > MAX_CACHED.
    538     0   stevel 		 */
    539     0   stevel 
    540     0   stevel 		head_sz = shift - MAX(MEMALIGN_HEADER_SIZE, OVSZ_HEADER_SIZE);
    541     0   stevel 
    542     0   stevel 		tail_sz = alloc_size -
    543  8754    Surya 		    (shift + size + MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
    544     0   stevel 
    545     0   stevel 		oversize_bits |= IS_OVERSIZE(head_sz, alloc_size) |
    546  8754    Surya 		    IS_OVERSIZE(size, alloc_size) << DATA_SHIFT |
    547  8754    Surya 		    IS_OVERSIZE(tail_sz, alloc_size) << TAIL_SHIFT;
    548     0   stevel 
    549     0   stevel 		switch (oversize_bits) {
    550     0   stevel 			case NONE_OVERSIZE:
    551     0   stevel 			case DATA_OVERSIZE:
    552     0   stevel 				MEMALIGN_HEADER_ALLOC(ret_addr, shift,
    553  8754    Surya 				    alloc_buf);
    554     0   stevel 				break;
    555     0   stevel 			case HEAD_OVERSIZE:
    556     0   stevel 				/*
    557     0   stevel 				 * If we can extend data > MAX_CACHED and have
    558     0   stevel 				 * head still > MAX_CACHED, we split head-end
    559     0   stevel 				 * as the case of head-end and data oversized,
    560     0   stevel 				 * otherwise just create memalign header.
    561     0   stevel 				 */
    562     0   stevel 				tsize = (shift + size) - (MAX_CACHED + 8 +
    563  8754    Surya 				    MTMALLOC_MIN_ALIGN + OVSZ_HEADER_SIZE);
    564     0   stevel 
    565     0   stevel 				if (!IS_OVERSIZE(tsize, alloc_size)) {
    566     0   stevel 					MEMALIGN_HEADER_ALLOC(ret_addr, shift,
    567  8754    Surya 					    alloc_buf);
    568     0   stevel 					break;
    569     0   stevel 				} else {
    570     0   stevel 					tsize += OVSZ_HEADER_SIZE;
    571     0   stevel 					taddr = ALIGN((uintptr_t)alloc_buf +
    572  8754    Surya 					    tsize, MTMALLOC_MIN_ALIGN);
    573     0   stevel 					tshift = ret_addr - taddr;
    574     0   stevel 					MEMALIGN_HEADER_ALLOC(ret_addr, tshift,
    575  8754    Surya 					    taddr);
    576     0   stevel 					ret_addr = taddr;
    577     0   stevel 					shift = ret_addr - (uintptr_t)alloc_buf;
    578     0   stevel 				}
    579     0   stevel 				/* FALLTHROUGH */
    580     0   stevel 			case HEAD_AND_DATA_OVERSIZE:
    581     0   stevel 				/*
    582     0   stevel 				 * Split off the head fragment and
    583     0   stevel 				 * return it back to oversize freelist.
    584     0   stevel 				 * Create oversize header for the piece
    585     0   stevel 				 * of (data + tail fragment).
    586     0   stevel 				 */
    587     0   stevel 				orig = (oversize_t *)((uintptr_t)alloc_buf -
    588  8754    Surya 				    OVSZ_HEADER_SIZE);
    589  7166      raf 				big = oversize_header_alloc(ret_addr -
    590  8754    Surya 				    OVSZ_HEADER_SIZE, (orig->size - shift));
    591  7166      raf 				(void) mutex_lock(&oversize_lock);
    592  7166      raf 				insert_hash(big);
    593  7166      raf 				(void) mutex_unlock(&oversize_lock);
    594     0   stevel 				orig->size = shift - OVSZ_HEADER_SIZE;
    595     0   stevel 
    596     0   stevel 				/* free up the head fragment */
    597     0   stevel 				free_oversize(orig);
    598     0   stevel 				break;
    599     0   stevel 			case TAIL_OVERSIZE:
    600     0   stevel 				/*
    601     0   stevel 				 * If we can extend data > MAX_CACHED and have
    602     0   stevel 				 * tail-end still > MAX_CACHED, we split tail
    603     0   stevel 				 * end, otherwise just create memalign header.
    604     0   stevel 				 */
    605     0   stevel 				orig = (oversize_t *)((uintptr_t)alloc_buf -
    606  8754    Surya 				    OVSZ_HEADER_SIZE);
    607     0   stevel 				tsize =  orig->size - (MAX_CACHED + 8 +
    608  8754    Surya 				    shift + OVSZ_HEADER_SIZE +
    609  8754    Surya 				    MTMALLOC_MIN_ALIGN);
    610     0   stevel 				if (!IS_OVERSIZE(tsize, alloc_size)) {
    611     0   stevel 					MEMALIGN_HEADER_ALLOC(ret_addr, shift,
    612  8754    Surya 					    alloc_buf);
    613     0   stevel 					break;
    614     0   stevel 				} else {
    615     0   stevel 					size = MAX_CACHED + 8;
    616     0   stevel 				}
    617     0   stevel 				/* FALLTHROUGH */
    618     0   stevel 			case DATA_AND_TAIL_OVERSIZE:
    619     0   stevel 				/*
    620     0   stevel 				 * Split off the tail fragment and
    621     0   stevel 				 * return it back to oversize freelist.
    622     0   stevel 				 * Create memalign header and adjust
    623     0   stevel 				 * the size for the piece of
    624     0   stevel 				 * (head fragment + data).
    625     0   stevel 				 */
    626     0   stevel 				taddr = ALIGN(ret_addr + size,
    627  8754    Surya 				    MTMALLOC_MIN_ALIGN);
    628     0   stevel 				data_sz = (size_t)(taddr -
    629  8754    Surya 				    (uintptr_t)alloc_buf);
    630     0   stevel 				orig = (oversize_t *)((uintptr_t)alloc_buf -
    631  8754    Surya 				    OVSZ_HEADER_SIZE);
    632     0   stevel 				tsize = orig->size - data_sz;
    633     0   stevel 				orig->size = data_sz;
    634     0   stevel 				MEMALIGN_HEADER_ALLOC(ret_buf, shift,
    635  8754    Surya 				    alloc_buf);
    636     0   stevel 				tsize -= OVSZ_HEADER_SIZE;
    637     0   stevel 				tail = oversize_header_alloc(taddr,  tsize);
    638     0   stevel 				free_oversize(tail);
    639     0   stevel 				break;
    640     0   stevel 			case HEAD_AND_TAIL_OVERSIZE:
    641     0   stevel 				/*
    642     0   stevel 				 * Split off the head fragment.
    643     0   stevel 				 * We try to free up tail-end when we can
    644     0   stevel 				 * extend data size to (MAX_CACHED + 8)
    645     0   stevel 				 * and remain tail-end oversized.
    646     0   stevel 				 * The bottom line is all split pieces
    647     0   stevel 				 * should be oversize in size.
    648     0   stevel 				 */
    649     0   stevel 				orig = (oversize_t *)((uintptr_t)alloc_buf -
    650  8754    Surya 				    OVSZ_HEADER_SIZE);
    651     0   stevel 				tsize =  orig->size - (MAX_CACHED + 8 +
    652  8754    Surya 				    OVSZ_HEADER_SIZE + shift +
    653  8754    Surya 				    MTMALLOC_MIN_ALIGN);
    654     0   stevel 
    655     0   stevel 				if (!IS_OVERSIZE(tsize, alloc_size)) {
    656     0   stevel 					/*
    657     0   stevel 					 * If the chunk is not big enough
    658     0   stevel 					 * to make both data and tail oversize
    659     0   stevel 					 * we just keep them as one piece.
    660     0   stevel 					 */
    661  7166      raf 					big = oversize_header_alloc(ret_addr -
    662  8754    Surya 					    OVSZ_HEADER_SIZE,
    663  8754    Surya 					    orig->size - shift);
    664  7166      raf 					(void) mutex_lock(&oversize_lock);
    665  7166      raf 					insert_hash(big);
    666  7166      raf 					(void) mutex_unlock(&oversize_lock);
    667  8754    Surya 					orig->size = shift - OVSZ_HEADER_SIZE;
    668     0   stevel 					free_oversize(orig);
    669     0   stevel 					break;
    670     0   stevel 				} else {
    671     0   stevel 					/*
    672     0   stevel 					 * extend data size > MAX_CACHED
    673     0   stevel 					 * and handle it as head, data, tail
    674     0   stevel 					 * are all oversized.
    675     0   stevel 					 */
    676     0   stevel 					size = MAX_CACHED + 8;
    677     0   stevel 				}
    678     0   stevel 				/* FALLTHROUGH */
    679     0   stevel 			case ALL_OVERSIZE:
    680     0   stevel 				/*
    681     0   stevel 				 * split off the head and tail fragments,
    682     0   stevel 				 * return them back to the oversize freelist.
    683     0   stevel 				 * Alloc oversize header for data seg.
    684     0   stevel 				 */
    685     0   stevel 				orig = (oversize_t *)((uintptr_t)alloc_buf -
    686  8754    Surya 				    OVSZ_HEADER_SIZE);
    687     0   stevel 				tsize = orig->size;
    688     0   stevel 				orig->size = shift - OVSZ_HEADER_SIZE;
    689     0   stevel 				free_oversize(orig);
    690     0   stevel 
    691     0   stevel 				taddr = ALIGN(ret_addr + size,
    692  8754    Surya 				    MTMALLOC_MIN_ALIGN);
    693     0   stevel 				data_sz = taddr - ret_addr;
    694     0   stevel 				assert(tsize > (shift + data_sz +
    695  8754    Surya 				    OVSZ_HEADER_SIZE));
    696     0   stevel 				tail_sz = tsize -
    697  8754    Surya 				    (shift + data_sz + OVSZ_HEADER_SIZE);
    698     0   stevel 
    699     0   stevel 				/* create oversize header for data seg */
    700  7166      raf 				big = oversize_header_alloc(ret_addr -
    701  8754    Surya 				    OVSZ_HEADER_SIZE, data_sz);
    702  7166      raf 				(void) mutex_lock(&oversize_lock);
    703  7166      raf 				insert_hash(big);
    704  7166      raf 				(void) mutex_unlock(&oversize_lock);
    705     0   stevel 
    706     0   stevel 				/* create oversize header for tail fragment */
    707     0   stevel 				tail = oversize_header_alloc(taddr, tail_sz);
    708     0   stevel 				free_oversize(tail);
    709     0   stevel 				break;
    710     0   stevel 			default:
    711     0   stevel 				/* should not reach here */
    712     0   stevel 				assert(0);
    713     0   stevel 		}
    714     0   stevel 	}
    715     0   stevel 	return (ret_buf);
    716     0   stevel }
    717     0   stevel 
    718     0   stevel 
    719     0   stevel void *
    720     0   stevel valloc(size_t size)
    721     0   stevel {
    722     0   stevel 	static unsigned pagesize;
    723     0   stevel 
    724     0   stevel 	if (size == 0)
    725     0   stevel 		return (NULL);
    726     0   stevel 
    727     0   stevel 	if (!pagesize)
    728     0   stevel 		pagesize = sysconf(_SC_PAGESIZE);
    729     0   stevel 
    730     0   stevel 	return (memalign(pagesize, size));
    731     0   stevel }
    732     0   stevel 
    733     0   stevel void
    734     0   stevel mallocctl(int cmd, long value)
    735     0   stevel {
    736     0   stevel 	switch (cmd) {
    737     0   stevel 
    738     0   stevel 	case MTDEBUGPATTERN:
    739     0   stevel 		/*
    740     0   stevel 		 * Reinitialize free blocks in case malloc() is called prior
    741     0   stevel 		 * to mallocctl().
    742     0   stevel 		 */
    743     0   stevel 		if (value && !(debugopt & cmd)) {
    744     0   stevel 			reinit++;
    745     0   stevel 			debugopt |= cmd;
    746     0   stevel 			reinit_cpu_list();
    747     0   stevel 		}
    748     0   stevel 		/*FALLTHRU*/
    749     0   stevel 	case MTDOUBLEFREE:
    750     0   stevel 	case MTINITBUFFER:
    751     0   stevel 		if (value)
    752     0   stevel 			debugopt |= cmd;
    753     0   stevel 		else
    754     0   stevel 			debugopt &= ~cmd;
    755     0   stevel 		break;
    756     0   stevel 	case MTCHUNKSIZE:
    757     0   stevel 		if (value >= MINSIZE && value <= MAXSIZE)
    758     0   stevel 			requestsize = value;
    759     0   stevel 		break;
    760     0   stevel 	default:
    761     0   stevel 		break;
    762     0   stevel 	}
    763     0   stevel }
    764     0   stevel 
    765     0   stevel /*
    766  3866      raf  * Initialization function, called from the init section of the library.
    767  3866      raf  * No locking is required here because we are single-threaded during
    768  3866      raf  * library initialization.
    769     0   stevel  */
    770  3866      raf static void
    771     0   stevel setup_caches(void)
    772     0   stevel {
    773     0   stevel 	uintptr_t oldbrk;
    774     0   stevel 	uintptr_t newbrk;
    775     0   stevel 
    776     0   stevel 	size_t cache_space_needed;
    777     0   stevel 	size_t padding;
    778     0   stevel 
    779     0   stevel 	curcpu_func new_curcpu;
    780     0   stevel 	uint_t new_cpu_mask;
    781     0   stevel 	percpu_t *new_cpu_list;
    782     0   stevel 
    783     0   stevel 	uint_t i, j;
    784     0   stevel 	uintptr_t list_addr;
    785     0   stevel 
    786  3866      raf 	/*
    787  3866      raf 	 * Get a decent "current cpu identifier", to be used to reduce
    788  3866      raf 	 * contention.  Eventually, this should be replaced by an interface
    789  3866      raf 	 * to get the actual CPU sequence number in libthread/liblwp.
    790  3866      raf 	 */
    791  3866      raf 	new_curcpu = (curcpu_func)thr_self;
    792  3866      raf 	if ((ncpus = 2 * sysconf(_SC_NPROCESSORS_CONF)) <= 0)
    793  3866      raf 		ncpus = 4; /* decent default value */
    794     0   stevel 
    795     0   stevel 	/* round ncpus up to a power of 2 */
    796     0   stevel 	while (ncpus & (ncpus - 1))
    797     0   stevel 		ncpus++;
    798     0   stevel 
    799     0   stevel 	new_cpu_mask = ncpus - 1;	/* create the cpu mask */
    800     0   stevel 
    801     0   stevel 	/*
    802     0   stevel 	 * We now do some magic with the brk.  What we want to get in the
    803     0   stevel 	 * end is a bunch of well-aligned stuff in a big initial allocation.
    804     0   stevel 	 * Along the way, we do sanity checks to make sure no one else has
    805     0   stevel 	 * touched the brk (which shouldn't happen, but it's always good to
    806     0   stevel 	 * check)
    807     0   stevel 	 *
    808     0   stevel 	 * First, make sure sbrk is sane, and store the current brk in oldbrk.
    809     0   stevel 	 */
    810     0   stevel 	oldbrk = (uintptr_t)sbrk(0);
    811  3866      raf 	if ((void *)oldbrk == (void *)-1)
    812  3866      raf 		abort();	/* sbrk is broken -- we're doomed. */
    813     0   stevel 
    814     0   stevel 	/*
    815     0   stevel 	 * Now, align the brk to a multiple of CACHE_COHERENCY_UNIT, so that
    816     0   stevel 	 * the percpu structures and cache lists will be properly aligned.
    817     0   stevel 	 *
    818     0   stevel 	 *   2.  All hunks will be page-aligned, assuming HUNKSIZE >= PAGESIZE,
    819     0   stevel 	 *	so they can be paged out individually.
    820     0   stevel 	 */
    821     0   stevel 	newbrk = ALIGN(oldbrk, CACHE_COHERENCY_UNIT);
    822  3866      raf 	if (newbrk != oldbrk && (uintptr_t)sbrk(newbrk - oldbrk) != oldbrk)
    823  3866      raf 		abort();	/* sbrk is broken -- we're doomed. */
    824     0   stevel 
    825     0   stevel 	/*
    826     0   stevel 	 * For each cpu, there is one percpu_t and a list of caches
    827     0   stevel 	 */
    828     0   stevel 	cache_space_needed = ncpus * (sizeof (percpu_t) + CACHELIST_SIZE);
    829     0   stevel 
    830     0   stevel 	new_cpu_list = (percpu_t *)sbrk(cache_space_needed);
    831     0   stevel 
    832     0   stevel 	if (new_cpu_list == (percpu_t *)-1 ||
    833  3866      raf 	    (uintptr_t)new_cpu_list != newbrk)
    834  3866      raf 		abort();	/* sbrk is broken -- we're doomed. */
    835     0   stevel 
    836     0   stevel 	/*
    837     0   stevel 	 * Finally, align the brk to HUNKSIZE so that all hunks are
    838     0   stevel 	 * page-aligned, to avoid edge-effects.
    839     0   stevel 	 */
    840     0   stevel 
    841     0   stevel 	newbrk = (uintptr_t)new_cpu_list + cache_space_needed;
    842     0   stevel 
    843     0   stevel 	padding = ALIGN(newbrk, HUNKSIZE) - newbrk;
    844     0   stevel 
    845  3866      raf 	if (padding > 0 && (uintptr_t)sbrk(padding) != newbrk)
    846  3866      raf 		abort();	/* sbrk is broken -- we're doomed. */
    847     0   stevel 
    848     0   stevel 	list_addr = ((uintptr_t)new_cpu_list + (sizeof (percpu_t) * ncpus));
    849     0   stevel 
    850     0   stevel 	/* initialize the percpu list */
    851     0   stevel 	for (i = 0; i < ncpus; i++) {
    852     0   stevel 		new_cpu_list[i].mt_caches = (cache_head_t *)list_addr;
    853     0   stevel 		for (j = 0; j < NUM_CACHES; j++) {
    854     0   stevel 			new_cpu_list[i].mt_caches[j].mt_cache = NULL;
    855     0   stevel 			new_cpu_list[i].mt_caches[j].mt_hint = NULL;
    856     0   stevel 		}
    857     0   stevel 
    858  3866      raf 		(void) mutex_init(&new_cpu_list[i].mt_parent_lock,
    859  3866      raf 		    USYNC_THREAD, NULL);
    860     0   stevel 
    861     0   stevel 		/* get the correct cache list alignment */
    862     0   stevel 		list_addr += CACHELIST_SIZE;
    863     0   stevel 	}
    864     0   stevel 
    865     0   stevel 	/*
    866     0   stevel 	 * Initialize oversize listhead
    867     0   stevel 	 */
    868     0   stevel 	oversize_list.next_bysize = &oversize_list;
    869     0   stevel 	oversize_list.prev_bysize = &oversize_list;
    870     0   stevel 	oversize_list.next_byaddr = &oversize_list;
    871     0   stevel 	oversize_list.prev_byaddr = &oversize_list;
    872     0   stevel 	oversize_list.addr = NULL;
    873     0   stevel 	oversize_list.size = 0;		/* sentinal */
    874     0   stevel 
    875     0   stevel 	/*
    876  3866      raf 	 * Now install the global variables.
    877     0   stevel 	 */
    878     0   stevel 	curcpu = new_curcpu;
    879     0   stevel 	cpu_mask = new_cpu_mask;
    880     0   stevel 	cpu_list = new_cpu_list;
    881     0   stevel }
    882     0   stevel 
    883     0   stevel static void
    884     0   stevel create_cache(cache_t *cp, size_t size, uint_t chunksize)
    885     0   stevel {
    886     0   stevel 	long nblocks;
    887     0   stevel 
    888  3866      raf 	(void) mutex_init(&cp->mt_cache_lock, USYNC_THREAD, NULL);
    889     0   stevel 	cp->mt_size = size;
    890     0   stevel 	cp->mt_freelist = ((caddr_t)cp + sizeof (cache_t));
    891     0   stevel 	cp->mt_span = chunksize * HUNKSIZE - sizeof (cache_t);
    892     0   stevel 	cp->mt_hunks = chunksize;
    893     0   stevel 	/*
    894     0   stevel 	 * rough calculation. We will need to adjust later.
    895     0   stevel 	 */
    896     0   stevel 	nblocks = cp->mt_span / cp->mt_size;
    897     0   stevel 	nblocks >>= 3;
    898     0   stevel 	if (nblocks == 0) { /* less than 8 free blocks in this pool */
    899     0   stevel 		int32_t numblocks = 0;
    900     0   stevel 		long i = cp->mt_span;
    901     0   stevel 		size_t sub = cp->mt_size;
    902     0   stevel 		uchar_t mask = 0;
    903     0   stevel 
    904     0   stevel 		while (i > sub) {
    905     0   stevel 			numblocks++;
    906     0   stevel 			i -= sub;
    907     0   stevel 		}
    908     0   stevel 		nblocks = numblocks;
    909     0   stevel 		cp->mt_arena = (caddr_t)ALIGN(cp->mt_freelist + 8, 8);
    910     0   stevel 		cp->mt_nfree = numblocks;
    911     0   stevel 		while (numblocks--) {
    912     0   stevel 			mask |= 0x80 >> numblocks;
    913     0   stevel 		}
    914     0   stevel 		*(cp->mt_freelist) = mask;
    915     0   stevel 	} else {
    916     0   stevel 		cp->mt_arena = (caddr_t)ALIGN((caddr_t)cp->mt_freelist +
    917  8754    Surya 		    nblocks, 32);
    918     0   stevel 		/* recompute nblocks */
    919     0   stevel 		nblocks = (uintptr_t)((caddr_t)cp->mt_freelist +
    920  8754    Surya 		    cp->mt_span - cp->mt_arena) / cp->mt_size;
    921     0   stevel 		cp->mt_nfree = ((nblocks >> 3) << 3);
    922     0   stevel 		/* Set everything to free */
    923     0   stevel 		(void) memset(cp->mt_freelist, 0xff, nblocks >> 3);
    924     0   stevel 	}
    925     0   stevel 
    926     0   stevel 	if (debugopt & MTDEBUGPATTERN)
    927     0   stevel 		copy_pattern(FREEPATTERN, cp->mt_arena, cp->mt_size * nblocks);
    928     0   stevel 
    929     0   stevel 	cp->mt_next = NULL;
    930     0   stevel }
    931     0   stevel 
    932     0   stevel static void
    933     0   stevel reinit_cpu_list(void)
    934     0   stevel {
    935     0   stevel 	oversize_t *wp = oversize_list.next_bysize;
    936     0   stevel 	percpu_t *cpuptr;
    937     0   stevel 	cache_t *thiscache;
    938     0   stevel 	cache_head_t *cachehead;
    939     0   stevel 
    940     0   stevel 	/* Reinitialize free oversize blocks. */
    941     0   stevel 	(void) mutex_lock(&oversize_lock);
    942     0   stevel 	if (debugopt & MTDEBUGPATTERN)
    943     0   stevel 		for (; wp != &oversize_list; wp = wp->next_bysize)
    944     0   stevel 			copy_pattern(FREEPATTERN, wp->addr, wp->size);
    945     0   stevel 	(void) mutex_unlock(&oversize_lock);
    946     0   stevel 
    947     0   stevel 	/* Reinitialize free blocks. */
    948     0   stevel 	for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) {
    949     0   stevel 		(void) mutex_lock(&cpuptr->mt_parent_lock);
    950     0   stevel 		for (cachehead = &cpuptr->mt_caches[0]; cachehead <
    951  8754    Surya 		    &cpuptr->mt_caches[NUM_CACHES]; cachehead++) {
    952     0   stevel 			for (thiscache = cachehead->mt_cache; thiscache != NULL;
    953  8754    Surya 			    thiscache = thiscache->mt_next) {
    954     0   stevel 				(void) mutex_lock(&thiscache->mt_cache_lock);
    955     0   stevel 				if (thiscache->mt_nfree == 0) {
    956     0   stevel 					(void) mutex_unlock(
    957     0   stevel 					    &thiscache->mt_cache_lock);
    958     0   stevel 					continue;
    959     0   stevel 				}
    960     0   stevel 				if (thiscache != NULL)
    961     0   stevel 					reinit_cache(thiscache);
    962     0   stevel 				(void) mutex_unlock(&thiscache->mt_cache_lock);
    963     0   stevel 			}
    964     0   stevel 		}
    965     0   stevel 		(void) mutex_unlock(&cpuptr->mt_parent_lock);
    966     0   stevel 	}
    967     0   stevel 	reinit = 0;
    968     0   stevel }
    969     0   stevel 
    970     0   stevel static void
    971     0   stevel reinit_cache(cache_t *thiscache)
    972     0   stevel {
    973     0   stevel 	uint32_t *freeblocks; /* not a uintptr_t on purpose */
    974     0   stevel 	int32_t i, n;
    975     0   stevel 	caddr_t ret;
    976     0   stevel 
    977     0   stevel 	freeblocks = (uint32_t *)thiscache->mt_freelist;
    978     0   stevel 	while (freeblocks < (uint32_t *)thiscache->mt_arena) {
    979     0   stevel 		if (*freeblocks & 0xffffffff) {
    980  8754    Surya 			for (i = 0; i < 32; i++) {
    981  8754    Surya 				if (FLIP_EM(*freeblocks) & (0x80000000 >> i)) {
    982  8754    Surya 					n = (uintptr_t)(((freeblocks -
    983  8754    Surya 					    (uint32_t *)thiscache->mt_freelist)
    984  8754    Surya 					    << 5) + i) * thiscache->mt_size;
    985  8754    Surya 					ret = thiscache->mt_arena + n;
    986  8754    Surya 					ret += OVERHEAD;
    987  8754    Surya 					copy_pattern(FREEPATTERN, ret,
    988  8754    Surya 					    thiscache->mt_size);
    989  8754    Surya 				}
    990     0   stevel 			}
    991     0   stevel 		}
    992     0   stevel 		freeblocks++;
    993     0   stevel 	}
    994     0   stevel }
    995     0   stevel 
    996     0   stevel static void *
    997     0   stevel malloc_internal(size_t size, percpu_t *cpuptr)
    998     0   stevel {
    999     0   stevel 	cache_head_t *cachehead;
   1000     0   stevel 	cache_t *thiscache, *hintcache;
   1001     0   stevel 	int32_t i, n, logsz, bucket;
   1002     0   stevel 	uint32_t index;
   1003     0   stevel 	uint32_t *freeblocks; /* not a uintptr_t on purpose */
   1004     0   stevel 	caddr_t ret;
   1005     0   stevel 
   1006     0   stevel 	logsz = MIN_CACHED_SHIFT;
   1007     0   stevel 
   1008     0   stevel 	while (size > (1 << logsz))
   1009     0   stevel 		logsz++;
   1010     0   stevel 
   1011     0   stevel 	bucket = logsz - MIN_CACHED_SHIFT;
   1012     0   stevel 
   1013     0   stevel 	(void) mutex_lock(&cpuptr->mt_parent_lock);
   1014     0   stevel 
   1015     0   stevel 	/*
   1016     0   stevel 	 * Find a cache of the appropriate size with free buffers.
   1017     0   stevel 	 *
   1018     0   stevel 	 * We don't need to lock each cache as we check their mt_nfree count,
   1019     0   stevel 	 * since:
   1020     0   stevel 	 *	1.  We are only looking for caches with mt_nfree > 0.  If a
   1021     0   stevel 	 *	   free happens during our search, it will increment mt_nfree,
   1022     0   stevel 	 *	   which will not effect the test.
   1023     0   stevel 	 *	2.  Allocations can decrement mt_nfree, but they can't happen
   1024     0   stevel 	 *	   as long as we hold mt_parent_lock.
   1025     0   stevel 	 */
   1026     0   stevel 
   1027     0   stevel 	cachehead = &cpuptr->mt_caches[bucket];
   1028     0   stevel 
   1029     0   stevel 	/* Search through the list, starting at the mt_hint */
   1030     0   stevel 	thiscache = cachehead->mt_hint;
   1031     0   stevel 
   1032     0   stevel 	while (thiscache != NULL && thiscache->mt_nfree == 0)
   1033     0   stevel 		thiscache = thiscache->mt_next;
   1034     0   stevel 
   1035     0   stevel 	if (thiscache == NULL) {
   1036     0   stevel 		/* wrap around -- search up to the hint */
   1037     0   stevel 		thiscache = cachehead->mt_cache;
   1038     0   stevel 		hintcache = cachehead->mt_hint;
   1039     0   stevel 
   1040     0   stevel 		while (thiscache != NULL && thiscache != hintcache &&
   1041     0   stevel 		    thiscache->mt_nfree == 0)
   1042     0   stevel 			thiscache = thiscache->mt_next;
   1043     0   stevel 
   1044     0   stevel 		if (thiscache == hintcache)
   1045     0   stevel 			thiscache = NULL;
   1046     0   stevel 	}
   1047     0   stevel 
   1048     0   stevel 
   1049     0   stevel 	if (thiscache == NULL) { /* there are no free caches */
   1050     0   stevel 		int32_t thisrequest = requestsize;
   1051     0   stevel 		int32_t buffer_size = (1 << logsz) + OVERHEAD;
   1052     0   stevel 
   1053     0   stevel 		thiscache = (cache_t *)morecore(thisrequest * HUNKSIZE);
   1054     0   stevel 
   1055     0   stevel 		if (thiscache == (cache_t *)-1) {
   1056  8754    Surya 			(void) mutex_unlock(&cpuptr->mt_parent_lock);
   1057  8754    Surya 			errno = EAGAIN;
   1058  8754    Surya 			return (NULL);
   1059     0   stevel 		}
   1060     0   stevel 		create_cache(thiscache, buffer_size, thisrequest);
   1061     0   stevel 
   1062     0   stevel 		/* link in the new block at the beginning of the list */
   1063     0   stevel 		thiscache->mt_next = cachehead->mt_cache;
   1064     0   stevel 		cachehead->mt_cache = thiscache;
   1065     0   stevel 	}
   1066     0   stevel 
   1067     0   stevel 	/* update the hint to the cache we found or created */
   1068     0   stevel 	cachehead->mt_hint = thiscache;
   1069     0   stevel 
   1070     0   stevel 	/* thiscache now points to a cache with available space */
   1071     0   stevel 	(void) mutex_lock(&thiscache->mt_cache_lock);
   1072     0   stevel 
   1073     0   stevel 	freeblocks = (uint32_t *)thiscache->mt_freelist;
   1074     0   stevel 	while (freeblocks < (uint32_t *)thiscache->mt_arena) {
   1075     0   stevel 		if (*freeblocks & 0xffffffff)
   1076     0   stevel 			break;
   1077     0   stevel 		freeblocks++;
   1078     0   stevel 		if (freeblocks < (uint32_t *)thiscache->mt_arena &&
   1079     0   stevel 		    *freeblocks & 0xffffffff)
   1080     0   stevel 			break;
   1081     0   stevel 		freeblocks++;
   1082     0   stevel 		if (freeblocks < (uint32_t *)thiscache->mt_arena &&
   1083     0   stevel 		    *freeblocks & 0xffffffff)
   1084     0   stevel 			break;
   1085     0   stevel 		freeblocks++;
   1086     0   stevel 		if (freeblocks < (uint32_t *)thiscache->mt_arena &&
   1087     0   stevel 		    *freeblocks & 0xffffffff)
   1088     0   stevel 			break;
   1089     0   stevel 		freeblocks++;
   1090     0   stevel 	}
   1091     0   stevel 
   1092     0   stevel 	/*
   1093     0   stevel 	 * the offset from mt_freelist to freeblocks is the offset into
   1094     0   stevel 	 * the arena. Be sure to include the offset into freeblocks
   1095     0   stevel 	 * of the bitmask. n is the offset.
   1096     0   stevel 	 */
   1097     0   stevel 	for (i = 0; i < 32; ) {
   1098     0   stevel 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
   1099     0   stevel 			break;
   1100     0   stevel 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
   1101     0   stevel 			break;
   1102     0   stevel 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
   1103     0   stevel 			break;
   1104     0   stevel 		if (FLIP_EM(*freeblocks) & (0x80000000 >> i++))
   1105     0   stevel 			break;
   1106     0   stevel 	}
   1107     0   stevel 	index = 0x80000000 >> --i;
   1108     0   stevel 
   1109     0   stevel 
   1110     0   stevel 	*freeblocks &= FLIP_EM(~index);
   1111     0   stevel 
   1112     0   stevel 	thiscache->mt_nfree--;
   1113     0   stevel 
   1114     0   stevel 	(void) mutex_unlock(&thiscache->mt_cache_lock);
   1115     0   stevel 	(void) mutex_unlock(&cpuptr->mt_parent_lock);
   1116     0   stevel 
   1117     0   stevel 	n = (uintptr_t)(((freeblocks - (uint32_t *)thiscache->mt_freelist) << 5)
   1118  8754    Surya 	    + i) * thiscache->mt_size;
   1119     0   stevel 	/*
   1120     0   stevel 	 * Now you have the offset in n, you've changed the free mask
   1121     0   stevel 	 * in the freelist. Nothing left to do but find the block
   1122     0   stevel 	 * in the arena and put the value of thiscache in the word
   1123     0   stevel 	 * ahead of the handed out address and return the memory
   1124     0   stevel 	 * back to the user.
   1125     0   stevel 	 */
   1126     0   stevel 	ret = thiscache->mt_arena + n;
   1127     0   stevel 
   1128     0   stevel 	/* Store the cache addr for this buf. Makes free go fast. */
   1129     0   stevel 	*(uintptr_t *)ret = (uintptr_t)thiscache;
   1130     0   stevel 
   1131     0   stevel 	/*
   1132     0   stevel 	 * This assert makes sure we don't hand out memory that is not
   1133     0   stevel 	 * owned by this cache.
   1134     0   stevel 	 */
   1135     0   stevel 	assert(ret + thiscache->mt_size <= thiscache->mt_freelist +
   1136  8754    Surya 	    thiscache->mt_span);
   1137     0   stevel 
   1138     0   stevel 	ret += OVERHEAD;
   1139     0   stevel 
   1140     0   stevel 	assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
   1141     0   stevel 
   1142     0   stevel 	if (reinit == 0 && (debugopt & MTDEBUGPATTERN))
   1143     0   stevel 		if (verify_pattern(FREEPATTERN, ret, size))
   1144     0   stevel 			abort();	/* reference after free */
   1145     0   stevel 
   1146     0   stevel 	if (debugopt & MTINITBUFFER)
   1147     0   stevel 		copy_pattern(INITPATTERN, ret, size);
   1148     0   stevel 	return ((void *)ret);
   1149     0   stevel }
   1150     0   stevel 
   1151     0   stevel static void *
   1152     0   stevel morecore(size_t bytes)
   1153     0   stevel {
   1154     0   stevel 	void * ret;
   1155     0   stevel 
   1156     0   stevel 	if (bytes > LONG_MAX) {
   1157     0   stevel 		intptr_t wad;
   1158     0   stevel 		/*
   1159     0   stevel 		 * The request size is too big. We need to do this in
   1160     0   stevel 		 * chunks. Sbrk only takes an int for an arg.
   1161     0   stevel 		 */
   1162     0   stevel 		if (bytes == ULONG_MAX)
   1163     0   stevel 			return ((void *)-1);
   1164     0   stevel 
   1165     0   stevel 		ret = sbrk(0);
   1166     0   stevel 		wad = LONG_MAX;
   1167     0   stevel 		while (wad > 0) {
   1168     0   stevel 			if (sbrk(wad) == (void *)-1) {
   1169     0   stevel 				if (ret != sbrk(0))
   1170     0   stevel 					(void) sbrk(-LONG_MAX);
   1171     0   stevel 				return ((void *)-1);
   1172     0   stevel 			}
   1173     0   stevel 			bytes -= LONG_MAX;
   1174     0   stevel 			wad = bytes;
   1175     0   stevel 		}
   1176     0   stevel 	} else
   1177     0   stevel 		ret = sbrk(bytes);
   1178     0   stevel 
   1179     0   stevel 	return (ret);
   1180     0   stevel }
   1181     0   stevel 
   1182     0   stevel 
   1183     0   stevel static void *
   1184     0   stevel oversize(size_t size)
   1185     0   stevel {
   1186     0   stevel 	caddr_t ret;
   1187     0   stevel 	oversize_t *big;
   1188     0   stevel 
   1189  1412  rm88369 	/* make sure we will not overflow */
   1190  1412  rm88369 	if (size > MAX_MTMALLOC) {
   1191  1412  rm88369 		errno = ENOMEM;
   1192  1412  rm88369 		return (NULL);
   1193  1412  rm88369 	}
   1194     0   stevel 
   1195     0   stevel 	/*
   1196     0   stevel 	 * Since we ensure every address we hand back is
   1197     0   stevel 	 * MTMALLOC_MIN_ALIGN-byte aligned, ALIGNing size ensures that the
   1198     0   stevel 	 * memory handed out is MTMALLOC_MIN_ALIGN-byte aligned at both ends.
   1199     0   stevel 	 * This eases the implementation of MTDEBUGPATTERN and MTINITPATTERN,
   1200     0   stevel 	 * particularly where coalescing occurs.
   1201     0   stevel 	 */
   1202     0   stevel 	size = ALIGN(size, MTMALLOC_MIN_ALIGN);
   1203  1412  rm88369 
   1204  1412  rm88369 	/*
   1205  1412  rm88369 	 * The idea with the global lock is that we are sure to
   1206  1412  rm88369 	 * block in the kernel anyway since given an oversize alloc
   1207  1412  rm88369 	 * we are sure to have to call morecore();
   1208  1412  rm88369 	 */
   1209  1412  rm88369 	(void) mutex_lock(&oversize_lock);
   1210     0   stevel 
   1211     0   stevel 	if ((big = find_oversize(size)) != NULL) {
   1212     0   stevel 		if (reinit == 0 && (debugopt & MTDEBUGPATTERN))
   1213     0   stevel 			if (verify_pattern(FREEPATTERN, big->addr, size))
   1214     0   stevel 				abort();	/* reference after free */
   1215     0   stevel 	} else {
   1216     0   stevel 		/* Get more 8-byte aligned memory from heap */
   1217     0   stevel 		ret = morecore(size + OVSZ_HEADER_SIZE);
   1218     0   stevel 		if (ret == (caddr_t)-1) {
   1219     0   stevel 			(void) mutex_unlock(&oversize_lock);
   1220     0   stevel 			errno = ENOMEM;
   1221     0   stevel 			return (NULL);
   1222     0   stevel 		}
   1223     0   stevel 		big = oversize_header_alloc((uintptr_t)ret, size);
   1224     0   stevel 	}
   1225     0   stevel 	ret = big->addr;
   1226     0   stevel 
   1227  7166      raf 	insert_hash(big);
   1228     0   stevel 
   1229     0   stevel 	if (debugopt & MTINITBUFFER)
   1230     0   stevel 		copy_pattern(INITPATTERN, ret, size);
   1231     0   stevel 
   1232     0   stevel 	(void) mutex_unlock(&oversize_lock);
   1233     0   stevel 	assert(((uintptr_t)ret & 7) == 0); /* are we 8 byte aligned */
   1234     0   stevel 	return ((void *)ret);
   1235     0   stevel }
   1236     0   stevel 
   1237     0   stevel static void
   1238     0   stevel insert_oversize(oversize_t *op, oversize_t *nx)
   1239     0   stevel {
   1240     0   stevel 	oversize_t *sp;
   1241     0   stevel 
   1242     0   stevel 	/* locate correct insertion point in size-ordered list */
   1243     0   stevel 	for (sp = oversize_list.next_bysize;
   1244     0   stevel 	    sp != &oversize_list && (op->size > sp->size);
   1245     0   stevel 	    sp = sp->next_bysize)
   1246     0   stevel 		;
   1247     0   stevel 
   1248     0   stevel 	/* link into size-ordered list */
   1249     0   stevel 	op->next_bysize = sp;
   1250     0   stevel 	op->prev_bysize = sp->prev_bysize;
   1251     0   stevel 	op->prev_bysize->next_bysize = op;
   1252     0   stevel 	op->next_bysize->prev_bysize = op;
   1253     0   stevel 
   1254     0   stevel 	/*
   1255     0   stevel 	 * link item into address-ordered list
   1256     0   stevel 	 * (caller provides insertion point as an optimization)
   1257     0   stevel 	 */
   1258     0   stevel 	op->next_byaddr = nx;
   1259     0   stevel 	op->prev_byaddr = nx->prev_byaddr;
   1260     0   stevel 	op->prev_byaddr->next_byaddr = op;
   1261     0   stevel 	op->next_byaddr->prev_byaddr = op;
   1262     0   stevel 
   1263     0   stevel }
   1264     0   stevel 
   1265     0   stevel static void
   1266     0   stevel unlink_oversize(oversize_t *lp)
   1267     0   stevel {
   1268     0   stevel 	/* unlink from address list */
   1269     0   stevel 	lp->prev_byaddr->next_byaddr = lp->next_byaddr;
   1270     0   stevel 	lp->next_byaddr->prev_byaddr = lp->prev_byaddr;
   1271     0   stevel 
   1272     0   stevel 	/* unlink from size list */
   1273     0   stevel 	lp->prev_bysize->next_bysize = lp->next_bysize;
   1274     0   stevel 	lp->next_bysize->prev_bysize = lp->prev_bysize;
   1275     0   stevel }
   1276     0   stevel 
   1277     0   stevel static void
   1278     0   stevel position_oversize_by_size(oversize_t *op)
   1279     0   stevel {
   1280     0   stevel 	oversize_t *sp;
   1281     0   stevel 
   1282     0   stevel 	if (op->size > op->next_bysize->size ||
   1283     0   stevel 	    op->size < op->prev_bysize->size) {
   1284     0   stevel 
   1285     0   stevel 		/* unlink from size list */
   1286     0   stevel 		op->prev_bysize->next_bysize = op->next_bysize;
   1287     0   stevel 		op->next_bysize->prev_bysize = op->prev_bysize;
   1288     0   stevel 
   1289     0   stevel 		/* locate correct insertion point in size-ordered list */
   1290     0   stevel 		for (sp = oversize_list.next_bysize;
   1291     0   stevel 		    sp != &oversize_list && (op->size > sp->size);
   1292     0   stevel 		    sp = sp->next_bysize)
   1293     0   stevel 			;
   1294     0   stevel 
   1295     0   stevel 		/* link into size-ordered list */
   1296     0   stevel 		op->next_bysize = sp;
   1297     0   stevel 		op->prev_bysize = sp->prev_bysize;
   1298     0   stevel 		op->prev_bysize->next_bysize = op;
   1299     0   stevel 		op->next_bysize->prev_bysize = op;
   1300     0   stevel 	}
   1301     0   stevel }
   1302     0   stevel 
   1303     0   stevel static void
   1304     0   stevel add_oversize(oversize_t *lp)
   1305     0   stevel {
   1306     0   stevel 	int merge_flags = INSERT_ONLY;
   1307     0   stevel 	oversize_t *nx;  	/* ptr to item right of insertion point */
   1308     0   stevel 	oversize_t *pv;  	/* ptr to item left of insertion point */
   1309     0   stevel 	uint_t size_lp, size_pv, size_nx;
   1310     0   stevel 	uintptr_t endp_lp, endp_pv, endp_nx;
   1311     0   stevel 
   1312     0   stevel 	/*
   1313     0   stevel 	 * Locate insertion point in address-ordered list
   1314     0   stevel 	 */
   1315     0   stevel 
   1316     0   stevel 	for (nx = oversize_list.next_byaddr;
   1317     0   stevel 	    nx != &oversize_list && (lp->addr > nx->addr);
   1318     0   stevel 	    nx = nx->next_byaddr)
   1319     0   stevel 		;
   1320     0   stevel 
   1321     0   stevel 	/*
   1322     0   stevel 	 * Determine how to add chunk to oversize freelist
   1323     0   stevel 	 */
   1324     0   stevel 
   1325     0   stevel 	size_lp = OVSZ_HEADER_SIZE + lp->size;
   1326     0   stevel 	endp_lp = ALIGN((uintptr_t)lp + size_lp, MTMALLOC_MIN_ALIGN);
   1327     0   stevel 	size_lp = endp_lp - (uintptr_t)lp;
   1328     0   stevel 
   1329     0   stevel 	pv = nx->prev_byaddr;
   1330     0   stevel 
   1331     0   stevel 	if (pv->size) {
   1332     0   stevel 
   1333     0   stevel 		size_pv = OVSZ_HEADER_SIZE + pv->size;
   1334     0   stevel 		endp_pv = ALIGN((uintptr_t)pv + size_pv,
   1335     0   stevel 		    MTMALLOC_MIN_ALIGN);
   1336     0   stevel 		size_pv = endp_pv - (uintptr_t)pv;
   1337     0   stevel 
   1338     0   stevel 		/* Check for adjacency with left chunk */
   1339     0   stevel 		if ((uintptr_t)lp == endp_pv)
   1340     0   stevel 			merge_flags |= COALESCE_LEFT;
   1341     0   stevel 	}
   1342     0   stevel 
   1343     0   stevel 	if (nx->size) {
   1344     0   stevel 
   1345  8754    Surya 		/* Check for adjacency with right chunk */
   1346  8754    Surya 		if ((uintptr_t)nx == endp_lp) {
   1347  8754    Surya 			size_nx = OVSZ_HEADER_SIZE + nx->size;
   1348  8754    Surya 			endp_nx = ALIGN((uintptr_t)nx + size_nx,
   1349  8754    Surya 			    MTMALLOC_MIN_ALIGN);
   1350  8754    Surya 			size_nx = endp_nx - (uintptr_t)nx;
   1351  8754    Surya 			merge_flags |= COALESCE_RIGHT;
   1352  8754    Surya 		}
   1353     0   stevel 	}
   1354     0   stevel 
   1355     0   stevel 	/*
   1356     0   stevel 	 * If MTDEBUGPATTERN==1, lp->addr will have been overwritten with
   1357     0   stevel 	 * FREEPATTERN for lp->size bytes. If we can merge, the oversize
   1358     0   stevel 	 * header(s) that will also become part of the memory available for
   1359     0   stevel 	 * reallocation (ie lp and/or nx) must also be overwritten with
   1360     0   stevel 	 * FREEPATTERN or we will SIGABRT when this memory is next reallocated.
   1361     0   stevel 	 */
   1362     0   stevel 	switch (merge_flags) {
   1363     0   stevel 
   1364     0   stevel 	case INSERT_ONLY:		/* Coalescing not possible */
   1365     0   stevel 		insert_oversize(lp, nx);
   1366     0   stevel 		break;
   1367     0   stevel 	case COALESCE_LEFT:
   1368     0   stevel 		pv->size += size_lp;
   1369     0   stevel 		position_oversize_by_size(pv);
   1370     0   stevel 		if (debugopt & MTDEBUGPATTERN)
   1371     0   stevel 			copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
   1372     0   stevel 		break;
   1373     0   stevel 	case COALESCE_RIGHT:
   1374     0   stevel 		unlink_oversize(nx);
   1375     0   stevel 		lp->size += size_nx;
   1376     0   stevel 		insert_oversize(lp, pv->next_byaddr);
   1377     0   stevel 		if (debugopt & MTDEBUGPATTERN)
   1378     0   stevel 			copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
   1379     0   stevel 		break;
   1380     0   stevel 	case COALESCE_WITH_BOTH_SIDES:	/* Merge (with right) to the left */
   1381     0   stevel 		pv->size += size_lp + size_nx;
   1382     0   stevel 		unlink_oversize(nx);
   1383     0   stevel 		position_oversize_by_size(pv);
   1384     0   stevel 		if (debugopt & MTDEBUGPATTERN) {
   1385     0   stevel 			copy_pattern(FREEPATTERN, lp, OVSZ_HEADER_SIZE);
   1386     0   stevel 			copy_pattern(FREEPATTERN, nx, OVSZ_HEADER_SIZE);
   1387     0   stevel 		}
   1388     0   stevel 		break;
   1389     0   stevel 	}
   1390     0   stevel }
   1391     0   stevel 
   1392     0   stevel /*
   1393     0   stevel  * Find memory on our list that is at least size big. If we find a block that is
   1394     0   stevel  * big enough, we break it up and return the associated oversize_t struct back
   1395     0   stevel  * to the calling client. Any leftover piece of that block is returned to the
   1396     0   stevel  * freelist.
   1397     0   stevel  */
   1398     0   stevel static oversize_t *
   1399     0   stevel find_oversize(size_t size)
   1400     0   stevel {
   1401     0   stevel 	oversize_t *wp = oversize_list.next_bysize;
   1402     0   stevel 	while (wp != &oversize_list && size > wp->size)
   1403     0   stevel 		wp = wp->next_bysize;
   1404     0   stevel 
   1405     0   stevel 	if (wp == &oversize_list) /* empty list or nothing big enough */
   1406     0   stevel 		return (NULL);
   1407     0   stevel 	/* breaking up a chunk of memory */
   1408     0   stevel 	if ((long)((wp->size - (size + OVSZ_HEADER_SIZE + MTMALLOC_MIN_ALIGN)))
   1409     0   stevel 	    > MAX_CACHED) {
   1410     0   stevel 		caddr_t off;
   1411     0   stevel 		oversize_t *np;
   1412     0   stevel 		size_t osize;
   1413     0   stevel 		off = (caddr_t)ALIGN(wp->addr + size,
   1414     0   stevel 		    MTMALLOC_MIN_ALIGN);
   1415     0   stevel 		osize = wp->size;
   1416     0   stevel 		wp->size = (size_t)(off - wp->addr);
   1417     0   stevel 		np = oversize_header_alloc((uintptr_t)off,
   1418     0   stevel 		    osize - (wp->size + OVSZ_HEADER_SIZE));
   1419     0   stevel 		if ((long)np->size < 0)
   1420     0   stevel 			abort();
   1421     0   stevel 		unlink_oversize(wp);
   1422     0   stevel 		add_oversize(np);
   1423     0   stevel 	} else {
   1424     0   stevel 		unlink_oversize(wp);
   1425     0   stevel 	}
   1426     0   stevel 	return (wp);
   1427     0   stevel }
   1428     0   stevel 
   1429     0   stevel static void
   1430     0   stevel copy_pattern(uint32_t pattern, void *buf_arg, size_t size)
   1431     0   stevel {
   1432     0   stevel 	uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
   1433     0   stevel 	uint32_t *buf = buf_arg;
   1434     0   stevel 
   1435     0   stevel 	while (buf < bufend - 3) {
   1436     0   stevel 		buf[3] = buf[2] = buf[1] = buf[0] = pattern;
   1437     0   stevel 		buf += 4;
   1438     0   stevel 	}
   1439     0   stevel 	while (buf < bufend)
   1440     0   stevel 		*buf++ = pattern;
   1441     0   stevel }
   1442     0   stevel 
   1443     0   stevel static void *
   1444     0   stevel verify_pattern(uint32_t pattern, void *buf_arg, size_t size)
   1445     0   stevel {
   1446     0   stevel 	uint32_t *bufend = (uint32_t *)((char *)buf_arg + size);
   1447     0   stevel 	uint32_t *buf;
   1448     0   stevel 
   1449     0   stevel 	for (buf = buf_arg; buf < bufend; buf++)
   1450     0   stevel 		if (*buf != pattern)
   1451     0   stevel 			return (buf);
   1452     0   stevel 	return (NULL);
   1453     0   stevel }
   1454     0   stevel 
   1455     0   stevel static void
   1456     0   stevel free_oversize(oversize_t *ovp)
   1457     0   stevel {
   1458     0   stevel 	assert(((uintptr_t)ovp->addr & 7) == 0); /* are we 8 byte aligned */
   1459     0   stevel 	assert(ovp->size > MAX_CACHED);
   1460     0   stevel 
   1461     0   stevel 	ovp->next_bysize = ovp->prev_bysize = NULL;
   1462     0   stevel 	ovp->next_byaddr = ovp->prev_byaddr = NULL;
   1463     0   stevel 	(void) mutex_lock(&oversize_lock);
   1464     0   stevel 	add_oversize(ovp);
   1465     0   stevel 	(void) mutex_unlock(&oversize_lock);
   1466     0   stevel }
   1467     0   stevel 
   1468     0   stevel static oversize_t *
   1469     0   stevel oversize_header_alloc(uintptr_t mem, size_t size)
   1470     0   stevel {
   1471     0   stevel 	oversize_t *ovsz_hdr;
   1472     0   stevel 
   1473     0   stevel 	assert(size > MAX_CACHED);
   1474     0   stevel 
   1475     0   stevel 	ovsz_hdr = (oversize_t *)mem;
   1476     0   stevel 	ovsz_hdr->prev_bysize = NULL;
   1477     0   stevel 	ovsz_hdr->next_bysize = NULL;
   1478     0   stevel 	ovsz_hdr->prev_byaddr = NULL;
   1479     0   stevel 	ovsz_hdr->next_byaddr = NULL;
   1480     0   stevel 	ovsz_hdr->hash_next = NULL;
   1481     0   stevel 	ovsz_hdr->size = size;
   1482     0   stevel 	mem += OVSZ_SIZE;
   1483     0   stevel 	*(uintptr_t *)mem = MTMALLOC_OVERSIZE_MAGIC;
   1484     0   stevel 	mem += OVERHEAD;
   1485     0   stevel 	assert(((uintptr_t)mem & 7) == 0); /* are we 8 byte aligned */
   1486     0   stevel 	ovsz_hdr->addr = (caddr_t)mem;
   1487     0   stevel 	return (ovsz_hdr);
   1488     0   stevel }
   1489  3866      raf 
   1490  3866      raf static void
   1491  3866      raf malloc_prepare()
   1492  3866      raf {
   1493  3866      raf 	percpu_t *cpuptr;
   1494  3866      raf 	cache_head_t *cachehead;
   1495  3866      raf 	cache_t *thiscache;
   1496  3866      raf 
   1497  3866      raf 	(void) mutex_lock(&oversize_lock);
   1498  3866      raf 	for (cpuptr = &cpu_list[0]; cpuptr < &cpu_list[ncpus]; cpuptr++) {
   1499  3866      raf 		(void) mutex_lock(&cpuptr->mt_parent_lock);
   1500  3866      raf 		for (cachehead = &cpuptr->mt_caches[0];
   1501  3866      raf 		    cachehead < &cpuptr->mt_caches[NUM_CACHES];
   1502  3866      raf 		    cachehead++) {
   1503  3866      raf 			for (thiscache = cachehead->mt_cache;
   1504  3866      raf 			    thiscache != NULL;
   1505  3866      raf 			    thiscache = thiscache->mt_next) {
   1506  3866      raf 				(void) mutex_lock(
   1507  3866      raf 				    &thiscache->mt_cache_lock);
   1508  3866      raf 			}
   1509  3866      raf 		}
   1510  3866      raf 	}
   1511  3866      raf }
   1512  3866      raf 
   1513  3866      raf static void
   1514  3866      raf malloc_release()
   1515  3866      raf {
   1516  3866      raf 	percpu_t *cpuptr;
   1517  3866      raf 	cache_head_t *cachehead;
   1518  3866      raf 	cache_t *thiscache;
   1519  3866      raf 
   1520  3866      raf 	for (cpuptr = &cpu_list[ncpus - 1]; cpuptr >= &cpu_list[0]; cpuptr--) {
   1521  3866      raf 		for (cachehead = &cpuptr->mt_caches[NUM_CACHES - 1];
   1522  3866      raf 		    cachehead >= &cpuptr->mt_caches[0];
   1523  3866      raf 		    cachehead--) {
   1524  3866      raf 			for (thiscache = cachehead->mt_cache;
   1525  3866      raf 			    thiscache != NULL;
   1526  3866      raf 			    thiscache = thiscache->mt_next) {
   1527  3866      raf 				(void) mutex_unlock(
   1528  3866      raf 				    &thiscache->mt_cache_lock);
   1529  3866      raf 			}
   1530  3866      raf 		}
   1531  3866      raf 		(void) mutex_unlock(&cpuptr->mt_parent_lock);
   1532  3866      raf 	}
   1533  3866      raf 	(void) mutex_unlock(&oversize_lock);
   1534  3866      raf }
   1535  3866      raf 
   1536  3866      raf #pragma init(malloc_init)
   1537  3866      raf static void
   1538  3866      raf malloc_init(void)
   1539  3866      raf {
   1540  3866      raf 	/*
   1541  3866      raf 	 * This works in the init section for this library
   1542  3866      raf 	 * because setup_caches() doesn't call anything in libc
   1543  3866      raf 	 * that calls malloc().  If it did, disaster would ensue.
   1544  3866      raf 	 *
   1545  3866      raf 	 * For this to work properly, this library must be the first
   1546  3866      raf 	 * one to have its init section called (after libc) by the
   1547  3866      raf 	 * dynamic linker.  If some other library's init section
   1548  3866      raf 	 * ran first and called malloc(), disaster would ensue.
   1549  3866      raf 	 * Because this is an interposer library for malloc(), the
   1550  3866      raf 	 * dynamic linker arranges for its init section to run first.
   1551  3866      raf 	 */
   1552  3866      raf 	(void) setup_caches();
   1553  3866      raf 
   1554  3866      raf 	(void) pthread_atfork(malloc_prepare, malloc_release, malloc_release);
   1555  3866      raf }
   1556