Home | History | Annotate | Download | only in ml
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Copyright (c) 2009, Intel Corporation
     28  * All rights reserved.
     29  */
     30 
     31 /*       Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
     32 /*       Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T		*/
     33 /*         All Rights Reserved						*/
     34 
     35 /*       Copyright (c) 1987, 1988 Microsoft Corporation			*/
     36 /*         All Rights Reserved						*/
     37 
     38 #include <sys/errno.h>
     39 #include <sys/asm_linkage.h>
     40 
     41 #if defined(__lint)
     42 #include <sys/types.h>
     43 #include <sys/systm.h>
     44 #else	/* __lint */
     45 #include "assym.h"
     46 #endif	/* __lint */
     47 
     48 #define	KCOPY_MIN_SIZE	128	/* Must be >= 16 bytes */
     49 #define	XCOPY_MIN_SIZE	128	/* Must be >= 16 bytes */
     50 /*
     51  * Non-temopral access (NTA) alignment requirement
     52  */
     53 #define	NTA_ALIGN_SIZE	4	/* Must be at least 4-byte aligned */
     54 #define	NTA_ALIGN_MASK	_CONST(NTA_ALIGN_SIZE-1)
     55 #define	COUNT_ALIGN_SIZE	16	/* Must be at least 16-byte aligned */
     56 #define	COUNT_ALIGN_MASK	_CONST(COUNT_ALIGN_SIZE-1)
     57 
     58 /*
     59  * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
     60  * "rep smovq" for large sizes. Performance data shows that many calls to
     61  * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
     62  * these small sizes unrolled code is used. For medium sizes loops writing
     63  * 64-bytes per loop are used. Transition points were determined experimentally.
     64  */
     65 #define BZERO_USE_REP	(1024)
     66 #define BCOPY_DFLT_REP	(128)
     67 #define	BCOPY_NHM_REP	(768)
     68 
     69 /*
     70  * Copy a block of storage, returning an error code if `from' or
     71  * `to' takes a kernel pagefault which cannot be resolved.
     72  * Returns errno value on pagefault error, 0 if all ok
     73  */
     74 
     75 #if defined(__lint)
     76 
     77 /* ARGSUSED */
     78 int
     79 kcopy(const void *from, void *to, size_t count)
     80 { return (0); }
     81 
     82 #else	/* __lint */
     83 
     84 	.globl	kernelbase
     85 	.globl	postbootkernelbase
     86 
     87 #if defined(__amd64)
     88 
     89 	ENTRY(kcopy)
     90 	pushq	%rbp
     91 	movq	%rsp, %rbp
     92 #ifdef DEBUG
     93 	cmpq	postbootkernelbase(%rip), %rdi 		/* %rdi = from */
     94 	jb	0f
     95 	cmpq	postbootkernelbase(%rip), %rsi		/* %rsi = to */
     96 	jnb	1f
     97 0:	leaq	.kcopy_panic_msg(%rip), %rdi
     98 	xorl	%eax, %eax
     99 	call	panic
    100 1:
    101 #endif
    102 	/*
    103 	 * pass lofault value as 4th argument to do_copy_fault
    104 	 */
    105 	leaq	_kcopy_copyerr(%rip), %rcx
    106 	movq	%gs:CPU_THREAD, %r9	/* %r9 = thread addr */
    107 
    108 do_copy_fault:
    109 	movq	T_LOFAULT(%r9), %r11	/* save the current lofault */
    110 	movq	%rcx, T_LOFAULT(%r9)	/* new lofault */
    111 	call	bcopy_altentry
    112 	xorl	%eax, %eax		/* return 0 (success) */
    113 
    114 	/*
    115 	 * A fault during do_copy_fault is indicated through an errno value
    116 	 * in %rax and we iretq from the trap handler to here.
    117 	 */
    118 _kcopy_copyerr:
    119 	movq	%r11, T_LOFAULT(%r9)	/* restore original lofault */
    120 	leave
    121 	ret
    122 	SET_SIZE(kcopy)
    123 
    124 #elif defined(__i386)
    125 
    126 #define	ARG_FROM	8
    127 #define	ARG_TO		12
    128 #define	ARG_COUNT	16
    129 
    130 	ENTRY(kcopy)
    131 #ifdef DEBUG
    132 	pushl	%ebp
    133 	movl	%esp, %ebp
    134 	movl	postbootkernelbase, %eax
    135 	cmpl	%eax, ARG_FROM(%ebp)
    136 	jb	0f
    137 	cmpl	%eax, ARG_TO(%ebp)
    138 	jnb	1f
    139 0:	pushl	$.kcopy_panic_msg
    140 	call	panic
    141 1:	popl	%ebp
    142 #endif
    143 	lea	_kcopy_copyerr, %eax	/* lofault value */
    144 	movl	%gs:CPU_THREAD, %edx
    145 
    146 do_copy_fault:
    147 	pushl	%ebp
    148 	movl	%esp, %ebp		/* setup stack frame */
    149 	pushl	%esi
    150 	pushl	%edi			/* save registers */
    151 
    152 	movl	T_LOFAULT(%edx), %edi
    153 	pushl	%edi			/* save the current lofault */
    154 	movl	%eax, T_LOFAULT(%edx)	/* new lofault */
    155 
    156 	movl	ARG_COUNT(%ebp), %ecx
    157 	movl	ARG_FROM(%ebp), %esi
    158 	movl	ARG_TO(%ebp), %edi
    159 	shrl	$2, %ecx		/* word count */
    160 	rep
    161 	  smovl
    162 	movl	ARG_COUNT(%ebp), %ecx
    163 	andl	$3, %ecx		/* bytes left over */
    164 	rep
    165 	  smovb
    166 	xorl	%eax, %eax
    167 
    168 	/*
    169 	 * A fault during do_copy_fault is indicated through an errno value
    170 	 * in %eax and we iret from the trap handler to here.
    171 	 */
    172 _kcopy_copyerr:
    173 	popl	%ecx
    174 	popl	%edi
    175 	movl	%ecx, T_LOFAULT(%edx)	/* restore the original lofault */
    176 	popl	%esi
    177 	popl	%ebp
    178 	ret
    179 	SET_SIZE(kcopy)
    180 
    181 #undef	ARG_FROM
    182 #undef	ARG_TO
    183 #undef	ARG_COUNT
    184 
    185 #endif	/* __i386 */
    186 #endif	/* __lint */
    187 
    188 #if defined(__lint)
    189 
    190 /*
    191  * Copy a block of storage.  Similar to kcopy but uses non-temporal
    192  * instructions.
    193  */
    194 
    195 /* ARGSUSED */
    196 int
    197 kcopy_nta(const void *from, void *to, size_t count, int copy_cached)
    198 { return (0); }
    199 
    200 #else	/* __lint */
    201 
    202 #if defined(__amd64)
    203 
    204 #define	COPY_LOOP_INIT(src, dst, cnt)	\
    205 	addq	cnt, src;			\
    206 	addq	cnt, dst;			\
    207 	shrq	$3, cnt;			\
    208 	neg	cnt
    209 
    210 	/* Copy 16 bytes per loop.  Uses %rax and %r8 */
    211 #define	COPY_LOOP_BODY(src, dst, cnt)	\
    212 	prefetchnta	0x100(src, cnt, 8);	\
    213 	movq	(src, cnt, 8), %rax;		\
    214 	movq	0x8(src, cnt, 8), %r8;		\
    215 	movnti	%rax, (dst, cnt, 8);		\
    216 	movnti	%r8, 0x8(dst, cnt, 8);		\
    217 	addq	$2, cnt
    218 
    219 	ENTRY(kcopy_nta)
    220 	pushq	%rbp
    221 	movq	%rsp, %rbp
    222 #ifdef DEBUG
    223 	cmpq	postbootkernelbase(%rip), %rdi 		/* %rdi = from */
    224 	jb	0f
    225 	cmpq	postbootkernelbase(%rip), %rsi		/* %rsi = to */
    226 	jnb	1f
    227 0:	leaq	.kcopy_panic_msg(%rip), %rdi
    228 	xorl	%eax, %eax
    229 	call	panic
    230 1:
    231 #endif
    232 
    233 	movq	%gs:CPU_THREAD, %r9
    234 	cmpq	$0, %rcx		/* No non-temporal access? */
    235 	/*
    236 	 * pass lofault value as 4th argument to do_copy_fault
    237 	 */
    238 	leaq	_kcopy_nta_copyerr(%rip), %rcx	/* doesn't set rflags */
    239 	jnz	do_copy_fault		/* use regular access */
    240 	/*
    241 	 * Make sure cnt is >= KCOPY_MIN_SIZE
    242 	 */
    243 	cmpq	$KCOPY_MIN_SIZE, %rdx
    244 	jb	do_copy_fault
    245 
    246 	/*
    247 	 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
    248 	 * count is COUNT_ALIGN_SIZE aligned.
    249 	 */
    250 	movq	%rdi, %r10
    251 	orq	%rsi, %r10
    252 	andq	$NTA_ALIGN_MASK, %r10
    253 	orq	%rdx, %r10
    254 	andq	$COUNT_ALIGN_MASK, %r10
    255 	jnz	do_copy_fault
    256 
    257 	ALTENTRY(do_copy_fault_nta)
    258 	movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
    259 	movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
    260 	movq    %rcx, T_LOFAULT(%r9)    /* new lofault */
    261 
    262 	/*
    263 	 * COPY_LOOP_BODY uses %rax and %r8
    264 	 */
    265 	COPY_LOOP_INIT(%rdi, %rsi, %rdx)
    266 2:	COPY_LOOP_BODY(%rdi, %rsi, %rdx)
    267 	jnz	2b
    268 
    269 	mfence
    270 	xorl	%eax, %eax		/* return 0 (success) */
    271 
    272 _kcopy_nta_copyerr:
    273 	movq	%r11, T_LOFAULT(%r9)    /* restore original lofault */
    274 	leave
    275 	ret
    276 	SET_SIZE(do_copy_fault_nta)
    277 	SET_SIZE(kcopy_nta)
    278 
    279 #elif defined(__i386)
    280 
    281 #define	ARG_FROM	8
    282 #define	ARG_TO		12
    283 #define	ARG_COUNT	16
    284 
    285 #define	COPY_LOOP_INIT(src, dst, cnt)	\
    286 	addl	cnt, src;			\
    287 	addl	cnt, dst;			\
    288 	shrl	$3, cnt;			\
    289 	neg	cnt
    290 
    291 #define	COPY_LOOP_BODY(src, dst, cnt)	\
    292 	prefetchnta	0x100(src, cnt, 8);	\
    293 	movl	(src, cnt, 8), %esi;		\
    294 	movnti	%esi, (dst, cnt, 8);		\
    295 	movl	0x4(src, cnt, 8), %esi;		\
    296 	movnti	%esi, 0x4(dst, cnt, 8);		\
    297 	movl	0x8(src, cnt, 8), %esi;		\
    298 	movnti	%esi, 0x8(dst, cnt, 8);		\
    299 	movl	0xc(src, cnt, 8), %esi;		\
    300 	movnti	%esi, 0xc(dst, cnt, 8);		\
    301 	addl	$2, cnt
    302 
    303 	/*
    304 	 * kcopy_nta is not implemented for 32-bit as no performance
    305 	 * improvement was shown.  We simply jump directly to kcopy
    306 	 * and discard the 4 arguments.
    307 	 */
    308 	ENTRY(kcopy_nta)
    309 	jmp	kcopy
    310 
    311 	lea	_kcopy_nta_copyerr, %eax	/* lofault value */
    312 	ALTENTRY(do_copy_fault_nta)
    313 	pushl	%ebp
    314 	movl	%esp, %ebp		/* setup stack frame */
    315 	pushl	%esi
    316 	pushl	%edi
    317 
    318 	movl	%gs:CPU_THREAD, %edx
    319 	movl	T_LOFAULT(%edx), %edi
    320 	pushl	%edi			/* save the current lofault */
    321 	movl	%eax, T_LOFAULT(%edx)	/* new lofault */
    322 
    323 	/* COPY_LOOP_BODY needs to use %esi */
    324 	movl	ARG_COUNT(%ebp), %ecx
    325 	movl	ARG_FROM(%ebp), %edi
    326 	movl	ARG_TO(%ebp), %eax
    327 	COPY_LOOP_INIT(%edi, %eax, %ecx)
    328 1:	COPY_LOOP_BODY(%edi, %eax, %ecx)
    329 	jnz	1b
    330 	mfence
    331 
    332 	xorl	%eax, %eax
    333 _kcopy_nta_copyerr:
    334 	popl	%ecx
    335 	popl	%edi
    336 	movl	%ecx, T_LOFAULT(%edx)	/* restore the original lofault */
    337 	popl	%esi
    338 	leave
    339 	ret
    340 	SET_SIZE(do_copy_fault_nta)
    341 	SET_SIZE(kcopy_nta)
    342 
    343 #undef	ARG_FROM
    344 #undef	ARG_TO
    345 #undef	ARG_COUNT
    346 
    347 #endif	/* __i386 */
    348 #endif	/* __lint */
    349 
    350 #if defined(__lint)
    351 
    352 /* ARGSUSED */
    353 void
    354 bcopy(const void *from, void *to, size_t count)
    355 {}
    356 
    357 #else	/* __lint */
    358 
    359 #if defined(__amd64)
    360 
    361 	ENTRY(bcopy)
    362 #ifdef DEBUG
    363 	orq	%rdx, %rdx		/* %rdx = count */
    364 	jz	1f
    365 	cmpq	postbootkernelbase(%rip), %rdi		/* %rdi = from */
    366 	jb	0f
    367 	cmpq	postbootkernelbase(%rip), %rsi		/* %rsi = to */
    368 	jnb	1f
    369 0:	leaq	.bcopy_panic_msg(%rip), %rdi
    370 	jmp	call_panic		/* setup stack and call panic */
    371 1:
    372 #endif
    373 	/*
    374 	 * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
    375 	 * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
    376 	 * uses these registers in future they must be saved and restored.
    377 	 */
    378 	ALTENTRY(bcopy_altentry)
    379 do_copy:
    380 #define	L(s) .bcopy/**/s
    381 	cmpq	$0x50, %rdx		/* 80 */
    382 	jge	bcopy_ck_size
    383 
    384 	/*
    385 	 * Performance data shows many caller's copy small buffers. So for
    386 	 * best perf for these sizes unrolled code is used. Store data without
    387 	 * worrying about alignment.
    388 	 */
    389 	leaq	L(fwdPxQx)(%rip), %r10
    390 	addq	%rdx, %rdi
    391 	addq	%rdx, %rsi
    392 	movslq	(%r10,%rdx,4), %rcx
    393 	leaq	(%rcx,%r10,1), %r10
    394 	jmpq	*%r10
    395 
    396 	.p2align 4
    397 L(fwdPxQx):
    398 	.int       L(P0Q0)-L(fwdPxQx)	/* 0 */
    399 	.int       L(P1Q0)-L(fwdPxQx)
    400 	.int       L(P2Q0)-L(fwdPxQx)
    401 	.int       L(P3Q0)-L(fwdPxQx)
    402 	.int       L(P4Q0)-L(fwdPxQx)
    403 	.int       L(P5Q0)-L(fwdPxQx)
    404 	.int       L(P6Q0)-L(fwdPxQx)
    405 	.int       L(P7Q0)-L(fwdPxQx)
    406 
    407 	.int       L(P0Q1)-L(fwdPxQx)	/* 8 */
    408 	.int       L(P1Q1)-L(fwdPxQx)
    409 	.int       L(P2Q1)-L(fwdPxQx)
    410 	.int       L(P3Q1)-L(fwdPxQx)
    411 	.int       L(P4Q1)-L(fwdPxQx)
    412 	.int       L(P5Q1)-L(fwdPxQx)
    413 	.int       L(P6Q1)-L(fwdPxQx)
    414 	.int       L(P7Q1)-L(fwdPxQx)
    415 
    416 	.int       L(P0Q2)-L(fwdPxQx)	/* 16 */
    417 	.int       L(P1Q2)-L(fwdPxQx)
    418 	.int       L(P2Q2)-L(fwdPxQx)
    419 	.int       L(P3Q2)-L(fwdPxQx)
    420 	.int       L(P4Q2)-L(fwdPxQx)
    421 	.int       L(P5Q2)-L(fwdPxQx)
    422 	.int       L(P6Q2)-L(fwdPxQx)
    423 	.int       L(P7Q2)-L(fwdPxQx)
    424 
    425 	.int       L(P0Q3)-L(fwdPxQx)	/* 24 */
    426 	.int       L(P1Q3)-L(fwdPxQx)
    427 	.int       L(P2Q3)-L(fwdPxQx)
    428 	.int       L(P3Q3)-L(fwdPxQx)
    429 	.int       L(P4Q3)-L(fwdPxQx)
    430 	.int       L(P5Q3)-L(fwdPxQx)
    431 	.int       L(P6Q3)-L(fwdPxQx)
    432 	.int       L(P7Q3)-L(fwdPxQx)
    433 
    434 	.int       L(P0Q4)-L(fwdPxQx)	/* 32 */
    435 	.int       L(P1Q4)-L(fwdPxQx)
    436 	.int       L(P2Q4)-L(fwdPxQx)
    437 	.int       L(P3Q4)-L(fwdPxQx)
    438 	.int       L(P4Q4)-L(fwdPxQx)
    439 	.int       L(P5Q4)-L(fwdPxQx)
    440 	.int       L(P6Q4)-L(fwdPxQx)
    441 	.int       L(P7Q4)-L(fwdPxQx)
    442 
    443 	.int       L(P0Q5)-L(fwdPxQx)	/* 40 */
    444 	.int       L(P1Q5)-L(fwdPxQx)
    445 	.int       L(P2Q5)-L(fwdPxQx)
    446 	.int       L(P3Q5)-L(fwdPxQx)
    447 	.int       L(P4Q5)-L(fwdPxQx)
    448 	.int       L(P5Q5)-L(fwdPxQx)
    449 	.int       L(P6Q5)-L(fwdPxQx)
    450 	.int       L(P7Q5)-L(fwdPxQx)
    451 
    452 	.int       L(P0Q6)-L(fwdPxQx)	/* 48 */
    453 	.int       L(P1Q6)-L(fwdPxQx)
    454 	.int       L(P2Q6)-L(fwdPxQx)
    455 	.int       L(P3Q6)-L(fwdPxQx)
    456 	.int       L(P4Q6)-L(fwdPxQx)
    457 	.int       L(P5Q6)-L(fwdPxQx)
    458 	.int       L(P6Q6)-L(fwdPxQx)
    459 	.int       L(P7Q6)-L(fwdPxQx)
    460 
    461 	.int       L(P0Q7)-L(fwdPxQx)	/* 56 */
    462 	.int       L(P1Q7)-L(fwdPxQx)
    463 	.int       L(P2Q7)-L(fwdPxQx)
    464 	.int       L(P3Q7)-L(fwdPxQx)
    465 	.int       L(P4Q7)-L(fwdPxQx)
    466 	.int       L(P5Q7)-L(fwdPxQx)
    467 	.int       L(P6Q7)-L(fwdPxQx)
    468 	.int       L(P7Q7)-L(fwdPxQx)
    469 
    470 	.int       L(P0Q8)-L(fwdPxQx)	/* 64 */
    471 	.int       L(P1Q8)-L(fwdPxQx)
    472 	.int       L(P2Q8)-L(fwdPxQx)
    473 	.int       L(P3Q8)-L(fwdPxQx)
    474 	.int       L(P4Q8)-L(fwdPxQx)
    475 	.int       L(P5Q8)-L(fwdPxQx)
    476 	.int       L(P6Q8)-L(fwdPxQx)
    477 	.int       L(P7Q8)-L(fwdPxQx)
    478 
    479 	.int       L(P0Q9)-L(fwdPxQx)	/* 72 */
    480 	.int       L(P1Q9)-L(fwdPxQx)
    481 	.int       L(P2Q9)-L(fwdPxQx)
    482 	.int       L(P3Q9)-L(fwdPxQx)
    483 	.int       L(P4Q9)-L(fwdPxQx)
    484 	.int       L(P5Q9)-L(fwdPxQx)
    485 	.int       L(P6Q9)-L(fwdPxQx)
    486 	.int       L(P7Q9)-L(fwdPxQx)	/* 79 */
    487 
    488 	.p2align 4
    489 L(P0Q9):
    490 	mov    -0x48(%rdi), %rcx
    491 	mov    %rcx, -0x48(%rsi)
    492 L(P0Q8):
    493 	mov    -0x40(%rdi), %r10
    494 	mov    %r10, -0x40(%rsi)
    495 L(P0Q7):
    496 	mov    -0x38(%rdi), %r8
    497 	mov    %r8, -0x38(%rsi)
    498 L(P0Q6):
    499 	mov    -0x30(%rdi), %rcx
    500 	mov    %rcx, -0x30(%rsi)
    501 L(P0Q5):
    502 	mov    -0x28(%rdi), %r10
    503 	mov    %r10, -0x28(%rsi)
    504 L(P0Q4):
    505 	mov    -0x20(%rdi), %r8
    506 	mov    %r8, -0x20(%rsi)
    507 L(P0Q3):
    508 	mov    -0x18(%rdi), %rcx
    509 	mov    %rcx, -0x18(%rsi)
    510 L(P0Q2):
    511 	mov    -0x10(%rdi), %r10
    512 	mov    %r10, -0x10(%rsi)
    513 L(P0Q1):
    514 	mov    -0x8(%rdi), %r8
    515 	mov    %r8, -0x8(%rsi)
    516 L(P0Q0):
    517 	ret
    518 
    519 	.p2align 4
    520 L(P1Q9):
    521 	mov    -0x49(%rdi), %r8
    522 	mov    %r8, -0x49(%rsi)
    523 L(P1Q8):
    524 	mov    -0x41(%rdi), %rcx
    525 	mov    %rcx, -0x41(%rsi)
    526 L(P1Q7):
    527 	mov    -0x39(%rdi), %r10
    528 	mov    %r10, -0x39(%rsi)
    529 L(P1Q6):
    530 	mov    -0x31(%rdi), %r8
    531 	mov    %r8, -0x31(%rsi)
    532 L(P1Q5):
    533 	mov    -0x29(%rdi), %rcx
    534 	mov    %rcx, -0x29(%rsi)
    535 L(P1Q4):
    536 	mov    -0x21(%rdi), %r10
    537 	mov    %r10, -0x21(%rsi)
    538 L(P1Q3):
    539 	mov    -0x19(%rdi), %r8
    540 	mov    %r8, -0x19(%rsi)
    541 L(P1Q2):
    542 	mov    -0x11(%rdi), %rcx
    543 	mov    %rcx, -0x11(%rsi)
    544 L(P1Q1):
    545 	mov    -0x9(%rdi), %r10
    546 	mov    %r10, -0x9(%rsi)
    547 L(P1Q0):
    548 	movzbq -0x1(%rdi), %r8
    549 	mov    %r8b, -0x1(%rsi)
    550 	ret
    551 
    552 	.p2align 4
    553 L(P2Q9):
    554 	mov    -0x4a(%rdi), %r8
    555 	mov    %r8, -0x4a(%rsi)
    556 L(P2Q8):
    557 	mov    -0x42(%rdi), %rcx
    558 	mov    %rcx, -0x42(%rsi)
    559 L(P2Q7):
    560 	mov    -0x3a(%rdi), %r10
    561 	mov    %r10, -0x3a(%rsi)
    562 L(P2Q6):
    563 	mov    -0x32(%rdi), %r8
    564 	mov    %r8, -0x32(%rsi)
    565 L(P2Q5):
    566 	mov    -0x2a(%rdi), %rcx
    567 	mov    %rcx, -0x2a(%rsi)
    568 L(P2Q4):
    569 	mov    -0x22(%rdi), %r10
    570 	mov    %r10, -0x22(%rsi)
    571 L(P2Q3):
    572 	mov    -0x1a(%rdi), %r8
    573 	mov    %r8, -0x1a(%rsi)
    574 L(P2Q2):
    575 	mov    -0x12(%rdi), %rcx
    576 	mov    %rcx, -0x12(%rsi)
    577 L(P2Q1):
    578 	mov    -0xa(%rdi), %r10
    579 	mov    %r10, -0xa(%rsi)
    580 L(P2Q0):
    581 	movzwq -0x2(%rdi), %r8
    582 	mov    %r8w, -0x2(%rsi)
    583 	ret
    584 
    585 	.p2align 4
    586 L(P3Q9):
    587 	mov    -0x4b(%rdi), %r8
    588 	mov    %r8, -0x4b(%rsi)
    589 L(P3Q8):
    590 	mov    -0x43(%rdi), %rcx
    591 	mov    %rcx, -0x43(%rsi)
    592 L(P3Q7):
    593 	mov    -0x3b(%rdi), %r10
    594 	mov    %r10, -0x3b(%rsi)
    595 L(P3Q6):
    596 	mov    -0x33(%rdi), %r8
    597 	mov    %r8, -0x33(%rsi)
    598 L(P3Q5):
    599 	mov    -0x2b(%rdi), %rcx
    600 	mov    %rcx, -0x2b(%rsi)
    601 L(P3Q4):
    602 	mov    -0x23(%rdi), %r10
    603 	mov    %r10, -0x23(%rsi)
    604 L(P3Q3):
    605 	mov    -0x1b(%rdi), %r8
    606 	mov    %r8, -0x1b(%rsi)
    607 L(P3Q2):
    608 	mov    -0x13(%rdi), %rcx
    609 	mov    %rcx, -0x13(%rsi)
    610 L(P3Q1):
    611 	mov    -0xb(%rdi), %r10
    612 	mov    %r10, -0xb(%rsi)
    613 	/*
    614 	 * These trailing loads/stores have to do all their loads 1st,
    615 	 * then do the stores.
    616 	 */
    617 L(P3Q0):
    618 	movzwq -0x3(%rdi), %r8
    619 	movzbq -0x1(%rdi), %r10
    620 	mov    %r8w, -0x3(%rsi)
    621 	mov    %r10b, -0x1(%rsi)
    622 	ret
    623 
    624 	.p2align 4
    625 L(P4Q9):
    626 	mov    -0x4c(%rdi), %r8
    627 	mov    %r8, -0x4c(%rsi)
    628 L(P4Q8):
    629 	mov    -0x44(%rdi), %rcx
    630 	mov    %rcx, -0x44(%rsi)
    631 L(P4Q7):
    632 	mov    -0x3c(%rdi), %r10
    633 	mov    %r10, -0x3c(%rsi)
    634 L(P4Q6):
    635 	mov    -0x34(%rdi), %r8
    636 	mov    %r8, -0x34(%rsi)
    637 L(P4Q5):
    638 	mov    -0x2c(%rdi), %rcx
    639 	mov    %rcx, -0x2c(%rsi)
    640 L(P4Q4):
    641 	mov    -0x24(%rdi), %r10
    642 	mov    %r10, -0x24(%rsi)
    643 L(P4Q3):
    644 	mov    -0x1c(%rdi), %r8
    645 	mov    %r8, -0x1c(%rsi)
    646 L(P4Q2):
    647 	mov    -0x14(%rdi), %rcx
    648 	mov    %rcx, -0x14(%rsi)
    649 L(P4Q1):
    650 	mov    -0xc(%rdi), %r10
    651 	mov    %r10, -0xc(%rsi)
    652 L(P4Q0):
    653 	mov    -0x4(%rdi), %r8d
    654 	mov    %r8d, -0x4(%rsi)
    655 	ret
    656 
    657 	.p2align 4
    658 L(P5Q9):
    659 	mov    -0x4d(%rdi), %r8
    660 	mov    %r8, -0x4d(%rsi)
    661 L(P5Q8):
    662 	mov    -0x45(%rdi), %rcx
    663 	mov    %rcx, -0x45(%rsi)
    664 L(P5Q7):
    665 	mov    -0x3d(%rdi), %r10
    666 	mov    %r10, -0x3d(%rsi)
    667 L(P5Q6):
    668 	mov    -0x35(%rdi), %r8
    669 	mov    %r8, -0x35(%rsi)
    670 L(P5Q5):
    671 	mov    -0x2d(%rdi), %rcx
    672 	mov    %rcx, -0x2d(%rsi)
    673 L(P5Q4):
    674 	mov    -0x25(%rdi), %r10
    675 	mov    %r10, -0x25(%rsi)
    676 L(P5Q3):
    677 	mov    -0x1d(%rdi), %r8
    678 	mov    %r8, -0x1d(%rsi)
    679 L(P5Q2):
    680 	mov    -0x15(%rdi), %rcx
    681 	mov    %rcx, -0x15(%rsi)
    682 L(P5Q1):
    683 	mov    -0xd(%rdi), %r10
    684 	mov    %r10, -0xd(%rsi)
    685 L(P5Q0):
    686 	mov    -0x5(%rdi), %r8d
    687 	movzbq -0x1(%rdi), %r10
    688 	mov    %r8d, -0x5(%rsi)
    689 	mov    %r10b, -0x1(%rsi)
    690 	ret
    691 
    692 	.p2align 4
    693 L(P6Q9):
    694 	mov    -0x4e(%rdi), %r8
    695 	mov    %r8, -0x4e(%rsi)
    696 L(P6Q8):
    697 	mov    -0x46(%rdi), %rcx
    698 	mov    %rcx, -0x46(%rsi)
    699 L(P6Q7):
    700 	mov    -0x3e(%rdi), %r10
    701 	mov    %r10, -0x3e(%rsi)
    702 L(P6Q6):
    703 	mov    -0x36(%rdi), %r8
    704 	mov    %r8, -0x36(%rsi)
    705 L(P6Q5):
    706 	mov    -0x2e(%rdi), %rcx
    707 	mov    %rcx, -0x2e(%rsi)
    708 L(P6Q4):
    709 	mov    -0x26(%rdi), %r10
    710 	mov    %r10, -0x26(%rsi)
    711 L(P6Q3):
    712 	mov    -0x1e(%rdi), %r8
    713 	mov    %r8, -0x1e(%rsi)
    714 L(P6Q2):
    715 	mov    -0x16(%rdi), %rcx
    716 	mov    %rcx, -0x16(%rsi)
    717 L(P6Q1):
    718 	mov    -0xe(%rdi), %r10
    719 	mov    %r10, -0xe(%rsi)
    720 L(P6Q0):
    721 	mov    -0x6(%rdi), %r8d
    722 	movzwq -0x2(%rdi), %r10
    723 	mov    %r8d, -0x6(%rsi)
    724 	mov    %r10w, -0x2(%rsi)
    725 	ret
    726 
    727 	.p2align 4
    728 L(P7Q9):
    729 	mov    -0x4f(%rdi), %r8
    730 	mov    %r8, -0x4f(%rsi)
    731 L(P7Q8):
    732 	mov    -0x47(%rdi), %rcx
    733 	mov    %rcx, -0x47(%rsi)
    734 L(P7Q7):
    735 	mov    -0x3f(%rdi), %r10
    736 	mov    %r10, -0x3f(%rsi)
    737 L(P7Q6):
    738 	mov    -0x37(%rdi), %r8
    739 	mov    %r8, -0x37(%rsi)
    740 L(P7Q5):
    741 	mov    -0x2f(%rdi), %rcx
    742 	mov    %rcx, -0x2f(%rsi)
    743 L(P7Q4):
    744 	mov    -0x27(%rdi), %r10
    745 	mov    %r10, -0x27(%rsi)
    746 L(P7Q3):
    747 	mov    -0x1f(%rdi), %r8
    748 	mov    %r8, -0x1f(%rsi)
    749 L(P7Q2):
    750 	mov    -0x17(%rdi), %rcx
    751 	mov    %rcx, -0x17(%rsi)
    752 L(P7Q1):
    753 	mov    -0xf(%rdi), %r10
    754 	mov    %r10, -0xf(%rsi)
    755 L(P7Q0):
    756 	mov    -0x7(%rdi), %r8d
    757 	movzwq -0x3(%rdi), %r10
    758 	movzbq -0x1(%rdi), %rcx
    759 	mov    %r8d, -0x7(%rsi)
    760 	mov    %r10w, -0x3(%rsi)
    761 	mov    %cl, -0x1(%rsi)
    762 	ret
    763 
    764 	/*
    765 	 * For large sizes rep smovq is fastest.
    766 	 * Transition point determined experimentally as measured on
    767 	 * Intel Xeon processors (incl. Nehalem and previous generations) and
    768 	 * AMD Opteron. The transition value is patched at boot time to avoid
    769 	 * memory reference hit.
    770 	 */
    771 	.globl bcopy_patch_start
    772 bcopy_patch_start:
    773 	cmpq	$BCOPY_NHM_REP, %rdx
    774 	.globl bcopy_patch_end
    775 bcopy_patch_end:
    776 
    777 	.p2align 4
    778 	.globl bcopy_ck_size
    779 bcopy_ck_size:
    780 	cmpq	$BCOPY_DFLT_REP, %rdx
    781 	jge	L(use_rep)
    782 
    783 	/*
    784 	 * Align to a 8-byte boundary. Avoids penalties from unaligned stores
    785 	 * as well as from stores spanning cachelines.
    786 	 */
    787 	test	$0x7, %rsi
    788 	jz	L(aligned_loop)
    789 	test	$0x1, %rsi
    790 	jz	2f
    791 	movzbq	(%rdi), %r8
    792 	dec	%rdx
    793 	inc	%rdi
    794 	mov	%r8b, (%rsi)
    795 	inc	%rsi
    796 2:
    797 	test	$0x2, %rsi
    798 	jz	4f
    799 	movzwq	(%rdi), %r8
    800 	sub	$0x2, %rdx
    801 	add	$0x2, %rdi
    802 	mov	%r8w, (%rsi)
    803 	add	$0x2, %rsi
    804 4:
    805 	test	$0x4, %rsi
    806 	jz	L(aligned_loop)
    807 	mov	(%rdi), %r8d
    808 	sub	$0x4, %rdx
    809 	add	$0x4, %rdi
    810 	mov	%r8d, (%rsi)
    811 	add	$0x4, %rsi
    812 
    813 	/*
    814 	 * Copy 64-bytes per loop
    815 	 */
    816 	.p2align 4
    817 L(aligned_loop):
    818 	mov	(%rdi), %r8
    819 	mov	0x8(%rdi), %r10
    820 	lea	-0x40(%rdx), %rdx
    821 	mov	%r8, (%rsi)
    822 	mov	%r10, 0x8(%rsi)
    823 	mov	0x10(%rdi), %rcx
    824 	mov	0x18(%rdi), %r8
    825 	mov	%rcx, 0x10(%rsi)
    826 	mov	%r8, 0x18(%rsi)
    827 
    828 	cmp	$0x40, %rdx
    829 	mov	0x20(%rdi), %r10
    830 	mov	0x28(%rdi), %rcx
    831 	mov	%r10, 0x20(%rsi)
    832 	mov	%rcx, 0x28(%rsi)
    833 	mov	0x30(%rdi), %r8
    834 	mov	0x38(%rdi), %r10
    835 	lea	0x40(%rdi), %rdi
    836 	mov	%r8, 0x30(%rsi)
    837 	mov	%r10, 0x38(%rsi)
    838 	lea	0x40(%rsi), %rsi
    839 	jge	L(aligned_loop)
    840 
    841 	/*
    842 	 * Copy remaining bytes (0-63)
    843 	 */
    844 L(do_remainder):
    845 	leaq	L(fwdPxQx)(%rip), %r10
    846 	addq	%rdx, %rdi
    847 	addq	%rdx, %rsi
    848 	movslq	(%r10,%rdx,4), %rcx
    849 	leaq	(%rcx,%r10,1), %r10
    850 	jmpq	*%r10
    851 
    852 	/*
    853 	 * Use rep smovq. Clear remainder via unrolled code
    854 	 */
    855 	.p2align 4
    856 L(use_rep):
    857 	xchgq	%rdi, %rsi		/* %rsi = source, %rdi = destination */
    858 	movq	%rdx, %rcx		/* %rcx = count */
    859 	shrq	$3, %rcx		/* 8-byte word count */
    860 	rep
    861 	  smovq
    862 
    863 	xchgq	%rsi, %rdi		/* %rdi = src, %rsi = destination */
    864 	andq	$7, %rdx		/* remainder */
    865 	jnz	L(do_remainder)
    866 	ret
    867 #undef	L
    868 
    869 #ifdef DEBUG
    870 	/*
    871 	 * Setup frame on the run-time stack. The end of the input argument
    872 	 * area must be aligned on a 16 byte boundary. The stack pointer %rsp,
    873 	 * always points to the end of the latest allocated stack frame.
    874 	 * panic(const char *format, ...) is a varargs function. When a
    875 	 * function taking variable arguments is called, %rax must be set
    876 	 * to eight times the number of floating point parameters passed
    877 	 * to the function in SSE registers.
    878 	 */
    879 call_panic:
    880 	pushq	%rbp			/* align stack properly */
    881 	movq	%rsp, %rbp
    882 	xorl	%eax, %eax		/* no variable arguments */
    883 	call	panic			/* %rdi = format string */
    884 #endif
    885 	SET_SIZE(bcopy_altentry)
    886 	SET_SIZE(bcopy)
    887 
    888 #elif defined(__i386)
    889 
    890 #define	ARG_FROM	4
    891 #define	ARG_TO		8
    892 #define	ARG_COUNT	12
    893 
    894 	ENTRY(bcopy)
    895 #ifdef DEBUG
    896 	movl	ARG_COUNT(%esp), %eax
    897 	orl	%eax, %eax
    898 	jz	1f
    899 	movl	postbootkernelbase, %eax
    900 	cmpl	%eax, ARG_FROM(%esp)
    901 	jb	0f
    902 	cmpl	%eax, ARG_TO(%esp)
    903 	jnb	1f
    904 0:	pushl	%ebp
    905 	movl	%esp, %ebp
    906 	pushl	$.bcopy_panic_msg
    907 	call	panic
    908 1:
    909 #endif
    910 do_copy:
    911 	movl	%esi, %eax		/* save registers */
    912 	movl	%edi, %edx
    913 	movl	ARG_COUNT(%esp), %ecx
    914 	movl	ARG_FROM(%esp), %esi
    915 	movl	ARG_TO(%esp), %edi
    916 
    917 	shrl	$2, %ecx		/* word count */
    918 	rep
    919 	  smovl
    920 	movl	ARG_COUNT(%esp), %ecx
    921 	andl	$3, %ecx		/* bytes left over */
    922 	rep
    923 	  smovb
    924 	movl	%eax, %esi		/* restore registers */
    925 	movl	%edx, %edi
    926 	ret
    927 	SET_SIZE(bcopy)
    928 
    929 #undef	ARG_COUNT
    930 #undef	ARG_FROM
    931 #undef	ARG_TO
    932 
    933 #endif	/* __i386 */
    934 #endif	/* __lint */
    935 
    936 
    937 /*
    938  * Zero a block of storage, returning an error code if we
    939  * take a kernel pagefault which cannot be resolved.
    940  * Returns errno value on pagefault error, 0 if all ok
    941  */
    942 
    943 #if defined(__lint)
    944 
    945 /* ARGSUSED */
    946 int
    947 kzero(void *addr, size_t count)
    948 { return (0); }
    949 
    950 #else	/* __lint */
    951 
    952 #if defined(__amd64)
    953 
    954 	ENTRY(kzero)
    955 #ifdef DEBUG
    956         cmpq	postbootkernelbase(%rip), %rdi	/* %rdi = addr */
    957         jnb	0f
    958         leaq	.kzero_panic_msg(%rip), %rdi
    959 	jmp	call_panic		/* setup stack and call panic */
    960 0:
    961 #endif
    962 	/*
    963 	 * pass lofault value as 3rd argument for fault return
    964 	 */
    965 	leaq	_kzeroerr(%rip), %rdx
    966 
    967 	movq	%gs:CPU_THREAD, %r9	/* %r9 = thread addr */
    968 	movq	T_LOFAULT(%r9), %r11	/* save the current lofault */
    969 	movq	%rdx, T_LOFAULT(%r9)	/* new lofault */
    970 	call	bzero_altentry
    971 	xorl	%eax, %eax
    972 	movq	%r11, T_LOFAULT(%r9)	/* restore the original lofault */
    973 	ret
    974 	/*
    975 	 * A fault during bzero is indicated through an errno value
    976 	 * in %rax when we iretq to here.
    977 	 */
    978 _kzeroerr:
    979 	addq	$8, %rsp		/* pop bzero_altentry call ret addr */
    980 	movq	%r11, T_LOFAULT(%r9)	/* restore the original lofault */
    981 	ret
    982 	SET_SIZE(kzero)
    983 
    984 #elif defined(__i386)
    985 
    986 #define	ARG_ADDR	8
    987 #define	ARG_COUNT	12
    988 
    989 	ENTRY(kzero)
    990 #ifdef DEBUG
    991 	pushl	%ebp
    992 	movl	%esp, %ebp
    993 	movl	postbootkernelbase, %eax
    994         cmpl	%eax, ARG_ADDR(%ebp)
    995         jnb	0f
    996         pushl   $.kzero_panic_msg
    997         call    panic
    998 0:	popl	%ebp
    999 #endif
   1000 	lea	_kzeroerr, %eax		/* kzeroerr is lofault value */
   1001 
   1002 	pushl	%ebp			/* save stack base */
   1003 	movl	%esp, %ebp		/* set new stack base */
   1004 	pushl	%edi			/* save %edi */
   1005 
   1006 	mov	%gs:CPU_THREAD, %edx
   1007 	movl	T_LOFAULT(%edx), %edi
   1008 	pushl	%edi			/* save the current lofault */
   1009 	movl	%eax, T_LOFAULT(%edx)	/* new lofault */
   1010 
   1011 	movl	ARG_COUNT(%ebp), %ecx	/* get size in bytes */
   1012 	movl	ARG_ADDR(%ebp), %edi	/* %edi <- address of bytes to clear */
   1013 	shrl	$2, %ecx		/* Count of double words to zero */
   1014 	xorl	%eax, %eax		/* sstol val */
   1015 	rep
   1016 	  sstol			/* %ecx contains words to clear (%eax=0) */
   1017 
   1018 	movl	ARG_COUNT(%ebp), %ecx	/* get size in bytes */
   1019 	andl	$3, %ecx		/* do mod 4 */
   1020 	rep
   1021 	  sstob			/* %ecx contains residual bytes to clear */
   1022 
   1023 	/*
   1024 	 * A fault during kzero is indicated through an errno value
   1025 	 * in %eax when we iret to here.
   1026 	 */
   1027 _kzeroerr:
   1028 	popl	%edi
   1029 	movl	%edi, T_LOFAULT(%edx)	/* restore the original lofault */
   1030 	popl	%edi
   1031 	popl	%ebp
   1032 	ret
   1033 	SET_SIZE(kzero)
   1034 
   1035 #undef	ARG_ADDR
   1036 #undef	ARG_COUNT
   1037 
   1038 #endif	/* __i386 */
   1039 #endif	/* __lint */
   1040 
   1041 /*
   1042  * Zero a block of storage.
   1043  */
   1044 
   1045 #if defined(__lint)
   1046 
   1047 /* ARGSUSED */
   1048 void
   1049 bzero(void *addr, size_t count)
   1050 {}
   1051 
   1052 #else	/* __lint */
   1053 
   1054 #if defined(__amd64)
   1055 
   1056 	ENTRY(bzero)
   1057 #ifdef DEBUG
   1058 	cmpq	postbootkernelbase(%rip), %rdi	/* %rdi = addr */
   1059 	jnb	0f
   1060 	leaq	.bzero_panic_msg(%rip), %rdi
   1061 	jmp	call_panic		/* setup stack and call panic */
   1062 0:
   1063 #endif
   1064 	ALTENTRY(bzero_altentry)
   1065 do_zero:
   1066 #define	L(s) .bzero/**/s
   1067 	xorl	%eax, %eax
   1068 
   1069 	cmpq	$0x50, %rsi		/* 80 */
   1070 	jge	L(ck_align)
   1071 
   1072 	/*
   1073 	 * Performance data shows many caller's are zeroing small buffers. So
   1074 	 * for best perf for these sizes unrolled code is used. Store zeros
   1075 	 * without worrying about alignment.
   1076 	 */
   1077 	leaq	L(setPxQx)(%rip), %r10
   1078 	addq	%rsi, %rdi
   1079 	movslq	(%r10,%rsi,4), %rcx
   1080 	leaq	(%rcx,%r10,1), %r10
   1081 	jmpq	*%r10
   1082 
   1083 	.p2align 4
   1084 L(setPxQx):
   1085 	.int       L(P0Q0)-L(setPxQx)	/* 0 */
   1086 	.int       L(P1Q0)-L(setPxQx)
   1087 	.int       L(P2Q0)-L(setPxQx)
   1088 	.int       L(P3Q0)-L(setPxQx)
   1089 	.int       L(P4Q0)-L(setPxQx)
   1090 	.int       L(P5Q0)-L(setPxQx)
   1091 	.int       L(P6Q0)-L(setPxQx)
   1092 	.int       L(P7Q0)-L(setPxQx)
   1093 
   1094 	.int       L(P0Q1)-L(setPxQx)	/* 8 */
   1095 	.int       L(P1Q1)-L(setPxQx)
   1096 	.int       L(P2Q1)-L(setPxQx)
   1097 	.int       L(P3Q1)-L(setPxQx)
   1098 	.int       L(P4Q1)-L(setPxQx)
   1099 	.int       L(P5Q1)-L(setPxQx)
   1100 	.int       L(P6Q1)-L(setPxQx)
   1101 	.int       L(P7Q1)-L(setPxQx)
   1102 
   1103 	.int       L(P0Q2)-L(setPxQx)	/* 16 */
   1104 	.int       L(P1Q2)-L(setPxQx)
   1105 	.int       L(P2Q2)-L(setPxQx)
   1106 	.int       L(P3Q2)-L(setPxQx)
   1107 	.int       L(P4Q2)-L(setPxQx)
   1108 	.int       L(P5Q2)-L(setPxQx)
   1109 	.int       L(P6Q2)-L(setPxQx)
   1110 	.int       L(P7Q2)-L(setPxQx)
   1111 
   1112 	.int       L(P0Q3)-L(setPxQx)	/* 24 */
   1113 	.int       L(P1Q3)-L(setPxQx)
   1114 	.int       L(P2Q3)-L(setPxQx)
   1115 	.int       L(P3Q3)-L(setPxQx)
   1116 	.int       L(P4Q3)-L(setPxQx)
   1117 	.int       L(P5Q3)-L(setPxQx)
   1118 	.int       L(P6Q3)-L(setPxQx)
   1119 	.int       L(P7Q3)-L(setPxQx)
   1120 
   1121 	.int       L(P0Q4)-L(setPxQx)	/* 32 */
   1122 	.int       L(P1Q4)-L(setPxQx)
   1123 	.int       L(P2Q4)-L(setPxQx)
   1124 	.int       L(P3Q4)-L(setPxQx)
   1125 	.int       L(P4Q4)-L(setPxQx)
   1126 	.int       L(P5Q4)-L(setPxQx)
   1127 	.int       L(P6Q4)-L(setPxQx)
   1128 	.int       L(P7Q4)-L(setPxQx)
   1129 
   1130 	.int       L(P0Q5)-L(setPxQx)	/* 40 */
   1131 	.int       L(P1Q5)-L(setPxQx)
   1132 	.int       L(P2Q5)-L(setPxQx)
   1133 	.int       L(P3Q5)-L(setPxQx)
   1134 	.int       L(P4Q5)-L(setPxQx)
   1135 	.int       L(P5Q5)-L(setPxQx)
   1136 	.int       L(P6Q5)-L(setPxQx)
   1137 	.int       L(P7Q5)-L(setPxQx)
   1138 
   1139 	.int       L(P0Q6)-L(setPxQx)	/* 48 */
   1140 	.int       L(P1Q6)-L(setPxQx)
   1141 	.int       L(P2Q6)-L(setPxQx)
   1142 	.int       L(P3Q6)-L(setPxQx)
   1143 	.int       L(P4Q6)-L(setPxQx)
   1144 	.int       L(P5Q6)-L(setPxQx)
   1145 	.int       L(P6Q6)-L(setPxQx)
   1146 	.int       L(P7Q6)-L(setPxQx)
   1147 
   1148 	.int       L(P0Q7)-L(setPxQx)	/* 56 */
   1149 	.int       L(P1Q7)-L(setPxQx)
   1150 	.int       L(P2Q7)-L(setPxQx)
   1151 	.int       L(P3Q7)-L(setPxQx)
   1152 	.int       L(P4Q7)-L(setPxQx)
   1153 	.int       L(P5Q7)-L(setPxQx)
   1154 	.int       L(P6Q7)-L(setPxQx)
   1155 	.int       L(P7Q7)-L(setPxQx)
   1156 
   1157 	.int       L(P0Q8)-L(setPxQx)	/* 64 */
   1158 	.int       L(P1Q8)-L(setPxQx)
   1159 	.int       L(P2Q8)-L(setPxQx)
   1160 	.int       L(P3Q8)-L(setPxQx)
   1161 	.int       L(P4Q8)-L(setPxQx)
   1162 	.int       L(P5Q8)-L(setPxQx)
   1163 	.int       L(P6Q8)-L(setPxQx)
   1164 	.int       L(P7Q8)-L(setPxQx)
   1165 
   1166 	.int       L(P0Q9)-L(setPxQx)	/* 72 */
   1167 	.int       L(P1Q9)-L(setPxQx)
   1168 	.int       L(P2Q9)-L(setPxQx)
   1169 	.int       L(P3Q9)-L(setPxQx)
   1170 	.int       L(P4Q9)-L(setPxQx)
   1171 	.int       L(P5Q9)-L(setPxQx)
   1172 	.int       L(P6Q9)-L(setPxQx)
   1173 	.int       L(P7Q9)-L(setPxQx)	/* 79 */
   1174 
   1175 	.p2align 4
   1176 L(P0Q9): mov    %rax, -0x48(%rdi)
   1177 L(P0Q8): mov    %rax, -0x40(%rdi)
   1178 L(P0Q7): mov    %rax, -0x38(%rdi)
   1179 L(P0Q6): mov    %rax, -0x30(%rdi)
   1180 L(P0Q5): mov    %rax, -0x28(%rdi)
   1181 L(P0Q4): mov    %rax, -0x20(%rdi)
   1182 L(P0Q3): mov    %rax, -0x18(%rdi)
   1183 L(P0Q2): mov    %rax, -0x10(%rdi)
   1184 L(P0Q1): mov    %rax, -0x8(%rdi)
   1185 L(P0Q0):
   1186 	 ret
   1187 
   1188 	.p2align 4
   1189 L(P1Q9): mov    %rax, -0x49(%rdi)
   1190 L(P1Q8): mov    %rax, -0x41(%rdi)
   1191 L(P1Q7): mov    %rax, -0x39(%rdi)
   1192 L(P1Q6): mov    %rax, -0x31(%rdi)
   1193 L(P1Q5): mov    %rax, -0x29(%rdi)
   1194 L(P1Q4): mov    %rax, -0x21(%rdi)
   1195 L(P1Q3): mov    %rax, -0x19(%rdi)
   1196 L(P1Q2): mov    %rax, -0x11(%rdi)
   1197 L(P1Q1): mov    %rax, -0x9(%rdi)
   1198 L(P1Q0): mov    %al, -0x1(%rdi)
   1199 	 ret
   1200 
   1201 	.p2align 4
   1202 L(P2Q9): mov    %rax, -0x4a(%rdi)
   1203 L(P2Q8): mov    %rax, -0x42(%rdi)
   1204 L(P2Q7): mov    %rax, -0x3a(%rdi)
   1205 L(P2Q6): mov    %rax, -0x32(%rdi)
   1206 L(P2Q5): mov    %rax, -0x2a(%rdi)
   1207 L(P2Q4): mov    %rax, -0x22(%rdi)
   1208 L(P2Q3): mov    %rax, -0x1a(%rdi)
   1209 L(P2Q2): mov    %rax, -0x12(%rdi)
   1210 L(P2Q1): mov    %rax, -0xa(%rdi)
   1211 L(P2Q0): mov    %ax, -0x2(%rdi)
   1212 	 ret
   1213 
   1214 	.p2align 4
   1215 L(P3Q9): mov    %rax, -0x4b(%rdi)
   1216 L(P3Q8): mov    %rax, -0x43(%rdi)
   1217 L(P3Q7): mov    %rax, -0x3b(%rdi)
   1218 L(P3Q6): mov    %rax, -0x33(%rdi)
   1219 L(P3Q5): mov    %rax, -0x2b(%rdi)
   1220 L(P3Q4): mov    %rax, -0x23(%rdi)
   1221 L(P3Q3): mov    %rax, -0x1b(%rdi)
   1222 L(P3Q2): mov    %rax, -0x13(%rdi)
   1223 L(P3Q1): mov    %rax, -0xb(%rdi)
   1224 L(P3Q0): mov    %ax, -0x3(%rdi)
   1225 	 mov    %al, -0x1(%rdi)
   1226 	 ret
   1227 
   1228 	.p2align 4
   1229 L(P4Q9): mov    %rax, -0x4c(%rdi)
   1230 L(P4Q8): mov    %rax, -0x44(%rdi)
   1231 L(P4Q7): mov    %rax, -0x3c(%rdi)
   1232 L(P4Q6): mov    %rax, -0x34(%rdi)
   1233 L(P4Q5): mov    %rax, -0x2c(%rdi)
   1234 L(P4Q4): mov    %rax, -0x24(%rdi)
   1235 L(P4Q3): mov    %rax, -0x1c(%rdi)
   1236 L(P4Q2): mov    %rax, -0x14(%rdi)
   1237 L(P4Q1): mov    %rax, -0xc(%rdi)
   1238 L(P4Q0): mov    %eax, -0x4(%rdi)
   1239 	 ret
   1240 
   1241 	.p2align 4
   1242 L(P5Q9): mov    %rax, -0x4d(%rdi)
   1243 L(P5Q8): mov    %rax, -0x45(%rdi)
   1244 L(P5Q7): mov    %rax, -0x3d(%rdi)
   1245 L(P5Q6): mov    %rax, -0x35(%rdi)
   1246 L(P5Q5): mov    %rax, -0x2d(%rdi)
   1247 L(P5Q4): mov    %rax, -0x25(%rdi)
   1248 L(P5Q3): mov    %rax, -0x1d(%rdi)
   1249 L(P5Q2): mov    %rax, -0x15(%rdi)
   1250 L(P5Q1): mov    %rax, -0xd(%rdi)
   1251 L(P5Q0): mov    %eax, -0x5(%rdi)
   1252 	 mov    %al, -0x1(%rdi)
   1253 	 ret
   1254 
   1255 	.p2align 4
   1256 L(P6Q9): mov    %rax, -0x4e(%rdi)
   1257 L(P6Q8): mov    %rax, -0x46(%rdi)
   1258 L(P6Q7): mov    %rax, -0x3e(%rdi)
   1259 L(P6Q6): mov    %rax, -0x36(%rdi)
   1260 L(P6Q5): mov    %rax, -0x2e(%rdi)
   1261 L(P6Q4): mov    %rax, -0x26(%rdi)
   1262 L(P6Q3): mov    %rax, -0x1e(%rdi)
   1263 L(P6Q2): mov    %rax, -0x16(%rdi)
   1264 L(P6Q1): mov    %rax, -0xe(%rdi)
   1265 L(P6Q0): mov    %eax, -0x6(%rdi)
   1266 	 mov    %ax, -0x2(%rdi)
   1267 	 ret
   1268 
   1269 	.p2align 4
   1270 L(P7Q9): mov    %rax, -0x4f(%rdi)
   1271 L(P7Q8): mov    %rax, -0x47(%rdi)
   1272 L(P7Q7): mov    %rax, -0x3f(%rdi)
   1273 L(P7Q6): mov    %rax, -0x37(%rdi)
   1274 L(P7Q5): mov    %rax, -0x2f(%rdi)
   1275 L(P7Q4): mov    %rax, -0x27(%rdi)
   1276 L(P7Q3): mov    %rax, -0x1f(%rdi)
   1277 L(P7Q2): mov    %rax, -0x17(%rdi)
   1278 L(P7Q1): mov    %rax, -0xf(%rdi)
   1279 L(P7Q0): mov    %eax, -0x7(%rdi)
   1280 	 mov    %ax, -0x3(%rdi)
   1281 	 mov    %al, -0x1(%rdi)
   1282 	 ret
   1283 
   1284 	/*
   1285 	 * Align to a 16-byte boundary. Avoids penalties from unaligned stores
   1286 	 * as well as from stores spanning cachelines. Note 16-byte alignment
   1287 	 * is better in case where rep sstosq is used.
   1288 	 */
   1289 	.p2align 4
   1290 L(ck_align):
   1291 	test	$0xf, %rdi
   1292 	jz	L(aligned_now)
   1293 	test	$1, %rdi
   1294 	jz	2f
   1295 	mov	%al, (%rdi)
   1296 	dec	%rsi
   1297 	lea	1(%rdi),%rdi
   1298 2:
   1299 	test	$2, %rdi
   1300 	jz	4f
   1301 	mov	%ax, (%rdi)
   1302 	sub	$2, %rsi
   1303 	lea	2(%rdi),%rdi
   1304 4:
   1305 	test	$4, %rdi
   1306 	jz	8f
   1307 	mov	%eax, (%rdi)
   1308 	sub	$4, %rsi
   1309 	lea	4(%rdi),%rdi
   1310 8:
   1311 	test	$8, %rdi
   1312 	jz	L(aligned_now)
   1313 	mov	%rax, (%rdi)
   1314 	sub	$8, %rsi
   1315 	lea	8(%rdi),%rdi
   1316 
   1317 	/*
   1318 	 * For large sizes rep sstoq is fastest.
   1319 	 * Transition point determined experimentally as measured on
   1320 	 * Intel Xeon processors (incl. Nehalem) and AMD Opteron.
   1321 	 */
   1322 L(aligned_now):
   1323 	cmp	$BZERO_USE_REP, %rsi
   1324 	jg	L(use_rep)
   1325 
   1326 	/*
   1327 	 * zero 64-bytes per loop
   1328 	 */
   1329 	.p2align 4
   1330 L(bzero_loop):
   1331 	leaq	-0x40(%rsi), %rsi
   1332 	cmpq	$0x40, %rsi
   1333 	movq	%rax, (%rdi)
   1334 	movq	%rax, 0x8(%rdi)
   1335 	movq	%rax, 0x10(%rdi)
   1336 	movq	%rax, 0x18(%rdi)
   1337 	movq	%rax, 0x20(%rdi)
   1338 	movq	%rax, 0x28(%rdi)
   1339 	movq	%rax, 0x30(%rdi)
   1340 	movq	%rax, 0x38(%rdi)
   1341 	leaq	0x40(%rdi), %rdi
   1342 	jge	L(bzero_loop)
   1343 
   1344 	/*
   1345 	 * Clear any remaining bytes..
   1346 	 */
   1347 9:
   1348 	leaq	L(setPxQx)(%rip), %r10
   1349 	addq	%rsi, %rdi
   1350 	movslq	(%r10,%rsi,4), %rcx
   1351 	leaq	(%rcx,%r10,1), %r10
   1352 	jmpq	*%r10
   1353 
   1354 	/*
   1355 	 * Use rep sstoq. Clear any remainder via unrolled code
   1356 	 */
   1357 	.p2align 4
   1358 L(use_rep):
   1359 	movq	%rsi, %rcx		/* get size in bytes */
   1360 	shrq	$3, %rcx		/* count of 8-byte words to zero */
   1361 	rep
   1362 	  sstoq				/* %rcx = words to clear (%rax=0) */
   1363 	andq	$7, %rsi		/* remaining bytes */
   1364 	jnz	9b
   1365 	ret
   1366 #undef	L
   1367 	SET_SIZE(bzero_altentry)
   1368 	SET_SIZE(bzero)
   1369 
   1370 #elif defined(__i386)
   1371 
   1372 #define	ARG_ADDR	4
   1373 #define	ARG_COUNT	8
   1374 
   1375 	ENTRY(bzero)
   1376 #ifdef DEBUG
   1377 	movl	postbootkernelbase, %eax
   1378 	cmpl	%eax, ARG_ADDR(%esp)
   1379 	jnb	0f
   1380 	pushl	%ebp
   1381 	movl	%esp, %ebp
   1382 	pushl	$.bzero_panic_msg
   1383 	call	panic
   1384 0:
   1385 #endif
   1386 do_zero:
   1387 	movl	%edi, %edx
   1388 	movl	ARG_COUNT(%esp), %ecx
   1389 	movl	ARG_ADDR(%esp), %edi
   1390 	shrl	$2, %ecx
   1391 	xorl	%eax, %eax
   1392 	rep
   1393 	  sstol
   1394 	movl	ARG_COUNT(%esp), %ecx
   1395 	andl	$3, %ecx
   1396 	rep
   1397 	  sstob
   1398 	movl	%edx, %edi
   1399 	ret
   1400 	SET_SIZE(bzero)
   1401 
   1402 #undef	ARG_ADDR
   1403 #undef	ARG_COUNT
   1404 
   1405 #endif	/* __i386 */
   1406 #endif	/* __lint */
   1407 
   1408 /*
   1409  * Transfer data to and from user space -
   1410  * Note that these routines can cause faults
   1411  * It is assumed that the kernel has nothing at
   1412  * less than KERNELBASE in the virtual address space.
   1413  *
   1414  * Note that copyin(9F) and copyout(9F) are part of the
   1415  * DDI/DKI which specifies that they return '-1' on "errors."
   1416  *
   1417  * Sigh.
   1418  *
   1419  * So there's two extremely similar routines - xcopyin_nta() and
   1420  * xcopyout_nta() which return the errno that we've faithfully computed.
   1421  * This allows other callers (e.g. uiomove(9F)) to work correctly.
   1422  * Given that these are used pretty heavily, we expand the calling
   1423  * sequences inline for all flavours (rather than making wrappers).
   1424  */
   1425 
   1426 /*
   1427  * Copy user data to kernel space.
   1428  */
   1429 
   1430 #if defined(__lint)
   1431 
   1432 /* ARGSUSED */
   1433 int
   1434 copyin(const void *uaddr, void *kaddr, size_t count)
   1435 { return (0); }
   1436 
   1437 #else	/* lint */
   1438 
   1439 #if defined(__amd64)
   1440 
   1441 	ENTRY(copyin)
   1442 	pushq	%rbp
   1443 	movq	%rsp, %rbp
   1444 	subq	$24, %rsp
   1445 
   1446 	/*
   1447 	 * save args in case we trap and need to rerun as a copyop
   1448 	 */
   1449 	movq	%rdi, (%rsp)
   1450 	movq	%rsi, 0x8(%rsp)
   1451 	movq	%rdx, 0x10(%rsp)
   1452 
   1453 	movq	kernelbase(%rip), %rax
   1454 #ifdef DEBUG
   1455 	cmpq	%rax, %rsi		/* %rsi = kaddr */
   1456 	jnb	1f
   1457 	leaq	.copyin_panic_msg(%rip), %rdi
   1458 	xorl	%eax, %eax
   1459 	call	panic
   1460 1:
   1461 #endif
   1462 	/*
   1463 	 * pass lofault value as 4th argument to do_copy_fault
   1464 	 */
   1465 	leaq	_copyin_err(%rip), %rcx
   1466 
   1467 	movq	%gs:CPU_THREAD, %r9
   1468 	cmpq	%rax, %rdi		/* test uaddr < kernelbase */
   1469 	jb	do_copy_fault
   1470 	jmp	3f
   1471 
   1472 _copyin_err:
   1473 	movq	%r11, T_LOFAULT(%r9)	/* restore original lofault */
   1474 	addq	$8, %rsp		/* pop bcopy_altentry call ret addr */
   1475 3:
   1476 	movq	T_COPYOPS(%r9), %rax
   1477 	cmpq	$0, %rax
   1478 	jz	2f
   1479 	/*
   1480 	 * reload args for the copyop
   1481 	 */
   1482 	movq	(%rsp), %rdi
   1483 	movq	0x8(%rsp), %rsi
   1484 	movq	0x10(%rsp), %rdx
   1485 	leave
   1486 	jmp	*CP_COPYIN(%rax)
   1487 
   1488 2:	movl	$-1, %eax
   1489 	leave
   1490 	ret
   1491 	SET_SIZE(copyin)
   1492 
   1493 #elif defined(__i386)
   1494 
   1495 #define	ARG_UADDR	4
   1496 #define	ARG_KADDR	8
   1497 
   1498 	ENTRY(copyin)
   1499 	movl	kernelbase, %ecx
   1500 #ifdef DEBUG
   1501 	cmpl	%ecx, ARG_KADDR(%esp)
   1502 	jnb	1f
   1503 	pushl	%ebp
   1504 	movl	%esp, %ebp
   1505 	pushl	$.copyin_panic_msg
   1506 	call	panic
   1507 1:
   1508 #endif
   1509 	lea	_copyin_err, %eax
   1510 
   1511 	movl	%gs:CPU_THREAD, %edx
   1512 	cmpl	%ecx, ARG_UADDR(%esp)	/* test uaddr < kernelbase */
   1513 	jb	do_copy_fault
   1514 	jmp	3f
   1515 
   1516 _copyin_err:
   1517 	popl	%ecx
   1518 	popl	%edi
   1519 	movl	%ecx, T_LOFAULT(%edx)	/* restore original lofault */
   1520 	popl	%esi
   1521 	popl	%ebp
   1522 3:
   1523 	movl	T_COPYOPS(%edx), %eax
   1524 	cmpl	$0, %eax
   1525 	jz	2f
   1526 	jmp	*CP_COPYIN(%eax)
   1527 
   1528 2:	movl	$-1, %eax
   1529 	ret
   1530 	SET_SIZE(copyin)
   1531 
   1532 #undef	ARG_UADDR
   1533 #undef	ARG_KADDR
   1534 
   1535 #endif	/* __i386 */
   1536 #endif	/* __lint */
   1537 
   1538 #if defined(__lint)
   1539 
   1540 /* ARGSUSED */
   1541 int
   1542 xcopyin_nta(const void *uaddr, void *kaddr, size_t count, int copy_cached)
   1543 { return (0); }
   1544 
   1545 #else	/* __lint */
   1546 
   1547 #if defined(__amd64)
   1548 
   1549 	ENTRY(xcopyin_nta)
   1550 	pushq	%rbp
   1551 	movq	%rsp, %rbp
   1552 	subq	$24, %rsp
   1553 
   1554 	/*
   1555 	 * save args in case we trap and need to rerun as a copyop
   1556 	 * %rcx is consumed in this routine so we don't need to save
   1557 	 * it.
   1558 	 */
   1559 	movq	%rdi, (%rsp)
   1560 	movq	%rsi, 0x8(%rsp)
   1561 	movq	%rdx, 0x10(%rsp)
   1562 
   1563 	movq	kernelbase(%rip), %rax
   1564 #ifdef DEBUG
   1565 	cmpq	%rax, %rsi		/* %rsi = kaddr */
   1566 	jnb	1f
   1567 	leaq	.xcopyin_panic_msg(%rip), %rdi
   1568 	xorl	%eax, %eax
   1569 	call	panic
   1570 1:
   1571 #endif
   1572 	movq	%gs:CPU_THREAD, %r9
   1573 	cmpq	%rax, %rdi		/* test uaddr < kernelbase */
   1574 	jae	4f
   1575 	cmpq	$0, %rcx		/* No non-temporal access? */
   1576 	/*
   1577 	 * pass lofault value as 4th argument to do_copy_fault
   1578 	 */
   1579 	leaq	_xcopyin_err(%rip), %rcx	/* doesn't set rflags */
   1580 	jnz	do_copy_fault		/* use regular access */
   1581 	/*
   1582 	 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
   1583 	 */
   1584 	cmpq	$XCOPY_MIN_SIZE, %rdx
   1585 	jb	do_copy_fault
   1586 
   1587 	/*
   1588 	 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
   1589 	 * count is COUNT_ALIGN_SIZE aligned.
   1590 	 */
   1591 	movq	%rdi, %r10
   1592 	orq	%rsi, %r10
   1593 	andq	$NTA_ALIGN_MASK, %r10
   1594 	orq	%rdx, %r10
   1595 	andq	$COUNT_ALIGN_MASK, %r10
   1596 	jnz	do_copy_fault
   1597 	leaq	_xcopyin_nta_err(%rip), %rcx	/* doesn't set rflags */
   1598 	jmp	do_copy_fault_nta	/* use non-temporal access */
   1599 
   1600 4:
   1601 	movl	$EFAULT, %eax
   1602 	jmp	3f
   1603 
   1604 	/*
   1605 	 * A fault during do_copy_fault or do_copy_fault_nta is
   1606 	 * indicated through an errno value in %rax and we iret from the
   1607 	 * trap handler to here.
   1608 	 */
   1609 _xcopyin_err:
   1610 	addq	$8, %rsp		/* pop bcopy_altentry call ret addr */
   1611 _xcopyin_nta_err:
   1612 	movq	%r11, T_LOFAULT(%r9)	/* restore original lofault */
   1613 3:
   1614 	movq	T_COPYOPS(%r9), %r8
   1615 	cmpq	$0, %r8
   1616 	jz	2f
   1617 
   1618 	/*
   1619 	 * reload args for the copyop
   1620 	 */
   1621 	movq	(%rsp), %rdi
   1622 	movq	0x8(%rsp), %rsi
   1623 	movq	0x10(%rsp), %rdx
   1624 	leave
   1625 	jmp	*CP_XCOPYIN(%r8)
   1626 
   1627 2:	leave
   1628 	ret
   1629 	SET_SIZE(xcopyin_nta)
   1630 
   1631 #elif defined(__i386)
   1632 
   1633 #define	ARG_UADDR	4
   1634 #define	ARG_KADDR	8
   1635 #define	ARG_COUNT	12
   1636 #define	ARG_CACHED	16
   1637 
   1638 	.globl	use_sse_copy
   1639 
   1640 	ENTRY(xcopyin_nta)
   1641 	movl	kernelbase, %ecx
   1642 	lea	_xcopyin_err, %eax
   1643 	movl	%gs:CPU_THREAD, %edx
   1644 	cmpl	%ecx, ARG_UADDR(%esp)	/* test uaddr < kernelbase */
   1645 	jae	4f
   1646 
   1647 	cmpl	$0, use_sse_copy	/* no sse support */
   1648 	jz	do_copy_fault
   1649 
   1650 	cmpl	$0, ARG_CACHED(%esp)	/* copy_cached hint set? */
   1651 	jnz	do_copy_fault
   1652 
   1653 	/*
   1654 	 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
   1655 	 */
   1656 	cmpl	$XCOPY_MIN_SIZE, ARG_COUNT(%esp)
   1657 	jb	do_copy_fault
   1658 
   1659 	/*
   1660 	 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
   1661 	 * count is COUNT_ALIGN_SIZE aligned.
   1662 	 */
   1663 	movl	ARG_UADDR(%esp), %ecx
   1664 	orl	ARG_KADDR(%esp), %ecx
   1665 	andl	$NTA_ALIGN_MASK, %ecx
   1666 	orl	ARG_COUNT(%esp), %ecx
   1667 	andl	$COUNT_ALIGN_MASK, %ecx
   1668 	jnz	do_copy_fault
   1669 
   1670 	jmp	do_copy_fault_nta	/* use regular access */
   1671 
   1672 4:
   1673 	movl	$EFAULT, %eax
   1674 	jmp	3f
   1675 
   1676 	/*
   1677 	 * A fault during do_copy_fault or do_copy_fault_nta is
   1678 	 * indicated through an errno value in %eax and we iret from the
   1679 	 * trap handler to here.
   1680 	 */
   1681 _xcopyin_err:
   1682 	popl	%ecx
   1683 	popl	%edi
   1684 	movl	%ecx, T_LOFAULT(%edx)	/* restore original lofault */
   1685 	popl	%esi
   1686 	popl	%ebp
   1687 3:
   1688 	cmpl	$0, T_COPYOPS(%edx)
   1689 	jz	2f
   1690 	movl	T_COPYOPS(%edx), %eax
   1691 	jmp	*CP_XCOPYIN(%eax)
   1692 
   1693 2:	rep; 	ret	/* use 2 byte return instruction when branch target */
   1694 			/* AMD Software Optimization Guide - Section 6.2 */
   1695 	SET_SIZE(xcopyin_nta)
   1696 
   1697 #undef	ARG_UADDR
   1698 #undef	ARG_KADDR
   1699 #undef	ARG_COUNT
   1700 #undef	ARG_CACHED
   1701 
   1702 #endif	/* __i386 */
   1703 #endif	/* __lint */
   1704 
   1705 /*
   1706  * Copy kernel data to user space.
   1707  */
   1708 
   1709 #if defined(__lint)
   1710 
   1711 /* ARGSUSED */
   1712 int
   1713 copyout(const void *kaddr, void *uaddr, size_t count)
   1714 { return (0); }
   1715 
   1716 #else	/* __lint */
   1717 
   1718 #if defined(__amd64)
   1719 
   1720 	ENTRY(copyout)
   1721 	pushq	%rbp
   1722 	movq	%rsp, %rbp
   1723 	subq	$24, %rsp
   1724 
   1725 	/*
   1726 	 * save args in case we trap and need to rerun as a copyop
   1727 	 */
   1728 	movq	%rdi, (%rsp)
   1729 	movq	%rsi, 0x8(%rsp)
   1730 	movq	%rdx, 0x10(%rsp)
   1731 
   1732 	movq	kernelbase(%rip), %rax
   1733 #ifdef DEBUG
   1734 	cmpq	%rax, %rdi		/* %rdi = kaddr */
   1735 	jnb	1f
   1736 	leaq	.copyout_panic_msg(%rip), %rdi
   1737 	xorl	%eax, %eax
   1738 	call	panic
   1739 1:
   1740 #endif
   1741 	/*
   1742 	 * pass lofault value as 4th argument to do_copy_fault
   1743 	 */
   1744 	leaq	_copyout_err(%rip), %rcx
   1745 
   1746 	movq	%gs:CPU_THREAD, %r9
   1747 	cmpq	%rax, %rsi		/* test uaddr < kernelbase */
   1748 	jb	do_copy_fault
   1749 	jmp	3f
   1750 
   1751 _copyout_err:
   1752 	movq	%r11, T_LOFAULT(%r9)	/* restore original lofault */
   1753 	addq	$8, %rsp		/* pop bcopy_altentry call ret addr */
   1754 3:
   1755 	movq	T_COPYOPS(%r9), %rax
   1756 	cmpq	$0, %rax
   1757 	jz	2f
   1758 
   1759 	/*
   1760 	 * reload args for the copyop
   1761 	 */
   1762 	movq	(%rsp), %rdi
   1763 	movq	0x8(%rsp), %rsi
   1764 	movq	0x10(%rsp), %rdx
   1765 	leave
   1766 	jmp	*CP_COPYOUT(%rax)
   1767 
   1768 2:	movl	$-1, %eax
   1769 	leave
   1770 	ret
   1771 	SET_SIZE(copyout)
   1772 
   1773 #elif defined(__i386)
   1774 
   1775 #define	ARG_KADDR	4
   1776 #define	ARG_UADDR	8
   1777 
   1778 	ENTRY(copyout)
   1779 	movl	kernelbase, %ecx
   1780 #ifdef DEBUG
   1781 	cmpl	%ecx, ARG_KADDR(%esp)
   1782 	jnb	1f
   1783 	pushl	%ebp
   1784 	movl	%esp, %ebp
   1785 	pushl	$.copyout_panic_msg
   1786 	call	panic
   1787 1:
   1788 #endif
   1789 	lea	_copyout_err, %eax
   1790 	movl	%gs:CPU_THREAD, %edx
   1791 	cmpl	%ecx, ARG_UADDR(%esp)	/* test uaddr < kernelbase */
   1792 	jb	do_copy_fault
   1793 	jmp	3f
   1794 
   1795 _copyout_err:
   1796 	popl	%ecx
   1797 	popl	%edi
   1798 	movl	%ecx, T_LOFAULT(%edx)	/* restore original lofault */
   1799 	popl	%esi
   1800 	popl	%ebp
   1801 3:
   1802 	movl	T_COPYOPS(%edx), %eax
   1803 	cmpl	$0, %eax
   1804 	jz	2f
   1805 	jmp	*CP_COPYOUT(%eax)
   1806 
   1807 2:	movl	$-1, %eax
   1808 	ret
   1809 	SET_SIZE(copyout)
   1810 
   1811 #undef	ARG_UADDR
   1812 #undef	ARG_KADDR
   1813 
   1814 #endif	/* __i386 */
   1815 #endif	/* __lint */
   1816 
   1817 #if defined(__lint)
   1818 
   1819 /* ARGSUSED */
   1820 int
   1821 xcopyout_nta(const void *kaddr, void *uaddr, size_t count, int copy_cached)
   1822 { return (0); }
   1823 
   1824 #else	/* __lint */
   1825 
   1826 #if defined(__amd64)
   1827 
   1828 	ENTRY(xcopyout_nta)
   1829 	pushq	%rbp
   1830 	movq	%rsp, %rbp
   1831 	subq	$24, %rsp
   1832 
   1833 	/*
   1834 	 * save args in case we trap and need to rerun as a copyop
   1835 	 */
   1836 	movq	%rdi, (%rsp)
   1837 	movq	%rsi, 0x8(%rsp)
   1838 	movq	%rdx, 0x10(%rsp)
   1839 
   1840 	movq	kernelbase(%rip), %rax
   1841 #ifdef DEBUG
   1842 	cmpq	%rax, %rdi		/* %rdi = kaddr */
   1843 	jnb	1f
   1844 	leaq	.xcopyout_panic_msg(%rip), %rdi
   1845 	xorl	%eax, %eax
   1846 	call	panic
   1847 1:
   1848 #endif
   1849 	movq	%gs:CPU_THREAD, %r9
   1850 	cmpq	%rax, %rsi		/* test uaddr < kernelbase */
   1851 	jae	4f
   1852 
   1853 	cmpq	$0, %rcx		/* No non-temporal access? */
   1854 	/*
   1855 	 * pass lofault value as 4th argument to do_copy_fault
   1856 	 */
   1857 	leaq	_xcopyout_err(%rip), %rcx
   1858 	jnz	do_copy_fault
   1859 	/*
   1860 	 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
   1861 	 */
   1862 	cmpq	$XCOPY_MIN_SIZE, %rdx
   1863 	jb	do_copy_fault
   1864 
   1865 	/*
   1866 	 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
   1867 	 * count is COUNT_ALIGN_SIZE aligned.
   1868 	 */
   1869 	movq	%rdi, %r10
   1870 	orq	%rsi, %r10
   1871 	andq	$NTA_ALIGN_MASK, %r10
   1872 	orq	%rdx, %r10
   1873 	andq	$COUNT_ALIGN_MASK, %r10
   1874 	jnz	do_copy_fault
   1875 	leaq	_xcopyout_nta_err(%rip), %rcx
   1876 	jmp	do_copy_fault_nta
   1877 
   1878 4:
   1879 	movl	$EFAULT, %eax
   1880 	jmp	3f
   1881 
   1882 	/*
   1883 	 * A fault during do_copy_fault or do_copy_fault_nta is
   1884 	 * indicated through an errno value in %rax and we iret from the
   1885 	 * trap handler to here.
   1886 	 */
   1887 _xcopyout_err:
   1888 	addq	$8, %rsp		/* pop bcopy_altentry call ret addr */
   1889 _xcopyout_nta_err:
   1890 	movq	%r11, T_LOFAULT(%r9)	/* restore original lofault */
   1891 3:
   1892 	movq	T_COPYOPS(%r9), %r8
   1893 	cmpq	$0, %r8
   1894 	jz	2f
   1895 
   1896 	/*
   1897 	 * reload args for the copyop
   1898 	 */
   1899 	movq	(%rsp), %rdi
   1900 	movq	0x8(%rsp), %rsi
   1901 	movq	0x10(%rsp), %rdx
   1902 	leave
   1903 	jmp	*CP_XCOPYOUT(%r8)
   1904 
   1905 2:	leave
   1906 	ret
   1907 	SET_SIZE(xcopyout_nta)
   1908 
   1909 #elif defined(__i386)
   1910 
   1911 #define	ARG_KADDR	4
   1912 #define	ARG_UADDR	8
   1913 #define	ARG_COUNT	12
   1914 #define	ARG_CACHED	16
   1915 
   1916 	ENTRY(xcopyout_nta)
   1917 	movl	kernelbase, %ecx
   1918 	lea	_xcopyout_err, %eax
   1919 	movl	%gs:CPU_THREAD, %edx
   1920 	cmpl	%ecx, ARG_UADDR(%esp)	/* test uaddr < kernelbase */
   1921 	jae	4f
   1922 
   1923 	cmpl	$0, use_sse_copy	/* no sse support */
   1924 	jz	do_copy_fault
   1925 
   1926 	cmpl	$0, ARG_CACHED(%esp)	/* copy_cached hint set? */
   1927 	jnz	do_copy_fault
   1928 
   1929 	/*
   1930 	 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
   1931 	 */
   1932 	cmpl	$XCOPY_MIN_SIZE, %edx
   1933 	jb	do_copy_fault
   1934 
   1935 	/*
   1936 	 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
   1937 	 * count is COUNT_ALIGN_SIZE aligned.
   1938 	 */
   1939 	movl	ARG_UADDR(%esp), %ecx
   1940 	orl	ARG_KADDR(%esp), %ecx
   1941 	andl	$NTA_ALIGN_MASK, %ecx
   1942 	orl	ARG_COUNT(%esp), %ecx
   1943 	andl	$COUNT_ALIGN_MASK, %ecx
   1944 	jnz	do_copy_fault
   1945 	jmp	do_copy_fault_nta
   1946 
   1947 4:
   1948 	movl	$EFAULT, %eax
   1949 	jmp	3f
   1950 
   1951 	/*
   1952 	 * A fault during do_copy_fault or do_copy_fault_nta is
   1953 	 * indicated through an errno value in %eax and we iret from the
   1954 	 * trap handler to here.
   1955 	 */
   1956 _xcopyout_err:
   1957 	/ restore the original lofault
   1958 	popl	%ecx
   1959 	popl	%edi
   1960 	movl	%ecx, T_LOFAULT(%edx)	/ original lofault
   1961 	popl	%esi
   1962 	popl	%ebp
   1963 3:
   1964 	cmpl	$0, T_COPYOPS(%edx)
   1965 	jz	2f
   1966 	movl	T_COPYOPS(%edx), %eax
   1967 	jmp	*CP_XCOPYOUT(%eax)
   1968 
   1969 2:	rep;	ret	/* use 2 byte return instruction when branch target */
   1970 			/* AMD Software Optimization Guide - Section 6.2 */
   1971 	SET_SIZE(xcopyout_nta)
   1972 
   1973 #undef	ARG_UADDR
   1974 #undef	ARG_KADDR
   1975 #undef	ARG_COUNT
   1976 #undef	ARG_CACHED
   1977 
   1978 #endif	/* __i386 */
   1979 #endif	/* __lint */
   1980 
   1981 /*
   1982  * Copy a null terminated string from one point to another in
   1983  * the kernel address space.
   1984  */
   1985 
   1986 #if defined(__lint)
   1987 
   1988 /* ARGSUSED */
   1989 int
   1990 copystr(const char *from, char *to, size_t maxlength, size_t *lencopied)
   1991 { return (0); }
   1992 
   1993 #else	/* __lint */
   1994 
   1995 #if defined(__amd64)
   1996 
   1997 	ENTRY(copystr)
   1998 	pushq	%rbp
   1999 	movq	%rsp, %rbp
   2000 #ifdef DEBUG
   2001 	movq	kernelbase(%rip), %rax
   2002 	cmpq	%rax, %rdi		/* %rdi = from */
   2003 	jb	0f
   2004 	cmpq	%rax, %rsi		/* %rsi = to */
   2005 	jnb	1f
   2006 0:	leaq	.copystr_panic_msg(%rip), %rdi
   2007 	xorl	%eax, %eax
   2008 	call	panic
   2009 1:
   2010 #endif
   2011 	movq	%gs:CPU_THREAD, %r9
   2012 	movq	T_LOFAULT(%r9), %r8	/* pass current lofault value as */
   2013 					/* 5th argument to do_copystr */
   2014 do_copystr:
   2015 	movq	%gs:CPU_THREAD, %r9	/* %r9 = thread addr */
   2016 	movq    T_LOFAULT(%r9), %r11	/* save the current lofault */
   2017 	movq	%r8, T_LOFAULT(%r9)	/* new lofault */
   2018 
   2019 	movq	%rdx, %r8		/* save maxlength */
   2020 
   2021 	cmpq	$0, %rdx		/* %rdx = maxlength */
   2022 	je	copystr_enametoolong	/* maxlength == 0 */
   2023 
   2024 copystr_loop:
   2025 	decq	%r8
   2026 	movb	(%rdi), %al
   2027 	incq	%rdi
   2028 	movb	%al, (%rsi)
   2029 	incq	%rsi
   2030 	cmpb	$0, %al
   2031 	je	copystr_null		/* null char */
   2032 	cmpq	$0, %r8
   2033 	jne	copystr_loop
   2034 
   2035 copystr_enametoolong:
   2036 	movl	$ENAMETOOLONG, %eax
   2037 	jmp	copystr_out
   2038 
   2039 copystr_null:
   2040 	xorl	%eax, %eax		/* no error */
   2041 
   2042 copystr_out:
   2043 	cmpq	$0, %rcx		/* want length? */
   2044 	je	copystr_done		/* no */
   2045 	subq	%r8, %rdx		/* compute length and store it */
   2046 	movq	%rdx, (%rcx)
   2047 
   2048 copystr_done:
   2049 	movq	%r11, T_LOFAULT(%r9)	/* restore the original lofault */
   2050 	leave
   2051 	ret
   2052 	SET_SIZE(copystr)
   2053 
   2054 #elif defined(__i386)
   2055 
   2056 #define	ARG_FROM	8
   2057 #define	ARG_TO		12
   2058 #define	ARG_MAXLEN	16
   2059 #define	ARG_LENCOPIED	20
   2060 
   2061 	ENTRY(copystr)
   2062 #ifdef DEBUG
   2063 	pushl	%ebp
   2064 	movl	%esp, %ebp
   2065 	movl	kernelbase, %eax
   2066 	cmpl	%eax, ARG_FROM(%esp)
   2067 	jb	0f
   2068 	cmpl	%eax, ARG_TO(%esp)
   2069 	jnb	1f
   2070 0:	pushl	$.copystr_panic_msg
   2071 	call	panic
   2072 1:	popl	%ebp
   2073 #endif
   2074 	/* get the current lofault address */
   2075 	movl	%gs:CPU_THREAD, %eax
   2076 	movl	T_LOFAULT(%eax), %eax
   2077 do_copystr:
   2078 	pushl	%ebp			/* setup stack frame */
   2079 	movl	%esp, %ebp
   2080 	pushl	%ebx			/* save registers */
   2081 	pushl	%edi
   2082 
   2083 	movl	%gs:CPU_THREAD, %ebx
   2084 	movl	T_LOFAULT(%ebx), %edi
   2085 	pushl	%edi			/* save the current lofault */
   2086 	movl	%eax, T_LOFAULT(%ebx)	/* new lofault */
   2087 
   2088 	movl	ARG_MAXLEN(%ebp), %ecx
   2089 	cmpl	$0, %ecx
   2090 	je	copystr_enametoolong	/* maxlength == 0 */
   2091 
   2092 	movl	ARG_FROM(%ebp), %ebx	/* source address */
   2093 	movl	ARG_TO(%ebp), %edx	/* destination address */
   2094 
   2095 copystr_loop:
   2096 	decl	%ecx
   2097 	movb	(%ebx), %al
   2098 	incl	%ebx
   2099 	movb	%al, (%edx)
   2100 	incl	%edx
   2101 	cmpb	$0, %al
   2102 	je	copystr_null		/* null char */
   2103 	cmpl	$0, %ecx
   2104 	jne	copystr_loop
   2105 
   2106 copystr_enametoolong:
   2107 	movl	$ENAMETOOLONG, %eax
   2108 	jmp	copystr_out
   2109 
   2110 copystr_null:
   2111 	xorl	%eax, %eax		/* no error */
   2112 
   2113 copystr_out:
   2114 	cmpl	$0, ARG_LENCOPIED(%ebp)	/* want length? */
   2115 	je	copystr_done		/* no */
   2116 	movl	ARG_MAXLEN(%ebp), %edx
   2117 	subl	%ecx, %edx		/* compute length and store it */
   2118 	movl	ARG_LENCOPIED(%ebp), %ecx
   2119 	movl	%edx, (%ecx)
   2120 
   2121 copystr_done:
   2122 	popl	%edi
   2123 	movl	%gs:CPU_THREAD, %ebx
   2124 	movl	%edi, T_LOFAULT(%ebx)	/* restore the original lofault */
   2125 
   2126 	popl	%edi
   2127 	popl	%ebx
   2128 	popl	%ebp
   2129 	ret
   2130 	SET_SIZE(copystr)
   2131 
   2132 #undef	ARG_FROM
   2133 #undef	ARG_TO
   2134 #undef	ARG_MAXLEN
   2135 #undef	ARG_LENCOPIED
   2136 
   2137 #endif	/* __i386 */
   2138 #endif	/* __lint */
   2139 
   2140 /*
   2141  * Copy a null terminated string from the user address space into
   2142  * the kernel address space.
   2143  */
   2144 
   2145 #if defined(__lint)
   2146 
   2147 /* ARGSUSED */
   2148 int
   2149 copyinstr(const char *uaddr, char *kaddr, size_t maxlength,
   2150     size_t *lencopied)
   2151 { return (0); }
   2152 
   2153 #else	/* __lint */
   2154 
   2155 #if defined(__amd64)
   2156 
   2157 	ENTRY(copyinstr)
   2158 	pushq	%rbp
   2159 	movq	%rsp, %rbp
   2160 	subq	$32, %rsp
   2161 
   2162 	/*
   2163 	 * save args in case we trap and need to rerun as a copyop
   2164 	 */
   2165 	movq	%rdi, (%rsp)
   2166 	movq	%rsi, 0x8(%rsp)
   2167 	movq	%rdx, 0x10(%rsp)
   2168 	movq	%rcx, 0x18(%rsp)
   2169 
   2170 	movq	kernelbase(%rip), %rax
   2171 #ifdef DEBUG
   2172 	cmpq	%rax, %rsi		/* %rsi = kaddr */
   2173 	jnb	1f
   2174 	leaq	.copyinstr_panic_msg(%rip), %rdi
   2175 	xorl	%eax, %eax
   2176 	call	panic
   2177 1:
   2178 #endif
   2179 	/*
   2180 	 * pass lofault value as 5th argument to do_copystr
   2181 	 */
   2182 	leaq	_copyinstr_error(%rip), %r8
   2183 
   2184 	cmpq	%rax, %rdi		/* test uaddr < kernelbase */
   2185 	jb	do_copystr
   2186 	movq	%gs:CPU_THREAD, %r9
   2187 	jmp	3f
   2188 
   2189 _copyinstr_error:
   2190 	movq	%r11, T_LOFAULT(%r9)	/* restore original lofault */
   2191 3:
   2192 	movq	T_COPYOPS(%r9), %rax
   2193 	cmpq	$0, %rax
   2194 	jz	2f
   2195 
   2196 	/*
   2197 	 * reload args for the copyop
   2198 	 */
   2199 	movq	(%rsp), %rdi
   2200 	movq	0x8(%rsp), %rsi
   2201 	movq	0x10(%rsp), %rdx
   2202 	movq	0x18(%rsp), %rcx
   2203 	leave
   2204 	jmp	*CP_COPYINSTR(%rax)
   2205 
   2206 2:	movl	$EFAULT, %eax		/* return EFAULT */
   2207 	leave
   2208 	ret
   2209 	SET_SIZE(copyinstr)
   2210 
   2211 #elif defined(__i386)
   2212 
   2213 #define	ARG_UADDR	4
   2214 #define	ARG_KADDR	8
   2215 
   2216 	ENTRY(copyinstr)
   2217 	movl	kernelbase, %ecx
   2218 #ifdef DEBUG
   2219 	cmpl	%ecx, ARG_KADDR(%esp)
   2220 	jnb	1f
   2221 	pushl	%ebp
   2222 	movl	%esp, %ebp
   2223 	pushl	$.copyinstr_panic_msg
   2224 	call	panic
   2225 1:
   2226 #endif
   2227 	lea	_copyinstr_error, %eax
   2228 	cmpl	%ecx, ARG_UADDR(%esp)	/* test uaddr < kernelbase */
   2229 	jb	do_copystr
   2230 	movl	%gs:CPU_THREAD, %edx
   2231 	jmp	3f
   2232 
   2233 _copyinstr_error:
   2234 	popl	%edi
   2235 	movl	%gs:CPU_THREAD, %edx
   2236 	movl	%edi, T_LOFAULT(%edx)	/* original lofault */
   2237 
   2238 	popl	%edi
   2239 	popl	%ebx
   2240 	popl	%ebp
   2241 3:
   2242 	movl	T_COPYOPS(%edx), %eax
   2243 	cmpl	$0, %eax
   2244 	jz	2f
   2245 	jmp	*CP_COPYINSTR(%eax)
   2246 
   2247 2:	movl	$EFAULT, %eax		/* return EFAULT */
   2248 	ret
   2249 	SET_SIZE(copyinstr)
   2250 
   2251 #undef	ARG_UADDR
   2252 #undef	ARG_KADDR
   2253 
   2254 #endif	/* __i386 */
   2255 #endif	/* __lint */
   2256 
   2257 /*
   2258  * Copy a null terminated string from the kernel
   2259  * address space to the user address space.
   2260  */
   2261 
   2262 #if defined(__lint)
   2263 
   2264 /* ARGSUSED */
   2265 int
   2266 copyoutstr(const char *kaddr, char *uaddr, size_t maxlength,
   2267     size_t *lencopied)
   2268 { return (0); }
   2269 
   2270 #else	/* __lint */
   2271 
   2272 #if defined(__amd64)
   2273 
   2274 	ENTRY(copyoutstr)
   2275 	pushq	%rbp
   2276 	movq	%rsp, %rbp
   2277 	subq	$32, %rsp
   2278 
   2279 	/*
   2280 	 * save args in case we trap and need to rerun as a copyop
   2281 	 */
   2282 	movq	%rdi, (%rsp)
   2283 	movq	%rsi, 0x8(%rsp)
   2284 	movq	%rdx, 0x10(%rsp)
   2285 	movq	%rcx, 0x18(%rsp)
   2286 
   2287 	movq	kernelbase(%rip), %rax
   2288 #ifdef DEBUG
   2289 	cmpq	%rax, %rdi		/* %rdi = kaddr */
   2290 	jnb	1f
   2291 	leaq	.copyoutstr_panic_msg(%rip), %rdi
   2292 	jmp	call_panic		/* setup stack and call panic */
   2293 1:
   2294 #endif
   2295 	/*
   2296 	 * pass lofault value as 5th argument to do_copystr
   2297 	 */
   2298 	leaq	_copyoutstr_error(%rip), %r8
   2299 
   2300 	cmpq	%rax, %rsi		/* test uaddr < kernelbase */
   2301 	jb	do_copystr
   2302 	movq	%gs:CPU_THREAD, %r9
   2303 	jmp	3f
   2304 
   2305 _copyoutstr_error:
   2306 	movq	%r11, T_LOFAULT(%r9)	/* restore the original lofault */
   2307 3:
   2308 	movq	T_COPYOPS(%r9), %rax
   2309 	cmpq	$0, %rax
   2310 	jz	2f
   2311 
   2312 	/*
   2313 	 * reload args for the copyop
   2314 	 */
   2315 	movq	(%rsp), %rdi
   2316 	movq	0x8(%rsp), %rsi
   2317 	movq	0x10(%rsp), %rdx
   2318 	movq	0x18(%rsp), %rcx
   2319 	leave
   2320 	jmp	*CP_COPYOUTSTR(%rax)
   2321 
   2322 2:	movl	$EFAULT, %eax		/* return EFAULT */
   2323 	leave
   2324 	ret
   2325 	SET_SIZE(copyoutstr)
   2326 
   2327 #elif defined(__i386)
   2328 
   2329 #define	ARG_KADDR	4
   2330 #define	ARG_UADDR	8
   2331 
   2332 	ENTRY(copyoutstr)
   2333 	movl	kernelbase, %ecx
   2334 #ifdef DEBUG
   2335 	cmpl	%ecx, ARG_KADDR(%esp)
   2336 	jnb	1f
   2337 	pushl	%ebp
   2338 	movl	%esp, %ebp
   2339 	pushl	$.copyoutstr_panic_msg
   2340 	call	panic
   2341 1:
   2342 #endif
   2343 	lea	_copyoutstr_error, %eax
   2344 	cmpl	%ecx, ARG_UADDR(%esp)	/* test uaddr < kernelbase */
   2345 	jb	do_copystr
   2346 	movl	%gs:CPU_THREAD, %edx
   2347 	jmp	3f
   2348 
   2349 _copyoutstr_error:
   2350 	popl	%edi
   2351 	movl	%gs:CPU_THREAD, %edx
   2352 	movl	%edi, T_LOFAULT(%edx)	/* restore the original lofault */
   2353 
   2354 	popl	%edi
   2355 	popl	%ebx
   2356 	popl	%ebp
   2357 3:
   2358 	movl	T_COPYOPS(%edx), %eax
   2359 	cmpl	$0, %eax
   2360 	jz	2f
   2361 	jmp	*CP_COPYOUTSTR(%eax)
   2362 
   2363 2:	movl	$EFAULT, %eax		/* return EFAULT */
   2364 	ret
   2365 	SET_SIZE(copyoutstr)
   2366 
   2367 #undef	ARG_KADDR
   2368 #undef	ARG_UADDR
   2369 
   2370 #endif	/* __i386 */
   2371 #endif	/* __lint */
   2372 
   2373 /*
   2374  * Since all of the fuword() variants are so similar, we have a macro to spit
   2375  * them out.  This allows us to create DTrace-unobservable functions easily.
   2376  */
   2377 
   2378 #if defined(__lint)
   2379 
   2380 #if defined(__amd64)
   2381 
   2382 /* ARGSUSED */
   2383 int
   2384 fuword64(const void *addr, uint64_t *dst)
   2385 { return (0); }
   2386 
   2387 #endif
   2388 
   2389 /* ARGSUSED */
   2390 int
   2391 fuword32(const void *addr, uint32_t *dst)
   2392 { return (0); }
   2393 
   2394 /* ARGSUSED */
   2395 int
   2396 fuword16(const void *addr, uint16_t *dst)
   2397 { return (0); }
   2398 
   2399 /* ARGSUSED */
   2400 int
   2401 fuword8(const void *addr, uint8_t *dst)
   2402 { return (0); }
   2403 
   2404 #else	/* __lint */
   2405 
   2406 #if defined(__amd64)
   2407 
   2408 /*
   2409  * (Note that we don't save and reload the arguments here
   2410  * because their values are not altered in the copy path)
   2411  */
   2412 
   2413 #define	FUWORD(NAME, INSTR, REG, COPYOP)	\
   2414 	ENTRY(NAME)				\
   2415 	movq	%gs:CPU_THREAD, %r9;		\
   2416 	cmpq	kernelbase(%rip), %rdi;		\
   2417 	jae	1f;				\
   2418 	leaq	_flt_/**/NAME, %rdx;		\
   2419 	movq	%rdx, T_LOFAULT(%r9);		\
   2420 	INSTR	(%rdi), REG;			\
   2421 	movq	$0, T_LOFAULT(%r9);		\
   2422 	INSTR	REG, (%rsi);			\
   2423 	xorl	%eax, %eax;			\
   2424 	ret;					\
   2425 _flt_/**/NAME:					\
   2426 	movq	$0, T_LOFAULT(%r9);		\
   2427 1:						\
   2428 	movq	T_COPYOPS(%r9), %rax;		\
   2429 	cmpq	$0, %rax;			\
   2430 	jz	2f;				\
   2431 	jmp	*COPYOP(%rax);			\
   2432 2:						\
   2433 	movl	$-1, %eax;			\
   2434 	ret;					\
   2435 	SET_SIZE(NAME)
   2436 
   2437 	FUWORD(fuword64, movq, %rax, CP_FUWORD64)
   2438 	FUWORD(fuword32, movl, %eax, CP_FUWORD32)
   2439 	FUWORD(fuword16, movw, %ax, CP_FUWORD16)
   2440 	FUWORD(fuword8, movb, %al, CP_FUWORD8)
   2441 
   2442 #elif defined(__i386)
   2443 
   2444 #define	FUWORD(NAME, INSTR, REG, COPYOP)	\
   2445 	ENTRY(NAME)				\
   2446 	movl	%gs:CPU_THREAD, %ecx;		\
   2447 	movl	kernelbase, %eax;		\
   2448 	cmpl	%eax, 4(%esp);			\
   2449 	jae	1f;				\
   2450 	lea	_flt_/**/NAME, %edx;		\
   2451 	movl	%edx, T_LOFAULT(%ecx);		\
   2452 	movl	4(%esp), %eax;			\
   2453 	movl	8(%esp), %edx;			\
   2454 	INSTR	(%eax), REG;			\
   2455 	movl	$0, T_LOFAULT(%ecx);		\
   2456 	INSTR	REG, (%edx);			\
   2457 	xorl	%eax, %eax;			\
   2458 	ret;					\
   2459 _flt_/**/NAME:					\
   2460 	movl	$0, T_LOFAULT(%ecx);		\
   2461 1:						\
   2462 	movl	T_COPYOPS(%ecx), %eax;		\
   2463 	cmpl	$0, %eax;			\
   2464 	jz	2f;				\
   2465 	jmp	*COPYOP(%eax);			\
   2466 2:						\
   2467 	movl	$-1, %eax;			\
   2468 	ret;					\
   2469 	SET_SIZE(NAME)
   2470 
   2471 	FUWORD(fuword32, movl, %eax, CP_FUWORD32)
   2472 	FUWORD(fuword16, movw, %ax, CP_FUWORD16)
   2473 	FUWORD(fuword8, movb, %al, CP_FUWORD8)
   2474 
   2475 #endif	/* __i386 */
   2476 
   2477 #undef	FUWORD
   2478 
   2479 #endif	/* __lint */
   2480 
   2481 /*
   2482  * Set user word.
   2483  */
   2484 
   2485 #if defined(__lint)
   2486 
   2487 #if defined(__amd64)
   2488 
   2489 /* ARGSUSED */
   2490 int
   2491 suword64(void *addr, uint64_t value)
   2492 { return (0); }
   2493 
   2494 #endif
   2495 
   2496 /* ARGSUSED */
   2497 int
   2498 suword32(void *addr, uint32_t value)
   2499 { return (0); }
   2500 
   2501 /* ARGSUSED */
   2502 int
   2503 suword16(void *addr, uint16_t value)
   2504 { return (0); }
   2505 
   2506 /* ARGSUSED */
   2507 int
   2508 suword8(void *addr, uint8_t value)
   2509 { return (0); }
   2510 
   2511 #else	/* lint */
   2512 
   2513 #if defined(__amd64)
   2514 
   2515 /*
   2516  * (Note that we don't save and reload the arguments here
   2517  * because their values are not altered in the copy path)
   2518  */
   2519 
   2520 #define	SUWORD(NAME, INSTR, REG, COPYOP)	\
   2521 	ENTRY(NAME)				\
   2522 	movq	%gs:CPU_THREAD, %r9;		\
   2523 	cmpq	kernelbase(%rip), %rdi;		\
   2524 	jae	1f;				\
   2525 	leaq	_flt_/**/NAME, %rdx;		\
   2526 	movq	%rdx, T_LOFAULT(%r9);		\
   2527 	INSTR	REG, (%rdi);			\
   2528 	movq	$0, T_LOFAULT(%r9);		\
   2529 	xorl	%eax, %eax;			\
   2530 	ret;					\
   2531 _flt_/**/NAME:					\
   2532 	movq	$0, T_LOFAULT(%r9);		\
   2533 1:						\
   2534 	movq	T_COPYOPS(%r9), %rax;		\
   2535 	cmpq	$0, %rax;			\
   2536 	jz	3f;				\
   2537 	jmp	*COPYOP(%rax);			\
   2538 3:						\
   2539 	movl	$-1, %eax;			\
   2540 	ret;					\
   2541 	SET_SIZE(NAME)
   2542 
   2543 	SUWORD(suword64, movq, %rsi, CP_SUWORD64)
   2544 	SUWORD(suword32, movl, %esi, CP_SUWORD32)
   2545 	SUWORD(suword16, movw, %si, CP_SUWORD16)
   2546 	SUWORD(suword8, movb, %sil, CP_SUWORD8)
   2547 
   2548 #elif defined(__i386)
   2549 
   2550 #define	SUWORD(NAME, INSTR, REG, COPYOP)	\
   2551 	ENTRY(NAME)				\
   2552 	movl	%gs:CPU_THREAD, %ecx;		\
   2553 	movl	kernelbase, %eax;		\
   2554 	cmpl	%eax, 4(%esp);			\
   2555 	jae	1f;				\
   2556 	lea	_flt_/**/NAME, %edx;		\
   2557 	movl	%edx, T_LOFAULT(%ecx);		\
   2558 	movl	4(%esp), %eax;			\
   2559 	movl	8(%esp), %edx;			\
   2560 	INSTR	REG, (%eax);			\
   2561 	movl	$0, T_LOFAULT(%ecx);		\
   2562 	xorl	%eax, %eax;			\
   2563 	ret;					\
   2564 _flt_/**/NAME:					\
   2565 	movl	$0, T_LOFAULT(%ecx);		\
   2566 1:						\
   2567 	movl	T_COPYOPS(%ecx), %eax;		\
   2568 	cmpl	$0, %eax;			\
   2569 	jz	3f;				\
   2570 	movl	COPYOP(%eax), %ecx;		\
   2571 	jmp	*%ecx;				\
   2572 3:						\
   2573 	movl	$-1, %eax;			\
   2574 	ret;					\
   2575 	SET_SIZE(NAME)
   2576 
   2577 	SUWORD(suword32, movl, %edx, CP_SUWORD32)
   2578 	SUWORD(suword16, movw, %dx, CP_SUWORD16)
   2579 	SUWORD(suword8, movb, %dl, CP_SUWORD8)
   2580 
   2581 #endif	/* __i386 */
   2582 
   2583 #undef	SUWORD
   2584 
   2585 #endif	/* __lint */
   2586 
   2587 #if defined(__lint)
   2588 
   2589 #if defined(__amd64)
   2590 
   2591 /*ARGSUSED*/
   2592 void
   2593 fuword64_noerr(const void *addr, uint64_t *dst)
   2594 {}
   2595 
   2596 #endif
   2597 
   2598 /*ARGSUSED*/
   2599 void
   2600 fuword32_noerr(const void *addr, uint32_t *dst)
   2601 {}
   2602 
   2603 /*ARGSUSED*/
   2604 void
   2605 fuword8_noerr(const void *addr, uint8_t *dst)
   2606 {}
   2607 
   2608 /*ARGSUSED*/
   2609 void
   2610 fuword16_noerr(const void *addr, uint16_t *dst)
   2611 {}
   2612 
   2613 #else   /* __lint */
   2614 
   2615 #if defined(__amd64)
   2616 
   2617 #define	FUWORD_NOERR(NAME, INSTR, REG)		\
   2618 	ENTRY(NAME)				\
   2619 	cmpq	kernelbase(%rip), %rdi;		\
   2620 	cmovnbq	kernelbase(%rip), %rdi;		\
   2621 	INSTR	(%rdi), REG;			\
   2622 	INSTR	REG, (%rsi);			\
   2623 	ret;					\
   2624 	SET_SIZE(NAME)
   2625 
   2626 	FUWORD_NOERR(fuword64_noerr, movq, %rax)
   2627 	FUWORD_NOERR(fuword32_noerr, movl, %eax)
   2628 	FUWORD_NOERR(fuword16_noerr, movw, %ax)
   2629 	FUWORD_NOERR(fuword8_noerr, movb, %al)
   2630 
   2631 #elif defined(__i386)
   2632 
   2633 #define	FUWORD_NOERR(NAME, INSTR, REG)		\
   2634 	ENTRY(NAME)				\
   2635 	movl	4(%esp), %eax;			\
   2636 	cmpl	kernelbase, %eax;		\
   2637 	jb	1f;				\
   2638 	movl	kernelbase, %eax;		\
   2639 1:	movl	8(%esp), %edx;			\
   2640 	INSTR	(%eax), REG;			\
   2641 	INSTR	REG, (%edx);			\
   2642 	ret;					\
   2643 	SET_SIZE(NAME)
   2644 
   2645 	FUWORD_NOERR(fuword32_noerr, movl, %ecx)
   2646 	FUWORD_NOERR(fuword16_noerr, movw, %cx)
   2647 	FUWORD_NOERR(fuword8_noerr, movb, %cl)
   2648 
   2649 #endif	/* __i386 */
   2650 
   2651 #undef	FUWORD_NOERR
   2652 
   2653 #endif	/* __lint */
   2654 
   2655 #if defined(__lint)
   2656 
   2657 #if defined(__amd64)
   2658 
   2659 /*ARGSUSED*/
   2660 void
   2661 suword64_noerr(void *addr, uint64_t value)
   2662 {}
   2663 
   2664 #endif
   2665 
   2666 /*ARGSUSED*/
   2667 void
   2668 suword32_noerr(void *addr, uint32_t value)
   2669 {}
   2670 
   2671 /*ARGSUSED*/
   2672 void
   2673 suword16_noerr(void *addr, uint16_t value)
   2674 {}
   2675 
   2676 /*ARGSUSED*/
   2677 void
   2678 suword8_noerr(void *addr, uint8_t value)
   2679 {}
   2680 
   2681 #else	/* lint */
   2682 
   2683 #if defined(__amd64)
   2684 
   2685 #define	SUWORD_NOERR(NAME, INSTR, REG)		\
   2686 	ENTRY(NAME)				\
   2687 	cmpq	kernelbase(%rip), %rdi;		\
   2688 	cmovnbq	kernelbase(%rip), %rdi;		\
   2689 	INSTR	REG, (%rdi);			\
   2690 	ret;					\
   2691 	SET_SIZE(NAME)
   2692 
   2693 	SUWORD_NOERR(suword64_noerr, movq, %rsi)
   2694 	SUWORD_NOERR(suword32_noerr, movl, %esi)
   2695 	SUWORD_NOERR(suword16_noerr, movw, %si)
   2696 	SUWORD_NOERR(suword8_noerr, movb, %sil)
   2697 
   2698 #elif defined(__i386)
   2699 
   2700 #define	SUWORD_NOERR(NAME, INSTR, REG)		\
   2701 	ENTRY(NAME)				\
   2702 	movl	4(%esp), %eax;			\
   2703 	cmpl	kernelbase, %eax;		\
   2704 	jb	1f;				\
   2705 	movl	kernelbase, %eax;		\
   2706 1:						\
   2707 	movl	8(%esp), %edx;			\
   2708 	INSTR	REG, (%eax);			\
   2709 	ret;					\
   2710 	SET_SIZE(NAME)
   2711 
   2712 	SUWORD_NOERR(suword32_noerr, movl, %edx)
   2713 	SUWORD_NOERR(suword16_noerr, movw, %dx)
   2714 	SUWORD_NOERR(suword8_noerr, movb, %dl)
   2715 
   2716 #endif	/* __i386 */
   2717 
   2718 #undef	SUWORD_NOERR
   2719 
   2720 #endif	/* lint */
   2721 
   2722 
   2723 #if defined(__lint)
   2724 
   2725 /*ARGSUSED*/
   2726 int
   2727 subyte(void *addr, uchar_t value)
   2728 { return (0); }
   2729 
   2730 /*ARGSUSED*/
   2731 void
   2732 subyte_noerr(void *addr, uchar_t value)
   2733 {}
   2734 
   2735 /*ARGSUSED*/
   2736 int
   2737 fulword(const void *addr, ulong_t *valuep)
   2738 { return (0); }
   2739 
   2740 /*ARGSUSED*/
   2741 void
   2742 fulword_noerr(const void *addr, ulong_t *valuep)
   2743 {}
   2744 
   2745 /*ARGSUSED*/
   2746 int
   2747 sulword(void *addr, ulong_t valuep)
   2748 { return (0); }
   2749 
   2750 /*ARGSUSED*/
   2751 void
   2752 sulword_noerr(void *addr, ulong_t valuep)
   2753 {}
   2754 
   2755 #else
   2756 
   2757 	.weak	subyte
   2758 	subyte=suword8
   2759 	.weak	subyte_noerr
   2760 	subyte_noerr=suword8_noerr
   2761 
   2762 #if defined(__amd64)
   2763 
   2764 	.weak	fulword
   2765 	fulword=fuword64
   2766 	.weak	fulword_noerr
   2767 	fulword_noerr=fuword64_noerr
   2768 	.weak	sulword
   2769 	sulword=suword64
   2770 	.weak	sulword_noerr
   2771 	sulword_noerr=suword64_noerr
   2772 
   2773 #elif defined(__i386)
   2774 
   2775 	.weak	fulword
   2776 	fulword=fuword32
   2777 	.weak	fulword_noerr
   2778 	fulword_noerr=fuword32_noerr
   2779 	.weak	sulword
   2780 	sulword=suword32
   2781 	.weak	sulword_noerr
   2782 	sulword_noerr=suword32_noerr
   2783 
   2784 #endif /* __i386 */
   2785 
   2786 #endif /* __lint */
   2787 
   2788 #if defined(__lint)
   2789 
   2790 /*
   2791  * Copy a block of storage - must not overlap (from + len <= to).
   2792  * No fault handler installed (to be called under on_fault())
   2793  */
   2794 
   2795 /* ARGSUSED */
   2796 void
   2797 copyout_noerr(const void *kfrom, void *uto, size_t count)
   2798 {}
   2799 
   2800 /* ARGSUSED */
   2801 void
   2802 copyin_noerr(const void *ufrom, void *kto, size_t count)
   2803 {}
   2804 
   2805 /*
   2806  * Zero a block of storage in user space
   2807  */
   2808 
   2809 /* ARGSUSED */
   2810 void
   2811 uzero(void *addr, size_t count)
   2812 {}
   2813 
   2814 /*
   2815  * copy a block of storage in user space
   2816  */
   2817 
   2818 /* ARGSUSED */
   2819 void
   2820 ucopy(const void *ufrom, void *uto, size_t ulength)
   2821 {}
   2822 
   2823 /*
   2824  * copy a string in user space
   2825  */
   2826 
   2827 /* ARGSUSED */
   2828 void
   2829 ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied)
   2830 {}
   2831 
   2832 #else /* __lint */
   2833 
   2834 #if defined(__amd64)
   2835 
   2836 	ENTRY(copyin_noerr)
   2837 	movq	kernelbase(%rip), %rax
   2838 #ifdef DEBUG
   2839 	cmpq	%rax, %rsi		/* %rsi = kto */
   2840 	jae	1f
   2841 	leaq	.cpyin_ne_pmsg(%rip), %rdi
   2842 	jmp	call_panic		/* setup stack and call panic */
   2843 1:
   2844 #endif
   2845 	cmpq	%rax, %rdi		/* ufrom < kernelbase */
   2846 	jb	do_copy
   2847 	movq	%rax, %rdi		/* force fault at kernelbase */
   2848 	jmp	do_copy
   2849 	SET_SIZE(copyin_noerr)
   2850 
   2851 	ENTRY(copyout_noerr)
   2852 	movq	kernelbase(%rip), %rax
   2853 #ifdef DEBUG
   2854 	cmpq	%rax, %rdi		/* %rdi = kfrom */
   2855 	jae	1f
   2856 	leaq	.cpyout_ne_pmsg(%rip), %rdi
   2857 	jmp	call_panic		/* setup stack and call panic */
   2858 1:
   2859 #endif
   2860 	cmpq	%rax, %rsi		/* uto < kernelbase */
   2861 	jb	do_copy
   2862 	movq	%rax, %rsi		/* force fault at kernelbase */
   2863 	jmp	do_copy
   2864 	SET_SIZE(copyout_noerr)
   2865 
   2866 	ENTRY(uzero)
   2867 	movq	kernelbase(%rip), %rax
   2868 	cmpq	%rax, %rdi
   2869 	jb	do_zero
   2870 	movq	%rax, %rdi	/* force fault at kernelbase */
   2871 	jmp	do_zero
   2872 	SET_SIZE(uzero)
   2873 
   2874 	ENTRY(ucopy)
   2875 	movq	kernelbase(%rip), %rax
   2876 	cmpq	%rax, %rdi
   2877 	cmovaeq	%rax, %rdi	/* force fault at kernelbase */
   2878 	cmpq	%rax, %rsi
   2879 	cmovaeq	%rax, %rsi	/* force fault at kernelbase */
   2880 	jmp	do_copy
   2881 	SET_SIZE(ucopy)
   2882 
   2883 	ENTRY(ucopystr)
   2884 	movq	kernelbase(%rip), %rax
   2885 	cmpq	%rax, %rdi
   2886 	cmovaeq	%rax, %rdi	/* force fault at kernelbase */
   2887 	cmpq	%rax, %rsi
   2888 	cmovaeq	%rax, %rsi	/* force fault at kernelbase */
   2889 	/* do_copystr expects lofault address in %r8 */
   2890 	movq	%gs:CPU_THREAD, %r8
   2891 	movq	T_LOFAULT(%r8), %r8
   2892 	jmp	do_copystr
   2893 	SET_SIZE(ucopystr)
   2894 
   2895 #elif defined(__i386)
   2896 
   2897 	ENTRY(copyin_noerr)
   2898 	movl	kernelbase, %eax
   2899 #ifdef DEBUG
   2900 	cmpl	%eax, 8(%esp)
   2901 	jae	1f
   2902 	pushl	$.cpyin_ne_pmsg
   2903 	call	panic
   2904 1:
   2905 #endif
   2906 	cmpl	%eax, 4(%esp)
   2907 	jb	do_copy
   2908 	movl	%eax, 4(%esp)	/* force fault at kernelbase */
   2909 	jmp	do_copy
   2910 	SET_SIZE(copyin_noerr)
   2911 
   2912 	ENTRY(copyout_noerr)
   2913 	movl	kernelbase, %eax
   2914 #ifdef DEBUG
   2915 	cmpl	%eax, 4(%esp)
   2916 	jae	1f
   2917 	pushl	$.cpyout_ne_pmsg
   2918 	call	panic
   2919 1:
   2920 #endif
   2921 	cmpl	%eax, 8(%esp)
   2922 	jb	do_copy
   2923 	movl	%eax, 8(%esp)	/* force fault at kernelbase */
   2924 	jmp	do_copy
   2925 	SET_SIZE(copyout_noerr)
   2926 
   2927 	ENTRY(uzero)
   2928 	movl	kernelbase, %eax
   2929 	cmpl	%eax, 4(%esp)
   2930 	jb	do_zero
   2931 	movl	%eax, 4(%esp)	/* force fault at kernelbase */
   2932 	jmp	do_zero
   2933 	SET_SIZE(uzero)
   2934 
   2935 	ENTRY(ucopy)
   2936 	movl	kernelbase, %eax
   2937 	cmpl	%eax, 4(%esp)
   2938 	jb	1f
   2939 	movl	%eax, 4(%esp)	/* force fault at kernelbase */
   2940 1:
   2941 	cmpl	%eax, 8(%esp)
   2942 	jb	do_copy
   2943 	movl	%eax, 8(%esp)	/* force fault at kernelbase */
   2944 	jmp	do_copy
   2945 	SET_SIZE(ucopy)
   2946 
   2947 	ENTRY(ucopystr)
   2948 	movl	kernelbase, %eax
   2949 	cmpl	%eax, 4(%esp)
   2950 	jb	1f
   2951 	movl	%eax, 4(%esp)	/* force fault at kernelbase */
   2952 1:
   2953 	cmpl	%eax, 8(%esp)
   2954 	jb	2f
   2955 	movl	%eax, 8(%esp)	/* force fault at kernelbase */
   2956 2:
   2957 	/* do_copystr expects the lofault address in %eax */
   2958 	movl	%gs:CPU_THREAD, %eax
   2959 	movl	T_LOFAULT(%eax), %eax
   2960 	jmp	do_copystr
   2961 	SET_SIZE(ucopystr)
   2962 
   2963 #endif	/* __i386 */
   2964 
   2965 #ifdef DEBUG
   2966 	.data
   2967 .kcopy_panic_msg:
   2968 	.string "kcopy: arguments below kernelbase"
   2969 .bcopy_panic_msg:
   2970 	.string "bcopy: arguments below kernelbase"
   2971 .kzero_panic_msg:
   2972         .string "kzero: arguments below kernelbase"
   2973 .bzero_panic_msg:
   2974 	.string	"bzero: arguments below kernelbase"
   2975 .copyin_panic_msg:
   2976 	.string "copyin: kaddr argument below kernelbase"
   2977 .xcopyin_panic_msg:
   2978 	.string	"xcopyin: kaddr argument below kernelbase"
   2979 .copyout_panic_msg:
   2980 	.string "copyout: kaddr argument below kernelbase"
   2981 .xcopyout_panic_msg:
   2982 	.string	"xcopyout: kaddr argument below kernelbase"
   2983 .copystr_panic_msg:
   2984 	.string	"copystr: arguments in user space"
   2985 .copyinstr_panic_msg:
   2986 	.string	"copyinstr: kaddr argument not in kernel address space"
   2987 .copyoutstr_panic_msg:
   2988 	.string	"copyoutstr: kaddr argument not in kernel address space"
   2989 .cpyin_ne_pmsg:
   2990 	.string "copyin_noerr: argument not in kernel address space"
   2991 .cpyout_ne_pmsg:
   2992 	.string "copyout_noerr: argument not in kernel address space"
   2993 #endif
   2994 
   2995 #endif	/* __lint */
   2996