Home | History | Annotate | Download | only in cpu
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 # ident	"%Z%%M%	%I%	%E% SMI"
     27 
     28 #include <sys/param.h>
     29 #include <sys/errno.h>
     30 #include <sys/asm_linkage.h>
     31 #include <sys/vtrace.h>
     32 #include <sys/machthread.h>
     33 #include <sys/clock.h>
     34 #include <sys/asi.h>
     35 #include <sys/fsr.h>
     36 #include <sys/privregs.h>
     37 
     38 #if !defined(lint)
     39 #include "assym.h"
     40 #endif	/* lint */
     41 
     42 
     43 /*
     44  * Less then or equal this number of bytes we will always copy byte-for-byte
     45  */
     46 #define	SMALL_LIMIT	7
     47 
     48 /*
     49  * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault
     50  * handler was set
     51  */
     52 #define	LOFAULT_SET 2
     53 
     54 
     55 /*
     56  * Copy a block of storage, returning an error code if `from' or
     57  * `to' takes a kernel pagefault which cannot be resolved.
     58  * Returns errno value on pagefault error, 0 if all ok
     59  */
     60 
     61 
     62 
     63 #if defined(lint)
     64 
     65 /* ARGSUSED */
     66 int
     67 kcopy(const void *from, void *to, size_t count)
     68 { return(0); }
     69 
     70 #else	/* lint */
     71 
     72 	.seg	".text"
     73 	.align	4
     74 
     75 	ENTRY(kcopy)
     76 
     77 	save	%sp, -SA(MINFRAME), %sp
     78 	set	.copyerr, %l7			! copyerr is lofault value
     79 	ldn	[THREAD_REG + T_LOFAULT], %o5	! save existing handler
     80 	or	%o5, LOFAULT_SET, %o5
     81 	membar	#Sync				! sync error barrier
     82 	b	.do_copy			! common code
     83 	stn	%l7, [THREAD_REG + T_LOFAULT]	! set t_lofault
     84 
     85 /*
     86  * We got here because of a fault during kcopy.
     87  * Errno value is in %g1.
     88  */
     89 .copyerr:
     90 	! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET
     91 	! into %o5 to indicate it has set t_lofault handler. Need to clear
     92 	! LOFAULT_SET flag before restoring the error handler.
     93 	andn	%o5, LOFAULT_SET, %o5
     94 	membar	#Sync			! sync error barrier
     95 	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
     96 	ret
     97 	restore	%g1, 0, %o0
     98 
     99 	SET_SIZE(kcopy)
    100 #endif	/* lint */
    101 
    102 
    103 /*
    104  * Copy a block of storage - must not overlap (from + len <= to).
    105  */
    106 #if defined(lint)
    107 
    108 /* ARGSUSED */
    109 void
    110 bcopy(const void *from, void *to, size_t count)
    111 {}
    112 
    113 #else	/* lint */
    114 
    115 	ENTRY(bcopy)
    116 
    117 	save	%sp, -SA(MINFRAME), %sp
    118 	clr	%o5			! flag LOFAULT_SET is not set for bcopy
    119 
    120 .do_copy:
    121         mov     %i1, %g5                ! save dest addr start
    122 
    123         mov     %i2, %l6                ! save size
    124 
    125 	cmp	%i2, 12			! for small counts
    126 	blu	%ncc, .bytecp		! just copy bytes
    127 	  .empty
    128 
    129 	!
    130 	! use aligned transfers where possible
    131 	!
    132 	xor	%i0, %i1, %o4		! xor from and to address
    133 	btst	7, %o4			! if lower three bits zero
    134 	bz	.aldoubcp		! can align on double boundary
    135 	.empty	! assembler complaints about label
    136 
    137 	xor	%i0, %i1, %o4		! xor from and to address
    138 	btst	3, %o4			! if lower two bits zero
    139 	bz	.alwordcp		! can align on word boundary
    140 	btst	3, %i0			! delay slot, from address unaligned?
    141 	!
    142 	! use aligned reads and writes where possible
    143 	! this differs from wordcp in that it copes
    144 	! with odd alignment between source and destnation
    145 	! using word reads and writes with the proper shifts
    146 	! in between to align transfers to and from memory
    147 	! i0 - src address, i1 - dest address, i2 - count
    148 	! i3, i4 - tmps for used generating complete word
    149 	! i5 (word to write)
    150 	! l0 size in bits of upper part of source word (US)
    151 	! l1 size in bits of lower part of source word (LS = 32 - US)
    152 	! l2 size in bits of upper part of destination word (UD)
    153 	! l3 size in bits of lower part of destination word (LD = 32 - UD)
    154 	! l4 number of bytes leftover after aligned transfers complete
    155 	! l5 the number 32
    156 	!
    157 	mov	32, %l5			! load an oft-needed constant
    158 	bz	.align_dst_only
    159 	btst	3, %i1			! is destnation address aligned?
    160 	clr	%i4			! clear registers used in either case
    161 	bz	.align_src_only
    162 	clr	%l0
    163 	!
    164 	! both source and destination addresses are unaligned
    165 	!
    166 1:					! align source
    167 	ldub	[%i0], %i3		! read a byte from source address
    168 	add	%i0, 1, %i0		! increment source address
    169 	or	%i4, %i3, %i4		! or in with previous bytes (if any)
    170 	btst	3, %i0			! is source aligned?
    171 	add	%l0, 8, %l0		! increment size of upper source (US)
    172 	bnz,a	1b
    173 	sll	%i4, 8, %i4		! make room for next byte
    174 
    175 	sub	%l5, %l0, %l1		! generate shift left count (LS)
    176 	sll	%i4, %l1, %i4		! prepare to get rest
    177 	ld	[%i0], %i3		! read a word
    178 	add	%i0, 4, %i0		! increment source address
    179 	srl	%i3, %l0, %i5		! upper src bits into lower dst bits
    180 	or	%i4, %i5, %i5		! merge
    181 	mov	24, %l3			! align destination
    182 1:
    183 	srl	%i5, %l3, %i4		! prepare to write a single byte
    184 	stb	%i4, [%i1]		! write a byte
    185 	add	%i1, 1, %i1		! increment destination address
    186 	sub	%i2, 1, %i2		! decrement count
    187 	btst	3, %i1			! is destination aligned?
    188 	bnz,a	1b
    189 	sub	%l3, 8, %l3		! delay slot, decrement shift count (LD)
    190 	sub	%l5, %l3, %l2		! generate shift left count (UD)
    191 	sll	%i5, %l2, %i5		! move leftover into upper bytes
    192 	cmp	%l2, %l0		! cmp # reqd to fill dst w old src left
    193 	bgu	%ncc, .more_needed	! need more to fill than we have
    194 	nop
    195 
    196 	sll	%i3, %l1, %i3		! clear upper used byte(s)
    197 	srl	%i3, %l1, %i3
    198 	! get the odd bytes between alignments
    199 	sub	%l0, %l2, %l0		! regenerate shift count
    200 	sub	%l5, %l0, %l1		! generate new shift left count (LS)
    201 	and	%i2, 3, %l4		! must do remaining bytes if count%4 > 0
    202 	andn	%i2, 3, %i2		! # of aligned bytes that can be moved
    203 	srl	%i3, %l0, %i4
    204 	or	%i5, %i4, %i5
    205 	st	%i5, [%i1]		! write a word
    206 	subcc	%i2, 4, %i2		! decrement count
    207 	bz	%ncc, .unalign_out
    208 	add	%i1, 4, %i1		! increment destination address
    209 
    210 	b	2f
    211 	sll	%i3, %l1, %i5		! get leftover into upper bits
    212 .more_needed:
    213 	sll	%i3, %l0, %i3		! save remaining byte(s)
    214 	srl	%i3, %l0, %i3
    215 	sub	%l2, %l0, %l1		! regenerate shift count
    216 	sub	%l5, %l1, %l0		! generate new shift left count
    217 	sll	%i3, %l1, %i4		! move to fill empty space
    218 	b	3f
    219 	or	%i5, %i4, %i5		! merge to complete word
    220 	!
    221 	! the source address is aligned and destination is not
    222 	!
    223 .align_dst_only:
    224 	ld	[%i0], %i4		! read a word
    225 	add	%i0, 4, %i0		! increment source address
    226 	mov	24, %l0			! initial shift alignment count
    227 1:
    228 	srl	%i4, %l0, %i3		! prepare to write a single byte
    229 	stb	%i3, [%i1]		! write a byte
    230 	add	%i1, 1, %i1		! increment destination address
    231 	sub	%i2, 1, %i2		! decrement count
    232 	btst	3, %i1			! is destination aligned?
    233 	bnz,a	1b
    234 	sub	%l0, 8, %l0		! delay slot, decrement shift count
    235 .xfer:
    236 	sub	%l5, %l0, %l1		! generate shift left count
    237 	sll	%i4, %l1, %i5		! get leftover
    238 3:
    239 	and	%i2, 3, %l4		! must do remaining bytes if count%4 > 0
    240 	andn	%i2, 3, %i2		! # of aligned bytes that can be moved
    241 2:
    242 	ld	[%i0], %i3		! read a source word
    243 	add	%i0, 4, %i0		! increment source address
    244 	srl	%i3, %l0, %i4		! upper src bits into lower dst bits
    245 	or	%i5, %i4, %i5		! merge with upper dest bits (leftover)
    246 	st	%i5, [%i1]		! write a destination word
    247 	subcc	%i2, 4, %i2		! decrement count
    248 	bz	%ncc, .unalign_out	! check if done
    249 	add	%i1, 4, %i1		! increment destination address
    250 	b	2b			! loop
    251 	sll	%i3, %l1, %i5		! get leftover
    252 .unalign_out:
    253 	tst	%l4			! any bytes leftover?
    254 	bz	%ncc, .cpdone
    255 	.empty				! allow next instruction in delay slot
    256 1:
    257 	sub	%l0, 8, %l0		! decrement shift
    258 	srl	%i3, %l0, %i4		! upper src byte into lower dst byte
    259 	stb	%i4, [%i1]		! write a byte
    260 	subcc	%l4, 1, %l4		! decrement count
    261 	bz	%ncc, .cpdone		! done?
    262 	add	%i1, 1, %i1		! increment destination
    263 	tst	%l0			! any more previously read bytes
    264 	bnz	%ncc, 1b		! we have leftover bytes
    265 	mov	%l4, %i2		! delay slot, mv cnt where dbytecp wants
    266 	b	.dbytecp		! let dbytecp do the rest
    267 	sub	%i0, %i1, %i0		! i0 gets the difference of src and dst
    268 	!
    269 	! the destination address is aligned and the source is not
    270 	!
    271 .align_src_only:
    272 	ldub	[%i0], %i3		! read a byte from source address
    273 	add	%i0, 1, %i0		! increment source address
    274 	or	%i4, %i3, %i4		! or in with previous bytes (if any)
    275 	btst	3, %i0			! is source aligned?
    276 	add	%l0, 8, %l0		! increment shift count (US)
    277 	bnz,a	.align_src_only
    278 	sll	%i4, 8, %i4		! make room for next byte
    279 	b,a	.xfer
    280 	!
    281 	! if from address unaligned for double-word moves,
    282 	! move bytes till it is, if count is < 56 it could take
    283 	! longer to align the thing than to do the transfer
    284 	! in word size chunks right away
    285 	!
    286 .aldoubcp:
    287 	cmp	%i2, 56			! if count < 56, use wordcp, it takes
    288 	blu,a	%ncc, .alwordcp		! longer to align doubles than words
    289 	mov	3, %o0			! mask for word alignment
    290 	call	.alignit		! copy bytes until aligned
    291 	mov	7, %o0			! mask for double alignment
    292 	!
    293 	! source and destination are now double-word aligned
    294 	! i3 has aligned count returned by alignit
    295 	!
    296 	and	%i2, 7, %i2		! unaligned leftover count
    297 	sub	%i0, %i1, %i0		! i0 gets the difference of src and dst
    298 5:
    299 	ldx	[%i0+%i1], %o4		! read from address
    300 	stx	%o4, [%i1]		! write at destination address
    301 	subcc	%i3, 8, %i3		! dec count
    302 	bgu	%ncc, 5b
    303 	add	%i1, 8, %i1		! delay slot, inc to address
    304 	cmp	%i2, 4			! see if we can copy a word
    305 	blu	%ncc, .dbytecp		! if 3 or less bytes use bytecp
    306 	.empty
    307 	!
    308 	! for leftover bytes we fall into wordcp, if needed
    309 	!
    310 .wordcp:
    311 	and	%i2, 3, %i2		! unaligned leftover count
    312 5:
    313 	ld	[%i0+%i1], %o4		! read from address
    314 	st	%o4, [%i1]		! write at destination address
    315 	subcc	%i3, 4, %i3		! dec count
    316 	bgu	%ncc, 5b
    317 	add	%i1, 4, %i1		! delay slot, inc to address
    318 	b,a	.dbytecp
    319 
    320 	! we come here to align copies on word boundaries
    321 .alwordcp:
    322 	call	.alignit		! go word-align it
    323 	mov	3, %o0			! bits that must be zero to be aligned
    324 	b	.wordcp
    325 	sub	%i0, %i1, %i0		! i0 gets the difference of src and dst
    326 
    327 	!
    328 	! byte copy, works with any alignment
    329 	!
    330 .bytecp:
    331 	b	.dbytecp
    332 	sub	%i0, %i1, %i0		! i0 gets difference of src and dst
    333 
    334 	!
    335 	! differenced byte copy, works with any alignment
    336 	! assumes dest in %i1 and (source - dest) in %i0
    337 	!
    338 1:
    339 	stb	%o4, [%i1]		! write to address
    340 	inc	%i1			! inc to address
    341 .dbytecp:
    342 	deccc	%i2			! dec count
    343 	bgeu,a	%ncc, 1b		! loop till done
    344 	ldub	[%i0+%i1], %o4		! read from address
    345 .cpdone:
    346 	membar	#Sync				! sync error barrier
    347 	! Restore t_lofault handler, if came here from kcopy().
    348 	tst	%o5
    349 	bz	%ncc, 1f
    350 	andn	%o5, LOFAULT_SET, %o5
    351 	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
    352 1:
    353         mov     %g5, %o0                ! copy dest address
    354         call    sync_icache
    355         mov     %l6, %o1                ! saved size
    356 	ret
    357 	restore %g0, 0, %o0		! return (0)
    358 
    359 /*
    360  * Common code used to align transfers on word and doubleword
    361  * boudaries.  Aligns source and destination and returns a count
    362  * of aligned bytes to transfer in %i3
    363  */
    364 1:
    365 	inc	%i0			! inc from
    366 	stb	%o4, [%i1]		! write a byte
    367 	inc	%i1			! inc to
    368 	dec	%i2			! dec count
    369 .alignit:
    370 	btst	%o0, %i0		! %o0 is bit mask to check for alignment
    371 	bnz,a	1b
    372 	ldub	[%i0], %o4		! read next byte
    373 
    374 	retl
    375 	andn	%i2, %o0, %i3		! return size of aligned bytes
    376 	SET_SIZE(bcopy)
    377 
    378 #endif	/* lint */
    379 
    380 /*
    381  * Block copy with possibly overlapped operands.
    382  */
    383 
    384 #if defined(lint)
    385 
    386 /*ARGSUSED*/
    387 void
    388 ovbcopy(const void *from, void *to, size_t count)
    389 {}
    390 
    391 #else	/* lint */
    392 
    393 	ENTRY(ovbcopy)
    394 	tst	%o2			! check count
    395 	bgu,a	%ncc, 1f		! nothing to do or bad arguments
    396 	subcc	%o0, %o1, %o3		! difference of from and to address
    397 
    398 	retl				! return
    399 	nop
    400 1:
    401 	bneg,a	%ncc, 2f
    402 	neg	%o3			! if < 0, make it positive
    403 2:	cmp	%o2, %o3		! cmp size and abs(from - to)
    404 	bleu	%ncc, bcopy		! if size <= abs(diff): use bcopy,
    405 	.empty				!   no overlap
    406 	cmp	%o0, %o1		! compare from and to addresses
    407 	blu	%ncc, .ov_bkwd		! if from < to, copy backwards
    408 	nop
    409 	!
    410 	! Copy forwards.
    411 	!
    412 .ov_fwd:
    413 	ldub	[%o0], %o3		! read from address
    414 	inc	%o0			! inc from address
    415 	stb	%o3, [%o1]		! write to address
    416 	deccc	%o2			! dec count
    417 	bgu	%ncc, .ov_fwd		! loop till done
    418 	inc	%o1			! inc to address
    419 
    420 	retl				! return
    421 	nop
    422 	!
    423 	! Copy backwards.
    424 	!
    425 .ov_bkwd:
    426 	deccc	%o2			! dec count
    427 	ldub	[%o0 + %o2], %o3	! get byte at end of src
    428 	bgu	%ncc, .ov_bkwd		! loop till done
    429 	stb	%o3, [%o1 + %o2]	! delay slot, store at end of dst
    430 
    431 	retl				! return
    432 	nop
    433 	SET_SIZE(ovbcopy)
    434 
    435 #endif	/* lint */
    436 
    437 /*
    438  * hwblkpagecopy()
    439  *
    440  * Copies exactly one page.  This routine assumes the caller (ppcopy)
    441  * has already disabled kernel preemption and has checked
    442  * use_hw_bcopy.
    443  */
    444 #ifdef lint
    445 /*ARGSUSED*/
    446 void
    447 hwblkpagecopy(const void *src, void *dst)
    448 { }
    449 #else /* lint */
    450 	ENTRY(hwblkpagecopy)
    451 	save	%sp, -SA(MINFRAME), %sp
    452 
    453 	! %i0 - source address (arg)
    454 	! %i1 - destination address (arg)
    455 	! %i2 - length of region (not arg)
    456 
    457 	set	PAGESIZE, %i2
    458 	mov     %i1,    %o0     ! store destination address for flushing
    459 
    460 	/*
    461 	 * Copying exactly one page and PAGESIZE is in mutliple of 0x80.
    462 	 */
    463 1:
    464 	ldx	[%i0+0x0], %l0
    465 	ldx	[%i0+0x8], %l1
    466 	ldx	[%i0+0x10], %l2
    467 	ldx	[%i0+0x18], %l3
    468 	ldx	[%i0+0x20], %l4
    469 	ldx	[%i0+0x28], %l5
    470 	ldx	[%i0+0x30], %l6
    471 	ldx	[%i0+0x38], %l7
    472 	stx	%l0, [%i1+0x0]
    473 	stx	%l1, [%i1+0x8]
    474 	stx	%l2, [%i1+0x10]
    475 	stx	%l3, [%i1+0x18]
    476 	stx	%l4, [%i1+0x20]
    477 	stx	%l5, [%i1+0x28]
    478 	stx	%l6, [%i1+0x30]
    479 	stx	%l7, [%i1+0x38]
    480 
    481 	ldx	[%i0+0x40], %l0
    482 	ldx	[%i0+0x48], %l1
    483 	ldx	[%i0+0x50], %l2
    484 	ldx	[%i0+0x58], %l3
    485 	ldx	[%i0+0x60], %l4
    486 	ldx	[%i0+0x68], %l5
    487 	ldx	[%i0+0x70], %l6
    488 	ldx	[%i0+0x78], %l7
    489 	stx	%l0, [%i1+0x40]
    490 	stx	%l1, [%i1+0x48]
    491 	stx	%l2, [%i1+0x50]
    492 	stx	%l3, [%i1+0x58]
    493 	stx	%l4, [%i1+0x60]
    494 	stx	%l5, [%i1+0x68]
    495 	stx	%l6, [%i1+0x70]
    496 	stx	%l7, [%i1+0x78]
    497 
    498 	add	%i0, 0x80, %i0
    499 	subcc	%i2, 0x80, %i2
    500 	bgu,pt	%xcc, 1b
    501 	add	%i1, 0x80, %i1
    502 
    503 	! %o0 contains the dest. address
    504 	set	PAGESIZE, %o1
    505 	call	sync_icache
    506 	nop
    507 
    508 	membar #Sync
    509 	ret
    510 	restore	%g0, 0, %o0
    511 	SET_SIZE(hwblkpagecopy)
    512 #endif	/* lint */
    513 
    514 
    515 /*
    516  * Transfer data to and from user space -
    517  * Note that these routines can cause faults
    518  * It is assumed that the kernel has nothing at
    519  * less than KERNELBASE in the virtual address space.
    520  *
    521  * Note that copyin(9F) and copyout(9F) are part of the
    522  * DDI/DKI which specifies that they return '-1' on "errors."
    523  *
    524  * Sigh.
    525  *
    526  * So there's two extremely similar routines - xcopyin() and xcopyout()
    527  * which return the errno that we've faithfully computed.  This
    528  * allows other callers (e.g. uiomove(9F)) to work correctly.
    529  * Given that these are used pretty heavily, we expand the calling
    530  * sequences inline for all flavours (rather than making wrappers).
    531  *
    532  * There are also stub routines for xcopyout_little and xcopyin_little,
    533  * which currently are intended to handle requests of <= 16 bytes from
    534  * do_unaligned. Future enhancement to make them handle 8k pages efficiently
    535  * is left as an exercise...
    536  */
    537 
    538 /*
    539  * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr)
    540  *
    541  * General theory of operation:
    542  *
    543  * None of the copyops routines grab a window.
    544  *
    545  * Flow:
    546  *
    547  * If count == zero return zero.
    548  *
    549  * Store the previous lo_fault handler into %g6.
    550  * Place our secondary lofault handler into %g5.
    551  * Place the address of our fault handler into %o3.
    552  *
    553  * If count is less than or equal to SMALL_LIMIT (7) we
    554  * always do a byte for byte copy.
    555  *
    556  * If count is > SMALL_LIMIT, we check the alignment of the input
    557  * and output pointers.  We store -count in %o3, we store the number
    558  * of chunks (8, 4, 2 or 1 byte) operated on in our basic copy loop
    559  * in %o2. Following this we branch to the appropriate copy loop and
    560  * copy that many chunks.  Since we've been adding the chunk size
    561  * to %o3 each time through as well as decrementing %o2, we can tell
    562  * if any data is is left to be copied by examining %o3. If that is
    563  * zero, we're done and can go home. If not, we figure out what the
    564  * largest chunk size left to be copied is and branch to that copy
    565  * loop unless there's only one byte left. We load that as we're
    566  * branching to code that stores it just before we return.
    567  *
    568  * Fault handlers are invoked if we reference memory that has no
    569  * current mapping.  All forms share the same copyio_fault handler.
    570  * This routine handles fixing up the stack and general housecleaning.
    571  * Each copy operation has a simple fault handler that is then called
    572  * to do the work specific to the invidual operation.  The handler
    573  * for copyOP and xcopyOP are found at the end of individual function.
    574  * The handlers for xcopyOP_little are found at the end of xcopyin_little.
    575  * The handlers for copyOP_noerr are found at the end of copyin_noerr.
    576  */
    577 
    578 /*
    579  * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
    580  */
    581 
    582 #if defined(lint)
    583 
    584 /*ARGSUSED*/
    585 int
    586 copyout(const void *kaddr, void *uaddr, size_t count)
    587 { return (0); }
    588 
    589 #else	/* lint */
    590 
    591 /*
    592  * We save the arguments in the following registers in case of a fault:
    593  * 	kaddr - %g2
    594  * 	uaddr - %g3
    595  * 	count - %g4
    596  */
    597 #define	SAVE_SRC	%g2
    598 #define	SAVE_DST	%g3
    599 #define	SAVE_COUNT	%g4
    600 
    601 #define	REAL_LOFAULT		%g5
    602 #define	SAVED_LOFAULT		%g6
    603 
    604 /*
    605  * Generic copyio fault handler.  This is the first line of defense when a
    606  * fault occurs in (x)copyin/(x)copyout.  In order for this to function
    607  * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT.
    608  * This allows us to share common code for all the flavors of the copy
    609  * operations, including the _noerr versions.
    610  *
    611  * Note that this function will restore the original input parameters before
    612  * calling REAL_LOFAULT.  So the real handler can vector to the appropriate
    613  * member of the t_copyop structure, if needed.
    614  */
    615 	ENTRY(copyio_fault)
    616 	membar	#Sync
    617 	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
    618 
    619 	mov	SAVE_SRC, %o0
    620 	mov	SAVE_DST, %o1
    621 	jmp	REAL_LOFAULT
    622 	  mov	SAVE_COUNT, %o2
    623 	SET_SIZE(copyio_fault)
    624 
    625 	ENTRY(copyout)
    626 	sethi	%hi(.copyout_err), REAL_LOFAULT
    627 	or	REAL_LOFAULT, %lo(.copyout_err), REAL_LOFAULT
    628 
    629 .do_copyout:
    630 	!
    631 	! Check the length and bail if zero.
    632 	!
    633 	tst	%o2
    634 	bnz,pt	%ncc, 1f
    635 	  nop
    636 	retl
    637 	  clr	%o0
    638 1:
    639 	sethi	%hi(copyio_fault), %o3
    640 	ldn	[THREAD_REG + T_LOFAULT], SAVED_LOFAULT
    641 	or	%o3, %lo(copyio_fault), %o3
    642 	membar	#Sync
    643 	stn	%o3, [THREAD_REG + T_LOFAULT]
    644 
    645 	mov	%o0, SAVE_SRC
    646 	mov	%o1, SAVE_DST
    647 	mov	%o2, SAVE_COUNT
    648 
    649 	!
    650 	! Check to see if we're more than SMALL_LIMIT (7 bytes).
    651 	! Run in leaf mode, using the %o regs as our input regs.
    652 	!
    653 	subcc	%o2, SMALL_LIMIT, %o3
    654 	bgu,a,pt %ncc, .dco_ns
    655 	or	%o0, %o1, %o3
    656 
    657 .dcobcp:
    658 	sub	%g0, %o2, %o3		! negate count
    659 	add	%o0, %o2, %o0		! make %o0 point at the end
    660 	add	%o1, %o2, %o1		! make %o1 point at the end
    661 	ba,pt	%ncc, .dcocl
    662 	ldub	[%o0 + %o3], %o4	! load first byte
    663 	!
    664 	! %o0 and %o2 point at the end and remain pointing at the end
    665 	! of their buffers. We pull things out by adding %o3 (which is
    666 	! the negation of the length) to the buffer end which gives us
    667 	! the curent location in the buffers. By incrementing %o3 we walk
    668 	! through both buffers without having to bump each buffer's
    669 	! pointer. A very fast 4 instruction loop.
    670 	!
    671 	.align 16
    672 .dcocl:
    673 	stba	%o4, [%o1 + %o3]ASI_USER
    674 	inccc	%o3
    675 	bl,a,pt	%ncc, .dcocl
    676 	ldub	[%o0 + %o3], %o4
    677 	!
    678 	! We're done. Go home.
    679 	!
    680 	membar	#Sync
    681 	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
    682 	retl
    683 	clr	%o0
    684 	!
    685 	! Try aligned copies from here.
    686 	!
    687 .dco_ns:
    688 	! %o0 = kernel addr (to be copied from)
    689 	! %o1 = user addr (to be copied to)
    690 	! %o2 = length
    691 	! %o3 = %o1 | %o2 (used for alignment checking)
    692 	! %o4 is alternate lo_fault
    693 	! %o5 is original lo_fault
    694 	!
    695 	! See if we're single byte aligned. If we are, check the
    696 	! limit for single byte copies. If we're smaller or equal,
    697 	! bounce to the byte for byte copy loop. Otherwise do it in
    698 	! HW (if enabled).
    699 	!
    700 	btst	1, %o3
    701 	bz,pt	%icc, .dcoh8
    702 	btst	7, %o3
    703 
    704 	ba	.dcobcp
    705 	nop
    706 .dcoh8:
    707 	!
    708 	! 8 byte aligned?
    709 	!
    710 	bnz,a	%ncc, .dcoh4
    711 	btst	3, %o3
    712 .dcos8:
    713 	!
    714 	! Housekeeping for copy loops. Uses same idea as in the byte for
    715 	! byte copy loop above.
    716 	!
    717 	add	%o0, %o2, %o0
    718 	add	%o1, %o2, %o1
    719 	sub	%g0, %o2, %o3
    720 	ba,pt	%ncc, .dodebc
    721 	srl	%o2, 3, %o2		! Number of 8 byte chunks to copy
    722 	!
    723 	! 4 byte aligned?
    724 	!
    725 .dcoh4:
    726 	bnz,pn	%ncc, .dcoh2
    727 	nop
    728 .dcos4:
    729 	add	%o0, %o2, %o0
    730 	add	%o1, %o2, %o1
    731 	sub	%g0, %o2, %o3
    732 	ba,pt	%ncc, .dodfbc
    733 	srl	%o2, 2, %o2		! Number of 4 byte chunks to copy
    734 	!
    735 	! We must be 2 byte aligned. Off we go.
    736 	! The check for small copies was done in the
    737 	! delay at .dcoh4
    738 	!
    739 .dcoh2:
    740 .dcos2:
    741 	add	%o0, %o2, %o0
    742 	add	%o1, %o2, %o1
    743 	sub	%g0, %o2, %o3
    744 	ba,pt	%ncc, .dodtbc
    745 	srl	%o2, 1, %o2		! Number of 2 byte chunks to copy
    746 
    747 .dodebc:
    748 	ldx	[%o0 + %o3], %o4
    749 	deccc	%o2
    750 	stxa	%o4, [%o1 + %o3]ASI_USER
    751 	bg,pt	%ncc, .dodebc
    752 	addcc	%o3, 8, %o3
    753 	!
    754 	! End of copy loop. Check to see if we're done. Most
    755 	! eight byte aligned copies end here.
    756 	!
    757 	bz,pt	%ncc, .dcofh
    758 	nop
    759 	!
    760 	! Something is left - do it byte for byte.
    761 	!
    762 	ba,pt	%ncc, .dcocl
    763 	ldub	[%o0 + %o3], %o4	! load next byte
    764 	!
    765 	! Four byte copy loop. %o2 is the number of 4 byte chunks to copy.
    766 	!
    767 	.align 32
    768 .dodfbc:
    769 	lduw	[%o0 + %o3], %o4
    770 	deccc	%o2
    771 	sta	%o4, [%o1 + %o3]ASI_USER
    772 	bg,pt	%ncc, .dodfbc
    773 	addcc	%o3, 4, %o3
    774 	!
    775 	! End of copy loop. Check to see if we're done. Most
    776 	! four byte aligned copies end here.
    777 	!
    778 	bz,pt	%ncc, .dcofh
    779 	nop
    780 	!
    781 	! Something is left. Do it byte for byte.
    782 	!
    783 	ba,pt	%ncc, .dcocl
    784 	ldub	[%o0 + %o3], %o4	! load next byte
    785 	!
    786 	! two byte aligned copy loop. %o2 is the number of 2 byte chunks to
    787 	! copy.
    788 	!
    789 	.align 32
    790 .dodtbc:
    791 	lduh	[%o0 + %o3], %o4
    792 	deccc	%o2
    793 	stha	%o4, [%o1 + %o3]ASI_USER
    794 	bg,pt	%ncc, .dodtbc
    795 	addcc	%o3, 2, %o3
    796 	!
    797 	! End of copy loop. Anything left?
    798 	!
    799 	bz,pt	%ncc, .dcofh
    800 	nop
    801 	!
    802 	! Deal with the last byte
    803 	!
    804 	ldub	[%o0 + %o3], %o4
    805 	stba	%o4, [%o1 + %o3]ASI_USER
    806 .dcofh:
    807 	membar	#Sync
    808 	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
    809 	retl
    810 	clr	%o0
    811 
    812 .copyout_err:
    813 	ldn	[THREAD_REG + T_COPYOPS], %o4
    814 	brz	%o4, 2f
    815 	nop
    816 	ldn	[%o4 + CP_COPYOUT], %g2
    817 	jmp	%g2
    818 	nop
    819 2:
    820 	retl
    821 	mov	-1, %o0
    822 	SET_SIZE(copyout)
    823 
    824 #endif	/* lint */
    825 
    826 
    827 #ifdef	lint
    828 
    829 /*ARGSUSED*/
    830 int
    831 xcopyout(const void *kaddr, void *uaddr, size_t count)
    832 { return (0); }
    833 
    834 #else	/* lint */
    835 
    836 	ENTRY(xcopyout)
    837 	sethi	%hi(.xcopyout_err), REAL_LOFAULT
    838 	b	.do_copyout
    839 	  or	REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
    840 .xcopyout_err:
    841 	ldn	[THREAD_REG + T_COPYOPS], %o4
    842 	brz	%o4, 2f
    843 	nop
    844 	ldn	[%o4 + CP_XCOPYOUT], %g2
    845 	jmp	%g2
    846 	nop
    847 2:
    848 	retl
    849 	mov	%g1, %o0
    850 	SET_SIZE(xcopyout)
    851 
    852 #endif	/* lint */
    853 
    854 #ifdef	lint
    855 
    856 /*ARGSUSED*/
    857 int
    858 xcopyout_little(const void *kaddr, void *uaddr, size_t count)
    859 { return (0); }
    860 
    861 #else	/* lint */
    862 
    863 	ENTRY(xcopyout_little)
    864 	sethi	%hi(.little_err), %o4
    865 	ldn	[THREAD_REG + T_LOFAULT], %o5
    866 	or	%o4, %lo(.little_err), %o4
    867 	membar	#Sync			! sync error barrier
    868 	stn	%o4, [THREAD_REG + T_LOFAULT]
    869 
    870 	subcc	%g0, %o2, %o3
    871 	add	%o0, %o2, %o0
    872 	bz,pn	%ncc, 2f		! check for zero bytes
    873 	sub	%o2, 1, %o4
    874 	add	%o0, %o4, %o0		! start w/last byte
    875 	add	%o1, %o2, %o1
    876 	ldub	[%o0+%o3], %o4
    877 
    878 1:	stba	%o4, [%o1+%o3]ASI_AIUSL
    879 	inccc	%o3
    880 	sub	%o0, 2, %o0		! get next byte
    881 	bcc,a,pt %ncc, 1b
    882 	  ldub	[%o0+%o3], %o4
    883 
    884 2:	membar	#Sync			! sync error barrier
    885 	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
    886 	retl
    887 	mov	%g0, %o0		! return (0)
    888 	SET_SIZE(xcopyout_little)
    889 
    890 #endif	/* lint */
    891 
    892 /*
    893  * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
    894  */
    895 
    896 #if defined(lint)
    897 
    898 /*ARGSUSED*/
    899 int
    900 copyin(const void *uaddr, void *kaddr, size_t count)
    901 { return (0); }
    902 
    903 #else	/* lint */
    904 
    905 	ENTRY(copyin)
    906 	sethi	%hi(.copyin_err), REAL_LOFAULT
    907 	or	REAL_LOFAULT, %lo(.copyin_err), REAL_LOFAULT
    908 
    909 .do_copyin:
    910 	!
    911 	! Check the length and bail if zero.
    912 	!
    913 	tst	%o2
    914 	bnz,pt	%ncc, 1f
    915 	  nop
    916 	retl
    917 	  clr	%o0
    918 1:
    919 	sethi	%hi(copyio_fault), %o3
    920 	ldn	[THREAD_REG + T_LOFAULT], SAVED_LOFAULT
    921 	or	%o3, %lo(copyio_fault), %o3
    922 	membar	#Sync
    923 	stn	%o3, [THREAD_REG + T_LOFAULT]
    924 
    925 	mov	%o0, SAVE_SRC
    926 	mov	%o1, SAVE_DST
    927 	mov	%o2, SAVE_COUNT
    928 
    929 	!
    930 	! Check to see if we're more than SMALL_LIMIT.
    931 	!
    932 	subcc	%o2, SMALL_LIMIT, %o3
    933 	bgu,a,pt %ncc, .dci_ns
    934 	or	%o0, %o1, %o3
    935 
    936 .dcibcp:
    937 	sub	%g0, %o2, %o3		! setup for copy loop
    938 	add	%o0, %o2, %o0
    939 	add	%o1, %o2, %o1
    940 	ba,pt	%ncc, .dcicl
    941 	lduba	[%o0 + %o3]ASI_USER, %o4
    942 	!
    943 	! %o0 and %o1 point at the end and remain pointing at the end
    944 	! of their buffers. We pull things out by adding %o3 (which is
    945 	! the negation of the length) to the buffer end which gives us
    946 	! the curent location in the buffers. By incrementing %o3 we walk
    947 	! through both buffers without having to bump each buffer's
    948 	! pointer. A very fast 4 instruction loop.
    949 	!
    950 	.align 16
    951 .dcicl:
    952 	stb	%o4, [%o1 + %o3]
    953 	inccc	%o3
    954 	bl,a,pt %ncc, .dcicl
    955 	lduba	[%o0 + %o3]ASI_USER, %o4
    956 	!
    957 	! We're done. Go home.
    958 	!
    959 	membar	#Sync
    960 	stn	SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]
    961 	retl
    962 	clr	%o0
    963 	!
    964 	! Try aligned copies from here.
    965 	!
    966 .dci_ns:
    967 	!
    968 	! See if we're single byte aligned. If we are, check the
    969 	! limit for single byte copies. If we're smaller, or equal,
    970 	! bounce to the byte for byte copy loop. Otherwise do it in
    971 	! HW (if enabled).
    972 	!
    973 	btst	1, %o3
    974 	bz,a,pt	%icc, .dcih8
    975 	btst	7, %o3
    976 	ba	.dcibcp
    977 	nop
    978 
    979 .dcih8:
    980 	!
    981 	! 8 byte aligned?
    982 	!
    983 	bnz,a	%ncc, .dcih4
    984 	btst	3, %o3
    985 .dcis8:
    986 	!
    987 	! Housekeeping for copy loops. Uses same idea as in the byte for
    988 	! byte copy loop above.
    989 	!
    990 	add	%o0, %o2, %o0
    991 	add	%o1, %o2, %o1
    992 	sub	%g0, %o2, %o3
    993 	ba,pt	%ncc, .didebc
    994 	srl	%o2, 3, %o2		! Number of 8 byte chunks to copy
    995 	!
    996 	! 4 byte aligned?
    997 	!
    998 .dcih4:
    999 	bnz	%ncc, .dcih2
   1000 	nop
   1001 .dcis4:
   1002 	!
   1003 	! Housekeeping for copy loops. Uses same idea as in the byte
   1004 	! for byte copy loop above.
   1005 	!
   1006 	add	%o0, %o2, %o0
   1007 	add	%o1, %o2, %o1
   1008 	sub	%g0, %o2, %o3
   1009 	ba,pt	%ncc, .didfbc
   1010 	srl	%o2, 2, %o2		! Number of 4 byte chunks to copy
   1011 .dcih2:
   1012 .dcis2:
   1013 	add	%o0, %o2, %o0
   1014 	add	%o1, %o2, %o1
   1015 	sub	%g0, %o2, %o3
   1016 	ba,pt	%ncc, .didtbc
   1017 	srl	%o2, 1, %o2		! Number of 2 byte chunks to copy
   1018 
   1019 .didebc:
   1020 	ldxa	[%o0 + %o3]ASI_USER, %o4
   1021 	deccc	%o2
   1022 	stx	%o4, [%o1 + %o3]
   1023 	bg,pt	%ncc, .didebc
   1024 	addcc	%o3, 8, %o3
   1025 	!
   1026 	! End of copy loop. Most 8 byte aligned copies end here.
   1027 	!
   1028 	bz,pt	%ncc, .dcifh
   1029 	nop
   1030 	!
   1031 	! Something is left. Do it byte for byte.
   1032 	!
   1033 	ba,pt	%ncc, .dcicl
   1034 	lduba	[%o0 + %o3]ASI_USER, %o4
   1035 	!
   1036 	! 4 byte copy loop. %o2 is number of 4 byte chunks to copy.
   1037 	!
   1038 	.align 32
   1039 .didfbc:
   1040 	lduwa	[%o0 + %o3]ASI_USER, %o4
   1041 	deccc	%o2
   1042 	st	%o4, [%o1 + %o3]
   1043 	bg,pt	%ncc, .didfbc
   1044 	addcc	%o3, 4, %o3
   1045 	!
   1046 	! End of copy loop. Most 4 byte aligned copies end here.
   1047 	!
   1048 	bz,pt	%ncc, .dcifh
   1049 	nop
   1050 	!
   1051 	! Something is left. Do it byte for byte.
   1052 	!
   1053 	ba,pt	%ncc, .dcicl
   1054 	lduba	[%o0 + %o3]ASI_USER, %o4
   1055 	!
   1056 	! 2 byte aligned copy loop. %o2 is number of 2 byte chunks to
   1057 	! copy.
   1058 	!
   1059 	.align 32
   1060 .didtbc:
   1061 	lduha	[%o0 + %o3]ASI_USER, %o4
   1062 	deccc	%o2
   1063 	sth	%o4, [%o1 + %o3]
   1064 	bg,pt	%ncc, .didtbc
   1065 	addcc	%o3, 2, %o3
   1066 	!
   1067 	! End of copy loop. Most 2 byte aligned copies end here.
   1068 	!
   1069 	bz,pt	%ncc, .dcifh
   1070 	nop
   1071 	!
   1072 	! Deal with the last byte
   1073 	!
   1074 	lduba	[%o0 + %o3]ASI_USER, %o4
   1075 	stb	%o4, [%o1 + %o3]
   1076 .dcifh:
   1077 	membar	#Sync
   1078 	stn     SAVED_LOFAULT, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
   1079 	retl
   1080 	clr	%o0
   1081 
   1082 .copyin_err:
   1083 	ldn	[THREAD_REG + T_COPYOPS], %o4
   1084 	brz	%o4, 2f
   1085 	nop
   1086 	ldn	[%o4 + CP_COPYIN], %g2
   1087 	jmp	%g2
   1088 	nop
   1089 2:
   1090 	retl
   1091 	mov	-1, %o0
   1092 	SET_SIZE(copyin)
   1093 
   1094 #endif	/* lint */
   1095 
   1096 #ifdef	lint
   1097 
   1098 /*ARGSUSED*/
   1099 int
   1100 xcopyin(const void *uaddr, void *kaddr, size_t count)
   1101 { return (0); }
   1102 
   1103 #else	/* lint */
   1104 
   1105 	ENTRY(xcopyin)
   1106 	sethi	%hi(.xcopyin_err), REAL_LOFAULT
   1107 	b	.do_copyin
   1108 	  or	REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
   1109 .xcopyin_err:
   1110 	ldn	[THREAD_REG + T_COPYOPS], %o4
   1111 	brz	%o4, 2f
   1112 	nop
   1113 	ldn	[%o4 + CP_XCOPYIN], %g2
   1114 	jmp	%g2
   1115 	nop
   1116 2:
   1117 	retl
   1118 	mov	%g1, %o0
   1119 	SET_SIZE(xcopyin)
   1120 
   1121 #endif	/* lint */
   1122 
   1123 #ifdef	lint
   1124 
   1125 /*ARGSUSED*/
   1126 int
   1127 xcopyin_little(const void *uaddr, void *kaddr, size_t count)
   1128 { return (0); }
   1129 
   1130 #else	/* lint */
   1131 
   1132 	ENTRY(xcopyin_little)
   1133 	sethi	%hi(.little_err), %o4
   1134 	ldn	[THREAD_REG + T_LOFAULT], %o5
   1135 	or	%o4, %lo(.little_err), %o4
   1136 	membar	#Sync				! sync error barrier
   1137 	stn	%o4, [THREAD_REG + T_LOFAULT]
   1138 
   1139 	subcc	%g0, %o2, %o3
   1140 	add	%o0, %o2, %o0
   1141 	bz,pn	%ncc, 2f		! check for zero bytes
   1142 	sub	%o2, 1, %o4
   1143 	add	%o0, %o4, %o0		! start w/last byte
   1144 	add	%o1, %o2, %o1
   1145 	lduba	[%o0+%o3]ASI_AIUSL, %o4
   1146 
   1147 1:	stb	%o4, [%o1+%o3]
   1148 	inccc	%o3
   1149 	sub	%o0, 2, %o0		! get next byte
   1150 	bcc,a,pt %ncc, 1b
   1151 	  lduba	[%o0+%o3]ASI_AIUSL, %o4
   1152 
   1153 2:	membar	#Sync				! sync error barrier
   1154 	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
   1155 	retl
   1156 	mov	%g0, %o0		! return (0)
   1157 
   1158 .little_err:
   1159 	membar	#Sync				! sync error barrier
   1160 	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
   1161 	retl
   1162 	mov	%g1, %o0
   1163 	SET_SIZE(xcopyin_little)
   1164 
   1165 #endif	/* lint */
   1166 
   1167 
   1168 /*
   1169  * Copy a block of storage - must not overlap (from + len <= to).
   1170  * No fault handler installed (to be called under on_fault())
   1171  */
   1172 #if defined(lint)
   1173 
   1174 /* ARGSUSED */
   1175 void
   1176 copyin_noerr(const void *ufrom, void *kto, size_t count)
   1177 {}
   1178 
   1179 #else	/* lint */
   1180 
   1181 	ENTRY(copyin_noerr)
   1182 	sethi	%hi(.copyio_noerr), REAL_LOFAULT
   1183 	b	.do_copyin
   1184 	  or	REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
   1185 .copyio_noerr:
   1186 	jmp	SAVED_LOFAULT
   1187 	  nop
   1188 	SET_SIZE(copyin_noerr)
   1189 
   1190 #endif /* lint */
   1191 
   1192 /*
   1193  * Copy a block of storage - must not overlap (from + len <= to).
   1194  * No fault handler installed (to be called under on_fault())
   1195  */
   1196 
   1197 #if defined(lint)
   1198 
   1199 /* ARGSUSED */
   1200 void
   1201 copyout_noerr(const void *kfrom, void *uto, size_t count)
   1202 {}
   1203 
   1204 #else	/* lint */
   1205 
   1206 	ENTRY(copyout_noerr)
   1207 	sethi	%hi(.copyio_noerr), REAL_LOFAULT
   1208 	b	.do_copyout
   1209 	  or	REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
   1210 	SET_SIZE(copyout_noerr)
   1211 
   1212 #endif /* lint */
   1213 
   1214 #if defined(lint)
   1215 
   1216 int use_hw_bcopy = 1;
   1217 int use_hw_bzero = 1;
   1218 
   1219 #else /* !lint */
   1220 
   1221 	.align	4
   1222 	DGDEF(use_hw_bcopy)
   1223 	.word	1
   1224 	DGDEF(use_hw_bzero)
   1225 	.word	1
   1226 
   1227 	.align	64
   1228 	.section ".text"
   1229 #endif /* !lint */
   1230 
   1231 
   1232 /*
   1233  * hwblkclr - clears block-aligned, block-multiple-sized regions that are
   1234  * longer than 256 bytes in length. For the generic module we will simply
   1235  * call bzero and return 1 to ensure that the pages in cache should be
   1236  * flushed to ensure integrity.
   1237  * Caller is responsible for ensuring use_hw_bzero is true and that
   1238  * kpreempt_disable() has been called.
   1239  */
   1240 #ifdef lint
   1241 /*ARGSUSED*/
   1242 int
   1243 hwblkclr(void *addr, size_t len)
   1244 {
   1245 	return(0);
   1246 }
   1247 #else /* lint */
   1248 	! %i0 - start address
   1249 	! %i1 - length of region (multiple of 64)
   1250 
   1251 	ENTRY(hwblkclr)
   1252 	save	%sp, -SA(MINFRAME), %sp
   1253 
   1254 	! Simply call bzero and notify the caller that bzero was used
   1255 	mov	%i0, %o0
   1256 	call	bzero
   1257 	  mov	%i1, %o1
   1258 	ret
   1259 	restore	%g0, 1, %o0	! return (1) - did not use block operations
   1260 
   1261 	SET_SIZE(hwblkclr)
   1262 #endif	/* lint */
   1263 
   1264 #ifdef	lint
   1265 /* Copy 32 bytes of data from src to dst using physical addresses */
   1266 /*ARGSUSED*/
   1267 void
   1268 hw_pa_bcopy32(uint64_t src, uint64_t dst)
   1269 {}
   1270 #else	/*!lint */
   1271 
   1272 	/*
   1273 	 * Copy 32 bytes of data from src (%o0) to dst (%o1)
   1274 	 * using physical addresses.
   1275 	 */
   1276 	ENTRY_NP(hw_pa_bcopy32)
   1277 	rdpr    %pstate, %g1
   1278 	andn    %g1, PSTATE_IE, %g2
   1279 	wrpr    %g0, %g2, %pstate
   1280 
   1281 	ldxa    [%o0]ASI_MEM, %o2
   1282 	add     %o0, 8, %o0
   1283 	ldxa    [%o0]ASI_MEM, %o3
   1284 	add     %o0, 8, %o0
   1285 	ldxa    [%o0]ASI_MEM, %o4
   1286 	add     %o0, 8, %o0
   1287 	ldxa    [%o0]ASI_MEM, %o5
   1288 	stxa    %o2, [%o1]ASI_MEM
   1289 	add     %o1, 8, %o1
   1290 	stxa    %o3, [%o1]ASI_MEM
   1291 	add     %o1, 8, %o1
   1292 	stxa    %o4, [%o1]ASI_MEM
   1293 	add     %o1, 8, %o1
   1294 	stxa    %o5, [%o1]ASI_MEM
   1295 
   1296 	membar	#Sync
   1297 	retl
   1298 	  wrpr    %g0, %g1, %pstate
   1299 	SET_SIZE(hw_pa_bcopy32)
   1300 #endif /* lint */
   1301 
   1302 /*
   1303  * Zero a block of storage.
   1304  *
   1305  * uzero is used by the kernel to zero a block in user address space.
   1306  */
   1307 
   1308 
   1309 #if defined(lint)
   1310 
   1311 /* ARGSUSED */
   1312 int
   1313 kzero(void *addr, size_t count)
   1314 { return(0); }
   1315 
   1316 /* ARGSUSED */
   1317 void
   1318 uzero(void *addr, size_t count)
   1319 {}
   1320 
   1321 #else	/* lint */
   1322 
   1323 	ENTRY(uzero)
   1324 	!
   1325 	! Set a new lo_fault handler only if we came in with one
   1326 	! already specified.
   1327 	!
   1328 	wr	%g0, ASI_USER, %asi
   1329 	ldn	[THREAD_REG + T_LOFAULT], %o5
   1330 	tst	%o5
   1331 	bz,pt	%ncc, .do_zero
   1332 	sethi	%hi(.zeroerr), %o2
   1333 	or	%o2, %lo(.zeroerr), %o2
   1334 	membar	#Sync
   1335 	ba,pt	%ncc, .do_zero
   1336 	stn	%o2, [THREAD_REG + T_LOFAULT]
   1337 
   1338 	ENTRY(kzero)
   1339 	!
   1340 	! Always set a lo_fault handler
   1341 	!
   1342 	wr	%g0, ASI_P, %asi
   1343 	ldn	[THREAD_REG + T_LOFAULT], %o5
   1344 	sethi	%hi(.zeroerr), %o2
   1345 	or	%o5, LOFAULT_SET, %o5
   1346 	or	%o2, %lo(.zeroerr), %o2
   1347 	membar	#Sync
   1348 	ba,pt	%ncc, .do_zero
   1349 	stn	%o2, [THREAD_REG + T_LOFAULT]
   1350 
   1351 /*
   1352  * We got here because of a fault during kzero or if
   1353  * uzero or bzero was called with t_lofault non-zero.
   1354  * Otherwise we've already run screaming from the room.
   1355  * Errno value is in %g1. Note that we're here iff
   1356  * we did set t_lofault.
   1357  */
   1358 .zeroerr:
   1359 	!
   1360 	! Undo asi register setting. Just set it to be the
   1361         ! kernel default without checking.
   1362 	!
   1363 	wr	%g0, ASI_P, %asi
   1364 
   1365 	!
   1366 	! We did set t_lofault. It may well have been zero coming in.
   1367 	!
   1368 1:
   1369 	tst	%o5
   1370 	membar #Sync
   1371 	bne,pn	%ncc, 3f
   1372 	andncc	%o5, LOFAULT_SET, %o5
   1373 2:
   1374 	!
   1375 	! Old handler was zero. Just return the error.
   1376 	!
   1377 	retl				! return
   1378 	mov	%g1, %o0		! error code from %g1
   1379 3:
   1380 	!
   1381 	! We're here because %o5 was non-zero. It was non-zero
   1382 	! because either LOFAULT_SET was present, a previous fault
   1383 	! handler was present or both. In all cases we need to reset
   1384 	! T_LOFAULT to the value of %o5 after clearing LOFAULT_SET
   1385 	! before we either simply return the error or we invoke the
   1386 	! previously specified handler.
   1387 	!
   1388 	be	%ncc, 2b
   1389 	stn	%o5, [THREAD_REG + T_LOFAULT]
   1390 	jmp	%o5			! goto real handler
   1391 	  nop
   1392 	SET_SIZE(kzero)
   1393 	SET_SIZE(uzero)
   1394 
   1395 #endif	/* lint */
   1396 
   1397 /*
   1398  * Zero a block of storage.
   1399  */
   1400 
   1401 #if defined(lint)
   1402 
   1403 /* ARGSUSED */
   1404 void
   1405 bzero(void *addr, size_t count)
   1406 {}
   1407 
   1408 #else	/* lint */
   1409 
   1410 	ENTRY(bzero)
   1411 	wr	%g0, ASI_P, %asi
   1412 
   1413 	ldn	[THREAD_REG + T_LOFAULT], %o5	! save old vector
   1414 	tst	%o5
   1415 	bz,pt	%ncc, .do_zero
   1416 	sethi	%hi(.zeroerr), %o2
   1417 	or	%o2, %lo(.zeroerr), %o2
   1418 	membar	#Sync				! sync error barrier
   1419 	stn	%o2, [THREAD_REG + T_LOFAULT]	! install new vector
   1420 
   1421 .do_zero:
   1422 	cmp	%o1, 7
   1423 	blu,pn	%ncc, .byteclr
   1424 	nop
   1425 
   1426 	cmp	%o1, 15
   1427 	blu,pn	%ncc, .wdalign
   1428 	nop
   1429 
   1430 	andcc	%o0, 7, %o3		! is add aligned on a 8 byte bound
   1431 	bz,pt	%ncc, .blkalign		! already double aligned
   1432 	sub	%o3, 8, %o3		! -(bytes till double aligned)
   1433 	add	%o1, %o3, %o1		! update o1 with new count
   1434 
   1435 1:
   1436 	stba	%g0, [%o0]%asi
   1437 	inccc	%o3
   1438 	bl,pt	%ncc, 1b
   1439 	inc	%o0
   1440 
   1441 	! Now address is double aligned
   1442 .blkalign:
   1443 	cmp	%o1, 0x80		! check if there are 128 bytes to set
   1444 	blu,pn	%ncc, .bzero_small
   1445 	mov	%o1, %o3
   1446 
   1447 	andcc	%o0, 0x3f, %o3		! is block aligned?
   1448 	bz,pt	%ncc, .bzero_blk
   1449 	sub	%o3, 0x40, %o3		! -(bytes till block aligned)
   1450 	add	%o1, %o3, %o1		! o1 is the remainder
   1451 
   1452 	! Clear -(%o3) bytes till block aligned
   1453 1:
   1454 	stxa	%g0, [%o0]%asi
   1455 	addcc	%o3, 8, %o3
   1456 	bl,pt	%ncc, 1b
   1457 	add	%o0, 8, %o0
   1458 
   1459 .bzero_blk:
   1460 	and	%o1, 0x3f, %o3		! calc bytes left after blk clear
   1461 	andn	%o1, 0x3f, %o4		! calc size of blocks in bytes
   1462 
   1463 	cmp	%o4, 0x100		! 256 bytes or more
   1464 	blu,pn	%ncc, 3f
   1465 	nop
   1466 
   1467 2:
   1468 	stxa	%g0, [%o0+0x0]%asi
   1469 	stxa	%g0, [%o0+0x40]%asi
   1470 	stxa	%g0, [%o0+0x80]%asi
   1471 	stxa	%g0, [%o0+0xc0]%asi
   1472 
   1473 	stxa	%g0, [%o0+0x8]%asi
   1474 	stxa	%g0, [%o0+0x10]%asi
   1475 	stxa	%g0, [%o0+0x18]%asi
   1476 	stxa	%g0, [%o0+0x20]%asi
   1477 	stxa	%g0, [%o0+0x28]%asi
   1478 	stxa	%g0, [%o0+0x30]%asi
   1479 	stxa	%g0, [%o0+0x38]%asi
   1480 
   1481 	stxa	%g0, [%o0+0x48]%asi
   1482 	stxa	%g0, [%o0+0x50]%asi
   1483 	stxa	%g0, [%o0+0x58]%asi
   1484 	stxa	%g0, [%o0+0x60]%asi
   1485 	stxa	%g0, [%o0+0x68]%asi
   1486 	stxa	%g0, [%o0+0x70]%asi
   1487 	stxa	%g0, [%o0+0x78]%asi
   1488 
   1489 	stxa	%g0, [%o0+0x88]%asi
   1490 	stxa	%g0, [%o0+0x90]%asi
   1491 	stxa	%g0, [%o0+0x98]%asi
   1492 	stxa	%g0, [%o0+0xa0]%asi
   1493 	stxa	%g0, [%o0+0xa8]%asi
   1494 	stxa	%g0, [%o0+0xb0]%asi
   1495 	stxa	%g0, [%o0+0xb8]%asi
   1496 
   1497 	stxa	%g0, [%o0+0xc8]%asi
   1498 	stxa	%g0, [%o0+0xd0]%asi
   1499 	stxa	%g0, [%o0+0xd8]%asi
   1500 	stxa	%g0, [%o0+0xe0]%asi
   1501 	stxa	%g0, [%o0+0xe8]%asi
   1502 	stxa	%g0, [%o0+0xf0]%asi
   1503 	stxa	%g0, [%o0+0xf8]%asi
   1504 
   1505 	sub	%o4, 0x100, %o4
   1506 	cmp	%o4, 0x100
   1507 	bgu,pt	%ncc, 2b
   1508 	add	%o0, 0x100, %o0
   1509 
   1510 3:
   1511 	! ... check if 64 bytes to set
   1512 	cmp	%o4, 0x40
   1513 	blu	%ncc, .bzero_blk_done
   1514 	nop
   1515 
   1516 4:
   1517 	stxa	%g0, [%o0+0x0]%asi
   1518 	stxa	%g0, [%o0+0x8]%asi
   1519 	stxa	%g0, [%o0+0x10]%asi
   1520 	stxa	%g0, [%o0+0x18]%asi
   1521 	stxa	%g0, [%o0+0x20]%asi
   1522 	stxa	%g0, [%o0+0x28]%asi
   1523 	stxa	%g0, [%o0+0x30]%asi
   1524 	stxa	%g0, [%o0+0x38]%asi
   1525 
   1526 	subcc	%o4, 0x40, %o4
   1527 	bgu,pt	%ncc, 3b
   1528 	add	%o0, 0x40, %o0
   1529 
   1530 .bzero_blk_done:
   1531 	membar	#Sync
   1532 
   1533 .bzero_small:
   1534 	! Set the remaining doubles
   1535 	subcc	%o3, 8, %o3		! Can we store any doubles?
   1536 	blu,pn	%ncc, .byteclr
   1537 	and	%o1, 7, %o1		! calc bytes left after doubles
   1538 
   1539 .dbclr:
   1540 	stxa	%g0, [%o0]%asi		! Clear the doubles
   1541 	subcc	%o3, 8, %o3
   1542 	bgeu,pt	%ncc, .dbclr
   1543 	add	%o0, 8, %o0
   1544 
   1545 	ba	.byteclr
   1546 	nop
   1547 
   1548 .wdalign:
   1549 	andcc	%o0, 3, %o3		! is add aligned on a word boundary
   1550 	bz,pn	%ncc, .wdclr
   1551 	andn	%o1, 3, %o3		! create word sized count in %o3
   1552 
   1553 	dec	%o1			! decrement count
   1554 	stba	%g0, [%o0]%asi		! clear a byte
   1555 	ba	.wdalign
   1556 	inc	%o0			! next byte
   1557 
   1558 .wdclr:
   1559 	sta	%g0, [%o0]%asi		! 4-byte clearing loop
   1560 	subcc	%o3, 4, %o3
   1561 	bnz,pt	%ncc, .wdclr
   1562 	inc	4, %o0
   1563 
   1564 	and	%o1, 3, %o1		! leftover count, if any
   1565 
   1566 .byteclr:
   1567 	! Set the leftover bytes
   1568 	brz	%o1, .bzero_exit
   1569 	nop
   1570 
   1571 7:
   1572 	deccc	%o1			! byte clearing loop
   1573 	stba	%g0, [%o0]%asi
   1574 	bgu,pt	%ncc, 7b
   1575 	inc	%o0
   1576 
   1577 .bzero_exit:
   1578 	!
   1579 	! We're just concerned with whether t_lofault was set
   1580 	! when we came in. We end up here from either kzero()
   1581 	! or bzero(). kzero() *always* sets a lofault handler.
   1582 	! It ors LOFAULT_SET into %o5 to indicate it has done
   1583 	! this even if the value of %o5 is otherwise zero.
   1584 	! bzero() sets a lofault handler *only* if one was
   1585 	! previously set. Accordingly we need to examine
   1586 	! %o5 and if it is non-zero be sure to clear LOFAULT_SET
   1587 	! before resetting the error handler.
   1588 	!
   1589 	tst	%o5
   1590 	bz	%ncc, 1f
   1591 	andn	%o5, LOFAULT_SET, %o5
   1592 	membar	#Sync				! sync error barrier
   1593 	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
   1594 1:
   1595 	retl
   1596 	clr	%o0			! return (0)
   1597 
   1598 	SET_SIZE(bzero)
   1599 #endif	/* lint */
   1600