1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright (c) 2009, Intel Corporation 28 * All rights reserved. 29 */ 30 31 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 32 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 33 /* All Rights Reserved */ 34 35 /* Copyright (c) 1987, 1988 Microsoft Corporation */ 36 /* All Rights Reserved */ 37 38 #include <sys/errno.h> 39 #include <sys/asm_linkage.h> 40 41 #if defined(__lint) 42 #include <sys/types.h> 43 #include <sys/systm.h> 44 #else /* __lint */ 45 #include "assym.h" 46 #endif /* __lint */ 47 48 #define KCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */ 49 #define XCOPY_MIN_SIZE 128 /* Must be >= 16 bytes */ 50 /* 51 * Non-temopral access (NTA) alignment requirement 52 */ 53 #define NTA_ALIGN_SIZE 4 /* Must be at least 4-byte aligned */ 54 #define NTA_ALIGN_MASK _CONST(NTA_ALIGN_SIZE-1) 55 #define COUNT_ALIGN_SIZE 16 /* Must be at least 16-byte aligned */ 56 #define COUNT_ALIGN_MASK _CONST(COUNT_ALIGN_SIZE-1) 57 58 /* 59 * The optimal 64-bit bcopy and kcopy for modern x86 processors uses 60 * "rep smovq" for large sizes. Performance data shows that many calls to 61 * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for 62 * these small sizes unrolled code is used. For medium sizes loops writing 63 * 64-bytes per loop are used. Transition points were determined experimentally. 64 */ 65 #define BZERO_USE_REP (1024) 66 #define BCOPY_DFLT_REP (128) 67 #define BCOPY_NHM_REP (768) 68 69 /* 70 * Copy a block of storage, returning an error code if `from' or 71 * `to' takes a kernel pagefault which cannot be resolved. 72 * Returns errno value on pagefault error, 0 if all ok 73 */ 74 75 #if defined(__lint) 76 77 /* ARGSUSED */ 78 int 79 kcopy(const void *from, void *to, size_t count) 80 { return (0); } 81 82 #else /* __lint */ 83 84 .globl kernelbase 85 .globl postbootkernelbase 86 87 #if defined(__amd64) 88 89 ENTRY(kcopy) 90 pushq %rbp 91 movq %rsp, %rbp 92 #ifdef DEBUG 93 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ 94 jb 0f 95 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ 96 jnb 1f 97 0: leaq .kcopy_panic_msg(%rip), %rdi 98 xorl %eax, %eax 99 call panic 100 1: 101 #endif 102 /* 103 * pass lofault value as 4th argument to do_copy_fault 104 */ 105 leaq _kcopy_copyerr(%rip), %rcx 106 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 107 108 do_copy_fault: 109 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 110 movq %rcx, T_LOFAULT(%r9) /* new lofault */ 111 call bcopy_altentry 112 xorl %eax, %eax /* return 0 (success) */ 113 114 /* 115 * A fault during do_copy_fault is indicated through an errno value 116 * in %rax and we iretq from the trap handler to here. 117 */ 118 _kcopy_copyerr: 119 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 120 leave 121 ret 122 SET_SIZE(kcopy) 123 124 #elif defined(__i386) 125 126 #define ARG_FROM 8 127 #define ARG_TO 12 128 #define ARG_COUNT 16 129 130 ENTRY(kcopy) 131 #ifdef DEBUG 132 pushl %ebp 133 movl %esp, %ebp 134 movl postbootkernelbase, %eax 135 cmpl %eax, ARG_FROM(%ebp) 136 jb 0f 137 cmpl %eax, ARG_TO(%ebp) 138 jnb 1f 139 0: pushl $.kcopy_panic_msg 140 call panic 141 1: popl %ebp 142 #endif 143 lea _kcopy_copyerr, %eax /* lofault value */ 144 movl %gs:CPU_THREAD, %edx 145 146 do_copy_fault: 147 pushl %ebp 148 movl %esp, %ebp /* setup stack frame */ 149 pushl %esi 150 pushl %edi /* save registers */ 151 152 movl T_LOFAULT(%edx), %edi 153 pushl %edi /* save the current lofault */ 154 movl %eax, T_LOFAULT(%edx) /* new lofault */ 155 156 movl ARG_COUNT(%ebp), %ecx 157 movl ARG_FROM(%ebp), %esi 158 movl ARG_TO(%ebp), %edi 159 shrl $2, %ecx /* word count */ 160 rep 161 smovl 162 movl ARG_COUNT(%ebp), %ecx 163 andl $3, %ecx /* bytes left over */ 164 rep 165 smovb 166 xorl %eax, %eax 167 168 /* 169 * A fault during do_copy_fault is indicated through an errno value 170 * in %eax and we iret from the trap handler to here. 171 */ 172 _kcopy_copyerr: 173 popl %ecx 174 popl %edi 175 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */ 176 popl %esi 177 popl %ebp 178 ret 179 SET_SIZE(kcopy) 180 181 #undef ARG_FROM 182 #undef ARG_TO 183 #undef ARG_COUNT 184 185 #endif /* __i386 */ 186 #endif /* __lint */ 187 188 #if defined(__lint) 189 190 /* 191 * Copy a block of storage. Similar to kcopy but uses non-temporal 192 * instructions. 193 */ 194 195 /* ARGSUSED */ 196 int 197 kcopy_nta(const void *from, void *to, size_t count, int copy_cached) 198 { return (0); } 199 200 #else /* __lint */ 201 202 #if defined(__amd64) 203 204 #define COPY_LOOP_INIT(src, dst, cnt) \ 205 addq cnt, src; \ 206 addq cnt, dst; \ 207 shrq $3, cnt; \ 208 neg cnt 209 210 /* Copy 16 bytes per loop. Uses %rax and %r8 */ 211 #define COPY_LOOP_BODY(src, dst, cnt) \ 212 prefetchnta 0x100(src, cnt, 8); \ 213 movq (src, cnt, 8), %rax; \ 214 movq 0x8(src, cnt, 8), %r8; \ 215 movnti %rax, (dst, cnt, 8); \ 216 movnti %r8, 0x8(dst, cnt, 8); \ 217 addq $2, cnt 218 219 ENTRY(kcopy_nta) 220 pushq %rbp 221 movq %rsp, %rbp 222 #ifdef DEBUG 223 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ 224 jb 0f 225 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ 226 jnb 1f 227 0: leaq .kcopy_panic_msg(%rip), %rdi 228 xorl %eax, %eax 229 call panic 230 1: 231 #endif 232 233 movq %gs:CPU_THREAD, %r9 234 cmpq $0, %rcx /* No non-temporal access? */ 235 /* 236 * pass lofault value as 4th argument to do_copy_fault 237 */ 238 leaq _kcopy_nta_copyerr(%rip), %rcx /* doesn't set rflags */ 239 jnz do_copy_fault /* use regular access */ 240 /* 241 * Make sure cnt is >= KCOPY_MIN_SIZE 242 */ 243 cmpq $KCOPY_MIN_SIZE, %rdx 244 jb do_copy_fault 245 246 /* 247 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 248 * count is COUNT_ALIGN_SIZE aligned. 249 */ 250 movq %rdi, %r10 251 orq %rsi, %r10 252 andq $NTA_ALIGN_MASK, %r10 253 orq %rdx, %r10 254 andq $COUNT_ALIGN_MASK, %r10 255 jnz do_copy_fault 256 257 ALTENTRY(do_copy_fault_nta) 258 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 259 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 260 movq %rcx, T_LOFAULT(%r9) /* new lofault */ 261 262 /* 263 * COPY_LOOP_BODY uses %rax and %r8 264 */ 265 COPY_LOOP_INIT(%rdi, %rsi, %rdx) 266 2: COPY_LOOP_BODY(%rdi, %rsi, %rdx) 267 jnz 2b 268 269 mfence 270 xorl %eax, %eax /* return 0 (success) */ 271 272 _kcopy_nta_copyerr: 273 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 274 leave 275 ret 276 SET_SIZE(do_copy_fault_nta) 277 SET_SIZE(kcopy_nta) 278 279 #elif defined(__i386) 280 281 #define ARG_FROM 8 282 #define ARG_TO 12 283 #define ARG_COUNT 16 284 285 #define COPY_LOOP_INIT(src, dst, cnt) \ 286 addl cnt, src; \ 287 addl cnt, dst; \ 288 shrl $3, cnt; \ 289 neg cnt 290 291 #define COPY_LOOP_BODY(src, dst, cnt) \ 292 prefetchnta 0x100(src, cnt, 8); \ 293 movl (src, cnt, 8), %esi; \ 294 movnti %esi, (dst, cnt, 8); \ 295 movl 0x4(src, cnt, 8), %esi; \ 296 movnti %esi, 0x4(dst, cnt, 8); \ 297 movl 0x8(src, cnt, 8), %esi; \ 298 movnti %esi, 0x8(dst, cnt, 8); \ 299 movl 0xc(src, cnt, 8), %esi; \ 300 movnti %esi, 0xc(dst, cnt, 8); \ 301 addl $2, cnt 302 303 /* 304 * kcopy_nta is not implemented for 32-bit as no performance 305 * improvement was shown. We simply jump directly to kcopy 306 * and discard the 4 arguments. 307 */ 308 ENTRY(kcopy_nta) 309 jmp kcopy 310 311 lea _kcopy_nta_copyerr, %eax /* lofault value */ 312 ALTENTRY(do_copy_fault_nta) 313 pushl %ebp 314 movl %esp, %ebp /* setup stack frame */ 315 pushl %esi 316 pushl %edi 317 318 movl %gs:CPU_THREAD, %edx 319 movl T_LOFAULT(%edx), %edi 320 pushl %edi /* save the current lofault */ 321 movl %eax, T_LOFAULT(%edx) /* new lofault */ 322 323 /* COPY_LOOP_BODY needs to use %esi */ 324 movl ARG_COUNT(%ebp), %ecx 325 movl ARG_FROM(%ebp), %edi 326 movl ARG_TO(%ebp), %eax 327 COPY_LOOP_INIT(%edi, %eax, %ecx) 328 1: COPY_LOOP_BODY(%edi, %eax, %ecx) 329 jnz 1b 330 mfence 331 332 xorl %eax, %eax 333 _kcopy_nta_copyerr: 334 popl %ecx 335 popl %edi 336 movl %ecx, T_LOFAULT(%edx) /* restore the original lofault */ 337 popl %esi 338 leave 339 ret 340 SET_SIZE(do_copy_fault_nta) 341 SET_SIZE(kcopy_nta) 342 343 #undef ARG_FROM 344 #undef ARG_TO 345 #undef ARG_COUNT 346 347 #endif /* __i386 */ 348 #endif /* __lint */ 349 350 #if defined(__lint) 351 352 /* ARGSUSED */ 353 void 354 bcopy(const void *from, void *to, size_t count) 355 {} 356 357 #else /* __lint */ 358 359 #if defined(__amd64) 360 361 ENTRY(bcopy) 362 #ifdef DEBUG 363 orq %rdx, %rdx /* %rdx = count */ 364 jz 1f 365 cmpq postbootkernelbase(%rip), %rdi /* %rdi = from */ 366 jb 0f 367 cmpq postbootkernelbase(%rip), %rsi /* %rsi = to */ 368 jnb 1f 369 0: leaq .bcopy_panic_msg(%rip), %rdi 370 jmp call_panic /* setup stack and call panic */ 371 1: 372 #endif 373 /* 374 * bcopy_altentry() is called from kcopy, i.e., do_copy_fault. 375 * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy 376 * uses these registers in future they must be saved and restored. 377 */ 378 ALTENTRY(bcopy_altentry) 379 do_copy: 380 #define L(s) .bcopy/**/s 381 cmpq $0x50, %rdx /* 80 */ 382 jge bcopy_ck_size 383 384 /* 385 * Performance data shows many caller's copy small buffers. So for 386 * best perf for these sizes unrolled code is used. Store data without 387 * worrying about alignment. 388 */ 389 leaq L(fwdPxQx)(%rip), %r10 390 addq %rdx, %rdi 391 addq %rdx, %rsi 392 movslq (%r10,%rdx,4), %rcx 393 leaq (%rcx,%r10,1), %r10 394 jmpq *%r10 395 396 .p2align 4 397 L(fwdPxQx): 398 .int L(P0Q0)-L(fwdPxQx) /* 0 */ 399 .int L(P1Q0)-L(fwdPxQx) 400 .int L(P2Q0)-L(fwdPxQx) 401 .int L(P3Q0)-L(fwdPxQx) 402 .int L(P4Q0)-L(fwdPxQx) 403 .int L(P5Q0)-L(fwdPxQx) 404 .int L(P6Q0)-L(fwdPxQx) 405 .int L(P7Q0)-L(fwdPxQx) 406 407 .int L(P0Q1)-L(fwdPxQx) /* 8 */ 408 .int L(P1Q1)-L(fwdPxQx) 409 .int L(P2Q1)-L(fwdPxQx) 410 .int L(P3Q1)-L(fwdPxQx) 411 .int L(P4Q1)-L(fwdPxQx) 412 .int L(P5Q1)-L(fwdPxQx) 413 .int L(P6Q1)-L(fwdPxQx) 414 .int L(P7Q1)-L(fwdPxQx) 415 416 .int L(P0Q2)-L(fwdPxQx) /* 16 */ 417 .int L(P1Q2)-L(fwdPxQx) 418 .int L(P2Q2)-L(fwdPxQx) 419 .int L(P3Q2)-L(fwdPxQx) 420 .int L(P4Q2)-L(fwdPxQx) 421 .int L(P5Q2)-L(fwdPxQx) 422 .int L(P6Q2)-L(fwdPxQx) 423 .int L(P7Q2)-L(fwdPxQx) 424 425 .int L(P0Q3)-L(fwdPxQx) /* 24 */ 426 .int L(P1Q3)-L(fwdPxQx) 427 .int L(P2Q3)-L(fwdPxQx) 428 .int L(P3Q3)-L(fwdPxQx) 429 .int L(P4Q3)-L(fwdPxQx) 430 .int L(P5Q3)-L(fwdPxQx) 431 .int L(P6Q3)-L(fwdPxQx) 432 .int L(P7Q3)-L(fwdPxQx) 433 434 .int L(P0Q4)-L(fwdPxQx) /* 32 */ 435 .int L(P1Q4)-L(fwdPxQx) 436 .int L(P2Q4)-L(fwdPxQx) 437 .int L(P3Q4)-L(fwdPxQx) 438 .int L(P4Q4)-L(fwdPxQx) 439 .int L(P5Q4)-L(fwdPxQx) 440 .int L(P6Q4)-L(fwdPxQx) 441 .int L(P7Q4)-L(fwdPxQx) 442 443 .int L(P0Q5)-L(fwdPxQx) /* 40 */ 444 .int L(P1Q5)-L(fwdPxQx) 445 .int L(P2Q5)-L(fwdPxQx) 446 .int L(P3Q5)-L(fwdPxQx) 447 .int L(P4Q5)-L(fwdPxQx) 448 .int L(P5Q5)-L(fwdPxQx) 449 .int L(P6Q5)-L(fwdPxQx) 450 .int L(P7Q5)-L(fwdPxQx) 451 452 .int L(P0Q6)-L(fwdPxQx) /* 48 */ 453 .int L(P1Q6)-L(fwdPxQx) 454 .int L(P2Q6)-L(fwdPxQx) 455 .int L(P3Q6)-L(fwdPxQx) 456 .int L(P4Q6)-L(fwdPxQx) 457 .int L(P5Q6)-L(fwdPxQx) 458 .int L(P6Q6)-L(fwdPxQx) 459 .int L(P7Q6)-L(fwdPxQx) 460 461 .int L(P0Q7)-L(fwdPxQx) /* 56 */ 462 .int L(P1Q7)-L(fwdPxQx) 463 .int L(P2Q7)-L(fwdPxQx) 464 .int L(P3Q7)-L(fwdPxQx) 465 .int L(P4Q7)-L(fwdPxQx) 466 .int L(P5Q7)-L(fwdPxQx) 467 .int L(P6Q7)-L(fwdPxQx) 468 .int L(P7Q7)-L(fwdPxQx) 469 470 .int L(P0Q8)-L(fwdPxQx) /* 64 */ 471 .int L(P1Q8)-L(fwdPxQx) 472 .int L(P2Q8)-L(fwdPxQx) 473 .int L(P3Q8)-L(fwdPxQx) 474 .int L(P4Q8)-L(fwdPxQx) 475 .int L(P5Q8)-L(fwdPxQx) 476 .int L(P6Q8)-L(fwdPxQx) 477 .int L(P7Q8)-L(fwdPxQx) 478 479 .int L(P0Q9)-L(fwdPxQx) /* 72 */ 480 .int L(P1Q9)-L(fwdPxQx) 481 .int L(P2Q9)-L(fwdPxQx) 482 .int L(P3Q9)-L(fwdPxQx) 483 .int L(P4Q9)-L(fwdPxQx) 484 .int L(P5Q9)-L(fwdPxQx) 485 .int L(P6Q9)-L(fwdPxQx) 486 .int L(P7Q9)-L(fwdPxQx) /* 79 */ 487 488 .p2align 4 489 L(P0Q9): 490 mov -0x48(%rdi), %rcx 491 mov %rcx, -0x48(%rsi) 492 L(P0Q8): 493 mov -0x40(%rdi), %r10 494 mov %r10, -0x40(%rsi) 495 L(P0Q7): 496 mov -0x38(%rdi), %r8 497 mov %r8, -0x38(%rsi) 498 L(P0Q6): 499 mov -0x30(%rdi), %rcx 500 mov %rcx, -0x30(%rsi) 501 L(P0Q5): 502 mov -0x28(%rdi), %r10 503 mov %r10, -0x28(%rsi) 504 L(P0Q4): 505 mov -0x20(%rdi), %r8 506 mov %r8, -0x20(%rsi) 507 L(P0Q3): 508 mov -0x18(%rdi), %rcx 509 mov %rcx, -0x18(%rsi) 510 L(P0Q2): 511 mov -0x10(%rdi), %r10 512 mov %r10, -0x10(%rsi) 513 L(P0Q1): 514 mov -0x8(%rdi), %r8 515 mov %r8, -0x8(%rsi) 516 L(P0Q0): 517 ret 518 519 .p2align 4 520 L(P1Q9): 521 mov -0x49(%rdi), %r8 522 mov %r8, -0x49(%rsi) 523 L(P1Q8): 524 mov -0x41(%rdi), %rcx 525 mov %rcx, -0x41(%rsi) 526 L(P1Q7): 527 mov -0x39(%rdi), %r10 528 mov %r10, -0x39(%rsi) 529 L(P1Q6): 530 mov -0x31(%rdi), %r8 531 mov %r8, -0x31(%rsi) 532 L(P1Q5): 533 mov -0x29(%rdi), %rcx 534 mov %rcx, -0x29(%rsi) 535 L(P1Q4): 536 mov -0x21(%rdi), %r10 537 mov %r10, -0x21(%rsi) 538 L(P1Q3): 539 mov -0x19(%rdi), %r8 540 mov %r8, -0x19(%rsi) 541 L(P1Q2): 542 mov -0x11(%rdi), %rcx 543 mov %rcx, -0x11(%rsi) 544 L(P1Q1): 545 mov -0x9(%rdi), %r10 546 mov %r10, -0x9(%rsi) 547 L(P1Q0): 548 movzbq -0x1(%rdi), %r8 549 mov %r8b, -0x1(%rsi) 550 ret 551 552 .p2align 4 553 L(P2Q9): 554 mov -0x4a(%rdi), %r8 555 mov %r8, -0x4a(%rsi) 556 L(P2Q8): 557 mov -0x42(%rdi), %rcx 558 mov %rcx, -0x42(%rsi) 559 L(P2Q7): 560 mov -0x3a(%rdi), %r10 561 mov %r10, -0x3a(%rsi) 562 L(P2Q6): 563 mov -0x32(%rdi), %r8 564 mov %r8, -0x32(%rsi) 565 L(P2Q5): 566 mov -0x2a(%rdi), %rcx 567 mov %rcx, -0x2a(%rsi) 568 L(P2Q4): 569 mov -0x22(%rdi), %r10 570 mov %r10, -0x22(%rsi) 571 L(P2Q3): 572 mov -0x1a(%rdi), %r8 573 mov %r8, -0x1a(%rsi) 574 L(P2Q2): 575 mov -0x12(%rdi), %rcx 576 mov %rcx, -0x12(%rsi) 577 L(P2Q1): 578 mov -0xa(%rdi), %r10 579 mov %r10, -0xa(%rsi) 580 L(P2Q0): 581 movzwq -0x2(%rdi), %r8 582 mov %r8w, -0x2(%rsi) 583 ret 584 585 .p2align 4 586 L(P3Q9): 587 mov -0x4b(%rdi), %r8 588 mov %r8, -0x4b(%rsi) 589 L(P3Q8): 590 mov -0x43(%rdi), %rcx 591 mov %rcx, -0x43(%rsi) 592 L(P3Q7): 593 mov -0x3b(%rdi), %r10 594 mov %r10, -0x3b(%rsi) 595 L(P3Q6): 596 mov -0x33(%rdi), %r8 597 mov %r8, -0x33(%rsi) 598 L(P3Q5): 599 mov -0x2b(%rdi), %rcx 600 mov %rcx, -0x2b(%rsi) 601 L(P3Q4): 602 mov -0x23(%rdi), %r10 603 mov %r10, -0x23(%rsi) 604 L(P3Q3): 605 mov -0x1b(%rdi), %r8 606 mov %r8, -0x1b(%rsi) 607 L(P3Q2): 608 mov -0x13(%rdi), %rcx 609 mov %rcx, -0x13(%rsi) 610 L(P3Q1): 611 mov -0xb(%rdi), %r10 612 mov %r10, -0xb(%rsi) 613 /* 614 * These trailing loads/stores have to do all their loads 1st, 615 * then do the stores. 616 */ 617 L(P3Q0): 618 movzwq -0x3(%rdi), %r8 619 movzbq -0x1(%rdi), %r10 620 mov %r8w, -0x3(%rsi) 621 mov %r10b, -0x1(%rsi) 622 ret 623 624 .p2align 4 625 L(P4Q9): 626 mov -0x4c(%rdi), %r8 627 mov %r8, -0x4c(%rsi) 628 L(P4Q8): 629 mov -0x44(%rdi), %rcx 630 mov %rcx, -0x44(%rsi) 631 L(P4Q7): 632 mov -0x3c(%rdi), %r10 633 mov %r10, -0x3c(%rsi) 634 L(P4Q6): 635 mov -0x34(%rdi), %r8 636 mov %r8, -0x34(%rsi) 637 L(P4Q5): 638 mov -0x2c(%rdi), %rcx 639 mov %rcx, -0x2c(%rsi) 640 L(P4Q4): 641 mov -0x24(%rdi), %r10 642 mov %r10, -0x24(%rsi) 643 L(P4Q3): 644 mov -0x1c(%rdi), %r8 645 mov %r8, -0x1c(%rsi) 646 L(P4Q2): 647 mov -0x14(%rdi), %rcx 648 mov %rcx, -0x14(%rsi) 649 L(P4Q1): 650 mov -0xc(%rdi), %r10 651 mov %r10, -0xc(%rsi) 652 L(P4Q0): 653 mov -0x4(%rdi), %r8d 654 mov %r8d, -0x4(%rsi) 655 ret 656 657 .p2align 4 658 L(P5Q9): 659 mov -0x4d(%rdi), %r8 660 mov %r8, -0x4d(%rsi) 661 L(P5Q8): 662 mov -0x45(%rdi), %rcx 663 mov %rcx, -0x45(%rsi) 664 L(P5Q7): 665 mov -0x3d(%rdi), %r10 666 mov %r10, -0x3d(%rsi) 667 L(P5Q6): 668 mov -0x35(%rdi), %r8 669 mov %r8, -0x35(%rsi) 670 L(P5Q5): 671 mov -0x2d(%rdi), %rcx 672 mov %rcx, -0x2d(%rsi) 673 L(P5Q4): 674 mov -0x25(%rdi), %r10 675 mov %r10, -0x25(%rsi) 676 L(P5Q3): 677 mov -0x1d(%rdi), %r8 678 mov %r8, -0x1d(%rsi) 679 L(P5Q2): 680 mov -0x15(%rdi), %rcx 681 mov %rcx, -0x15(%rsi) 682 L(P5Q1): 683 mov -0xd(%rdi), %r10 684 mov %r10, -0xd(%rsi) 685 L(P5Q0): 686 mov -0x5(%rdi), %r8d 687 movzbq -0x1(%rdi), %r10 688 mov %r8d, -0x5(%rsi) 689 mov %r10b, -0x1(%rsi) 690 ret 691 692 .p2align 4 693 L(P6Q9): 694 mov -0x4e(%rdi), %r8 695 mov %r8, -0x4e(%rsi) 696 L(P6Q8): 697 mov -0x46(%rdi), %rcx 698 mov %rcx, -0x46(%rsi) 699 L(P6Q7): 700 mov -0x3e(%rdi), %r10 701 mov %r10, -0x3e(%rsi) 702 L(P6Q6): 703 mov -0x36(%rdi), %r8 704 mov %r8, -0x36(%rsi) 705 L(P6Q5): 706 mov -0x2e(%rdi), %rcx 707 mov %rcx, -0x2e(%rsi) 708 L(P6Q4): 709 mov -0x26(%rdi), %r10 710 mov %r10, -0x26(%rsi) 711 L(P6Q3): 712 mov -0x1e(%rdi), %r8 713 mov %r8, -0x1e(%rsi) 714 L(P6Q2): 715 mov -0x16(%rdi), %rcx 716 mov %rcx, -0x16(%rsi) 717 L(P6Q1): 718 mov -0xe(%rdi), %r10 719 mov %r10, -0xe(%rsi) 720 L(P6Q0): 721 mov -0x6(%rdi), %r8d 722 movzwq -0x2(%rdi), %r10 723 mov %r8d, -0x6(%rsi) 724 mov %r10w, -0x2(%rsi) 725 ret 726 727 .p2align 4 728 L(P7Q9): 729 mov -0x4f(%rdi), %r8 730 mov %r8, -0x4f(%rsi) 731 L(P7Q8): 732 mov -0x47(%rdi), %rcx 733 mov %rcx, -0x47(%rsi) 734 L(P7Q7): 735 mov -0x3f(%rdi), %r10 736 mov %r10, -0x3f(%rsi) 737 L(P7Q6): 738 mov -0x37(%rdi), %r8 739 mov %r8, -0x37(%rsi) 740 L(P7Q5): 741 mov -0x2f(%rdi), %rcx 742 mov %rcx, -0x2f(%rsi) 743 L(P7Q4): 744 mov -0x27(%rdi), %r10 745 mov %r10, -0x27(%rsi) 746 L(P7Q3): 747 mov -0x1f(%rdi), %r8 748 mov %r8, -0x1f(%rsi) 749 L(P7Q2): 750 mov -0x17(%rdi), %rcx 751 mov %rcx, -0x17(%rsi) 752 L(P7Q1): 753 mov -0xf(%rdi), %r10 754 mov %r10, -0xf(%rsi) 755 L(P7Q0): 756 mov -0x7(%rdi), %r8d 757 movzwq -0x3(%rdi), %r10 758 movzbq -0x1(%rdi), %rcx 759 mov %r8d, -0x7(%rsi) 760 mov %r10w, -0x3(%rsi) 761 mov %cl, -0x1(%rsi) 762 ret 763 764 /* 765 * For large sizes rep smovq is fastest. 766 * Transition point determined experimentally as measured on 767 * Intel Xeon processors (incl. Nehalem and previous generations) and 768 * AMD Opteron. The transition value is patched at boot time to avoid 769 * memory reference hit. 770 */ 771 .globl bcopy_patch_start 772 bcopy_patch_start: 773 cmpq $BCOPY_NHM_REP, %rdx 774 .globl bcopy_patch_end 775 bcopy_patch_end: 776 777 .p2align 4 778 .globl bcopy_ck_size 779 bcopy_ck_size: 780 cmpq $BCOPY_DFLT_REP, %rdx 781 jge L(use_rep) 782 783 /* 784 * Align to a 8-byte boundary. Avoids penalties from unaligned stores 785 * as well as from stores spanning cachelines. 786 */ 787 test $0x7, %rsi 788 jz L(aligned_loop) 789 test $0x1, %rsi 790 jz 2f 791 movzbq (%rdi), %r8 792 dec %rdx 793 inc %rdi 794 mov %r8b, (%rsi) 795 inc %rsi 796 2: 797 test $0x2, %rsi 798 jz 4f 799 movzwq (%rdi), %r8 800 sub $0x2, %rdx 801 add $0x2, %rdi 802 mov %r8w, (%rsi) 803 add $0x2, %rsi 804 4: 805 test $0x4, %rsi 806 jz L(aligned_loop) 807 mov (%rdi), %r8d 808 sub $0x4, %rdx 809 add $0x4, %rdi 810 mov %r8d, (%rsi) 811 add $0x4, %rsi 812 813 /* 814 * Copy 64-bytes per loop 815 */ 816 .p2align 4 817 L(aligned_loop): 818 mov (%rdi), %r8 819 mov 0x8(%rdi), %r10 820 lea -0x40(%rdx), %rdx 821 mov %r8, (%rsi) 822 mov %r10, 0x8(%rsi) 823 mov 0x10(%rdi), %rcx 824 mov 0x18(%rdi), %r8 825 mov %rcx, 0x10(%rsi) 826 mov %r8, 0x18(%rsi) 827 828 cmp $0x40, %rdx 829 mov 0x20(%rdi), %r10 830 mov 0x28(%rdi), %rcx 831 mov %r10, 0x20(%rsi) 832 mov %rcx, 0x28(%rsi) 833 mov 0x30(%rdi), %r8 834 mov 0x38(%rdi), %r10 835 lea 0x40(%rdi), %rdi 836 mov %r8, 0x30(%rsi) 837 mov %r10, 0x38(%rsi) 838 lea 0x40(%rsi), %rsi 839 jge L(aligned_loop) 840 841 /* 842 * Copy remaining bytes (0-63) 843 */ 844 L(do_remainder): 845 leaq L(fwdPxQx)(%rip), %r10 846 addq %rdx, %rdi 847 addq %rdx, %rsi 848 movslq (%r10,%rdx,4), %rcx 849 leaq (%rcx,%r10,1), %r10 850 jmpq *%r10 851 852 /* 853 * Use rep smovq. Clear remainder via unrolled code 854 */ 855 .p2align 4 856 L(use_rep): 857 xchgq %rdi, %rsi /* %rsi = source, %rdi = destination */ 858 movq %rdx, %rcx /* %rcx = count */ 859 shrq $3, %rcx /* 8-byte word count */ 860 rep 861 smovq 862 863 xchgq %rsi, %rdi /* %rdi = src, %rsi = destination */ 864 andq $7, %rdx /* remainder */ 865 jnz L(do_remainder) 866 ret 867 #undef L 868 869 #ifdef DEBUG 870 /* 871 * Setup frame on the run-time stack. The end of the input argument 872 * area must be aligned on a 16 byte boundary. The stack pointer %rsp, 873 * always points to the end of the latest allocated stack frame. 874 * panic(const char *format, ...) is a varargs function. When a 875 * function taking variable arguments is called, %rax must be set 876 * to eight times the number of floating point parameters passed 877 * to the function in SSE registers. 878 */ 879 call_panic: 880 pushq %rbp /* align stack properly */ 881 movq %rsp, %rbp 882 xorl %eax, %eax /* no variable arguments */ 883 call panic /* %rdi = format string */ 884 #endif 885 SET_SIZE(bcopy_altentry) 886 SET_SIZE(bcopy) 887 888 #elif defined(__i386) 889 890 #define ARG_FROM 4 891 #define ARG_TO 8 892 #define ARG_COUNT 12 893 894 ENTRY(bcopy) 895 #ifdef DEBUG 896 movl ARG_COUNT(%esp), %eax 897 orl %eax, %eax 898 jz 1f 899 movl postbootkernelbase, %eax 900 cmpl %eax, ARG_FROM(%esp) 901 jb 0f 902 cmpl %eax, ARG_TO(%esp) 903 jnb 1f 904 0: pushl %ebp 905 movl %esp, %ebp 906 pushl $.bcopy_panic_msg 907 call panic 908 1: 909 #endif 910 do_copy: 911 movl %esi, %eax /* save registers */ 912 movl %edi, %edx 913 movl ARG_COUNT(%esp), %ecx 914 movl ARG_FROM(%esp), %esi 915 movl ARG_TO(%esp), %edi 916 917 shrl $2, %ecx /* word count */ 918 rep 919 smovl 920 movl ARG_COUNT(%esp), %ecx 921 andl $3, %ecx /* bytes left over */ 922 rep 923 smovb 924 movl %eax, %esi /* restore registers */ 925 movl %edx, %edi 926 ret 927 SET_SIZE(bcopy) 928 929 #undef ARG_COUNT 930 #undef ARG_FROM 931 #undef ARG_TO 932 933 #endif /* __i386 */ 934 #endif /* __lint */ 935 936 937 /* 938 * Zero a block of storage, returning an error code if we 939 * take a kernel pagefault which cannot be resolved. 940 * Returns errno value on pagefault error, 0 if all ok 941 */ 942 943 #if defined(__lint) 944 945 /* ARGSUSED */ 946 int 947 kzero(void *addr, size_t count) 948 { return (0); } 949 950 #else /* __lint */ 951 952 #if defined(__amd64) 953 954 ENTRY(kzero) 955 #ifdef DEBUG 956 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */ 957 jnb 0f 958 leaq .kzero_panic_msg(%rip), %rdi 959 jmp call_panic /* setup stack and call panic */ 960 0: 961 #endif 962 /* 963 * pass lofault value as 3rd argument for fault return 964 */ 965 leaq _kzeroerr(%rip), %rdx 966 967 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 968 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 969 movq %rdx, T_LOFAULT(%r9) /* new lofault */ 970 call bzero_altentry 971 xorl %eax, %eax 972 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 973 ret 974 /* 975 * A fault during bzero is indicated through an errno value 976 * in %rax when we iretq to here. 977 */ 978 _kzeroerr: 979 addq $8, %rsp /* pop bzero_altentry call ret addr */ 980 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 981 ret 982 SET_SIZE(kzero) 983 984 #elif defined(__i386) 985 986 #define ARG_ADDR 8 987 #define ARG_COUNT 12 988 989 ENTRY(kzero) 990 #ifdef DEBUG 991 pushl %ebp 992 movl %esp, %ebp 993 movl postbootkernelbase, %eax 994 cmpl %eax, ARG_ADDR(%ebp) 995 jnb 0f 996 pushl $.kzero_panic_msg 997 call panic 998 0: popl %ebp 999 #endif 1000 lea _kzeroerr, %eax /* kzeroerr is lofault value */ 1001 1002 pushl %ebp /* save stack base */ 1003 movl %esp, %ebp /* set new stack base */ 1004 pushl %edi /* save %edi */ 1005 1006 mov %gs:CPU_THREAD, %edx 1007 movl T_LOFAULT(%edx), %edi 1008 pushl %edi /* save the current lofault */ 1009 movl %eax, T_LOFAULT(%edx) /* new lofault */ 1010 1011 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */ 1012 movl ARG_ADDR(%ebp), %edi /* %edi <- address of bytes to clear */ 1013 shrl $2, %ecx /* Count of double words to zero */ 1014 xorl %eax, %eax /* sstol val */ 1015 rep 1016 sstol /* %ecx contains words to clear (%eax=0) */ 1017 1018 movl ARG_COUNT(%ebp), %ecx /* get size in bytes */ 1019 andl $3, %ecx /* do mod 4 */ 1020 rep 1021 sstob /* %ecx contains residual bytes to clear */ 1022 1023 /* 1024 * A fault during kzero is indicated through an errno value 1025 * in %eax when we iret to here. 1026 */ 1027 _kzeroerr: 1028 popl %edi 1029 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */ 1030 popl %edi 1031 popl %ebp 1032 ret 1033 SET_SIZE(kzero) 1034 1035 #undef ARG_ADDR 1036 #undef ARG_COUNT 1037 1038 #endif /* __i386 */ 1039 #endif /* __lint */ 1040 1041 /* 1042 * Zero a block of storage. 1043 */ 1044 1045 #if defined(__lint) 1046 1047 /* ARGSUSED */ 1048 void 1049 bzero(void *addr, size_t count) 1050 {} 1051 1052 #else /* __lint */ 1053 1054 #if defined(__amd64) 1055 1056 ENTRY(bzero) 1057 #ifdef DEBUG 1058 cmpq postbootkernelbase(%rip), %rdi /* %rdi = addr */ 1059 jnb 0f 1060 leaq .bzero_panic_msg(%rip), %rdi 1061 jmp call_panic /* setup stack and call panic */ 1062 0: 1063 #endif 1064 ALTENTRY(bzero_altentry) 1065 do_zero: 1066 #define L(s) .bzero/**/s 1067 xorl %eax, %eax 1068 1069 cmpq $0x50, %rsi /* 80 */ 1070 jge L(ck_align) 1071 1072 /* 1073 * Performance data shows many caller's are zeroing small buffers. So 1074 * for best perf for these sizes unrolled code is used. Store zeros 1075 * without worrying about alignment. 1076 */ 1077 leaq L(setPxQx)(%rip), %r10 1078 addq %rsi, %rdi 1079 movslq (%r10,%rsi,4), %rcx 1080 leaq (%rcx,%r10,1), %r10 1081 jmpq *%r10 1082 1083 .p2align 4 1084 L(setPxQx): 1085 .int L(P0Q0)-L(setPxQx) /* 0 */ 1086 .int L(P1Q0)-L(setPxQx) 1087 .int L(P2Q0)-L(setPxQx) 1088 .int L(P3Q0)-L(setPxQx) 1089 .int L(P4Q0)-L(setPxQx) 1090 .int L(P5Q0)-L(setPxQx) 1091 .int L(P6Q0)-L(setPxQx) 1092 .int L(P7Q0)-L(setPxQx) 1093 1094 .int L(P0Q1)-L(setPxQx) /* 8 */ 1095 .int L(P1Q1)-L(setPxQx) 1096 .int L(P2Q1)-L(setPxQx) 1097 .int L(P3Q1)-L(setPxQx) 1098 .int L(P4Q1)-L(setPxQx) 1099 .int L(P5Q1)-L(setPxQx) 1100 .int L(P6Q1)-L(setPxQx) 1101 .int L(P7Q1)-L(setPxQx) 1102 1103 .int L(P0Q2)-L(setPxQx) /* 16 */ 1104 .int L(P1Q2)-L(setPxQx) 1105 .int L(P2Q2)-L(setPxQx) 1106 .int L(P3Q2)-L(setPxQx) 1107 .int L(P4Q2)-L(setPxQx) 1108 .int L(P5Q2)-L(setPxQx) 1109 .int L(P6Q2)-L(setPxQx) 1110 .int L(P7Q2)-L(setPxQx) 1111 1112 .int L(P0Q3)-L(setPxQx) /* 24 */ 1113 .int L(P1Q3)-L(setPxQx) 1114 .int L(P2Q3)-L(setPxQx) 1115 .int L(P3Q3)-L(setPxQx) 1116 .int L(P4Q3)-L(setPxQx) 1117 .int L(P5Q3)-L(setPxQx) 1118 .int L(P6Q3)-L(setPxQx) 1119 .int L(P7Q3)-L(setPxQx) 1120 1121 .int L(P0Q4)-L(setPxQx) /* 32 */ 1122 .int L(P1Q4)-L(setPxQx) 1123 .int L(P2Q4)-L(setPxQx) 1124 .int L(P3Q4)-L(setPxQx) 1125 .int L(P4Q4)-L(setPxQx) 1126 .int L(P5Q4)-L(setPxQx) 1127 .int L(P6Q4)-L(setPxQx) 1128 .int L(P7Q4)-L(setPxQx) 1129 1130 .int L(P0Q5)-L(setPxQx) /* 40 */ 1131 .int L(P1Q5)-L(setPxQx) 1132 .int L(P2Q5)-L(setPxQx) 1133 .int L(P3Q5)-L(setPxQx) 1134 .int L(P4Q5)-L(setPxQx) 1135 .int L(P5Q5)-L(setPxQx) 1136 .int L(P6Q5)-L(setPxQx) 1137 .int L(P7Q5)-L(setPxQx) 1138 1139 .int L(P0Q6)-L(setPxQx) /* 48 */ 1140 .int L(P1Q6)-L(setPxQx) 1141 .int L(P2Q6)-L(setPxQx) 1142 .int L(P3Q6)-L(setPxQx) 1143 .int L(P4Q6)-L(setPxQx) 1144 .int L(P5Q6)-L(setPxQx) 1145 .int L(P6Q6)-L(setPxQx) 1146 .int L(P7Q6)-L(setPxQx) 1147 1148 .int L(P0Q7)-L(setPxQx) /* 56 */ 1149 .int L(P1Q7)-L(setPxQx) 1150 .int L(P2Q7)-L(setPxQx) 1151 .int L(P3Q7)-L(setPxQx) 1152 .int L(P4Q7)-L(setPxQx) 1153 .int L(P5Q7)-L(setPxQx) 1154 .int L(P6Q7)-L(setPxQx) 1155 .int L(P7Q7)-L(setPxQx) 1156 1157 .int L(P0Q8)-L(setPxQx) /* 64 */ 1158 .int L(P1Q8)-L(setPxQx) 1159 .int L(P2Q8)-L(setPxQx) 1160 .int L(P3Q8)-L(setPxQx) 1161 .int L(P4Q8)-L(setPxQx) 1162 .int L(P5Q8)-L(setPxQx) 1163 .int L(P6Q8)-L(setPxQx) 1164 .int L(P7Q8)-L(setPxQx) 1165 1166 .int L(P0Q9)-L(setPxQx) /* 72 */ 1167 .int L(P1Q9)-L(setPxQx) 1168 .int L(P2Q9)-L(setPxQx) 1169 .int L(P3Q9)-L(setPxQx) 1170 .int L(P4Q9)-L(setPxQx) 1171 .int L(P5Q9)-L(setPxQx) 1172 .int L(P6Q9)-L(setPxQx) 1173 .int L(P7Q9)-L(setPxQx) /* 79 */ 1174 1175 .p2align 4 1176 L(P0Q9): mov %rax, -0x48(%rdi) 1177 L(P0Q8): mov %rax, -0x40(%rdi) 1178 L(P0Q7): mov %rax, -0x38(%rdi) 1179 L(P0Q6): mov %rax, -0x30(%rdi) 1180 L(P0Q5): mov %rax, -0x28(%rdi) 1181 L(P0Q4): mov %rax, -0x20(%rdi) 1182 L(P0Q3): mov %rax, -0x18(%rdi) 1183 L(P0Q2): mov %rax, -0x10(%rdi) 1184 L(P0Q1): mov %rax, -0x8(%rdi) 1185 L(P0Q0): 1186 ret 1187 1188 .p2align 4 1189 L(P1Q9): mov %rax, -0x49(%rdi) 1190 L(P1Q8): mov %rax, -0x41(%rdi) 1191 L(P1Q7): mov %rax, -0x39(%rdi) 1192 L(P1Q6): mov %rax, -0x31(%rdi) 1193 L(P1Q5): mov %rax, -0x29(%rdi) 1194 L(P1Q4): mov %rax, -0x21(%rdi) 1195 L(P1Q3): mov %rax, -0x19(%rdi) 1196 L(P1Q2): mov %rax, -0x11(%rdi) 1197 L(P1Q1): mov %rax, -0x9(%rdi) 1198 L(P1Q0): mov %al, -0x1(%rdi) 1199 ret 1200 1201 .p2align 4 1202 L(P2Q9): mov %rax, -0x4a(%rdi) 1203 L(P2Q8): mov %rax, -0x42(%rdi) 1204 L(P2Q7): mov %rax, -0x3a(%rdi) 1205 L(P2Q6): mov %rax, -0x32(%rdi) 1206 L(P2Q5): mov %rax, -0x2a(%rdi) 1207 L(P2Q4): mov %rax, -0x22(%rdi) 1208 L(P2Q3): mov %rax, -0x1a(%rdi) 1209 L(P2Q2): mov %rax, -0x12(%rdi) 1210 L(P2Q1): mov %rax, -0xa(%rdi) 1211 L(P2Q0): mov %ax, -0x2(%rdi) 1212 ret 1213 1214 .p2align 4 1215 L(P3Q9): mov %rax, -0x4b(%rdi) 1216 L(P3Q8): mov %rax, -0x43(%rdi) 1217 L(P3Q7): mov %rax, -0x3b(%rdi) 1218 L(P3Q6): mov %rax, -0x33(%rdi) 1219 L(P3Q5): mov %rax, -0x2b(%rdi) 1220 L(P3Q4): mov %rax, -0x23(%rdi) 1221 L(P3Q3): mov %rax, -0x1b(%rdi) 1222 L(P3Q2): mov %rax, -0x13(%rdi) 1223 L(P3Q1): mov %rax, -0xb(%rdi) 1224 L(P3Q0): mov %ax, -0x3(%rdi) 1225 mov %al, -0x1(%rdi) 1226 ret 1227 1228 .p2align 4 1229 L(P4Q9): mov %rax, -0x4c(%rdi) 1230 L(P4Q8): mov %rax, -0x44(%rdi) 1231 L(P4Q7): mov %rax, -0x3c(%rdi) 1232 L(P4Q6): mov %rax, -0x34(%rdi) 1233 L(P4Q5): mov %rax, -0x2c(%rdi) 1234 L(P4Q4): mov %rax, -0x24(%rdi) 1235 L(P4Q3): mov %rax, -0x1c(%rdi) 1236 L(P4Q2): mov %rax, -0x14(%rdi) 1237 L(P4Q1): mov %rax, -0xc(%rdi) 1238 L(P4Q0): mov %eax, -0x4(%rdi) 1239 ret 1240 1241 .p2align 4 1242 L(P5Q9): mov %rax, -0x4d(%rdi) 1243 L(P5Q8): mov %rax, -0x45(%rdi) 1244 L(P5Q7): mov %rax, -0x3d(%rdi) 1245 L(P5Q6): mov %rax, -0x35(%rdi) 1246 L(P5Q5): mov %rax, -0x2d(%rdi) 1247 L(P5Q4): mov %rax, -0x25(%rdi) 1248 L(P5Q3): mov %rax, -0x1d(%rdi) 1249 L(P5Q2): mov %rax, -0x15(%rdi) 1250 L(P5Q1): mov %rax, -0xd(%rdi) 1251 L(P5Q0): mov %eax, -0x5(%rdi) 1252 mov %al, -0x1(%rdi) 1253 ret 1254 1255 .p2align 4 1256 L(P6Q9): mov %rax, -0x4e(%rdi) 1257 L(P6Q8): mov %rax, -0x46(%rdi) 1258 L(P6Q7): mov %rax, -0x3e(%rdi) 1259 L(P6Q6): mov %rax, -0x36(%rdi) 1260 L(P6Q5): mov %rax, -0x2e(%rdi) 1261 L(P6Q4): mov %rax, -0x26(%rdi) 1262 L(P6Q3): mov %rax, -0x1e(%rdi) 1263 L(P6Q2): mov %rax, -0x16(%rdi) 1264 L(P6Q1): mov %rax, -0xe(%rdi) 1265 L(P6Q0): mov %eax, -0x6(%rdi) 1266 mov %ax, -0x2(%rdi) 1267 ret 1268 1269 .p2align 4 1270 L(P7Q9): mov %rax, -0x4f(%rdi) 1271 L(P7Q8): mov %rax, -0x47(%rdi) 1272 L(P7Q7): mov %rax, -0x3f(%rdi) 1273 L(P7Q6): mov %rax, -0x37(%rdi) 1274 L(P7Q5): mov %rax, -0x2f(%rdi) 1275 L(P7Q4): mov %rax, -0x27(%rdi) 1276 L(P7Q3): mov %rax, -0x1f(%rdi) 1277 L(P7Q2): mov %rax, -0x17(%rdi) 1278 L(P7Q1): mov %rax, -0xf(%rdi) 1279 L(P7Q0): mov %eax, -0x7(%rdi) 1280 mov %ax, -0x3(%rdi) 1281 mov %al, -0x1(%rdi) 1282 ret 1283 1284 /* 1285 * Align to a 16-byte boundary. Avoids penalties from unaligned stores 1286 * as well as from stores spanning cachelines. Note 16-byte alignment 1287 * is better in case where rep sstosq is used. 1288 */ 1289 .p2align 4 1290 L(ck_align): 1291 test $0xf, %rdi 1292 jz L(aligned_now) 1293 test $1, %rdi 1294 jz 2f 1295 mov %al, (%rdi) 1296 dec %rsi 1297 lea 1(%rdi),%rdi 1298 2: 1299 test $2, %rdi 1300 jz 4f 1301 mov %ax, (%rdi) 1302 sub $2, %rsi 1303 lea 2(%rdi),%rdi 1304 4: 1305 test $4, %rdi 1306 jz 8f 1307 mov %eax, (%rdi) 1308 sub $4, %rsi 1309 lea 4(%rdi),%rdi 1310 8: 1311 test $8, %rdi 1312 jz L(aligned_now) 1313 mov %rax, (%rdi) 1314 sub $8, %rsi 1315 lea 8(%rdi),%rdi 1316 1317 /* 1318 * For large sizes rep sstoq is fastest. 1319 * Transition point determined experimentally as measured on 1320 * Intel Xeon processors (incl. Nehalem) and AMD Opteron. 1321 */ 1322 L(aligned_now): 1323 cmp $BZERO_USE_REP, %rsi 1324 jg L(use_rep) 1325 1326 /* 1327 * zero 64-bytes per loop 1328 */ 1329 .p2align 4 1330 L(bzero_loop): 1331 leaq -0x40(%rsi), %rsi 1332 cmpq $0x40, %rsi 1333 movq %rax, (%rdi) 1334 movq %rax, 0x8(%rdi) 1335 movq %rax, 0x10(%rdi) 1336 movq %rax, 0x18(%rdi) 1337 movq %rax, 0x20(%rdi) 1338 movq %rax, 0x28(%rdi) 1339 movq %rax, 0x30(%rdi) 1340 movq %rax, 0x38(%rdi) 1341 leaq 0x40(%rdi), %rdi 1342 jge L(bzero_loop) 1343 1344 /* 1345 * Clear any remaining bytes.. 1346 */ 1347 9: 1348 leaq L(setPxQx)(%rip), %r10 1349 addq %rsi, %rdi 1350 movslq (%r10,%rsi,4), %rcx 1351 leaq (%rcx,%r10,1), %r10 1352 jmpq *%r10 1353 1354 /* 1355 * Use rep sstoq. Clear any remainder via unrolled code 1356 */ 1357 .p2align 4 1358 L(use_rep): 1359 movq %rsi, %rcx /* get size in bytes */ 1360 shrq $3, %rcx /* count of 8-byte words to zero */ 1361 rep 1362 sstoq /* %rcx = words to clear (%rax=0) */ 1363 andq $7, %rsi /* remaining bytes */ 1364 jnz 9b 1365 ret 1366 #undef L 1367 SET_SIZE(bzero_altentry) 1368 SET_SIZE(bzero) 1369 1370 #elif defined(__i386) 1371 1372 #define ARG_ADDR 4 1373 #define ARG_COUNT 8 1374 1375 ENTRY(bzero) 1376 #ifdef DEBUG 1377 movl postbootkernelbase, %eax 1378 cmpl %eax, ARG_ADDR(%esp) 1379 jnb 0f 1380 pushl %ebp 1381 movl %esp, %ebp 1382 pushl $.bzero_panic_msg 1383 call panic 1384 0: 1385 #endif 1386 do_zero: 1387 movl %edi, %edx 1388 movl ARG_COUNT(%esp), %ecx 1389 movl ARG_ADDR(%esp), %edi 1390 shrl $2, %ecx 1391 xorl %eax, %eax 1392 rep 1393 sstol 1394 movl ARG_COUNT(%esp), %ecx 1395 andl $3, %ecx 1396 rep 1397 sstob 1398 movl %edx, %edi 1399 ret 1400 SET_SIZE(bzero) 1401 1402 #undef ARG_ADDR 1403 #undef ARG_COUNT 1404 1405 #endif /* __i386 */ 1406 #endif /* __lint */ 1407 1408 /* 1409 * Transfer data to and from user space - 1410 * Note that these routines can cause faults 1411 * It is assumed that the kernel has nothing at 1412 * less than KERNELBASE in the virtual address space. 1413 * 1414 * Note that copyin(9F) and copyout(9F) are part of the 1415 * DDI/DKI which specifies that they return '-1' on "errors." 1416 * 1417 * Sigh. 1418 * 1419 * So there's two extremely similar routines - xcopyin_nta() and 1420 * xcopyout_nta() which return the errno that we've faithfully computed. 1421 * This allows other callers (e.g. uiomove(9F)) to work correctly. 1422 * Given that these are used pretty heavily, we expand the calling 1423 * sequences inline for all flavours (rather than making wrappers). 1424 */ 1425 1426 /* 1427 * Copy user data to kernel space. 1428 */ 1429 1430 #if defined(__lint) 1431 1432 /* ARGSUSED */ 1433 int 1434 copyin(const void *uaddr, void *kaddr, size_t count) 1435 { return (0); } 1436 1437 #else /* lint */ 1438 1439 #if defined(__amd64) 1440 1441 ENTRY(copyin) 1442 pushq %rbp 1443 movq %rsp, %rbp 1444 subq $24, %rsp 1445 1446 /* 1447 * save args in case we trap and need to rerun as a copyop 1448 */ 1449 movq %rdi, (%rsp) 1450 movq %rsi, 0x8(%rsp) 1451 movq %rdx, 0x10(%rsp) 1452 1453 movq kernelbase(%rip), %rax 1454 #ifdef DEBUG 1455 cmpq %rax, %rsi /* %rsi = kaddr */ 1456 jnb 1f 1457 leaq .copyin_panic_msg(%rip), %rdi 1458 xorl %eax, %eax 1459 call panic 1460 1: 1461 #endif 1462 /* 1463 * pass lofault value as 4th argument to do_copy_fault 1464 */ 1465 leaq _copyin_err(%rip), %rcx 1466 1467 movq %gs:CPU_THREAD, %r9 1468 cmpq %rax, %rdi /* test uaddr < kernelbase */ 1469 jb do_copy_fault 1470 jmp 3f 1471 1472 _copyin_err: 1473 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 1474 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 1475 3: 1476 movq T_COPYOPS(%r9), %rax 1477 cmpq $0, %rax 1478 jz 2f 1479 /* 1480 * reload args for the copyop 1481 */ 1482 movq (%rsp), %rdi 1483 movq 0x8(%rsp), %rsi 1484 movq 0x10(%rsp), %rdx 1485 leave 1486 jmp *CP_COPYIN(%rax) 1487 1488 2: movl $-1, %eax 1489 leave 1490 ret 1491 SET_SIZE(copyin) 1492 1493 #elif defined(__i386) 1494 1495 #define ARG_UADDR 4 1496 #define ARG_KADDR 8 1497 1498 ENTRY(copyin) 1499 movl kernelbase, %ecx 1500 #ifdef DEBUG 1501 cmpl %ecx, ARG_KADDR(%esp) 1502 jnb 1f 1503 pushl %ebp 1504 movl %esp, %ebp 1505 pushl $.copyin_panic_msg 1506 call panic 1507 1: 1508 #endif 1509 lea _copyin_err, %eax 1510 1511 movl %gs:CPU_THREAD, %edx 1512 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 1513 jb do_copy_fault 1514 jmp 3f 1515 1516 _copyin_err: 1517 popl %ecx 1518 popl %edi 1519 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */ 1520 popl %esi 1521 popl %ebp 1522 3: 1523 movl T_COPYOPS(%edx), %eax 1524 cmpl $0, %eax 1525 jz 2f 1526 jmp *CP_COPYIN(%eax) 1527 1528 2: movl $-1, %eax 1529 ret 1530 SET_SIZE(copyin) 1531 1532 #undef ARG_UADDR 1533 #undef ARG_KADDR 1534 1535 #endif /* __i386 */ 1536 #endif /* __lint */ 1537 1538 #if defined(__lint) 1539 1540 /* ARGSUSED */ 1541 int 1542 xcopyin_nta(const void *uaddr, void *kaddr, size_t count, int copy_cached) 1543 { return (0); } 1544 1545 #else /* __lint */ 1546 1547 #if defined(__amd64) 1548 1549 ENTRY(xcopyin_nta) 1550 pushq %rbp 1551 movq %rsp, %rbp 1552 subq $24, %rsp 1553 1554 /* 1555 * save args in case we trap and need to rerun as a copyop 1556 * %rcx is consumed in this routine so we don't need to save 1557 * it. 1558 */ 1559 movq %rdi, (%rsp) 1560 movq %rsi, 0x8(%rsp) 1561 movq %rdx, 0x10(%rsp) 1562 1563 movq kernelbase(%rip), %rax 1564 #ifdef DEBUG 1565 cmpq %rax, %rsi /* %rsi = kaddr */ 1566 jnb 1f 1567 leaq .xcopyin_panic_msg(%rip), %rdi 1568 xorl %eax, %eax 1569 call panic 1570 1: 1571 #endif 1572 movq %gs:CPU_THREAD, %r9 1573 cmpq %rax, %rdi /* test uaddr < kernelbase */ 1574 jae 4f 1575 cmpq $0, %rcx /* No non-temporal access? */ 1576 /* 1577 * pass lofault value as 4th argument to do_copy_fault 1578 */ 1579 leaq _xcopyin_err(%rip), %rcx /* doesn't set rflags */ 1580 jnz do_copy_fault /* use regular access */ 1581 /* 1582 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 1583 */ 1584 cmpq $XCOPY_MIN_SIZE, %rdx 1585 jb do_copy_fault 1586 1587 /* 1588 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 1589 * count is COUNT_ALIGN_SIZE aligned. 1590 */ 1591 movq %rdi, %r10 1592 orq %rsi, %r10 1593 andq $NTA_ALIGN_MASK, %r10 1594 orq %rdx, %r10 1595 andq $COUNT_ALIGN_MASK, %r10 1596 jnz do_copy_fault 1597 leaq _xcopyin_nta_err(%rip), %rcx /* doesn't set rflags */ 1598 jmp do_copy_fault_nta /* use non-temporal access */ 1599 1600 4: 1601 movl $EFAULT, %eax 1602 jmp 3f 1603 1604 /* 1605 * A fault during do_copy_fault or do_copy_fault_nta is 1606 * indicated through an errno value in %rax and we iret from the 1607 * trap handler to here. 1608 */ 1609 _xcopyin_err: 1610 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 1611 _xcopyin_nta_err: 1612 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 1613 3: 1614 movq T_COPYOPS(%r9), %r8 1615 cmpq $0, %r8 1616 jz 2f 1617 1618 /* 1619 * reload args for the copyop 1620 */ 1621 movq (%rsp), %rdi 1622 movq 0x8(%rsp), %rsi 1623 movq 0x10(%rsp), %rdx 1624 leave 1625 jmp *CP_XCOPYIN(%r8) 1626 1627 2: leave 1628 ret 1629 SET_SIZE(xcopyin_nta) 1630 1631 #elif defined(__i386) 1632 1633 #define ARG_UADDR 4 1634 #define ARG_KADDR 8 1635 #define ARG_COUNT 12 1636 #define ARG_CACHED 16 1637 1638 .globl use_sse_copy 1639 1640 ENTRY(xcopyin_nta) 1641 movl kernelbase, %ecx 1642 lea _xcopyin_err, %eax 1643 movl %gs:CPU_THREAD, %edx 1644 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 1645 jae 4f 1646 1647 cmpl $0, use_sse_copy /* no sse support */ 1648 jz do_copy_fault 1649 1650 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */ 1651 jnz do_copy_fault 1652 1653 /* 1654 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 1655 */ 1656 cmpl $XCOPY_MIN_SIZE, ARG_COUNT(%esp) 1657 jb do_copy_fault 1658 1659 /* 1660 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 1661 * count is COUNT_ALIGN_SIZE aligned. 1662 */ 1663 movl ARG_UADDR(%esp), %ecx 1664 orl ARG_KADDR(%esp), %ecx 1665 andl $NTA_ALIGN_MASK, %ecx 1666 orl ARG_COUNT(%esp), %ecx 1667 andl $COUNT_ALIGN_MASK, %ecx 1668 jnz do_copy_fault 1669 1670 jmp do_copy_fault_nta /* use regular access */ 1671 1672 4: 1673 movl $EFAULT, %eax 1674 jmp 3f 1675 1676 /* 1677 * A fault during do_copy_fault or do_copy_fault_nta is 1678 * indicated through an errno value in %eax and we iret from the 1679 * trap handler to here. 1680 */ 1681 _xcopyin_err: 1682 popl %ecx 1683 popl %edi 1684 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */ 1685 popl %esi 1686 popl %ebp 1687 3: 1688 cmpl $0, T_COPYOPS(%edx) 1689 jz 2f 1690 movl T_COPYOPS(%edx), %eax 1691 jmp *CP_XCOPYIN(%eax) 1692 1693 2: rep; ret /* use 2 byte return instruction when branch target */ 1694 /* AMD Software Optimization Guide - Section 6.2 */ 1695 SET_SIZE(xcopyin_nta) 1696 1697 #undef ARG_UADDR 1698 #undef ARG_KADDR 1699 #undef ARG_COUNT 1700 #undef ARG_CACHED 1701 1702 #endif /* __i386 */ 1703 #endif /* __lint */ 1704 1705 /* 1706 * Copy kernel data to user space. 1707 */ 1708 1709 #if defined(__lint) 1710 1711 /* ARGSUSED */ 1712 int 1713 copyout(const void *kaddr, void *uaddr, size_t count) 1714 { return (0); } 1715 1716 #else /* __lint */ 1717 1718 #if defined(__amd64) 1719 1720 ENTRY(copyout) 1721 pushq %rbp 1722 movq %rsp, %rbp 1723 subq $24, %rsp 1724 1725 /* 1726 * save args in case we trap and need to rerun as a copyop 1727 */ 1728 movq %rdi, (%rsp) 1729 movq %rsi, 0x8(%rsp) 1730 movq %rdx, 0x10(%rsp) 1731 1732 movq kernelbase(%rip), %rax 1733 #ifdef DEBUG 1734 cmpq %rax, %rdi /* %rdi = kaddr */ 1735 jnb 1f 1736 leaq .copyout_panic_msg(%rip), %rdi 1737 xorl %eax, %eax 1738 call panic 1739 1: 1740 #endif 1741 /* 1742 * pass lofault value as 4th argument to do_copy_fault 1743 */ 1744 leaq _copyout_err(%rip), %rcx 1745 1746 movq %gs:CPU_THREAD, %r9 1747 cmpq %rax, %rsi /* test uaddr < kernelbase */ 1748 jb do_copy_fault 1749 jmp 3f 1750 1751 _copyout_err: 1752 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 1753 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 1754 3: 1755 movq T_COPYOPS(%r9), %rax 1756 cmpq $0, %rax 1757 jz 2f 1758 1759 /* 1760 * reload args for the copyop 1761 */ 1762 movq (%rsp), %rdi 1763 movq 0x8(%rsp), %rsi 1764 movq 0x10(%rsp), %rdx 1765 leave 1766 jmp *CP_COPYOUT(%rax) 1767 1768 2: movl $-1, %eax 1769 leave 1770 ret 1771 SET_SIZE(copyout) 1772 1773 #elif defined(__i386) 1774 1775 #define ARG_KADDR 4 1776 #define ARG_UADDR 8 1777 1778 ENTRY(copyout) 1779 movl kernelbase, %ecx 1780 #ifdef DEBUG 1781 cmpl %ecx, ARG_KADDR(%esp) 1782 jnb 1f 1783 pushl %ebp 1784 movl %esp, %ebp 1785 pushl $.copyout_panic_msg 1786 call panic 1787 1: 1788 #endif 1789 lea _copyout_err, %eax 1790 movl %gs:CPU_THREAD, %edx 1791 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 1792 jb do_copy_fault 1793 jmp 3f 1794 1795 _copyout_err: 1796 popl %ecx 1797 popl %edi 1798 movl %ecx, T_LOFAULT(%edx) /* restore original lofault */ 1799 popl %esi 1800 popl %ebp 1801 3: 1802 movl T_COPYOPS(%edx), %eax 1803 cmpl $0, %eax 1804 jz 2f 1805 jmp *CP_COPYOUT(%eax) 1806 1807 2: movl $-1, %eax 1808 ret 1809 SET_SIZE(copyout) 1810 1811 #undef ARG_UADDR 1812 #undef ARG_KADDR 1813 1814 #endif /* __i386 */ 1815 #endif /* __lint */ 1816 1817 #if defined(__lint) 1818 1819 /* ARGSUSED */ 1820 int 1821 xcopyout_nta(const void *kaddr, void *uaddr, size_t count, int copy_cached) 1822 { return (0); } 1823 1824 #else /* __lint */ 1825 1826 #if defined(__amd64) 1827 1828 ENTRY(xcopyout_nta) 1829 pushq %rbp 1830 movq %rsp, %rbp 1831 subq $24, %rsp 1832 1833 /* 1834 * save args in case we trap and need to rerun as a copyop 1835 */ 1836 movq %rdi, (%rsp) 1837 movq %rsi, 0x8(%rsp) 1838 movq %rdx, 0x10(%rsp) 1839 1840 movq kernelbase(%rip), %rax 1841 #ifdef DEBUG 1842 cmpq %rax, %rdi /* %rdi = kaddr */ 1843 jnb 1f 1844 leaq .xcopyout_panic_msg(%rip), %rdi 1845 xorl %eax, %eax 1846 call panic 1847 1: 1848 #endif 1849 movq %gs:CPU_THREAD, %r9 1850 cmpq %rax, %rsi /* test uaddr < kernelbase */ 1851 jae 4f 1852 1853 cmpq $0, %rcx /* No non-temporal access? */ 1854 /* 1855 * pass lofault value as 4th argument to do_copy_fault 1856 */ 1857 leaq _xcopyout_err(%rip), %rcx 1858 jnz do_copy_fault 1859 /* 1860 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 1861 */ 1862 cmpq $XCOPY_MIN_SIZE, %rdx 1863 jb do_copy_fault 1864 1865 /* 1866 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 1867 * count is COUNT_ALIGN_SIZE aligned. 1868 */ 1869 movq %rdi, %r10 1870 orq %rsi, %r10 1871 andq $NTA_ALIGN_MASK, %r10 1872 orq %rdx, %r10 1873 andq $COUNT_ALIGN_MASK, %r10 1874 jnz do_copy_fault 1875 leaq _xcopyout_nta_err(%rip), %rcx 1876 jmp do_copy_fault_nta 1877 1878 4: 1879 movl $EFAULT, %eax 1880 jmp 3f 1881 1882 /* 1883 * A fault during do_copy_fault or do_copy_fault_nta is 1884 * indicated through an errno value in %rax and we iret from the 1885 * trap handler to here. 1886 */ 1887 _xcopyout_err: 1888 addq $8, %rsp /* pop bcopy_altentry call ret addr */ 1889 _xcopyout_nta_err: 1890 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 1891 3: 1892 movq T_COPYOPS(%r9), %r8 1893 cmpq $0, %r8 1894 jz 2f 1895 1896 /* 1897 * reload args for the copyop 1898 */ 1899 movq (%rsp), %rdi 1900 movq 0x8(%rsp), %rsi 1901 movq 0x10(%rsp), %rdx 1902 leave 1903 jmp *CP_XCOPYOUT(%r8) 1904 1905 2: leave 1906 ret 1907 SET_SIZE(xcopyout_nta) 1908 1909 #elif defined(__i386) 1910 1911 #define ARG_KADDR 4 1912 #define ARG_UADDR 8 1913 #define ARG_COUNT 12 1914 #define ARG_CACHED 16 1915 1916 ENTRY(xcopyout_nta) 1917 movl kernelbase, %ecx 1918 lea _xcopyout_err, %eax 1919 movl %gs:CPU_THREAD, %edx 1920 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 1921 jae 4f 1922 1923 cmpl $0, use_sse_copy /* no sse support */ 1924 jz do_copy_fault 1925 1926 cmpl $0, ARG_CACHED(%esp) /* copy_cached hint set? */ 1927 jnz do_copy_fault 1928 1929 /* 1930 * Make sure cnt is >= XCOPY_MIN_SIZE bytes 1931 */ 1932 cmpl $XCOPY_MIN_SIZE, %edx 1933 jb do_copy_fault 1934 1935 /* 1936 * Make sure src and dst are NTA_ALIGN_SIZE aligned, 1937 * count is COUNT_ALIGN_SIZE aligned. 1938 */ 1939 movl ARG_UADDR(%esp), %ecx 1940 orl ARG_KADDR(%esp), %ecx 1941 andl $NTA_ALIGN_MASK, %ecx 1942 orl ARG_COUNT(%esp), %ecx 1943 andl $COUNT_ALIGN_MASK, %ecx 1944 jnz do_copy_fault 1945 jmp do_copy_fault_nta 1946 1947 4: 1948 movl $EFAULT, %eax 1949 jmp 3f 1950 1951 /* 1952 * A fault during do_copy_fault or do_copy_fault_nta is 1953 * indicated through an errno value in %eax and we iret from the 1954 * trap handler to here. 1955 */ 1956 _xcopyout_err: 1957 / restore the original lofault 1958 popl %ecx 1959 popl %edi 1960 movl %ecx, T_LOFAULT(%edx) / original lofault 1961 popl %esi 1962 popl %ebp 1963 3: 1964 cmpl $0, T_COPYOPS(%edx) 1965 jz 2f 1966 movl T_COPYOPS(%edx), %eax 1967 jmp *CP_XCOPYOUT(%eax) 1968 1969 2: rep; ret /* use 2 byte return instruction when branch target */ 1970 /* AMD Software Optimization Guide - Section 6.2 */ 1971 SET_SIZE(xcopyout_nta) 1972 1973 #undef ARG_UADDR 1974 #undef ARG_KADDR 1975 #undef ARG_COUNT 1976 #undef ARG_CACHED 1977 1978 #endif /* __i386 */ 1979 #endif /* __lint */ 1980 1981 /* 1982 * Copy a null terminated string from one point to another in 1983 * the kernel address space. 1984 */ 1985 1986 #if defined(__lint) 1987 1988 /* ARGSUSED */ 1989 int 1990 copystr(const char *from, char *to, size_t maxlength, size_t *lencopied) 1991 { return (0); } 1992 1993 #else /* __lint */ 1994 1995 #if defined(__amd64) 1996 1997 ENTRY(copystr) 1998 pushq %rbp 1999 movq %rsp, %rbp 2000 #ifdef DEBUG 2001 movq kernelbase(%rip), %rax 2002 cmpq %rax, %rdi /* %rdi = from */ 2003 jb 0f 2004 cmpq %rax, %rsi /* %rsi = to */ 2005 jnb 1f 2006 0: leaq .copystr_panic_msg(%rip), %rdi 2007 xorl %eax, %eax 2008 call panic 2009 1: 2010 #endif 2011 movq %gs:CPU_THREAD, %r9 2012 movq T_LOFAULT(%r9), %r8 /* pass current lofault value as */ 2013 /* 5th argument to do_copystr */ 2014 do_copystr: 2015 movq %gs:CPU_THREAD, %r9 /* %r9 = thread addr */ 2016 movq T_LOFAULT(%r9), %r11 /* save the current lofault */ 2017 movq %r8, T_LOFAULT(%r9) /* new lofault */ 2018 2019 movq %rdx, %r8 /* save maxlength */ 2020 2021 cmpq $0, %rdx /* %rdx = maxlength */ 2022 je copystr_enametoolong /* maxlength == 0 */ 2023 2024 copystr_loop: 2025 decq %r8 2026 movb (%rdi), %al 2027 incq %rdi 2028 movb %al, (%rsi) 2029 incq %rsi 2030 cmpb $0, %al 2031 je copystr_null /* null char */ 2032 cmpq $0, %r8 2033 jne copystr_loop 2034 2035 copystr_enametoolong: 2036 movl $ENAMETOOLONG, %eax 2037 jmp copystr_out 2038 2039 copystr_null: 2040 xorl %eax, %eax /* no error */ 2041 2042 copystr_out: 2043 cmpq $0, %rcx /* want length? */ 2044 je copystr_done /* no */ 2045 subq %r8, %rdx /* compute length and store it */ 2046 movq %rdx, (%rcx) 2047 2048 copystr_done: 2049 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 2050 leave 2051 ret 2052 SET_SIZE(copystr) 2053 2054 #elif defined(__i386) 2055 2056 #define ARG_FROM 8 2057 #define ARG_TO 12 2058 #define ARG_MAXLEN 16 2059 #define ARG_LENCOPIED 20 2060 2061 ENTRY(copystr) 2062 #ifdef DEBUG 2063 pushl %ebp 2064 movl %esp, %ebp 2065 movl kernelbase, %eax 2066 cmpl %eax, ARG_FROM(%esp) 2067 jb 0f 2068 cmpl %eax, ARG_TO(%esp) 2069 jnb 1f 2070 0: pushl $.copystr_panic_msg 2071 call panic 2072 1: popl %ebp 2073 #endif 2074 /* get the current lofault address */ 2075 movl %gs:CPU_THREAD, %eax 2076 movl T_LOFAULT(%eax), %eax 2077 do_copystr: 2078 pushl %ebp /* setup stack frame */ 2079 movl %esp, %ebp 2080 pushl %ebx /* save registers */ 2081 pushl %edi 2082 2083 movl %gs:CPU_THREAD, %ebx 2084 movl T_LOFAULT(%ebx), %edi 2085 pushl %edi /* save the current lofault */ 2086 movl %eax, T_LOFAULT(%ebx) /* new lofault */ 2087 2088 movl ARG_MAXLEN(%ebp), %ecx 2089 cmpl $0, %ecx 2090 je copystr_enametoolong /* maxlength == 0 */ 2091 2092 movl ARG_FROM(%ebp), %ebx /* source address */ 2093 movl ARG_TO(%ebp), %edx /* destination address */ 2094 2095 copystr_loop: 2096 decl %ecx 2097 movb (%ebx), %al 2098 incl %ebx 2099 movb %al, (%edx) 2100 incl %edx 2101 cmpb $0, %al 2102 je copystr_null /* null char */ 2103 cmpl $0, %ecx 2104 jne copystr_loop 2105 2106 copystr_enametoolong: 2107 movl $ENAMETOOLONG, %eax 2108 jmp copystr_out 2109 2110 copystr_null: 2111 xorl %eax, %eax /* no error */ 2112 2113 copystr_out: 2114 cmpl $0, ARG_LENCOPIED(%ebp) /* want length? */ 2115 je copystr_done /* no */ 2116 movl ARG_MAXLEN(%ebp), %edx 2117 subl %ecx, %edx /* compute length and store it */ 2118 movl ARG_LENCOPIED(%ebp), %ecx 2119 movl %edx, (%ecx) 2120 2121 copystr_done: 2122 popl %edi 2123 movl %gs:CPU_THREAD, %ebx 2124 movl %edi, T_LOFAULT(%ebx) /* restore the original lofault */ 2125 2126 popl %edi 2127 popl %ebx 2128 popl %ebp 2129 ret 2130 SET_SIZE(copystr) 2131 2132 #undef ARG_FROM 2133 #undef ARG_TO 2134 #undef ARG_MAXLEN 2135 #undef ARG_LENCOPIED 2136 2137 #endif /* __i386 */ 2138 #endif /* __lint */ 2139 2140 /* 2141 * Copy a null terminated string from the user address space into 2142 * the kernel address space. 2143 */ 2144 2145 #if defined(__lint) 2146 2147 /* ARGSUSED */ 2148 int 2149 copyinstr(const char *uaddr, char *kaddr, size_t maxlength, 2150 size_t *lencopied) 2151 { return (0); } 2152 2153 #else /* __lint */ 2154 2155 #if defined(__amd64) 2156 2157 ENTRY(copyinstr) 2158 pushq %rbp 2159 movq %rsp, %rbp 2160 subq $32, %rsp 2161 2162 /* 2163 * save args in case we trap and need to rerun as a copyop 2164 */ 2165 movq %rdi, (%rsp) 2166 movq %rsi, 0x8(%rsp) 2167 movq %rdx, 0x10(%rsp) 2168 movq %rcx, 0x18(%rsp) 2169 2170 movq kernelbase(%rip), %rax 2171 #ifdef DEBUG 2172 cmpq %rax, %rsi /* %rsi = kaddr */ 2173 jnb 1f 2174 leaq .copyinstr_panic_msg(%rip), %rdi 2175 xorl %eax, %eax 2176 call panic 2177 1: 2178 #endif 2179 /* 2180 * pass lofault value as 5th argument to do_copystr 2181 */ 2182 leaq _copyinstr_error(%rip), %r8 2183 2184 cmpq %rax, %rdi /* test uaddr < kernelbase */ 2185 jb do_copystr 2186 movq %gs:CPU_THREAD, %r9 2187 jmp 3f 2188 2189 _copyinstr_error: 2190 movq %r11, T_LOFAULT(%r9) /* restore original lofault */ 2191 3: 2192 movq T_COPYOPS(%r9), %rax 2193 cmpq $0, %rax 2194 jz 2f 2195 2196 /* 2197 * reload args for the copyop 2198 */ 2199 movq (%rsp), %rdi 2200 movq 0x8(%rsp), %rsi 2201 movq 0x10(%rsp), %rdx 2202 movq 0x18(%rsp), %rcx 2203 leave 2204 jmp *CP_COPYINSTR(%rax) 2205 2206 2: movl $EFAULT, %eax /* return EFAULT */ 2207 leave 2208 ret 2209 SET_SIZE(copyinstr) 2210 2211 #elif defined(__i386) 2212 2213 #define ARG_UADDR 4 2214 #define ARG_KADDR 8 2215 2216 ENTRY(copyinstr) 2217 movl kernelbase, %ecx 2218 #ifdef DEBUG 2219 cmpl %ecx, ARG_KADDR(%esp) 2220 jnb 1f 2221 pushl %ebp 2222 movl %esp, %ebp 2223 pushl $.copyinstr_panic_msg 2224 call panic 2225 1: 2226 #endif 2227 lea _copyinstr_error, %eax 2228 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 2229 jb do_copystr 2230 movl %gs:CPU_THREAD, %edx 2231 jmp 3f 2232 2233 _copyinstr_error: 2234 popl %edi 2235 movl %gs:CPU_THREAD, %edx 2236 movl %edi, T_LOFAULT(%edx) /* original lofault */ 2237 2238 popl %edi 2239 popl %ebx 2240 popl %ebp 2241 3: 2242 movl T_COPYOPS(%edx), %eax 2243 cmpl $0, %eax 2244 jz 2f 2245 jmp *CP_COPYINSTR(%eax) 2246 2247 2: movl $EFAULT, %eax /* return EFAULT */ 2248 ret 2249 SET_SIZE(copyinstr) 2250 2251 #undef ARG_UADDR 2252 #undef ARG_KADDR 2253 2254 #endif /* __i386 */ 2255 #endif /* __lint */ 2256 2257 /* 2258 * Copy a null terminated string from the kernel 2259 * address space to the user address space. 2260 */ 2261 2262 #if defined(__lint) 2263 2264 /* ARGSUSED */ 2265 int 2266 copyoutstr(const char *kaddr, char *uaddr, size_t maxlength, 2267 size_t *lencopied) 2268 { return (0); } 2269 2270 #else /* __lint */ 2271 2272 #if defined(__amd64) 2273 2274 ENTRY(copyoutstr) 2275 pushq %rbp 2276 movq %rsp, %rbp 2277 subq $32, %rsp 2278 2279 /* 2280 * save args in case we trap and need to rerun as a copyop 2281 */ 2282 movq %rdi, (%rsp) 2283 movq %rsi, 0x8(%rsp) 2284 movq %rdx, 0x10(%rsp) 2285 movq %rcx, 0x18(%rsp) 2286 2287 movq kernelbase(%rip), %rax 2288 #ifdef DEBUG 2289 cmpq %rax, %rdi /* %rdi = kaddr */ 2290 jnb 1f 2291 leaq .copyoutstr_panic_msg(%rip), %rdi 2292 jmp call_panic /* setup stack and call panic */ 2293 1: 2294 #endif 2295 /* 2296 * pass lofault value as 5th argument to do_copystr 2297 */ 2298 leaq _copyoutstr_error(%rip), %r8 2299 2300 cmpq %rax, %rsi /* test uaddr < kernelbase */ 2301 jb do_copystr 2302 movq %gs:CPU_THREAD, %r9 2303 jmp 3f 2304 2305 _copyoutstr_error: 2306 movq %r11, T_LOFAULT(%r9) /* restore the original lofault */ 2307 3: 2308 movq T_COPYOPS(%r9), %rax 2309 cmpq $0, %rax 2310 jz 2f 2311 2312 /* 2313 * reload args for the copyop 2314 */ 2315 movq (%rsp), %rdi 2316 movq 0x8(%rsp), %rsi 2317 movq 0x10(%rsp), %rdx 2318 movq 0x18(%rsp), %rcx 2319 leave 2320 jmp *CP_COPYOUTSTR(%rax) 2321 2322 2: movl $EFAULT, %eax /* return EFAULT */ 2323 leave 2324 ret 2325 SET_SIZE(copyoutstr) 2326 2327 #elif defined(__i386) 2328 2329 #define ARG_KADDR 4 2330 #define ARG_UADDR 8 2331 2332 ENTRY(copyoutstr) 2333 movl kernelbase, %ecx 2334 #ifdef DEBUG 2335 cmpl %ecx, ARG_KADDR(%esp) 2336 jnb 1f 2337 pushl %ebp 2338 movl %esp, %ebp 2339 pushl $.copyoutstr_panic_msg 2340 call panic 2341 1: 2342 #endif 2343 lea _copyoutstr_error, %eax 2344 cmpl %ecx, ARG_UADDR(%esp) /* test uaddr < kernelbase */ 2345 jb do_copystr 2346 movl %gs:CPU_THREAD, %edx 2347 jmp 3f 2348 2349 _copyoutstr_error: 2350 popl %edi 2351 movl %gs:CPU_THREAD, %edx 2352 movl %edi, T_LOFAULT(%edx) /* restore the original lofault */ 2353 2354 popl %edi 2355 popl %ebx 2356 popl %ebp 2357 3: 2358 movl T_COPYOPS(%edx), %eax 2359 cmpl $0, %eax 2360 jz 2f 2361 jmp *CP_COPYOUTSTR(%eax) 2362 2363 2: movl $EFAULT, %eax /* return EFAULT */ 2364 ret 2365 SET_SIZE(copyoutstr) 2366 2367 #undef ARG_KADDR 2368 #undef ARG_UADDR 2369 2370 #endif /* __i386 */ 2371 #endif /* __lint */ 2372 2373 /* 2374 * Since all of the fuword() variants are so similar, we have a macro to spit 2375 * them out. This allows us to create DTrace-unobservable functions easily. 2376 */ 2377 2378 #if defined(__lint) 2379 2380 #if defined(__amd64) 2381 2382 /* ARGSUSED */ 2383 int 2384 fuword64(const void *addr, uint64_t *dst) 2385 { return (0); } 2386 2387 #endif 2388 2389 /* ARGSUSED */ 2390 int 2391 fuword32(const void *addr, uint32_t *dst) 2392 { return (0); } 2393 2394 /* ARGSUSED */ 2395 int 2396 fuword16(const void *addr, uint16_t *dst) 2397 { return (0); } 2398 2399 /* ARGSUSED */ 2400 int 2401 fuword8(const void *addr, uint8_t *dst) 2402 { return (0); } 2403 2404 #else /* __lint */ 2405 2406 #if defined(__amd64) 2407 2408 /* 2409 * (Note that we don't save and reload the arguments here 2410 * because their values are not altered in the copy path) 2411 */ 2412 2413 #define FUWORD(NAME, INSTR, REG, COPYOP) \ 2414 ENTRY(NAME) \ 2415 movq %gs:CPU_THREAD, %r9; \ 2416 cmpq kernelbase(%rip), %rdi; \ 2417 jae 1f; \ 2418 leaq _flt_/**/NAME, %rdx; \ 2419 movq %rdx, T_LOFAULT(%r9); \ 2420 INSTR (%rdi), REG; \ 2421 movq $0, T_LOFAULT(%r9); \ 2422 INSTR REG, (%rsi); \ 2423 xorl %eax, %eax; \ 2424 ret; \ 2425 _flt_/**/NAME: \ 2426 movq $0, T_LOFAULT(%r9); \ 2427 1: \ 2428 movq T_COPYOPS(%r9), %rax; \ 2429 cmpq $0, %rax; \ 2430 jz 2f; \ 2431 jmp *COPYOP(%rax); \ 2432 2: \ 2433 movl $-1, %eax; \ 2434 ret; \ 2435 SET_SIZE(NAME) 2436 2437 FUWORD(fuword64, movq, %rax, CP_FUWORD64) 2438 FUWORD(fuword32, movl, %eax, CP_FUWORD32) 2439 FUWORD(fuword16, movw, %ax, CP_FUWORD16) 2440 FUWORD(fuword8, movb, %al, CP_FUWORD8) 2441 2442 #elif defined(__i386) 2443 2444 #define FUWORD(NAME, INSTR, REG, COPYOP) \ 2445 ENTRY(NAME) \ 2446 movl %gs:CPU_THREAD, %ecx; \ 2447 movl kernelbase, %eax; \ 2448 cmpl %eax, 4(%esp); \ 2449 jae 1f; \ 2450 lea _flt_/**/NAME, %edx; \ 2451 movl %edx, T_LOFAULT(%ecx); \ 2452 movl 4(%esp), %eax; \ 2453 movl 8(%esp), %edx; \ 2454 INSTR (%eax), REG; \ 2455 movl $0, T_LOFAULT(%ecx); \ 2456 INSTR REG, (%edx); \ 2457 xorl %eax, %eax; \ 2458 ret; \ 2459 _flt_/**/NAME: \ 2460 movl $0, T_LOFAULT(%ecx); \ 2461 1: \ 2462 movl T_COPYOPS(%ecx), %eax; \ 2463 cmpl $0, %eax; \ 2464 jz 2f; \ 2465 jmp *COPYOP(%eax); \ 2466 2: \ 2467 movl $-1, %eax; \ 2468 ret; \ 2469 SET_SIZE(NAME) 2470 2471 FUWORD(fuword32, movl, %eax, CP_FUWORD32) 2472 FUWORD(fuword16, movw, %ax, CP_FUWORD16) 2473 FUWORD(fuword8, movb, %al, CP_FUWORD8) 2474 2475 #endif /* __i386 */ 2476 2477 #undef FUWORD 2478 2479 #endif /* __lint */ 2480 2481 /* 2482 * Set user word. 2483 */ 2484 2485 #if defined(__lint) 2486 2487 #if defined(__amd64) 2488 2489 /* ARGSUSED */ 2490 int 2491 suword64(void *addr, uint64_t value) 2492 { return (0); } 2493 2494 #endif 2495 2496 /* ARGSUSED */ 2497 int 2498 suword32(void *addr, uint32_t value) 2499 { return (0); } 2500 2501 /* ARGSUSED */ 2502 int 2503 suword16(void *addr, uint16_t value) 2504 { return (0); } 2505 2506 /* ARGSUSED */ 2507 int 2508 suword8(void *addr, uint8_t value) 2509 { return (0); } 2510 2511 #else /* lint */ 2512 2513 #if defined(__amd64) 2514 2515 /* 2516 * (Note that we don't save and reload the arguments here 2517 * because their values are not altered in the copy path) 2518 */ 2519 2520 #define SUWORD(NAME, INSTR, REG, COPYOP) \ 2521 ENTRY(NAME) \ 2522 movq %gs:CPU_THREAD, %r9; \ 2523 cmpq kernelbase(%rip), %rdi; \ 2524 jae 1f; \ 2525 leaq _flt_/**/NAME, %rdx; \ 2526 movq %rdx, T_LOFAULT(%r9); \ 2527 INSTR REG, (%rdi); \ 2528 movq $0, T_LOFAULT(%r9); \ 2529 xorl %eax, %eax; \ 2530 ret; \ 2531 _flt_/**/NAME: \ 2532 movq $0, T_LOFAULT(%r9); \ 2533 1: \ 2534 movq T_COPYOPS(%r9), %rax; \ 2535 cmpq $0, %rax; \ 2536 jz 3f; \ 2537 jmp *COPYOP(%rax); \ 2538 3: \ 2539 movl $-1, %eax; \ 2540 ret; \ 2541 SET_SIZE(NAME) 2542 2543 SUWORD(suword64, movq, %rsi, CP_SUWORD64) 2544 SUWORD(suword32, movl, %esi, CP_SUWORD32) 2545 SUWORD(suword16, movw, %si, CP_SUWORD16) 2546 SUWORD(suword8, movb, %sil, CP_SUWORD8) 2547 2548 #elif defined(__i386) 2549 2550 #define SUWORD(NAME, INSTR, REG, COPYOP) \ 2551 ENTRY(NAME) \ 2552 movl %gs:CPU_THREAD, %ecx; \ 2553 movl kernelbase, %eax; \ 2554 cmpl %eax, 4(%esp); \ 2555 jae 1f; \ 2556 lea _flt_/**/NAME, %edx; \ 2557 movl %edx, T_LOFAULT(%ecx); \ 2558 movl 4(%esp), %eax; \ 2559 movl 8(%esp), %edx; \ 2560 INSTR REG, (%eax); \ 2561 movl $0, T_LOFAULT(%ecx); \ 2562 xorl %eax, %eax; \ 2563 ret; \ 2564 _flt_/**/NAME: \ 2565 movl $0, T_LOFAULT(%ecx); \ 2566 1: \ 2567 movl T_COPYOPS(%ecx), %eax; \ 2568 cmpl $0, %eax; \ 2569 jz 3f; \ 2570 movl COPYOP(%eax), %ecx; \ 2571 jmp *%ecx; \ 2572 3: \ 2573 movl $-1, %eax; \ 2574 ret; \ 2575 SET_SIZE(NAME) 2576 2577 SUWORD(suword32, movl, %edx, CP_SUWORD32) 2578 SUWORD(suword16, movw, %dx, CP_SUWORD16) 2579 SUWORD(suword8, movb, %dl, CP_SUWORD8) 2580 2581 #endif /* __i386 */ 2582 2583 #undef SUWORD 2584 2585 #endif /* __lint */ 2586 2587 #if defined(__lint) 2588 2589 #if defined(__amd64) 2590 2591 /*ARGSUSED*/ 2592 void 2593 fuword64_noerr(const void *addr, uint64_t *dst) 2594 {} 2595 2596 #endif 2597 2598 /*ARGSUSED*/ 2599 void 2600 fuword32_noerr(const void *addr, uint32_t *dst) 2601 {} 2602 2603 /*ARGSUSED*/ 2604 void 2605 fuword8_noerr(const void *addr, uint8_t *dst) 2606 {} 2607 2608 /*ARGSUSED*/ 2609 void 2610 fuword16_noerr(const void *addr, uint16_t *dst) 2611 {} 2612 2613 #else /* __lint */ 2614 2615 #if defined(__amd64) 2616 2617 #define FUWORD_NOERR(NAME, INSTR, REG) \ 2618 ENTRY(NAME) \ 2619 cmpq kernelbase(%rip), %rdi; \ 2620 cmovnbq kernelbase(%rip), %rdi; \ 2621 INSTR (%rdi), REG; \ 2622 INSTR REG, (%rsi); \ 2623 ret; \ 2624 SET_SIZE(NAME) 2625 2626 FUWORD_NOERR(fuword64_noerr, movq, %rax) 2627 FUWORD_NOERR(fuword32_noerr, movl, %eax) 2628 FUWORD_NOERR(fuword16_noerr, movw, %ax) 2629 FUWORD_NOERR(fuword8_noerr, movb, %al) 2630 2631 #elif defined(__i386) 2632 2633 #define FUWORD_NOERR(NAME, INSTR, REG) \ 2634 ENTRY(NAME) \ 2635 movl 4(%esp), %eax; \ 2636 cmpl kernelbase, %eax; \ 2637 jb 1f; \ 2638 movl kernelbase, %eax; \ 2639 1: movl 8(%esp), %edx; \ 2640 INSTR (%eax), REG; \ 2641 INSTR REG, (%edx); \ 2642 ret; \ 2643 SET_SIZE(NAME) 2644 2645 FUWORD_NOERR(fuword32_noerr, movl, %ecx) 2646 FUWORD_NOERR(fuword16_noerr, movw, %cx) 2647 FUWORD_NOERR(fuword8_noerr, movb, %cl) 2648 2649 #endif /* __i386 */ 2650 2651 #undef FUWORD_NOERR 2652 2653 #endif /* __lint */ 2654 2655 #if defined(__lint) 2656 2657 #if defined(__amd64) 2658 2659 /*ARGSUSED*/ 2660 void 2661 suword64_noerr(void *addr, uint64_t value) 2662 {} 2663 2664 #endif 2665 2666 /*ARGSUSED*/ 2667 void 2668 suword32_noerr(void *addr, uint32_t value) 2669 {} 2670 2671 /*ARGSUSED*/ 2672 void 2673 suword16_noerr(void *addr, uint16_t value) 2674 {} 2675 2676 /*ARGSUSED*/ 2677 void 2678 suword8_noerr(void *addr, uint8_t value) 2679 {} 2680 2681 #else /* lint */ 2682 2683 #if defined(__amd64) 2684 2685 #define SUWORD_NOERR(NAME, INSTR, REG) \ 2686 ENTRY(NAME) \ 2687 cmpq kernelbase(%rip), %rdi; \ 2688 cmovnbq kernelbase(%rip), %rdi; \ 2689 INSTR REG, (%rdi); \ 2690 ret; \ 2691 SET_SIZE(NAME) 2692 2693 SUWORD_NOERR(suword64_noerr, movq, %rsi) 2694 SUWORD_NOERR(suword32_noerr, movl, %esi) 2695 SUWORD_NOERR(suword16_noerr, movw, %si) 2696 SUWORD_NOERR(suword8_noerr, movb, %sil) 2697 2698 #elif defined(__i386) 2699 2700 #define SUWORD_NOERR(NAME, INSTR, REG) \ 2701 ENTRY(NAME) \ 2702 movl 4(%esp), %eax; \ 2703 cmpl kernelbase, %eax; \ 2704 jb 1f; \ 2705 movl kernelbase, %eax; \ 2706 1: \ 2707 movl 8(%esp), %edx; \ 2708 INSTR REG, (%eax); \ 2709 ret; \ 2710 SET_SIZE(NAME) 2711 2712 SUWORD_NOERR(suword32_noerr, movl, %edx) 2713 SUWORD_NOERR(suword16_noerr, movw, %dx) 2714 SUWORD_NOERR(suword8_noerr, movb, %dl) 2715 2716 #endif /* __i386 */ 2717 2718 #undef SUWORD_NOERR 2719 2720 #endif /* lint */ 2721 2722 2723 #if defined(__lint) 2724 2725 /*ARGSUSED*/ 2726 int 2727 subyte(void *addr, uchar_t value) 2728 { return (0); } 2729 2730 /*ARGSUSED*/ 2731 void 2732 subyte_noerr(void *addr, uchar_t value) 2733 {} 2734 2735 /*ARGSUSED*/ 2736 int 2737 fulword(const void *addr, ulong_t *valuep) 2738 { return (0); } 2739 2740 /*ARGSUSED*/ 2741 void 2742 fulword_noerr(const void *addr, ulong_t *valuep) 2743 {} 2744 2745 /*ARGSUSED*/ 2746 int 2747 sulword(void *addr, ulong_t valuep) 2748 { return (0); } 2749 2750 /*ARGSUSED*/ 2751 void 2752 sulword_noerr(void *addr, ulong_t valuep) 2753 {} 2754 2755 #else 2756 2757 .weak subyte 2758 subyte=suword8 2759 .weak subyte_noerr 2760 subyte_noerr=suword8_noerr 2761 2762 #if defined(__amd64) 2763 2764 .weak fulword 2765 fulword=fuword64 2766 .weak fulword_noerr 2767 fulword_noerr=fuword64_noerr 2768 .weak sulword 2769 sulword=suword64 2770 .weak sulword_noerr 2771 sulword_noerr=suword64_noerr 2772 2773 #elif defined(__i386) 2774 2775 .weak fulword 2776 fulword=fuword32 2777 .weak fulword_noerr 2778 fulword_noerr=fuword32_noerr 2779 .weak sulword 2780 sulword=suword32 2781 .weak sulword_noerr 2782 sulword_noerr=suword32_noerr 2783 2784 #endif /* __i386 */ 2785 2786 #endif /* __lint */ 2787 2788 #if defined(__lint) 2789 2790 /* 2791 * Copy a block of storage - must not overlap (from + len <= to). 2792 * No fault handler installed (to be called under on_fault()) 2793 */ 2794 2795 /* ARGSUSED */ 2796 void 2797 copyout_noerr(const void *kfrom, void *uto, size_t count) 2798 {} 2799 2800 /* ARGSUSED */ 2801 void 2802 copyin_noerr(const void *ufrom, void *kto, size_t count) 2803 {} 2804 2805 /* 2806 * Zero a block of storage in user space 2807 */ 2808 2809 /* ARGSUSED */ 2810 void 2811 uzero(void *addr, size_t count) 2812 {} 2813 2814 /* 2815 * copy a block of storage in user space 2816 */ 2817 2818 /* ARGSUSED */ 2819 void 2820 ucopy(const void *ufrom, void *uto, size_t ulength) 2821 {} 2822 2823 /* 2824 * copy a string in user space 2825 */ 2826 2827 /* ARGSUSED */ 2828 void 2829 ucopystr(const char *ufrom, char *uto, size_t umaxlength, size_t *lencopied) 2830 {} 2831 2832 #else /* __lint */ 2833 2834 #if defined(__amd64) 2835 2836 ENTRY(copyin_noerr) 2837 movq kernelbase(%rip), %rax 2838 #ifdef DEBUG 2839 cmpq %rax, %rsi /* %rsi = kto */ 2840 jae 1f 2841 leaq .cpyin_ne_pmsg(%rip), %rdi 2842 jmp call_panic /* setup stack and call panic */ 2843 1: 2844 #endif 2845 cmpq %rax, %rdi /* ufrom < kernelbase */ 2846 jb do_copy 2847 movq %rax, %rdi /* force fault at kernelbase */ 2848 jmp do_copy 2849 SET_SIZE(copyin_noerr) 2850 2851 ENTRY(copyout_noerr) 2852 movq kernelbase(%rip), %rax 2853 #ifdef DEBUG 2854 cmpq %rax, %rdi /* %rdi = kfrom */ 2855 jae 1f 2856 leaq .cpyout_ne_pmsg(%rip), %rdi 2857 jmp call_panic /* setup stack and call panic */ 2858 1: 2859 #endif 2860 cmpq %rax, %rsi /* uto < kernelbase */ 2861 jb do_copy 2862 movq %rax, %rsi /* force fault at kernelbase */ 2863 jmp do_copy 2864 SET_SIZE(copyout_noerr) 2865 2866 ENTRY(uzero) 2867 movq kernelbase(%rip), %rax 2868 cmpq %rax, %rdi 2869 jb do_zero 2870 movq %rax, %rdi /* force fault at kernelbase */ 2871 jmp do_zero 2872 SET_SIZE(uzero) 2873 2874 ENTRY(ucopy) 2875 movq kernelbase(%rip), %rax 2876 cmpq %rax, %rdi 2877 cmovaeq %rax, %rdi /* force fault at kernelbase */ 2878 cmpq %rax, %rsi 2879 cmovaeq %rax, %rsi /* force fault at kernelbase */ 2880 jmp do_copy 2881 SET_SIZE(ucopy) 2882 2883 ENTRY(ucopystr) 2884 movq kernelbase(%rip), %rax 2885 cmpq %rax, %rdi 2886 cmovaeq %rax, %rdi /* force fault at kernelbase */ 2887 cmpq %rax, %rsi 2888 cmovaeq %rax, %rsi /* force fault at kernelbase */ 2889 /* do_copystr expects lofault address in %r8 */ 2890 movq %gs:CPU_THREAD, %r8 2891 movq T_LOFAULT(%r8), %r8 2892 jmp do_copystr 2893 SET_SIZE(ucopystr) 2894 2895 #elif defined(__i386) 2896 2897 ENTRY(copyin_noerr) 2898 movl kernelbase, %eax 2899 #ifdef DEBUG 2900 cmpl %eax, 8(%esp) 2901 jae 1f 2902 pushl $.cpyin_ne_pmsg 2903 call panic 2904 1: 2905 #endif 2906 cmpl %eax, 4(%esp) 2907 jb do_copy 2908 movl %eax, 4(%esp) /* force fault at kernelbase */ 2909 jmp do_copy 2910 SET_SIZE(copyin_noerr) 2911 2912 ENTRY(copyout_noerr) 2913 movl kernelbase, %eax 2914 #ifdef DEBUG 2915 cmpl %eax, 4(%esp) 2916 jae 1f 2917 pushl $.cpyout_ne_pmsg 2918 call panic 2919 1: 2920 #endif 2921 cmpl %eax, 8(%esp) 2922 jb do_copy 2923 movl %eax, 8(%esp) /* force fault at kernelbase */ 2924 jmp do_copy 2925 SET_SIZE(copyout_noerr) 2926 2927 ENTRY(uzero) 2928 movl kernelbase, %eax 2929 cmpl %eax, 4(%esp) 2930 jb do_zero 2931 movl %eax, 4(%esp) /* force fault at kernelbase */ 2932 jmp do_zero 2933 SET_SIZE(uzero) 2934 2935 ENTRY(ucopy) 2936 movl kernelbase, %eax 2937 cmpl %eax, 4(%esp) 2938 jb 1f 2939 movl %eax, 4(%esp) /* force fault at kernelbase */ 2940 1: 2941 cmpl %eax, 8(%esp) 2942 jb do_copy 2943 movl %eax, 8(%esp) /* force fault at kernelbase */ 2944 jmp do_copy 2945 SET_SIZE(ucopy) 2946 2947 ENTRY(ucopystr) 2948 movl kernelbase, %eax 2949 cmpl %eax, 4(%esp) 2950 jb 1f 2951 movl %eax, 4(%esp) /* force fault at kernelbase */ 2952 1: 2953 cmpl %eax, 8(%esp) 2954 jb 2f 2955 movl %eax, 8(%esp) /* force fault at kernelbase */ 2956 2: 2957 /* do_copystr expects the lofault address in %eax */ 2958 movl %gs:CPU_THREAD, %eax 2959 movl T_LOFAULT(%eax), %eax 2960 jmp do_copystr 2961 SET_SIZE(ucopystr) 2962 2963 #endif /* __i386 */ 2964 2965 #ifdef DEBUG 2966 .data 2967 .kcopy_panic_msg: 2968 .string "kcopy: arguments below kernelbase" 2969 .bcopy_panic_msg: 2970 .string "bcopy: arguments below kernelbase" 2971 .kzero_panic_msg: 2972 .string "kzero: arguments below kernelbase" 2973 .bzero_panic_msg: 2974 .string "bzero: arguments below kernelbase" 2975 .copyin_panic_msg: 2976 .string "copyin: kaddr argument below kernelbase" 2977 .xcopyin_panic_msg: 2978 .string "xcopyin: kaddr argument below kernelbase" 2979 .copyout_panic_msg: 2980 .string "copyout: kaddr argument below kernelbase" 2981 .xcopyout_panic_msg: 2982 .string "xcopyout: kaddr argument below kernelbase" 2983 .copystr_panic_msg: 2984 .string "copystr: arguments in user space" 2985 .copyinstr_panic_msg: 2986 .string "copyinstr: kaddr argument not in kernel address space" 2987 .copyoutstr_panic_msg: 2988 .string "copyoutstr: kaddr argument not in kernel address space" 2989 .cpyin_ne_pmsg: 2990 .string "copyin_noerr: argument not in kernel address space" 2991 .cpyout_ne_pmsg: 2992 .string "copyout_noerr: argument not in kernel address space" 2993 #endif 2994 2995 #endif /* __lint */ 2996