Home | History | Annotate | Download | only in syscall
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/types.h>
     28 #include <sys/t_lock.h>
     29 #include <sys/param.h>
     30 #include <sys/systm.h>
     31 #include <sys/buf.h>
     32 #include <sys/conf.h>
     33 #include <sys/cred.h>
     34 #include <sys/kmem.h>
     35 #include <sys/sysmacros.h>
     36 #include <sys/vfs.h>
     37 #include <sys/vnode.h>
     38 #include <sys/debug.h>
     39 #include <sys/errno.h>
     40 #include <sys/time.h>
     41 #include <sys/file.h>
     42 #include <sys/open.h>
     43 #include <sys/user.h>
     44 #include <sys/termios.h>
     45 #include <sys/stream.h>
     46 #include <sys/strsubr.h>
     47 #include <sys/sunddi.h>
     48 #include <sys/esunddi.h>
     49 #include <sys/flock.h>
     50 #include <sys/modctl.h>
     51 #include <sys/cmn_err.h>
     52 #include <sys/vmsystm.h>
     53 
     54 #include <sys/socket.h>
     55 #include <sys/socketvar.h>
     56 #include <fs/sockfs/sockcommon.h>
     57 #include <fs/sockfs/socktpi.h>
     58 
     59 #include <netinet/in.h>
     60 #include <sys/sendfile.h>
     61 #include <sys/un.h>
     62 #include <sys/tihdr.h>
     63 #include <sys/atomic.h>
     64 
     65 #include <inet/common.h>
     66 #include <inet/ip.h>
     67 #include <inet/ip6.h>
     68 #include <inet/tcp.h>
     69 
     70 extern int sosendfile64(file_t *, file_t *, const struct ksendfilevec64 *,
     71 		ssize32_t *);
     72 extern int nl7c_sendfilev(struct sonode *, u_offset_t *, struct sendfilevec *,
     73 		int, ssize_t *);
     74 extern int snf_segmap(file_t *, vnode_t *, u_offset_t, u_offset_t, ssize_t *,
     75 		boolean_t);
     76 extern sotpi_info_t *sotpi_sototpi(struct sonode *);
     77 
     78 #define	SEND_MAX_CHUNK	16
     79 
     80 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
     81 /*
     82  * 64 bit offsets for 32 bit applications only running either on
     83  * 64 bit kernel or 32 bit kernel. For 32 bit apps, we can't transfer
     84  * more than 2GB of data.
     85  */
     86 int
     87 sendvec_chunk64(file_t *fp, u_offset_t *fileoff, struct ksendfilevec64 *sfv,
     88     int copy_cnt, ssize32_t *count)
     89 {
     90 	struct vnode *vp;
     91 	ushort_t fflag;
     92 	int ioflag;
     93 	size32_t cnt;
     94 	ssize32_t sfv_len;
     95 	ssize32_t tmpcount;
     96 	u_offset_t sfv_off;
     97 	struct uio auio;
     98 	struct iovec aiov;
     99 	int i, error;
    100 
    101 	fflag = fp->f_flag;
    102 	vp = fp->f_vnode;
    103 	for (i = 0; i < copy_cnt; i++) {
    104 
    105 		if (ISSIG(curthread, JUSTLOOKING))
    106 			return (EINTR);
    107 
    108 		/*
    109 		 * Do similar checks as "write" as we are writing
    110 		 * sfv_len bytes into "vp".
    111 		 */
    112 		sfv_len = (ssize32_t)sfv->sfv_len;
    113 
    114 		if (sfv_len == 0) {
    115 			sfv++;
    116 			continue;
    117 		}
    118 
    119 		if (sfv_len < 0)
    120 			return (EINVAL);
    121 
    122 		if (vp->v_type == VREG) {
    123 			if (*fileoff >= curproc->p_fsz_ctl) {
    124 				mutex_enter(&curproc->p_lock);
    125 				(void) rctl_action(
    126 				    rctlproc_legacy[RLIMIT_FSIZE],
    127 				    curproc->p_rctls, curproc, RCA_SAFE);
    128 				mutex_exit(&curproc->p_lock);
    129 				return (EFBIG);
    130 			}
    131 
    132 			if (*fileoff >= OFFSET_MAX(fp))
    133 				return (EFBIG);
    134 
    135 			if (*fileoff + sfv_len > OFFSET_MAX(fp))
    136 				return (EINVAL);
    137 		}
    138 
    139 		tmpcount = *count + sfv_len;
    140 		if (tmpcount < 0)
    141 			return (EINVAL);
    142 
    143 		sfv_off = sfv->sfv_off;
    144 
    145 		auio.uio_extflg = UIO_COPY_DEFAULT;
    146 		if (sfv->sfv_fd == SFV_FD_SELF) {
    147 			aiov.iov_len = sfv_len;
    148 			aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
    149 			auio.uio_loffset = *fileoff;
    150 			auio.uio_iovcnt = 1;
    151 			auio.uio_resid = sfv_len;
    152 			auio.uio_iov = &aiov;
    153 			auio.uio_segflg = UIO_USERSPACE;
    154 			auio.uio_llimit = curproc->p_fsz_ctl;
    155 			auio.uio_fmode = fflag;
    156 			ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
    157 			while (sfv_len > 0) {
    158 				error = VOP_WRITE(vp, &auio, ioflag,
    159 				    fp->f_cred, NULL);
    160 				cnt = sfv_len - auio.uio_resid;
    161 				sfv_len -= cnt;
    162 				ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
    163 				if (vp->v_type == VREG)
    164 					*fileoff += cnt;
    165 				*count += cnt;
    166 				if (error != 0)
    167 					return (error);
    168 			}
    169 		} else {
    170 			file_t	*ffp;
    171 			vnode_t	*readvp;
    172 			size_t	size;
    173 			caddr_t	ptr;
    174 
    175 			if ((ffp = getf(sfv->sfv_fd)) == NULL)
    176 				return (EBADF);
    177 
    178 			if ((ffp->f_flag & FREAD) == 0) {
    179 				releasef(sfv->sfv_fd);
    180 				return (EBADF);
    181 			}
    182 
    183 			readvp = ffp->f_vnode;
    184 			if (readvp->v_type != VREG) {
    185 				releasef(sfv->sfv_fd);
    186 				return (EINVAL);
    187 			}
    188 
    189 			/*
    190 			 * No point reading and writing to same vp,
    191 			 * as long as both are regular files. readvp is not
    192 			 * locked; but since we got it from an open file the
    193 			 * contents will be valid during the time of access.
    194 			 */
    195 			if (vn_compare(vp, readvp)) {
    196 				releasef(sfv->sfv_fd);
    197 				return (EINVAL);
    198 			}
    199 
    200 			/*
    201 			 * Optimize the regular file over
    202 			 * the socket case.
    203 			 */
    204 			if (vp->v_type == VSOCK) {
    205 				error = sosendfile64(fp, ffp, sfv,
    206 				    (ssize32_t *)&cnt);
    207 				*count += cnt;
    208 				if (error)
    209 					return (error);
    210 				sfv++;
    211 				continue;
    212 			}
    213 
    214 			/*
    215 			 * Note: we assume readvp != vp. "vp" is already
    216 			 * locked, and "readvp" must not be.
    217 			 */
    218 			if (readvp < vp) {
    219 				VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
    220 				(void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
    221 				    NULL);
    222 				(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
    223 			} else {
    224 				(void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
    225 				    NULL);
    226 			}
    227 
    228 			/*
    229 			 * Same checks as in pread64.
    230 			 */
    231 			if (sfv_off > MAXOFFSET_T) {
    232 				VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
    233 				releasef(sfv->sfv_fd);
    234 				return (EINVAL);
    235 			}
    236 
    237 			if (sfv_off + sfv_len > MAXOFFSET_T)
    238 				sfv_len = (ssize32_t)(MAXOFFSET_T - sfv_off);
    239 
    240 			/* Find the native blocksize to transfer data */
    241 			size = MIN(vp->v_vfsp->vfs_bsize,
    242 			    readvp->v_vfsp->vfs_bsize);
    243 			size = sfv_len < size ? sfv_len : size;
    244 			ptr = kmem_alloc(size, KM_NOSLEEP);
    245 			if (ptr == NULL) {
    246 				VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
    247 				releasef(sfv->sfv_fd);
    248 				return (ENOMEM);
    249 			}
    250 
    251 			while (sfv_len > 0) {
    252 				size_t	iov_len;
    253 
    254 				iov_len = MIN(size, sfv_len);
    255 				aiov.iov_base = ptr;
    256 				aiov.iov_len = iov_len;
    257 				auio.uio_loffset = sfv_off;
    258 				auio.uio_iov = &aiov;
    259 				auio.uio_iovcnt = 1;
    260 				auio.uio_resid = iov_len;
    261 				auio.uio_segflg = UIO_SYSSPACE;
    262 				auio.uio_llimit = MAXOFFSET_T;
    263 				auio.uio_fmode = ffp->f_flag;
    264 				ioflag = auio.uio_fmode &
    265 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
    266 
    267 				/*
    268 				 * If read sync is not asked for,
    269 				 * filter sync flags
    270 				 */
    271 				if ((ioflag & FRSYNC) == 0)
    272 					ioflag &= ~(FSYNC|FDSYNC);
    273 				error = VOP_READ(readvp, &auio, ioflag,
    274 				    fp->f_cred, NULL);
    275 				if (error) {
    276 					kmem_free(ptr, size);
    277 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
    278 					    NULL);
    279 					releasef(sfv->sfv_fd);
    280 					return (error);
    281 				}
    282 
    283 				/*
    284 				 * Check how must data was really read.
    285 				 * Decrement the 'len' and increment the
    286 				 * 'off' appropriately.
    287 				 */
    288 				cnt = iov_len - auio.uio_resid;
    289 				if (cnt == 0) {
    290 					/*
    291 					 * If we were reading a pipe (currently
    292 					 * not implemented), we may now lose
    293 					 * data.
    294 					 */
    295 					kmem_free(ptr, size);
    296 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
    297 					    NULL);
    298 					releasef(sfv->sfv_fd);
    299 					return (EINVAL);
    300 				}
    301 				sfv_len -= cnt;
    302 				sfv_off += cnt;
    303 
    304 				aiov.iov_base = ptr;
    305 				aiov.iov_len = cnt;
    306 				auio.uio_loffset = *fileoff;
    307 				auio.uio_iov = &aiov;
    308 				auio.uio_iovcnt = 1;
    309 				auio.uio_resid = cnt;
    310 				auio.uio_segflg = UIO_SYSSPACE;
    311 				auio.uio_llimit = curproc->p_fsz_ctl;
    312 				auio.uio_fmode = fflag;
    313 				ioflag = auio.uio_fmode &
    314 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
    315 				error = VOP_WRITE(vp, &auio, ioflag,
    316 				    fp->f_cred, NULL);
    317 
    318 				/*
    319 				 * Check how much data was written. Increment
    320 				 * the 'len' and decrement the 'off' if all
    321 				 * the data was not written.
    322 				 */
    323 				cnt -= auio.uio_resid;
    324 				sfv_len += auio.uio_resid;
    325 				sfv_off -= auio.uio_resid;
    326 				ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
    327 				if (vp->v_type == VREG)
    328 					*fileoff += cnt;
    329 				*count += cnt;
    330 				if (error != 0) {
    331 					kmem_free(ptr, size);
    332 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
    333 					    NULL);
    334 					releasef(sfv->sfv_fd);
    335 					return (error);
    336 				}
    337 			}
    338 			VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
    339 			releasef(sfv->sfv_fd);
    340 			kmem_free(ptr, size);
    341 		}
    342 		sfv++;
    343 	}
    344 	return (0);
    345 }
    346 
    347 ssize32_t
    348 sendvec64(file_t *fp, const struct ksendfilevec64 *vec, int sfvcnt,
    349 	size32_t *xferred, int fildes)
    350 {
    351 	u_offset_t		fileoff;
    352 	int			copy_cnt;
    353 	const struct ksendfilevec64 *copy_vec;
    354 	struct ksendfilevec64 sfv[SEND_MAX_CHUNK];
    355 	struct vnode *vp;
    356 	int error;
    357 	ssize32_t count = 0;
    358 
    359 	vp = fp->f_vnode;
    360 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
    361 
    362 	copy_vec = vec;
    363 	fileoff = fp->f_offset;
    364 
    365 	do {
    366 		copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
    367 		if (copyin(copy_vec, sfv, copy_cnt *
    368 		    sizeof (struct ksendfilevec64))) {
    369 			error = EFAULT;
    370 			break;
    371 		}
    372 
    373 		error = sendvec_chunk64(fp, &fileoff, sfv, copy_cnt, &count);
    374 		if (error != 0)
    375 			break;
    376 
    377 		copy_vec += copy_cnt;
    378 		sfvcnt -= copy_cnt;
    379 	} while (sfvcnt > 0);
    380 
    381 	if (vp->v_type == VREG)
    382 		fp->f_offset += count;
    383 
    384 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
    385 	if (copyout(&count, xferred, sizeof (count)))
    386 		error = EFAULT;
    387 	releasef(fildes);
    388 	if (error != 0)
    389 		return (set_errno(error));
    390 	return (count);
    391 }
    392 #endif
    393 
    394 int
    395 sendvec_small_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
    396     int copy_cnt, ssize_t total_size, int maxblk, ssize_t *count)
    397 {
    398 	struct vnode *vp;
    399 	struct uio auio;
    400 	struct iovec aiov;
    401 	ushort_t fflag;
    402 	int ioflag;
    403 	int i, error;
    404 	size_t cnt;
    405 	ssize_t sfv_len;
    406 	u_offset_t sfv_off;
    407 #ifdef _SYSCALL32_IMPL
    408 	model_t model = get_udatamodel();
    409 	u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
    410 	    MAXOFF32_T : MAXOFFSET_T;
    411 #else
    412 	const u_offset_t maxoff = MAXOFF32_T;
    413 #endif
    414 	mblk_t *dmp = NULL;
    415 	int wroff;
    416 	int buf_left = 0;
    417 	size_t	iov_len;
    418 	mblk_t  *head, *tmp;
    419 	size_t  size = total_size;
    420 	size_t  extra;
    421 	int tail_len;
    422 	struct nmsghdr msg;
    423 
    424 	fflag = fp->f_flag;
    425 	vp = fp->f_vnode;
    426 
    427 	ASSERT(vp->v_type == VSOCK);
    428 	ASSERT(maxblk > 0);
    429 
    430 	/* If nothing to send, return */
    431 	if (total_size == 0)
    432 		return (0);
    433 
    434 	if (vp->v_stream != NULL) {
    435 		wroff = (int)vp->v_stream->sd_wroff;
    436 		tail_len = (int)vp->v_stream->sd_tail;
    437 	} else {
    438 		struct sonode *so;
    439 
    440 		so = VTOSO(vp);
    441 		wroff = so->so_proto_props.sopp_wroff;
    442 		tail_len = so->so_proto_props.sopp_tail;
    443 	}
    444 
    445 	extra = wroff + tail_len;
    446 
    447 	buf_left = MIN(total_size, maxblk);
    448 	head = dmp = allocb(buf_left + extra, BPRI_HI);
    449 	if (head == NULL)
    450 		return (ENOMEM);
    451 	head->b_wptr = head->b_rptr = head->b_rptr + wroff;
    452 	bzero(&msg, sizeof (msg));
    453 
    454 	auio.uio_extflg = UIO_COPY_DEFAULT;
    455 	for (i = 0; i < copy_cnt; i++) {
    456 		if (ISSIG(curthread, JUSTLOOKING)) {
    457 			freemsg(head);
    458 			return (EINTR);
    459 		}
    460 
    461 		/*
    462 		 * Do similar checks as "write" as we are writing
    463 		 * sfv_len bytes into "vp".
    464 		 */
    465 		sfv_len = (ssize_t)sfv->sfv_len;
    466 
    467 		if (sfv_len == 0) {
    468 			sfv++;
    469 			continue;
    470 		}
    471 
    472 		/* Check for overflow */
    473 #ifdef _SYSCALL32_IMPL
    474 		if (model == DATAMODEL_ILP32) {
    475 			if (((ssize32_t)(*count + sfv_len)) < 0) {
    476 				freemsg(head);
    477 				return (EINVAL);
    478 			}
    479 		} else
    480 #endif
    481 		if ((*count + sfv_len) < 0) {
    482 			freemsg(head);
    483 			return (EINVAL);
    484 		}
    485 
    486 		sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
    487 
    488 		if (sfv->sfv_fd == SFV_FD_SELF) {
    489 			while (sfv_len > 0) {
    490 				if (buf_left == 0) {
    491 					tmp = dmp;
    492 					buf_left = MIN(total_size, maxblk);
    493 					iov_len = MIN(buf_left, sfv_len);
    494 					dmp = allocb(buf_left + extra, BPRI_HI);
    495 					if (dmp == NULL) {
    496 						freemsg(head);
    497 						return (ENOMEM);
    498 					}
    499 					dmp->b_wptr = dmp->b_rptr =
    500 					    dmp->b_rptr + wroff;
    501 					tmp->b_cont = dmp;
    502 				} else {
    503 					iov_len = MIN(buf_left, sfv_len);
    504 				}
    505 
    506 				aiov.iov_len = iov_len;
    507 				aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
    508 				auio.uio_loffset = *fileoff;
    509 				auio.uio_iovcnt = 1;
    510 				auio.uio_resid = iov_len;
    511 				auio.uio_iov = &aiov;
    512 				auio.uio_segflg = UIO_USERSPACE;
    513 				auio.uio_llimit = curproc->p_fsz_ctl;
    514 				auio.uio_fmode = fflag;
    515 
    516 				buf_left -= iov_len;
    517 				total_size -= iov_len;
    518 				sfv_len -= iov_len;
    519 				sfv_off += iov_len;
    520 
    521 				error = uiomove((caddr_t)dmp->b_wptr,
    522 				    iov_len, UIO_WRITE, &auio);
    523 				if (error != 0) {
    524 					freemsg(head);
    525 					return (error);
    526 				}
    527 				dmp->b_wptr += iov_len;
    528 			}
    529 		} else {
    530 			file_t	*ffp;
    531 			vnode_t	*readvp;
    532 
    533 			if ((ffp = getf(sfv->sfv_fd)) == NULL) {
    534 				freemsg(head);
    535 				return (EBADF);
    536 			}
    537 
    538 			if ((ffp->f_flag & FREAD) == 0) {
    539 				releasef(sfv->sfv_fd);
    540 				freemsg(head);
    541 				return (EACCES);
    542 			}
    543 
    544 			readvp = ffp->f_vnode;
    545 			if (readvp->v_type != VREG) {
    546 				releasef(sfv->sfv_fd);
    547 				freemsg(head);
    548 				return (EINVAL);
    549 			}
    550 
    551 			/*
    552 			 * No point reading and writing to same vp,
    553 			 * as long as both are regular files. readvp is not
    554 			 * locked; but since we got it from an open file the
    555 			 * contents will be valid during the time of access.
    556 			 */
    557 
    558 			if (vn_compare(vp, readvp)) {
    559 				releasef(sfv->sfv_fd);
    560 				freemsg(head);
    561 				return (EINVAL);
    562 			}
    563 
    564 			/*
    565 			 * Note: we assume readvp != vp. "vp" is already
    566 			 * locked, and "readvp" must not be.
    567 			 */
    568 
    569 			if (readvp < vp) {
    570 				VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
    571 				(void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
    572 				    NULL);
    573 				(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
    574 			} else {
    575 				(void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
    576 				    NULL);
    577 			}
    578 
    579 			/* Same checks as in pread */
    580 			if (sfv_off > maxoff) {
    581 				VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
    582 				releasef(sfv->sfv_fd);
    583 				freemsg(head);
    584 				return (EINVAL);
    585 			}
    586 			if (sfv_off + sfv_len > maxoff) {
    587 				total_size -= (sfv_off + sfv_len - maxoff);
    588 				sfv_len = (ssize_t)((offset_t)maxoff -
    589 				    sfv_off);
    590 			}
    591 
    592 			while (sfv_len > 0) {
    593 				if (buf_left == 0) {
    594 					tmp = dmp;
    595 					buf_left = MIN(total_size, maxblk);
    596 					iov_len = MIN(buf_left, sfv_len);
    597 					dmp = allocb(buf_left + extra, BPRI_HI);
    598 					if (dmp == NULL) {
    599 						VOP_RWUNLOCK(readvp,
    600 						    V_WRITELOCK_FALSE, NULL);
    601 						releasef(sfv->sfv_fd);
    602 						freemsg(head);
    603 						return (ENOMEM);
    604 					}
    605 					dmp->b_wptr = dmp->b_rptr =
    606 					    dmp->b_rptr + wroff;
    607 					tmp->b_cont = dmp;
    608 				} else {
    609 					iov_len = MIN(buf_left, sfv_len);
    610 				}
    611 				aiov.iov_base = (caddr_t)dmp->b_wptr;
    612 				aiov.iov_len = iov_len;
    613 				auio.uio_loffset = sfv_off;
    614 				auio.uio_iov = &aiov;
    615 				auio.uio_iovcnt = 1;
    616 				auio.uio_resid = iov_len;
    617 				auio.uio_segflg = UIO_SYSSPACE;
    618 				auio.uio_llimit = MAXOFFSET_T;
    619 				auio.uio_fmode = ffp->f_flag;
    620 				ioflag = auio.uio_fmode &
    621 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
    622 
    623 				/*
    624 				 * If read sync is not asked for,
    625 				 * filter sync flags
    626 				 */
    627 				if ((ioflag & FRSYNC) == 0)
    628 					ioflag &= ~(FSYNC|FDSYNC);
    629 				error = VOP_READ(readvp, &auio, ioflag,
    630 				    fp->f_cred, NULL);
    631 				if (error != 0) {
    632 					/*
    633 					 * If we were reading a pipe (currently
    634 					 * not implemented), we may now loose
    635 					 * data.
    636 					 */
    637 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
    638 					    NULL);
    639 					releasef(sfv->sfv_fd);
    640 					freemsg(head);
    641 					return (error);
    642 				}
    643 
    644 				/*
    645 				 * Check how much data was really read.
    646 				 * Decrement the 'len' and increment the
    647 				 * 'off' appropriately.
    648 				 */
    649 				cnt = iov_len - auio.uio_resid;
    650 				if (cnt == 0) {
    651 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
    652 					    NULL);
    653 					releasef(sfv->sfv_fd);
    654 					freemsg(head);
    655 					return (EINVAL);
    656 				}
    657 				sfv_len -= cnt;
    658 				sfv_off += cnt;
    659 				total_size -= cnt;
    660 				buf_left -= cnt;
    661 
    662 				dmp->b_wptr += cnt;
    663 			}
    664 			VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
    665 			releasef(sfv->sfv_fd);
    666 		}
    667 		sfv++;
    668 	}
    669 
    670 	ASSERT(total_size == 0);
    671 	error = socket_sendmblk(VTOSO(vp), &msg, fflag, CRED(), &head);
    672 	if (error != 0) {
    673 		if (head != NULL)
    674 			freemsg(head);
    675 		return (error);
    676 	}
    677 	ttolwp(curthread)->lwp_ru.ioch += (ulong_t)size;
    678 	*count += size;
    679 
    680 	return (0);
    681 }
    682 
    683 
    684 int
    685 sendvec_chunk(file_t *fp, u_offset_t *fileoff, struct sendfilevec *sfv,
    686     int copy_cnt, ssize_t *count)
    687 {
    688 	struct vnode *vp;
    689 	struct uio auio;
    690 	struct iovec aiov;
    691 	ushort_t fflag;
    692 	int ioflag;
    693 	int i, error;
    694 	size_t cnt;
    695 	ssize_t sfv_len;
    696 	u_offset_t sfv_off;
    697 #ifdef _SYSCALL32_IMPL
    698 	model_t model = get_udatamodel();
    699 	u_offset_t maxoff = (model == DATAMODEL_ILP32) ?
    700 	    MAXOFF32_T : MAXOFFSET_T;
    701 #else
    702 	const u_offset_t maxoff = MAXOFF32_T;
    703 #endif
    704 	mblk_t	*dmp = NULL;
    705 	char	*buf = NULL;
    706 	size_t  extra;
    707 	int maxblk, wroff, tail_len;
    708 	struct sonode *so;
    709 	stdata_t *stp;
    710 	struct nmsghdr msg;
    711 
    712 	fflag = fp->f_flag;
    713 	vp = fp->f_vnode;
    714 
    715 	if (vp->v_type == VSOCK) {
    716 		so = VTOSO(vp);
    717 		if (vp->v_stream != NULL) {
    718 			stp = vp->v_stream;
    719 			wroff = (int)stp->sd_wroff;
    720 			tail_len = (int)stp->sd_tail;
    721 			maxblk = (int)stp->sd_maxblk;
    722 		} else {
    723 			stp = NULL;
    724 			wroff = so->so_proto_props.sopp_wroff;
    725 			tail_len = so->so_proto_props.sopp_tail;
    726 			maxblk = so->so_proto_props.sopp_maxblk;
    727 		}
    728 		extra = wroff + tail_len;
    729 	}
    730 
    731 	bzero(&msg, sizeof (msg));
    732 	auio.uio_extflg = UIO_COPY_DEFAULT;
    733 	for (i = 0; i < copy_cnt; i++) {
    734 		if (ISSIG(curthread, JUSTLOOKING))
    735 			return (EINTR);
    736 
    737 		/*
    738 		 * Do similar checks as "write" as we are writing
    739 		 * sfv_len bytes into "vp".
    740 		 */
    741 		sfv_len = (ssize_t)sfv->sfv_len;
    742 
    743 		if (sfv_len == 0) {
    744 			sfv++;
    745 			continue;
    746 		}
    747 
    748 		if (vp->v_type == VREG) {
    749 			if (*fileoff >= curproc->p_fsz_ctl) {
    750 				mutex_enter(&curproc->p_lock);
    751 				(void) rctl_action(
    752 				    rctlproc_legacy[RLIMIT_FSIZE],
    753 				    curproc->p_rctls, curproc, RCA_SAFE);
    754 				mutex_exit(&curproc->p_lock);
    755 
    756 				return (EFBIG);
    757 			}
    758 
    759 			if (*fileoff >= maxoff)
    760 				return (EFBIG);
    761 
    762 			if (*fileoff + sfv_len > maxoff)
    763 				return (EINVAL);
    764 		}
    765 
    766 		/* Check for overflow */
    767 #ifdef _SYSCALL32_IMPL
    768 		if (model == DATAMODEL_ILP32) {
    769 			if (((ssize32_t)(*count + sfv_len)) < 0)
    770 				return (EINVAL);
    771 		} else
    772 #endif
    773 		if ((*count + sfv_len) < 0)
    774 			return (EINVAL);
    775 
    776 		sfv_off = (u_offset_t)(ulong_t)sfv->sfv_off;
    777 
    778 		if (sfv->sfv_fd == SFV_FD_SELF) {
    779 			if (vp->v_type == VSOCK) {
    780 				while (sfv_len > 0) {
    781 					size_t iov_len;
    782 
    783 					iov_len = sfv_len;
    784 					if (!SOCK_IS_NONSTR(so) &&
    785 					    SOTOTPI(so)->sti_kssl_ctx != NULL)
    786 						iov_len = MIN(iov_len, maxblk);
    787 
    788 					aiov.iov_len = iov_len;
    789 					aiov.iov_base =
    790 					    (caddr_t)(uintptr_t)sfv_off;
    791 
    792 					auio.uio_iov = &aiov;
    793 					auio.uio_iovcnt = 1;
    794 					auio.uio_loffset = *fileoff;
    795 					auio.uio_segflg = UIO_USERSPACE;
    796 					auio.uio_fmode = fflag;
    797 					auio.uio_llimit = curproc->p_fsz_ctl;
    798 					auio.uio_resid = iov_len;
    799 
    800 					dmp = allocb(iov_len + extra, BPRI_HI);
    801 					if (dmp == NULL)
    802 						return (ENOMEM);
    803 					dmp->b_wptr = dmp->b_rptr =
    804 					    dmp->b_rptr + wroff;
    805 					error = uiomove((caddr_t)dmp->b_wptr,
    806 					    iov_len, UIO_WRITE, &auio);
    807 					if (error != 0) {
    808 						freeb(dmp);
    809 						return (error);
    810 					}
    811 					dmp->b_wptr += iov_len;
    812 					error = socket_sendmblk(VTOSO(vp),
    813 					    &msg, fflag, CRED(), &dmp);
    814 
    815 					if (error != 0) {
    816 						if (dmp != NULL)
    817 							freeb(dmp);
    818 						return (error);
    819 					}
    820 					ttolwp(curthread)->lwp_ru.ioch +=
    821 					    (ulong_t)iov_len;
    822 					*count += iov_len;
    823 					sfv_len -= iov_len;
    824 					sfv_off += iov_len;
    825 				}
    826 			} else {
    827 				aiov.iov_len = sfv_len;
    828 				aiov.iov_base = (caddr_t)(uintptr_t)sfv_off;
    829 
    830 				auio.uio_iov = &aiov;
    831 				auio.uio_iovcnt = 1;
    832 				auio.uio_loffset = *fileoff;
    833 				auio.uio_segflg = UIO_USERSPACE;
    834 				auio.uio_fmode = fflag;
    835 				auio.uio_llimit = curproc->p_fsz_ctl;
    836 				auio.uio_resid = sfv_len;
    837 
    838 				ioflag = auio.uio_fmode &
    839 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
    840 				while (sfv_len > 0) {
    841 					error = VOP_WRITE(vp, &auio, ioflag,
    842 					    fp->f_cred, NULL);
    843 					cnt = sfv_len - auio.uio_resid;
    844 					sfv_len -= cnt;
    845 					ttolwp(curthread)->lwp_ru.ioch +=
    846 					    (ulong_t)cnt;
    847 					*fileoff += cnt;
    848 					*count += cnt;
    849 					if (error != 0)
    850 						return (error);
    851 				}
    852 			}
    853 		} else {
    854 			int segmapit = 0;
    855 			file_t	*ffp;
    856 			vnode_t	*readvp;
    857 			struct vnode *realvp;
    858 			size_t	size;
    859 			caddr_t	ptr;
    860 
    861 			if ((ffp = getf(sfv->sfv_fd)) == NULL)
    862 				return (EBADF);
    863 
    864 			if ((ffp->f_flag & FREAD) == 0) {
    865 				releasef(sfv->sfv_fd);
    866 				return (EBADF);
    867 			}
    868 
    869 			readvp = ffp->f_vnode;
    870 			if (VOP_REALVP(readvp, &realvp, NULL) == 0)
    871 				readvp = realvp;
    872 			if (readvp->v_type != VREG) {
    873 				releasef(sfv->sfv_fd);
    874 				return (EINVAL);
    875 			}
    876 
    877 			/*
    878 			 * No point reading and writing to same vp,
    879 			 * as long as both are regular files. readvp is not
    880 			 * locked; but since we got it from an open file the
    881 			 * contents will be valid during the time of access.
    882 			 */
    883 			if (vn_compare(vp, readvp)) {
    884 				releasef(sfv->sfv_fd);
    885 				return (EINVAL);
    886 			}
    887 
    888 			/*
    889 			 * Note: we assume readvp != vp. "vp" is already
    890 			 * locked, and "readvp" must not be.
    891 			 */
    892 			if (readvp < vp) {
    893 				VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
    894 				(void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
    895 				    NULL);
    896 				(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
    897 			} else {
    898 				(void) VOP_RWLOCK(readvp, V_WRITELOCK_FALSE,
    899 				    NULL);
    900 			}
    901 
    902 			/* Same checks as in pread */
    903 			if (sfv_off > maxoff) {
    904 				VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
    905 				releasef(sfv->sfv_fd);
    906 				return (EINVAL);
    907 			}
    908 			if (sfv_off + sfv_len > maxoff) {
    909 				sfv_len = (ssize_t)((offset_t)maxoff -
    910 				    sfv_off);
    911 			}
    912 			/* Find the native blocksize to transfer data */
    913 			size = MIN(vp->v_vfsp->vfs_bsize,
    914 			    readvp->v_vfsp->vfs_bsize);
    915 			size = sfv_len < size ? sfv_len : size;
    916 
    917 			if (vp->v_type != VSOCK) {
    918 				segmapit = 0;
    919 				buf = kmem_alloc(size, KM_NOSLEEP);
    920 				if (buf == NULL) {
    921 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
    922 					    NULL);
    923 					releasef(sfv->sfv_fd);
    924 					return (ENOMEM);
    925 				}
    926 			} else {
    927 				uint_t	copyflag;
    928 
    929 				copyflag = stp != NULL ? stp->sd_copyflag :
    930 				    so->so_proto_props.sopp_zcopyflag;
    931 				/*
    932 				 * For sockets acting as an SSL proxy, we
    933 				 * need to adjust the size to the maximum
    934 				 * SSL record size set in the stream head.
    935 				 */
    936 				if (!SOCK_IS_NONSTR(so) &&
    937 				    _SOTOTPI(so)->sti_kssl_ctx != NULL)
    938 					size = MIN(size, maxblk);
    939 
    940 				if (vn_has_flocks(readvp) ||
    941 				    readvp->v_flag & VNOMAP ||
    942 				    copyflag & STZCVMUNSAFE) {
    943 					segmapit = 0;
    944 				} else if (copyflag & STZCVMSAFE) {
    945 					segmapit = 1;
    946 				} else {
    947 					int on = 1;
    948 					if (socket_setsockopt(VTOSO(vp),
    949 					    SOL_SOCKET, SO_SND_COPYAVOID,
    950 					    &on, sizeof (on), CRED()) == 0)
    951 					segmapit = 1;
    952 				}
    953 			}
    954 
    955 			if (segmapit) {
    956 				boolean_t nowait;
    957 
    958 				nowait = (sfv->sfv_flag & SFV_NOWAIT) != 0;
    959 				error = snf_segmap(fp, readvp, sfv_off,
    960 				    (u_offset_t)sfv_len, (ssize_t *)&cnt,
    961 				    nowait);
    962 				releasef(sfv->sfv_fd);
    963 				*count += cnt;
    964 				if (error)
    965 					return (error);
    966 				sfv++;
    967 				continue;
    968 			}
    969 
    970 			while (sfv_len > 0) {
    971 				size_t	iov_len;
    972 
    973 				iov_len = MIN(size, sfv_len);
    974 
    975 				if (vp->v_type == VSOCK) {
    976 					dmp = allocb(iov_len + extra, BPRI_HI);
    977 					if (dmp == NULL) {
    978 						VOP_RWUNLOCK(readvp,
    979 						    V_WRITELOCK_FALSE, NULL);
    980 						releasef(sfv->sfv_fd);
    981 						return (ENOMEM);
    982 					}
    983 					dmp->b_wptr = dmp->b_rptr =
    984 					    dmp->b_rptr + wroff;
    985 					ptr = (caddr_t)dmp->b_rptr;
    986 				} else {
    987 					ptr = buf;
    988 				}
    989 
    990 				aiov.iov_base = ptr;
    991 				aiov.iov_len = iov_len;
    992 				auio.uio_loffset = sfv_off;
    993 				auio.uio_iov = &aiov;
    994 				auio.uio_iovcnt = 1;
    995 				auio.uio_resid = iov_len;
    996 				auio.uio_segflg = UIO_SYSSPACE;
    997 				auio.uio_llimit = MAXOFFSET_T;
    998 				auio.uio_fmode = ffp->f_flag;
    999 				ioflag = auio.uio_fmode &
   1000 				    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
   1001 
   1002 				/*
   1003 				 * If read sync is not asked for,
   1004 				 * filter sync flags
   1005 				 */
   1006 				if ((ioflag & FRSYNC) == 0)
   1007 					ioflag &= ~(FSYNC|FDSYNC);
   1008 				error = VOP_READ(readvp, &auio, ioflag,
   1009 				    fp->f_cred, NULL);
   1010 				if (error != 0) {
   1011 					/*
   1012 					 * If we were reading a pipe (currently
   1013 					 * not implemented), we may now lose
   1014 					 * data.
   1015 					 */
   1016 					if (vp->v_type == VSOCK)
   1017 						freeb(dmp);
   1018 					else
   1019 						kmem_free(buf, size);
   1020 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
   1021 					    NULL);
   1022 					releasef(sfv->sfv_fd);
   1023 					return (error);
   1024 				}
   1025 
   1026 				/*
   1027 				 * Check how much data was really read.
   1028 				 * Decrement the 'len' and increment the
   1029 				 * 'off' appropriately.
   1030 				 */
   1031 				cnt = iov_len - auio.uio_resid;
   1032 				if (cnt == 0) {
   1033 					if (vp->v_type == VSOCK)
   1034 						freeb(dmp);
   1035 					else
   1036 						kmem_free(buf, size);
   1037 					VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE,
   1038 					    NULL);
   1039 					releasef(sfv->sfv_fd);
   1040 					return (EINVAL);
   1041 				}
   1042 				sfv_len -= cnt;
   1043 				sfv_off += cnt;
   1044 
   1045 				if (vp->v_type == VSOCK) {
   1046 					dmp->b_wptr = dmp->b_rptr + cnt;
   1047 
   1048 					error = socket_sendmblk(VTOSO(vp),
   1049 					    &msg, fflag, CRED(), &dmp);
   1050 
   1051 					if (error != 0) {
   1052 						if (dmp != NULL)
   1053 							freeb(dmp);
   1054 						VOP_RWUNLOCK(readvp,
   1055 						    V_WRITELOCK_FALSE, NULL);
   1056 						releasef(sfv->sfv_fd);
   1057 						return (error);
   1058 					}
   1059 
   1060 					ttolwp(curthread)->lwp_ru.ioch +=
   1061 					    (ulong_t)cnt;
   1062 					*count += cnt;
   1063 				} else {
   1064 
   1065 					aiov.iov_base = ptr;
   1066 					aiov.iov_len = cnt;
   1067 					auio.uio_loffset = *fileoff;
   1068 					auio.uio_resid = cnt;
   1069 					auio.uio_iov = &aiov;
   1070 					auio.uio_iovcnt = 1;
   1071 					auio.uio_segflg = UIO_SYSSPACE;
   1072 					auio.uio_llimit = curproc->p_fsz_ctl;
   1073 					auio.uio_fmode = fflag;
   1074 					ioflag = auio.uio_fmode &
   1075 					    (FAPPEND|FSYNC|FDSYNC|FRSYNC);
   1076 					error = VOP_WRITE(vp, &auio, ioflag,
   1077 					    fp->f_cred, NULL);
   1078 
   1079 					/*
   1080 					 * Check how much data was written.
   1081 					 * Increment the 'len' and decrement the
   1082 					 * 'off' if all the data was not
   1083 					 * written.
   1084 					 */
   1085 					cnt -= auio.uio_resid;
   1086 					sfv_len += auio.uio_resid;
   1087 					sfv_off -= auio.uio_resid;
   1088 					ttolwp(curthread)->lwp_ru.ioch +=
   1089 					    (ulong_t)cnt;
   1090 					*fileoff += cnt;
   1091 					*count += cnt;
   1092 					if (error != 0) {
   1093 						kmem_free(buf, size);
   1094 						VOP_RWUNLOCK(readvp,
   1095 						    V_WRITELOCK_FALSE, NULL);
   1096 						releasef(sfv->sfv_fd);
   1097 						return (error);
   1098 					}
   1099 				}
   1100 			}
   1101 			if (buf) {
   1102 				kmem_free(buf, size);
   1103 				buf = NULL;
   1104 			}
   1105 			VOP_RWUNLOCK(readvp, V_WRITELOCK_FALSE, NULL);
   1106 			releasef(sfv->sfv_fd);
   1107 		}
   1108 		sfv++;
   1109 	}
   1110 	return (0);
   1111 }
   1112 
   1113 ssize_t
   1114 sendfilev(int opcode, int fildes, const struct sendfilevec *vec, int sfvcnt,
   1115     size_t *xferred)
   1116 {
   1117 	int error = 0;
   1118 	int first_vector_error = 0;
   1119 	file_t *fp;
   1120 	struct vnode *vp;
   1121 	struct sonode *so;
   1122 	u_offset_t fileoff;
   1123 	int copy_cnt;
   1124 	const struct sendfilevec *copy_vec;
   1125 	struct sendfilevec sfv[SEND_MAX_CHUNK];
   1126 	ssize_t count = 0;
   1127 #ifdef _SYSCALL32_IMPL
   1128 	struct ksendfilevec32 sfv32[SEND_MAX_CHUNK];
   1129 #endif
   1130 	ssize_t total_size;
   1131 	int i;
   1132 	boolean_t is_sock = B_FALSE;
   1133 	int maxblk = 0;
   1134 
   1135 	if (sfvcnt <= 0)
   1136 		return (set_errno(EINVAL));
   1137 
   1138 	if ((fp = getf(fildes)) == NULL)
   1139 		return (set_errno(EBADF));
   1140 
   1141 	if (((fp->f_flag) & FWRITE) == 0) {
   1142 		error = EBADF;
   1143 		goto err;
   1144 	}
   1145 
   1146 	fileoff = fp->f_offset;
   1147 	vp = fp->f_vnode;
   1148 
   1149 	switch (vp->v_type) {
   1150 	case VSOCK:
   1151 		so = VTOSO(vp);
   1152 		is_sock = B_TRUE;
   1153 		if (SOCK_IS_NONSTR(so)) {
   1154 			maxblk = so->so_proto_props.sopp_maxblk;
   1155 		} else {
   1156 			maxblk = (int)vp->v_stream->sd_maxblk;
   1157 		}
   1158 		break;
   1159 	case VREG:
   1160 		break;
   1161 	default:
   1162 		error = EINVAL;
   1163 		goto err;
   1164 	}
   1165 
   1166 	switch (opcode) {
   1167 	case SENDFILEV :
   1168 		break;
   1169 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
   1170 	case SENDFILEV64 :
   1171 		return (sendvec64(fp, (struct ksendfilevec64 *)vec, sfvcnt,
   1172 		    (size32_t *)xferred, fildes));
   1173 #endif
   1174 	default :
   1175 		error = ENOSYS;
   1176 		break;
   1177 	}
   1178 
   1179 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
   1180 	copy_vec = vec;
   1181 
   1182 	do {
   1183 		total_size = 0;
   1184 		copy_cnt = MIN(sfvcnt, SEND_MAX_CHUNK);
   1185 #ifdef _SYSCALL32_IMPL
   1186 		/* 32-bit callers need to have their iovec expanded. */
   1187 		if (get_udatamodel() == DATAMODEL_ILP32) {
   1188 			if (copyin(copy_vec, sfv32,
   1189 			    copy_cnt * sizeof (ksendfilevec32_t))) {
   1190 				error = EFAULT;
   1191 				break;
   1192 			}
   1193 
   1194 			for (i = 0; i < copy_cnt; i++) {
   1195 				sfv[i].sfv_fd = sfv32[i].sfv_fd;
   1196 				sfv[i].sfv_off =
   1197 				    (off_t)(uint32_t)sfv32[i].sfv_off;
   1198 				sfv[i].sfv_len = (size_t)sfv32[i].sfv_len;
   1199 				total_size += sfv[i].sfv_len;
   1200 				sfv[i].sfv_flag = sfv32[i].sfv_flag;
   1201 				/*
   1202 				 * Individual elements of the vector must not
   1203 				 * wrap or overflow, as later math is signed.
   1204 				 * Equally total_size needs to be checked after
   1205 				 * each vector is added in, to be sure that
   1206 				 * rogue values haven't overflowed the counter.
   1207 				 */
   1208 				if (((ssize32_t)sfv[i].sfv_len < 0) ||
   1209 				    ((ssize32_t)total_size < 0)) {
   1210 					/*
   1211 					 * Truncate the vector to send data
   1212 					 * described by elements before the
   1213 					 * error.
   1214 					 */
   1215 					copy_cnt = i;
   1216 					first_vector_error = EINVAL;
   1217 					/* total_size can't be trusted */
   1218 					if ((ssize32_t)total_size < 0)
   1219 						error = EINVAL;
   1220 					break;
   1221 				}
   1222 			}
   1223 			/* Nothing to do, process errors */
   1224 			if (copy_cnt == 0)
   1225 				break;
   1226 
   1227 		} else {
   1228 #endif
   1229 			if (copyin(copy_vec, sfv,
   1230 			    copy_cnt * sizeof (sendfilevec_t))) {
   1231 				error = EFAULT;
   1232 				break;
   1233 			}
   1234 
   1235 			for (i = 0; i < copy_cnt; i++) {
   1236 				total_size += sfv[i].sfv_len;
   1237 				/*
   1238 				 * Individual elements of the vector must not
   1239 				 * wrap or overflow, as later math is signed.
   1240 				 * Equally total_size needs to be checked after
   1241 				 * each vector is added in, to be sure that
   1242 				 * rogue values haven't overflowed the counter.
   1243 				 */
   1244 				if (((ssize_t)sfv[i].sfv_len < 0) ||
   1245 				    (total_size < 0)) {
   1246 					/*
   1247 					 * Truncate the vector to send data
   1248 					 * described by elements before the
   1249 					 * error.
   1250 					 */
   1251 					copy_cnt = i;
   1252 					first_vector_error = EINVAL;
   1253 					/* total_size can't be trusted */
   1254 					if (total_size < 0)
   1255 						error = EINVAL;
   1256 					break;
   1257 				}
   1258 			}
   1259 			/* Nothing to do, process errors */
   1260 			if (copy_cnt == 0)
   1261 				break;
   1262 #ifdef _SYSCALL32_IMPL
   1263 		}
   1264 #endif
   1265 
   1266 		/*
   1267 		 * The task between deciding to use sendvec_small_chunk
   1268 		 * and sendvec_chunk is dependant on multiple things:
   1269 		 *
   1270 		 * i) latency is important for smaller files. So if the
   1271 		 * data is smaller than 'tcp_slow_start_initial' times
   1272 		 * maxblk, then use sendvec_small_chunk which creates
   1273 		 * maxblk size mblks and chains them together and sends
   1274 		 * them to TCP in one shot. It also leaves 'wroff' size
   1275 		 * space for the headers in each mblk.
   1276 		 *
   1277 		 * ii) for total size bigger than 'tcp_slow_start_initial'
   1278 		 * time maxblk, its probably real file data which is
   1279 		 * dominating. So its better to use sendvec_chunk because
   1280 		 * performance goes to dog if we don't do pagesize reads.
   1281 		 * sendvec_chunk will do pagesize reads and write them
   1282 		 * in pagesize mblks to TCP.
   1283 		 *
   1284 		 * Side Notes: A write to file has not been optimized.
   1285 		 * Future zero copy code will plugin into sendvec_chunk
   1286 		 * only because doing zero copy for files smaller then
   1287 		 * pagesize is useless.
   1288 		 *
   1289 		 * Note, if socket has NL7C enabled then call NL7C's
   1290 		 * senfilev() function to consume the sfv[].
   1291 		 */
   1292 		if (is_sock) {
   1293 			if (!SOCK_IS_NONSTR(so) &&
   1294 			    _SOTOTPI(so)->sti_nl7c_flags != 0) {
   1295 				error = nl7c_sendfilev(so, &fileoff,
   1296 				    sfv, copy_cnt, &count);
   1297 			} else if ((total_size <= (4 * maxblk)) &&
   1298 			    error == 0) {
   1299 				error = sendvec_small_chunk(fp,
   1300 				    &fileoff, sfv, copy_cnt,
   1301 				    total_size, maxblk, &count);
   1302 			} else {
   1303 				error = sendvec_chunk(fp, &fileoff,
   1304 				    sfv, copy_cnt, &count);
   1305 			}
   1306 		} else {
   1307 			ASSERT(vp->v_type == VREG);
   1308 			error = sendvec_chunk(fp, &fileoff, sfv, copy_cnt,
   1309 			    &count);
   1310 		}
   1311 
   1312 
   1313 #ifdef _SYSCALL32_IMPL
   1314 	if (get_udatamodel() == DATAMODEL_ILP32)
   1315 		copy_vec = (const struct sendfilevec *)((char *)copy_vec +
   1316 		    (copy_cnt * sizeof (ksendfilevec32_t)));
   1317 	else
   1318 #endif
   1319 		copy_vec += copy_cnt;
   1320 		sfvcnt -= copy_cnt;
   1321 
   1322 	/* Process all vector members up to first error */
   1323 	} while ((sfvcnt > 0) && first_vector_error == 0 && error == 0);
   1324 
   1325 	if (vp->v_type == VREG)
   1326 		fp->f_offset += count;
   1327 
   1328 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
   1329 
   1330 #ifdef _SYSCALL32_IMPL
   1331 	if (get_udatamodel() == DATAMODEL_ILP32) {
   1332 		ssize32_t count32 = (ssize32_t)count;
   1333 		if (copyout(&count32, xferred, sizeof (count32)))
   1334 			error = EFAULT;
   1335 		releasef(fildes);
   1336 		if (error != 0)
   1337 			return (set_errno(error));
   1338 		if (first_vector_error != 0)
   1339 			return (set_errno(first_vector_error));
   1340 		return (count32);
   1341 	}
   1342 #endif
   1343 	if (copyout(&count, xferred, sizeof (count)))
   1344 		error = EFAULT;
   1345 	releasef(fildes);
   1346 	if (error != 0)
   1347 		return (set_errno(error));
   1348 	if (first_vector_error != 0)
   1349 		return (set_errno(first_vector_error));
   1350 	return (count);
   1351 err:
   1352 	ASSERT(error != 0);
   1353 	releasef(fildes);
   1354 	return (set_errno(error));
   1355 }
   1356