sendmmsg() Expected Behavior?

Hei Chan structurechart at yahoo.com
Thu Oct 23 22:43:07 EDT 2014


Hi,

I am on CentOS 6.5 with kernel version kernel-2.6.32-431.el6.

I was trying to find the implementation of recvmmsg(), and hopefully, I am not looking at the wrong source ~/rpmbuild/BUILD/kernel-2.6.32-431.el6/linux-2.6.32-431.el6.x86_64/net/socket.c. If I am, please kindly point me to the right source.

int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
                   unsigned int flags, struct timespec *timeout)
{
        int fput_needed, err, datagrams;
        struct socket *sock;
        struct mmsghdr __user *entry;
        struct compat_mmsghdr __user *compat_entry;
        struct msghdr msg_sys;
        struct timespec end_time;

        if (timeout &&
            poll_select_set_timeout(&end_time, timeout->tv_sec,
                                    timeout->tv_nsec))
                return -EINVAL;

        datagrams = 0;

        sock = sockfd_lookup_light(fd, &err, &fput_needed);
        if (!sock)
                return err;

        err = sock_error(sock->sk);
        if (err)
                goto out_put;

        entry = mmsg;
        compat_entry = (struct compat_mmsghdr __user *)mmsg;

        while (datagrams < vlen) {
                /*
                 * No need to ask LSM for more than the first datagram.
                 */
                if (MSG_CMSG_COMPAT & flags) {
                        err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
                                            &msg_sys, flags, datagrams);
                        if (err < 0)
                                break;
                        err = __put_user(err, &compat_entry->msg_len);
                        ++compat_entry;
                } else {
                        err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
                                            &msg_sys, flags, datagrams);
                        if (err < 0)
                                break;
                        err = put_user(err, &entry->msg_len);
                        ++entry;
                }

                if (err)
                        break;
                ++datagrams;

                if (timeout) {
                        ktime_get_ts(timeout);
                        *timeout = timespec_sub(end_time, *timeout);
                        if (timeout->tv_sec < 0) {
                                timeout->tv_sec = timeout->tv_nsec = 0;
                                break;
                        }

                        /* Timeout, return less than vlen datagrams */
                        if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
                                break;
                }

                /* Out of band data, return right away */
                if (msg_sys.msg_flags & MSG_OOB)
                        break;
        }

out_put:
        fput_light(sock->file, fput_needed);

        if (err == 0)
                return datagrams;

        if (datagrams != 0) {
                /*
                 * We may return less entries than requested (vlen) if the
                 * sock is non block and there aren't enough datagrams...
                 */
                if (err != -EAGAIN) {
                        /*
                         * ... or  if recvmsg returns an error after we
                         * received some datagrams, where we record the
                         * error to return on the next call or if the
                         * app asks about it using getsockopt(SO_ERROR).
                         */
                        sock->sk->sk_err = -err;
                }

                return datagrams;
        }

        return err;
}

Let's say there are 2 packets at the socket, and I try:

timespec t = {0, 0};
recvmmsg(fd, mmsg, vlen, 0, &t);

Then, because of these lines:

                    if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
                            break;

recvmmsg() will just return 1 packet instead of 2. Am I right?

It seems defecting the one of the main purposes of recvmmsg() -- trying to reduce # of system calls when there are packet queued.

Maybe I miss something?

I created a small test case that it waits for epoll callback, sleeps for 2-5 seconds and then calls recvmmsg() with timeout=0 (not null; just like above).

Then, I found that recvmmsg() doesn't return all the packets that queued in 1 single call.



More information about the Kernelnewbies mailing list