sim, kern: support FUTEX_CMP_REQUEUE

This patch supports FUTEX_CMP_REQUEUE operation. Below is its
description from Linux man page:

futex syscall: int futex(int *uaddr, int futex_op, int val,
                         const struct timespec *timeout,
                         int *uaddr2, int val3);

This operation first checks whether the location uaddr still contains
the value val3.  If not, the operation fails with the error EAGAIN.
Otherwise, the operation wakes up a maximum of val waiters that are
waiting on the futex at uaddr.  If there are more than val waiters, then
the remaining waiters are removed from the wait queue of the source
futex at uaddr and added to the wait queue of the target futex at
uaddr2.  The val2 argument specifies an upper limit on the number of
waiters that are requeued to the futex at uaddr2.

Reference: http://man7.org/linux/man-pages/man2/futex.2.html

Change-Id: I6d2ebd19a935b656d19d8342f7ab450c0d2031f4
Reviewed-on: https://gem5-review.googlesource.com/c/9629
Reviewed-by: Brandon Potter <Brandon.Potter@amd.com>
Maintainer: Brandon Potter <Brandon.Potter@amd.com>
diff --git a/src/kern/linux/linux.hh b/src/kern/linux/linux.hh
index 2da5968..4ed3902 100644
--- a/src/kern/linux/linux.hh
+++ b/src/kern/linux/linux.hh
@@ -241,6 +241,8 @@
     // For futex system call
     static const unsigned TGT_FUTEX_WAIT                = 0;
     static const unsigned TGT_FUTEX_WAKE                = 1;
+    static const unsigned TGT_FUTEX_REQUEUE             = 3;
+    static const unsigned TGT_FUTEX_CMP_REQUEUE         = 4;
     static const unsigned TGT_FUTEX_WAIT_BITSET         = 9;
     static const unsigned TGT_FUTEX_WAKE_BITSET         = 10;
     static const unsigned TGT_EAGAIN                    = 11;
diff --git a/src/sim/futex_map.hh b/src/sim/futex_map.hh
index 6f1f7a2..3d34109 100644
--- a/src/sim/futex_map.hh
+++ b/src/sim/futex_map.hh
@@ -221,6 +221,60 @@
 
         return woken_up;
     }
+
+    /**
+     * This operation wakes a given number (val) of waiters. If there are
+     * more threads waiting than woken, they are removed from the wait
+     * queue of the futex pointed to by addr1 and added to the wait queue
+     * of the futex pointed to by addr2. The number of waiter moved is
+     * capped by count2 (misused timeout parameter).
+     *
+     * The return value is the number of waiters that are woken or
+     * requeued.
+     */
+    int
+    requeue(Addr addr1, uint64_t tgid, int count, int count2, Addr addr2)
+    {
+        FutexKey key1(addr1, tgid);
+        auto it1 = find(key1);
+
+        if (it1 == end())
+            return 0;
+
+        int woken_up = 0;
+        auto &waiterList1 = it1->second;
+
+        while (!waiterList1.empty() && woken_up < count) {
+            waiterList1.front().tc->activate();
+            waiterList1.pop_front();
+            woken_up++;
+        }
+
+        WaiterList tmpList;
+        int requeued = 0;
+
+        while (!waiterList1.empty() && requeued < count2) {
+          auto w = waiterList1.front();
+          waiterList1.pop_front();
+          tmpList.push_back(w);
+          requeued++;
+        }
+
+        FutexKey key2(addr2, tgid);
+        auto it2 = find(key2);
+
+        if (it2 == end() && requeued > 0) {
+            insert({key2, tmpList});
+        } else {
+            it2->second.insert(it2->second.end(),
+                               tmpList.begin(), tmpList.end());
+        }
+
+        if (waiterList1.empty())
+            erase(it1);
+
+        return woken_up + requeued;
+    }
 };
 
 #endif // __FUTEX_MAP_HH__
diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh
index 295598c..0b7585c 100644
--- a/src/sim/syscall_emul.hh
+++ b/src/sim/syscall_emul.hh
@@ -441,8 +441,21 @@
         return futex_map.wakeup(uaddr, process->tgid(), val);
     } else if (OS::TGT_FUTEX_WAKE_BITSET == op) {
         return futex_map.wakeup_bitset(uaddr, process->tgid(), val3);
-    }
+    } else if (OS::TGT_FUTEX_REQUEUE == op ||
+               OS::TGT_FUTEX_CMP_REQUEUE == op) {
 
+        // Ensure futex system call accessed atomically.
+        BufferArg buf(uaddr, sizeof(int));
+        buf.copyIn(tc->getMemProxy());
+        int mem_val = *(int*)buf.bufferPtr();
+        /*
+         * For CMP_REQUEUE, the whole operation is only started only if
+         * val3 is still the value of the futex pointed to by uaddr.
+         */
+        if (OS::TGT_FUTEX_CMP_REQUEUE && val3 != mem_val)
+            return -OS::TGT_EWOULDBLOCK;
+        return futex_map.requeue(uaddr, process->tgid(), val, timeout, uaddr2);
+    }
     warn("futex: op %d not implemented; ignoring.", op);
     return -ENOSYS;
 }