sim,kern: support FUTEX_WAIT_BITSET and FUTEX_WAKE_BITSET ops

This patch adds support for two operations in futex system call:
FUTEX_WAIT_BITSET and FUTEX_WAKE_BITSET. The two operations are used to
selectively wake up a certain thread waiting on a futex variable.

Basically each thread waiting on a futex variable is associated with a
bitset that is checked when another thread tries to wake up all threads
waiting on the futex variable.

Change-Id: I2300e53b144d8fae226423fa2efb0238c1d93ef9
Reviewed-on: https://gem5-review.googlesource.com/c/9621
Reviewed-by: Brandon Potter <Brandon.Potter@amd.com>
Maintainer: Brandon Potter <Brandon.Potter@amd.com>
diff --git a/src/kern/linux/linux.hh b/src/kern/linux/linux.hh
index e559e05..2da5968 100644
--- a/src/kern/linux/linux.hh
+++ b/src/kern/linux/linux.hh
@@ -239,11 +239,14 @@
     static std::string cpuOnline(Process *process, ThreadContext *tc);
 
     // For futex system call
-    static const unsigned TGT_FUTEX_WAIT  = 0;
-    static const unsigned TGT_FUTEX_WAKE  = 1;
-    static const unsigned TGT_EAGAIN      = 11;
-    static const unsigned TGT_EWOULDBLOCK = TGT_EAGAIN;
-    static const unsigned TGT_FUTEX_PRIVATE_FLAG = 128;
+    static const unsigned TGT_FUTEX_WAIT                = 0;
+    static const unsigned TGT_FUTEX_WAKE                = 1;
+    static const unsigned TGT_FUTEX_WAIT_BITSET         = 9;
+    static const unsigned TGT_FUTEX_WAKE_BITSET         = 10;
+    static const unsigned TGT_EAGAIN                    = 11;
+    static const unsigned TGT_EWOULDBLOCK               = TGT_EAGAIN;
+    static const unsigned TGT_FUTEX_PRIVATE_FLAG        = 128;
+    static const unsigned TGT_FUTEX_CLOCK_REALTIME_FLAG = 256;
 
     // for *at syscalls
     static const int TGT_AT_FDCWD   = -100;
diff --git a/src/sim/futex_map.hh b/src/sim/futex_map.hh
index 998f8d4..6f1f7a2 100644
--- a/src/sim/futex_map.hh
+++ b/src/sim/futex_map.hh
@@ -79,12 +79,46 @@
     };
 }
 
-typedef std::list<ThreadContext *> ThreadContextList;
+/**
+ * WaiterState defines internal state of a waiter thread. The state
+ * includes a pointer to the thread's context and its associated bitmask.
+ */
+class WaiterState {
+  public:
+    ThreadContext* tc;
+    int bitmask;
+
+    /**
+     * this constructor is used if futex ops with bitset are used
+     */
+    WaiterState(ThreadContext* _tc, int _bitmask)
+      : tc(_tc), bitmask(_bitmask)
+    { }
+
+    /**
+     * if bitset is not defined, just set bitmask to 0xffffffff
+     */
+    WaiterState(ThreadContext* _tc)
+      : tc(_tc), bitmask(0xffffffff)
+    { }
+
+    /**
+     * return true if the bit-wise AND of the wakeup_bitmask given by
+     * a waking thread and this thread's internal bitmask is non-zero
+     */
+    bool
+    checkMask(int wakeup_bitmask) const
+    {
+        return bitmask & wakeup_bitmask;
+    }
+};
+
+typedef std::list<WaiterState> WaiterList;
 
 /**
  * FutexMap class holds a map of all futexes used in the system
  */
-class FutexMap : public std::unordered_map<FutexKey, ThreadContextList>
+class FutexMap : public std::unordered_map<FutexKey, WaiterList>
 {
   public:
     /** Inserts a futex into the map with one waiting TC */
@@ -95,10 +129,10 @@
         auto it = find(key);
 
         if (it == end()) {
-            ThreadContextList tcList {tc};
-            insert({key, tcList});
+            WaiterList waiterList {WaiterState(tc)};
+            insert({key, waiterList});
         } else {
-            it->second.push_back(tc);
+            it->second.push_back(WaiterState(tc));
         }
 
         /** Suspend the thread context */
@@ -116,20 +150,77 @@
             return 0;
 
         int woken_up = 0;
-        auto &tcList = it->second;
+        auto &waiterList = it->second;
 
-        while (!tcList.empty() && woken_up < count) {
-            tcList.front()->activate();
-            tcList.pop_front();
+        while (!waiterList.empty() && woken_up < count) {
+            waiterList.front().tc->activate();
+            waiterList.pop_front();
             woken_up++;
         }
 
-        if (tcList.empty())
+        if (waiterList.empty())
             erase(it);
 
         return woken_up;
     }
 
+    /**
+     * inserts a futex into the map with one waiting TC
+     * associates the waiter with a given bitmask
+     */
+    void
+    suspend_bitset(Addr addr, uint64_t tgid, ThreadContext *tc,
+                   int bitmask)
+    {
+        FutexKey key(addr, tgid);
+        auto it = find(key);
+
+        if (it == end()) {
+            WaiterList waiterList {WaiterState(tc, bitmask)};
+            insert({key, waiterList});
+        } else {
+            it->second.push_back(WaiterState(tc, bitmask));
+        }
+
+        /** Suspend the thread context */
+        tc->suspend();
+    }
+
+    /**
+     * Wakes up all waiters waiting on the addr and associated with the
+     * given bitset
+     */
+    int
+    wakeup_bitset(Addr addr, uint64_t tgid, int bitmask)
+    {
+        FutexKey key(addr, tgid);
+        auto it = find(key);
+
+        if (it == end())
+            return 0;
+
+        int woken_up = 0;
+
+        auto &waiterList = it->second;
+        auto iter = waiterList.begin();
+
+        while (iter != waiterList.end()) {
+            WaiterState& waiter = *iter;
+
+            if (waiter.checkMask(bitmask)) {
+                waiter.tc->activate();
+                iter = waiterList.erase(iter);
+                woken_up++;
+            } else {
+                ++iter;
+            }
+        }
+
+        if (waiterList.empty())
+            erase(it);
+
+        return woken_up;
+    }
 };
 
 #endif // __FUTEX_MAP_HH__
diff --git a/src/sim/syscall_emul.hh b/src/sim/syscall_emul.hh
index 5ba487c..dcd6b5d 100644
--- a/src/sim/syscall_emul.hh
+++ b/src/sim/syscall_emul.hh
@@ -403,16 +403,20 @@
     Addr uaddr = process->getSyscallArg(tc, index);
     int op = process->getSyscallArg(tc, index);
     int val = process->getSyscallArg(tc, index);
+    int timeout M5_VAR_USED = process->getSyscallArg(tc, index);
+    Addr uaddr2 M5_VAR_USED = process->getSyscallArg(tc, index);
+    int val3 = process->getSyscallArg(tc, index);
 
     /*
      * Unsupported option that does not affect the correctness of the
      * application. This is a performance optimization utilized by Linux.
      */
     op &= ~OS::TGT_FUTEX_PRIVATE_FLAG;
+    op &= ~OS::TGT_FUTEX_CLOCK_REALTIME_FLAG;
 
     FutexMap &futex_map = tc->getSystemPtr()->futexMap;
 
-    if (OS::TGT_FUTEX_WAIT == op) {
+    if (OS::TGT_FUTEX_WAIT == op || OS::TGT_FUTEX_WAIT_BITSET == op) {
         // Ensure futex system call accessed atomically.
         BufferArg buf(uaddr, sizeof(int));
         buf.copyIn(tc->getMemProxy());
@@ -426,11 +430,17 @@
         if (val != mem_val)
             return -OS::TGT_EWOULDBLOCK;
 
-        futex_map.suspend(uaddr, process->tgid(), tc);
+        if (OS::TGT_FUTEX_WAIT) {
+            futex_map.suspend(uaddr, process->tgid(), tc);
+        } else {
+            futex_map.suspend_bitset(uaddr, process->tgid(), tc, val3);
+        }
 
         return 0;
     } else if (OS::TGT_FUTEX_WAKE == op) {
         return futex_map.wakeup(uaddr, process->tgid(), val);
+    } else if (OS::TGT_FUTEX_WAKE_BITSET == op) {
+        return futex_map.wakeup_bitset(uaddr, process->tgid(), val3);
     }
 
     warn("futex: op %d not implemented; ignoring.", op);