Skip to content

Commit 5b682ab

Browse files
committed
Refactor the tests to ensure they fail if we remove remote node updating from ncEffectRegister
The NC does in fact notify remote peers when one of their processes is registered against a name locally. This should ensure that the remote's ncEffectDied triggers a message to the registry hosting node. Since there are some potential situations where that might fail, we still maintain monitors for remote registered processes, but they are not an essential function afaict. The original test case has been removed, since if we deliberately break the NC code to trigger failure, the test code will hang indefinitely (and chew cpu resources heavily), which isn't helpful or informative on CI. The replacement test case does make use of delays and timeouts, however this has been done in a structured fashion, breaking gracefully if we trigger failure, and passing relatively quickly on the happy path. It also seems to behave consistently.
1 parent 02494f2 commit 5b682ab

File tree

1 file changed

+59
-106
lines changed
  • distributed-process-tests/src/Control/Distributed/Process/Tests

1 file changed

+59
-106
lines changed

distributed-process-tests/src/Control/Distributed/Process/Tests/CH.hs

Lines changed: 59 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,113 +1481,66 @@ testExitRemote TestTransport{..} = do
14811481

14821482
testRegistryMonitoring :: TestTransport -> Assertion
14831483
testRegistryMonitoring TestTransport{..} = do
1484-
localNode <- newLocalNode testTransport initRemoteTable
1485-
remoteNode <- newLocalNode testTransport initRemoteTable
1486-
return ()
1484+
node1 <- newLocalNode testTransport initRemoteTable
1485+
node2 <- newLocalNode testTransport initRemoteTable
1486+
waitH <- newEmptyMVar
14871487

1488-
-- Local process. Test if local process will be removed from
1489-
-- registry when it dies.
1490-
box <- newEmptyMVar
1491-
runProcess localNode $ do
1492-
pid <- spawnLocal $ do
1493-
expect
1494-
register "test" pid
1495-
tpid <- whereis "test"
1496-
if tpid == Just pid
1497-
then do _ <- monitor pid
1498-
send pid ()
1499-
ProcessMonitorNotification{} <- expect
1500-
tpid1 <- whereis "test"
1501-
liftIO $ putMVar box (Nothing == tpid1)
1502-
else liftIO $ putMVar box False
1503-
1504-
takeMVar box >>= assertBool "expected local process to not be registered"
1505-
return ()
1506-
1507-
-- Remote process. Test if remote process entry is removed
1508-
-- from registry when process dies.
1509-
remote1 <- testRemote remoteNode
1510-
runProcess localNode $
1511-
let waitpoll = do
1512-
w <- whereis "test" :: Process (Maybe ProcessId)
1513-
forM_ w (const waitpoll)
1514-
in do register "test" remote1
1515-
-- [race-condition] invalid test if we force exit before waitpoll runs
1516-
registered <- whereis "test"
1517-
registered `shouldBe` (equalTo (Just remote1))
1518-
send remote1 ()
1519-
waitpoll
1520-
return ()
1521-
return ()
1522-
1523-
-- Many labels. Test if all labels associated with process
1524-
-- are removed from registry when it dies.
1525-
remote2 <- testRemote remoteNode
1526-
runProcess localNode $
1527-
let waitpoll = do
1528-
w1 <- whereis "test-3" :: Process (Maybe ProcessId)
1529-
w2 <- whereis "test-4" :: Process (Maybe ProcessId)
1530-
forM_ (w1 <|> w2) (const waitpoll)
1531-
in do register "test-3" remote2
1532-
register "test-4" remote2
1533-
send remote2 ()
1534-
waitpoll
1535-
return ()
1536-
1537-
{- XXX: waiting including patch for nsend for remote process
1538-
remote3 <- testRemote remoteNode
1539-
remote4 <- testRemote remoteNode
1540-
-- test many labels
1541-
runProcess localNode $ do
1542-
register "test-3" remote3
1543-
reregister "test-3" remote4
1544-
send remote3 ()
1545-
liftIO $ threadDelay 50000 -- XXX: racy
1546-
monitor remote4
1547-
nsend "test-3" ()
1548-
ProcessMonitorNotification{} <- expect
1549-
return ()
1550-
-}
1551-
1552-
-- Test registerRemoteAsync properties. Add a local process to
1553-
-- remote registry and checks that it is removed
1554-
-- when the process dies.
1555-
remote5 <- testRemote remoteNode
1556-
runProcess localNode $ do
1557-
registerRemoteAsync (localNodeId remoteNode) "test" remote5
1558-
receiveWait [
1559-
match (\(RegisterReply _ True _) -> return ())
1560-
] >>= send remote5
1561-
let waitpoll = do
1562-
whereisRemoteAsync (localNodeId remoteNode) "test"
1563-
receiveWait [
1564-
match (\(WhereIsReply _ mr) -> forM_ mr (const waitpoll))
1565-
]
1566-
waitpoll
1488+
let nid = localNodeId node2
1489+
pid <- forkProcess node1 $ do
1490+
getSelfPid >>= runUntilRegistered nid
1491+
liftIO $ takeMVar waitH
15671492

1568-
-- Add remote process to remote registry and checks if
1569-
-- entry is removed then process is dead.
1570-
remote6 <- testRemote localNode
1571-
runProcess localNode $ do
1572-
registerRemoteAsync (localNodeId remoteNode) "test" remote6
1573-
receiveWait [
1574-
match (\(RegisterReply _ True _) -> return ())
1575-
] >>= send remote6
1576-
let waitpoll = do
1577-
whereisRemoteAsync (localNodeId remoteNode) "test"
1578-
receiveWait [
1579-
match (\(WhereIsReply _ mr) -> forM_ mr (const waitpoll))
1580-
]
1581-
waitpoll
1582-
where
1583-
testRemote node = do
1584-
-- test many labels
1585-
pidBox <- newEmptyMVar
1586-
forkProcess node $ do
1587-
us <- getSelfPid
1588-
liftIO $ putMVar pidBox us
1589-
expect :: Process ()
1590-
takeMVar pidBox
1493+
runProcess node2 $ do
1494+
register regName pid
1495+
res <- whereis regName
1496+
us <- getSelfPid
1497+
liftIO $ do
1498+
putMVar waitH ()
1499+
assertBool "expected (Just pid)" $ res == (Just pid)
1500+
1501+
-- This delay isn't essential!
1502+
-- The test case passes perfectly fine without it (feel free to comment out
1503+
-- and see), however waiting a few seconds here, makes it much more likely
1504+
-- that in delayUntilMaybeUnregistered we will hit the match case right
1505+
-- away, and thus not be subjected to a 20 second delay. The value of 4
1506+
-- seconds appears to work optimally on osx and across several linux distros
1507+
-- running in virtual machines (which is essentially what we do in CI)
1508+
receiveTimeout 4000000 [ matchAny return ]
1509+
return ()
1510+
1511+
-- This delay doesn't serve much purpose in the happy path, however if some
1512+
-- future patch breaks the cooperative behaviour of node controllers viz
1513+
-- remote process registration and notification taking place via ncEffectDied,
1514+
-- there would be the possibility of a race in the test case should we attempt
1515+
-- to evaluate `whereis regName` on node2 right away. In case the name is still
1516+
-- erroneously registered, observing the 20 second delay (or lack of), could at
1517+
-- least give a hint that something is wrong, and we give up our time slice
1518+
-- so that there's a higher change the registrations have been cleaned up
1519+
-- in either case.
1520+
runProcess node2 $ delayUntilMaybeUnregistered nid pid
1521+
1522+
regHere <- newEmptyMVar
1523+
runProcess node2 $ whereis regName >>= liftIO . putMVar regHere
1524+
res <- takeMVar regHere
1525+
assertBool "expected Nothing, but process still registered" (res == Nothing)
1526+
1527+
where
1528+
runUntilRegistered nid us = do
1529+
whereisRemoteAsync nid regName
1530+
receiveWait [
1531+
matchIf (\(WhereIsReply n (Just p)) -> n == regName && p == us)
1532+
(const $ return ())
1533+
]
1534+
1535+
delayUntilMaybeUnregistered nid p = do
1536+
whereisRemoteAsync nid regName
1537+
receiveTimeout 20000000 {- 20 sec delay -} [
1538+
matchIf (\(WhereIsReply n p) -> n == regName && p == Nothing)
1539+
(const $ return ())
1540+
]
1541+
return ()
1542+
1543+
regName = "testRegisterRemote"
15911544

15921545
testUnsafeSend :: TestTransport -> Assertion
15931546
testUnsafeSend TestTransport{..} = do
@@ -1790,7 +1743,7 @@ tests testtrans = return [
17901743
, testCase "MaskRestoreScope" (testMaskRestoreScope testtrans)
17911744
, testCase "ExitLocal" (testExitLocal testtrans)
17921745
, testCase "ExitRemote" (testExitRemote testtrans)
1793-
, testCase "TestRegistryMonitor" (testRegistryMonitoring testtrans)
1746+
, testCase "RegistryMonitoring" (testRegistryMonitoring testtrans)
17941747
, testCase "TextCallLocal" (testCallLocal testtrans)
17951748
-- Unsafe Primitives
17961749
, testCase "TestUnsafeSend" (testUnsafeSend testtrans)

0 commit comments

Comments
 (0)