Catch any exceptions in the pusher loop. Use a lower timeout for pushers so we...

Catch any exceptions in the pusher loop. Use a lower timeout for pushers so we can see if they're actually still running.

Catch any exceptions in the pusher loop. Use a lower timeout for pushers so we...
b8690dd8 · David Baker · da84946d · b8690dd8
Commit b8690dd8 authored 9 years ago by David Baker
--- a/synapse/push/__init__.py
+++ b/synapse/push/__init__.py
@@ -24,6 +24,7 @@ import baserules
 import logging
 import simplejson as json
 import re
+import random
 logger = logging.getLogger(__name__)
@@ -256,134 +257,154 @@ class Pusher(object):
            logger.info("Pusher %s for user %s starting from token %s",
                        self.pushkey, self.user_name, self.last_token)
+        wait = 0
        while self.alive:
-            from_tok = StreamToken.from_string(self.last_token)
+            try:
-            config = PaginationConfig(from_token=from_tok, limit='1')
+                if wait > 0:
-            chunk = yield self.evStreamHandler.get_stream(
+                    yield synapse.util.async.sleep(wait)
-                self.user_name, config,
+                yield self.get_and_dispatch()
-                timeout=100*365*24*60*60*1000, affect_presence=False
+                wait = 0
-            )
+            except:
+                if wait == 0:
+                    wait = 1
+                else:
+                    wait = min(wait * 2, 1800)
+                logger.exception(
+                    "Exception in pusher loop for pushkey %s. Pausing for %ds",
+                    self.pushkey, wait
+                )
-            # limiting to 1 may get 1 event plus 1 presence event, so
+    @defer.inlineCallbacks
-            # pick out the actual event
+    def get_and_dispatch(self):
-            single_event = None
+        from_tok = StreamToken.from_string(self.last_token)
-            for c in chunk['chunk']:
+        config = PaginationConfig(from_token=from_tok, limit='1')
-                if 'event_id' in c:  # Hmmm...
+        timeout = (300 + random.randint(-60, 60)) * 1000
-                    single_event = c
+        chunk = yield self.evStreamHandler.get_stream(
-                    break
+            self.user_name, config,
-            if not single_event:
+            timeout=timeout, affect_presence=False
-                self.last_token = chunk['end']
+        )
-                continue
-            if not self.alive:
+        # limiting to 1 may get 1 event plus 1 presence event, so
-                continue
+        # pick out the actual event
+        single_event = None
+        for c in chunk['chunk']:
+            if 'event_id' in c:  # Hmmm...
+                single_event = c
+                break
+        if not single_event:
+            self.last_token = chunk['end']
+            logger.debug("Event stream timeout for pushkey %s", self.pushkey)
+            return
-            processed = False
+        if not self.alive:
-            actions = yield self._actions_for_event(single_event)
+            return
-            tweaks = _tweaks_for_actions(actions)
-            if len(actions) == 0:
+        processed = False
-                logger.warn("Empty actions! Using default action.")
+        actions = yield self._actions_for_event(single_event)
-                actions = Pusher.DEFAULT_ACTIONS
+        tweaks = _tweaks_for_actions(actions)
-            if 'notify' not in actions and 'dont_notify' not in actions:
+        if len(actions) == 0:
-                logger.warn("Neither notify nor dont_notify in actions: adding default")
+            logger.warn("Empty actions! Using default action.")
-                actions.extend(Pusher.DEFAULT_ACTIONS)
+            actions = Pusher.DEFAULT_ACTIONS
-            if 'dont_notify' in actions:
+        if 'notify' not in actions and 'dont_notify' not in actions:
-                logger.debug(
+            logger.warn("Neither notify nor dont_notify in actions: adding default")
-                    "%s for %s: dont_notify",
+            actions.extend(Pusher.DEFAULT_ACTIONS)
-                    single_event['event_id'], self.user_name
-                )
+        if 'dont_notify' in actions:
+            logger.debug(
+                "%s for %s: dont_notify",
+                single_event['event_id'], self.user_name
+            )
+            processed = True
+        else:
+            rejected = yield self.dispatch_push(single_event, tweaks)
+            self.has_unread = True
+            if isinstance(rejected, list) or isinstance(rejected, tuple):
                processed = True
-            else:
+                for pk in rejected:
-                rejected = yield self.dispatch_push(single_event, tweaks)
+                    if pk != self.pushkey:
-                self.has_unread = True
+                        # for sanity, we only remove the pushkey if it
-                if isinstance(rejected, list) or isinstance(rejected, tuple):
+                        # was the one we actually sent...
-                    processed = True
+                        logger.warn(
-                    for pk in rejected:
+                            ("Ignoring rejected pushkey %s because we"
-                        if pk != self.pushkey:
+                             " didn't send it"), pk
-                            # for sanity, we only remove the pushkey if it
+                        )
-                            # was the one we actually sent...
+                    else:
-                            logger.warn(
+                        logger.info(
-                                ("Ignoring rejected pushkey %s because we"
+                            "Pushkey %s was rejected: removing",
-                                 " didn't send it"), pk
+                            pk
-                            )
+                        )
-                        else:
+                        yield self.hs.get_pusherpool().remove_pusher(
-                            logger.info(
+                            self.app_id, pk, self.user_name
-                                "Pushkey %s was rejected: removing",
+                        )
-                                pk
-                            )
+        if not self.alive:
-                            yield self.hs.get_pusherpool().remove_pusher(
+            return
-                                self.app_id, pk, self.user_name
-                            )
+        if processed:
+            self.backoff_delay = Pusher.INITIAL_BACKOFF
-            if not self.alive:
+            self.last_token = chunk['end']
-                continue
+            self.store.update_pusher_last_token_and_success(
+                self.app_id,
+                self.pushkey,
+                self.user_name,
+                self.last_token,
+                self.clock.time_msec()
+            )
+            if self.failing_since:
+                self.failing_since = None
+                self.store.update_pusher_failing_since(
+                    self.app_id,
+                    self.pushkey,
+                    self.user_name,
+                    self.failing_since)
+        else:
+            if not self.failing_since:
+                self.failing_since = self.clock.time_msec()
+                self.store.update_pusher_failing_since(
+                    self.app_id,
+                    self.pushkey,
+                    self.user_name,
+                    self.failing_since
+                )
-            if processed:
+            if (self.failing_since and
+               self.failing_since <
+               self.clock.time_msec() - Pusher.GIVE_UP_AFTER):
+                # we really only give up so that if the URL gets
+                # fixed, we don't suddenly deliver a load
+                # of old notifications.
+                logger.warn("Giving up on a notification to user %s, "
+                            "pushkey %s",
+                            self.user_name, self.pushkey)
                self.backoff_delay = Pusher.INITIAL_BACKOFF
                self.last_token = chunk['end']
-                self.store.update_pusher_last_token_and_success(
+                self.store.update_pusher_last_token(
+                    self.app_id,
+                    self.pushkey,
+                    self.user_name,
+                    self.last_token
+                )
+                self.failing_since = None
+                self.store.update_pusher_failing_since(
                    self.app_id,
                    self.pushkey,
                    self.user_name,
-                    self.last_token,
+                    self.failing_since
-                    self.clock.time_msec()
                )
-                if self.failing_since:
-                    self.failing_since = None
-                    self.store.update_pusher_failing_since(
-                        self.app_id,
-                        self.pushkey,
-                        self.user_name,
-                        self.failing_since)
            else:
-                if not self.failing_since:
+                logger.warn("Failed to dispatch push for user %s "
-                    self.failing_since = self.clock.time_msec()
+                            "(failing for %dms)."
-                    self.store.update_pusher_failing_since(
+                            "Trying again in %dms",
-                        self.app_id,
+                            self.user_name,
-                        self.pushkey,
+                            self.clock.time_msec() - self.failing_since,
-                        self.user_name,
+                            self.backoff_delay)
-                        self.failing_since
+                yield synapse.util.async.sleep(self.backoff_delay / 1000.0)
-                    )
+                self.backoff_delay *= 2
+                if self.backoff_delay > Pusher.MAX_BACKOFF:
-                if (self.failing_since and
+                    self.backoff_delay = Pusher.MAX_BACKOFF
-                   self.failing_since <
-                   self.clock.time_msec() - Pusher.GIVE_UP_AFTER):
-                    # we really only give up so that if the URL gets
-                    # fixed, we don't suddenly deliver a load
-                    # of old notifications.
-                    logger.warn("Giving up on a notification to user %s, "
-                                "pushkey %s",
-                                self.user_name, self.pushkey)
-                    self.backoff_delay = Pusher.INITIAL_BACKOFF
-                    self.last_token = chunk['end']
-                    self.store.update_pusher_last_token(
-                        self.app_id,
-                        self.pushkey,
-                        self.user_name,
-                        self.last_token
-                    )
-                    self.failing_since = None
-                    self.store.update_pusher_failing_since(
-                        self.app_id,
-                        self.pushkey,
-                        self.user_name,
-                        self.failing_since
-                    )
-                else:
-                    logger.warn("Failed to dispatch push for user %s "
-                                "(failing for %dms)."
-                                "Trying again in %dms",
-                                self.user_name,
-                                self.clock.time_msec() - self.failing_since,
-                                self.backoff_delay)
-                    yield synapse.util.async.sleep(self.backoff_delay / 1000.0)
-                    self.backoff_delay *= 2
-                    if self.backoff_delay > Pusher.MAX_BACKOFF:
-                        self.backoff_delay = Pusher.MAX_BACKOFF
    def stop(self):
        self.alive = False