diff --git a/changelog.d/6291.misc b/changelog.d/6291.misc
new file mode 100644
index 0000000000000000000000000000000000000000..7b1bb4b6794fa341dc92a8a0664e0b7b1f74319c
--- /dev/null
+++ b/changelog.d/6291.misc
@@ -0,0 +1 @@
+Change cache descriptors to always return deferreds.
diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py
index 7659eaeb42346a53a5e35ac64aae4a0111d2bb68..b60a604474bd7134da8d003f3d4779d601383a33 100644
--- a/synapse/util/async_helpers.py
+++ b/synapse/util/async_helpers.py
@@ -86,11 +86,12 @@ class ObservableDeferred(object):
 
         deferred.addCallbacks(callback, errback)
 
-    def observe(self):
+    def observe(self) -> defer.Deferred:
         """Observe the underlying deferred.
 
-        Can return either a deferred if the underlying deferred is still pending
-        (or has failed), or the actual value. Callers may need to use maybeDeferred.
+        This returns a brand new deferred that is resolved when the underlying
+        deferred is resolved. Interacting with the returned deferred does not
+        effect the underdlying deferred.
         """
         if not self._result:
             d = defer.Deferred()
@@ -105,7 +106,7 @@ class ObservableDeferred(object):
             return d
         else:
             success, res = self._result
-            return res if success else defer.fail(res)
+            return defer.succeed(res) if success else defer.fail(res)
 
     def observers(self):
         return self._observers
diff --git a/tests/storage/test__base.py b/tests/storage/test__base.py
index dd49a1452418d9375438d8a56e9ee83c99cf1ba2..9b81b536f546dedfa6bf5a19b2f7f2de60c641cd 100644
--- a/tests/storage/test__base.py
+++ b/tests/storage/test__base.py
@@ -197,7 +197,7 @@ class CacheDecoratorTestCase(unittest.TestCase):
 
         a.func.prefill(("foo",), ObservableDeferred(d))
 
-        self.assertEquals(a.func("foo"), d.result)
+        self.assertEquals(a.func("foo").result, d.result)
         self.assertEquals(callcount[0], 0)
 
     @defer.inlineCallbacks
diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py
index f907903511d09c4fdba67812ed61bc7d0df8aefb..39e360fe24004cc6c4de5f2c5cbae9e16e497a37 100644
--- a/tests/util/caches/test_descriptors.py
+++ b/tests/util/caches/test_descriptors.py
@@ -310,14 +310,14 @@ class DescriptorTestCase(unittest.TestCase):
 
         obj.mock.return_value = ["spam", "eggs"]
         r = obj.fn(1, 2)
-        self.assertEqual(r, ["spam", "eggs"])
+        self.assertEqual(r.result, ["spam", "eggs"])
         obj.mock.assert_called_once_with(1, 2)
         obj.mock.reset_mock()
 
         # a call with different params should call the mock again
         obj.mock.return_value = ["chips"]
         r = obj.fn(1, 3)
-        self.assertEqual(r, ["chips"])
+        self.assertEqual(r.result, ["chips"])
         obj.mock.assert_called_once_with(1, 3)
         obj.mock.reset_mock()