From 24c16fc3494ce91ba97a06f5d42cdea1c4c38c93 Mon Sep 17 00:00:00 2001
From: Marcin Bachry <hegel666@gmail.com>
Date: Wed, 14 Dec 2016 22:38:18 +0100
Subject: [PATCH] Fix crash in url preview when html tag has no text

Signed-off-by: Marcin Bachry <hegel666@gmail.com>
---
 synapse/rest/media/v1/preview_url_resource.py |  5 +-
 tests/test_preview.py                         | 50 +++++++++++++++++++
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py
index 6a5a57102f..99760d622f 100644
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@@ -381,7 +381,10 @@ def _calc_og(tree, media_uri):
     if 'og:title' not in og:
         # do some basic spidering of the HTML
         title = tree.xpath("(//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1]")
-        og['og:title'] = title[0].text.strip() if title else None
+        if title and title[0].text is not None:
+            og['og:title'] = title[0].text.strip()
+        else:
+            og['og:title'] = None
 
     if 'og:image' not in og:
         # TODO: extract a favicon failing all else
diff --git a/tests/test_preview.py b/tests/test_preview.py
index ffa52e5dd4..5bd36c74aa 100644
--- a/tests/test_preview.py
+++ b/tests/test_preview.py
@@ -215,3 +215,53 @@ class PreviewUrlTestCase(unittest.TestCase):
             u"og:title": u"Foo",
             u"og:description": u"Some text."
         })
+
+    def test_missing_title(self):
+        html = u"""
+        <html>
+        <body>
+        Some text.
+        </body>
+        </html>
+        """
+
+        og = decode_and_calc_og(html, "http://example.com/test.html")
+
+        self.assertEquals(og, {
+            u"og:title": None,
+            u"og:description": u"Some text."
+        })
+
+    def test_h1_as_title(self):
+        html = u"""
+        <html>
+        <meta property="og:description" content="Some text."/>
+        <body>
+        <h1>Title</h1>
+        </body>
+        </html>
+        """
+
+        og = decode_and_calc_og(html, "http://example.com/test.html")
+
+        self.assertEquals(og, {
+            u"og:title": u"Title",
+            u"og:description": u"Some text."
+        })
+
+    def test_missing_title_and_broken_h1(self):
+        html = u"""
+        <html>
+        <body>
+        <h1><a href="foo"/></h1>
+        Some text.
+        </body>
+        </html>
+        """
+
+        og = decode_and_calc_og(html, "http://example.com/test.html")
+
+        self.assertEquals(og, {
+            u"og:title": None,
+            u"og:description": u"Some text."
+        })
-- 
GitLab