Skip to content

Commit

Permalink
Web.fetch: handle representative h-card URL with and without trailing…
Browse files Browse the repository at this point in the history
… slash
  • Loading branch information
snarfed committed Oct 25, 2024
1 parent 6e59248 commit 05b6e33
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 3 deletions.
13 changes: 13 additions & 0 deletions tests/test_web.py
Original file line number Diff line number Diff line change
Expand Up @@ -2598,6 +2598,19 @@ def test_verify_both_work(self, mock_get, _):
],
})

def test_verify_representative_hcard_url_without_trailing_slash(self, mock_get, _):
hcard = requests_response("""
<html><body class="h-card">
<a class="u-url" href="https://user.com"></a>
</body></html>""",
url='https://user.com/',
)
mock_get.side_effect = [FULL_REDIR, hcard]
self._test_verify(True, True, {
'objectType': 'person',
'url': 'https://user.com',
})

def test_verify_www_redirect(self, mock_get, _):
www_user = self.make_user('www.user.com', cls=Web)

Expand Down
9 changes: 6 additions & 3 deletions web.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ def fetch(cls, obj, gateway=False, check_backlink=False,
else None)
if metaformats is None:
# default to only for homepages
metaformats = urlparse(url).path in ('', '/')
metaformats = is_homepage

try:
parsed = util.fetch_mf2(url, gateway=gateway, metaformats=metaformats,
Expand All @@ -515,9 +515,12 @@ def fetch(cls, obj, gateway=False, check_backlink=False,
# find mf2 item
if is_homepage:
logger.info(f"{url} is user's web url")
entry = mf2util.representative_hcard(parsed, parsed['url'])
parsed_url = (parsed['url'] or '').rstrip('/')
# try both with and without trailing slash
entry = (mf2util.representative_hcard(parsed, parsed_url)
or mf2util.representative_hcard(parsed, parsed_url + '/'))
if not entry:
error(f"Couldn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {parsed['url']}")
error(f"Couldn't find a representative h-card (http://microformats.org/wiki/representative-h-card-parsing) on {parsed['url']}")
logger.info(f'Found representative h-card')
else:
entry = mf2util.find_first_entry(parsed, ['h-entry'])
Expand Down

0 comments on commit 05b6e33

Please sign in to comment.