diff --git a/src/Package/Updater.php b/src/Package/Updater.php index 867614cdc..04f54f4b7 100644 --- a/src/Package/Updater.php +++ b/src/Package/Updater.php @@ -14,6 +14,8 @@ use App\Entity\Dependent; use App\Entity\PackageFreezeReason; +use App\HtmlSanitizer\ReadmeImageSanitizer; +use App\HtmlSanitizer\ReadmeLinkSanitizer; use App\Util\HttpDownloaderOptionsFactory; use cebe\markdown\GithubMarkdown; use Composer\Package\AliasPackage; @@ -39,7 +41,6 @@ use Doctrine\DBAL\Connection; use App\Service\VersionCache; use Composer\Package\CompletePackageInterface; -use DOMElement; use Symfony\Component\HtmlSanitizer\HtmlSanitizer; use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; use Symfony\Component\Mailer\MailerInterface; @@ -758,6 +759,9 @@ private function prepareReadme(string $readme, ?string $host = null, ?string $ow ->allowAttribute('align', ['th', 'td', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']) ->allowAttribute('class', '*') ->allowLinkSchemes(['https', 'http', 'mailto']) + ->forceAttribute('a', 'rel', 'nofollow noindex noopener external ugc') + ->withAttributeSanitizer(new ReadmeLinkSanitizer($host, $owner.'/'.$repo, $basePath)) + ->withAttributeSanitizer(new ReadmeImageSanitizer($host, $owner.'/'.$repo, $basePath)) ->allowRelativeLinks() ->allowRelativeMedias() ->withMaxInputLength(10_000_000); @@ -765,69 +769,9 @@ private function prepareReadme(string $readme, ?string $host = null, ?string $ow $sanitizer = new HtmlSanitizer($config); $readme = $sanitizer->sanitizeFor('body', $readme); - libxml_use_internal_errors(true); - $dom = new \DOMDocument(); - $dom->loadHTML('' . $readme); - - // Links can not be trusted, mark them nofollow and convert relative to absolute links - $links = $dom->getElementsByTagName('a'); - /** @var DOMElement $link */ - foreach ($links as $link) { - $link->setAttribute('rel', 'nofollow noindex noopener external ugc'); - if ('#' === substr($link->getAttribute('href'), 0, 1)) { - $link->setAttribute('href', '#user-content-'.substr($link->getAttribute('href'), 1)); - } elseif ('mailto:' === substr($link->getAttribute('href'), 0, 7)) { - // do nothing - } elseif ($host === 'github.com' && !str_contains($link->getAttribute('href'), '//')) { - $link->setAttribute( - 'href', - 'https://github.com/'.$owner.'/'.$repo.'/blob/HEAD/'.$basePath.$link->getAttribute('href') - ); - } elseif ($host === 'gitlab.com' && !str_contains($link->getAttribute('href'), '//')) { - $link->setAttribute( - 'href', - 'https://gitlab.com/'.$owner.'/'.$repo.'/-/blob/HEAD/'.$basePath.$link->getAttribute('href') - ); - } - if ($link->getAttribute('target') !== '' && $link->getAttribute('target') !== '_blank') { - $link->setAttribute('target', '_blank'); - } - } - - // embed images of selected hosts by converting relative links to accessible URLs - if (in_array($host, ['github.com', 'gitlab.com', 'bitbucket.org'], true)) { - $images = $dom->getElementsByTagName('img'); - /** @var DOMElement $img */ - foreach ($images as $img) { - if (!str_contains($img->getAttribute('src'), '//')) { - $imgSrc = match ($host) { - 'github.com' => 'https://raw.github.com/'.$owner.'/'.$repo.'/HEAD/'.$basePath.$img->getAttribute('src'), - 'gitlab.com' => 'https://gitlab.com/'.$owner.'/'.$repo.'/-/raw/HEAD/'.$basePath.$img->getAttribute('src'), - 'bitbucket.org' => 'https://bitbucket.org/'.$owner.'/'.$repo.'/raw/HEAD/'.$basePath.$img->getAttribute('src'), - }; - $img->setAttribute('src', $imgSrc); - } - } - } - // remove first page element if it's a

or

, because it's usually // the project name or the `README` string which we don't need - $first = $dom->getElementsByTagName('body')->item(0); - if ($first) { - $first = $first->childNodes->item(0); - } - - if ($first && ('h1' === $first->nodeName || 'h2' === $first->nodeName)) { - $first->parentNode?->removeChild($first); - } - - $readme = $dom->saveHTML(); - Assert::string($readme); - $readme = substr($readme, strpos($readme, '') + 6); - $readme = substr($readme, 0, strrpos($readme, '') ?: PHP_INT_MAX); - - libxml_use_internal_errors(false); - libxml_clear_errors(); + $readme = Preg::replace('{^<(h[12])>.*}', '', $readme); return str_replace("\r\n", "\n", $readme); } diff --git a/tests/Package/UpdaterTest.php b/tests/Package/UpdaterTest.php index c0e6789cf..799898671 100644 --- a/tests/Package/UpdaterTest.php +++ b/tests/Package/UpdaterTest.php @@ -205,8 +205,8 @@ public function testUnderstandsDifferentFileNames(): void public function testReadmeParsing(): void { $readme = <<<'SOURCE' -

Fork CMS

-

Build Status +

PROJECT NAME

Fork CMS

+

Build Status Latest Stable Version License Code Coverage @@ -218,7 +218,7 @@ public function testReadmeParsing(): void

  • Run composer create-project forkcms/forkcms . in your document root.
  • Browse to your website
  • Follow the steps on-screen
  • -
  • Have fun!
  • +
  • Have fun!
  • Dependencies

    Remark: If you are using GIT instead of composer create-project or the zip-file from http://www.fork-cms.com, you @@ -259,6 +259,7 @@ public function testReadmeParsing(): void

    Community

    Join our Slack channel Join our Slack Channel!

    The Fork CMS team

    +
    SOURCE; @@ -267,20 +268,20 @@ public function testReadmeParsing(): void $readme = $reflMethod->invoke($this->updater, $readme, 'github.com', 'foo', 'bar'); self::assertSame(<<<'EXPECTED' -

    Fork CMS

    -

    Build Status -Latest Stable Version -License -Code Coverage -Documentation Status -huntr.dev | the place to protect open source

    +

    Fork CMS

    +

    Build Status +Latest Stable Version +License +Code Coverage +Documentation Status +huntr.dev | the place to protect open source

    Installation

      -
    1. ⚠️ Test Emoji Make sure you have composer installed.
    2. +
    3. ⚠️ Test Emoji Make sure you have composer installed.
    4. Run composer create-project forkcms/forkcms . in your document root.
    5. Browse to your website
    6. Follow the steps on-screen
    7. -
    8. Have fun!
    9. +
    10. Have fun!

    Dependencies

    Remark: If you are using GIT instead of composer create-project or the zip-file from http://www.fork-cms.com, you @@ -289,13 +290,13 @@ public function testReadmeParsing(): void

    composer install -o
     

    Security

    -

    If you discover any security-related issues, please email core@fork-cms.com instead of using the issue tracker. +

    If you discover any security-related issues, please email core@fork-cms.com instead of using the issue tracker. HTML is allowed in translations because you sometimes need it. Any reports regarding this will not be accepted as a security issue. Owners of a website can narrow down who can add/edit translation strings using the group permissions.

    Bugs

    If you encounter any bugs, please create an issue on Github. -If you're stuck or would like to discuss Fork CMS: Join our Slack channel Join our Slack Channel!

    +If you're stuck or would like to discuss Fork CMS: Join our Slack channel Join our Slack Channel!

    Running the tests

    -

    We use phpunit as a test framework. It's installed when using composer install. +

    We use phpunit as a test framework. It's installed when using composer install. To be able to run them, make sure you have a database with the same credentials as your normal database and with the name suffixed with _test.

    Because we support multiple php versions it gave some issues. Therefore we use the bridge from symfony.

    @@ -303,12 +304,12 @@ public function testReadmeParsing(): void
    composer test
     

    Running only the unit, functional, or the installer tests

    -
     composer test -- --testsuite=functional
    - composer test -- --testsuite=unit
    - composer test -- --testsuite=installer
    +
     composer test -- --testsuite=functional
    + composer test -- --testsuite=unit
    + composer test -- --testsuite=installer
     

    If you want to run all the tests except the ones from the installer use

    -
    composer test -- --exclude-group=installer
    +
    composer test -- --exclude-group=installer
     

    Styling the backend

    The backend uses Bootstrap in combination with Sass. To make changes, you should make @@ -317,8 +318,9 @@ public function testReadmeParsing(): void

    We use yarn to install our dependencies. For now we have a gulp-script that moves everything to the correct directories. So if you change the dependencies, make sure you run gulp build.

    Community

    -

    Join our Slack channel Join our Slack Channel!

    +

    Join our Slack channel Join our Slack Channel!

    The Fork CMS team

    + EXPECTED