<?php /** * External Links * * This file is part of Grav External Links plugin. * * Dual licensed under the MIT or GPL Version 3 licenses, see LICENSE. * http://benjamin-regler.de/license/ */ namespace Grav\Plugin; use Grav\Common\Utils; use Grav\Common\Grav; /** * External Links * * Helper class to add small icons to external and mailto links, informing * users the link will take them to a new site or open their email client. */ class ExternalLinks { /** * @var ExternalLinks */ /** ------------- * Public methods * -------------- */ /** * Process contents i.e. apply filer to the content. * * @param string $content The content to render. * @param array $options Options to be passed to the renderer. * @param null|Page $page Null or an instance of \Grav\Common\Page. * * @return string The rendered contents. */ public function render($content, $options = [], $page = null) { // Get all <a> tags and process them $content = preg_replace_callback('~<a(?:\s[^>]*)?>.*?</a>~i', function($match) use ($options, $page) { // Load PHP built-in DOMDocument class if (($dom = $this->loadDOMDocument($match[0])) === null) { return $match[0]; } // Check that there is really a link tag $a = $dom->getElementsByTagName('a'); if ($a->length == 0) { return $match[0]; } $a = $a->item(0); // Process links with non-empty href attribute $href = $a->getAttribute('href'); if (strlen($href) == 0) { return $match[0]; } // Get the class of the <a> element $class = $a->hasAttribute('class') ? $a->getAttribute('class') : ''; $classes = array_filter(explode(' ', $class)); // Exclude links with specific class from processing $exclude = $options->get('exclude.classes', null); if ($exclude && !!array_intersect($exclude, $classes)) { return $match[0]; } // Get domains to be seen as internal $domains = $options->get('exclude.domains', []); // This is a mailto link. if (strpos($href, 'mailto:') === 0) { $classes[] = 'mailto'; } // The link is external elseif ($url = $this->isExternalUrl($href, $domains, $page)) { // Add external class $classes[] = 'external-link'; $a->setAttribute('href', $url); // Add target="_blank" $target = $options->get('target'); if ($target) { $a->setAttribute('target', $target); } // Add no-follow. $nofollow = $options->get('no_follow'); if ($nofollow) { $rel = array_filter(explode(' ', $a->getAttribute('rel'))); if (!in_array('nofollow', $rel)) { $rel[] = 'nofollow'; $a->setAttribute('rel', implode(' ', $rel)); } } // Set rel="noopener noreferrer" $rel = $a->hasAttribute('rel') ? $a->getAttribute('rel') : ''; $rel = array_filter(explode(' ', $rel)); $rel[] = 'noopener'; $rel[] = 'noreferrer'; $a->setAttribute('rel', implode(' ', array_unique($rel))); // Add image class to <a> if it has at least one <img> child element $imgs = $a->getElementsByTagName('img'); if ($imgs->length > 1) { // Add "images" class to <a> element, if it has multiple child images $classes[] = 'images'; } elseif ($imgs->length == 1) { $imgNode = $imgs->item(0); // Get image size list($width, $height) = $this->getImageSize($imgNode); // Determine maximum dimension of image size $size = max($width, $height); // Depending on size determine image type $classes[] = ((0 < $size) && ($size <= 32)) ? 'icon' : 'image'; } else { // Add "no-image" class to <a> element, if it has no child images $classes[] = 'no-image'; } // Add title (aka alert text) if ($options->get('title')) { $language = Grav::instance()['language']; $message = $language->translate(['PLUGINS.EXTERNAL_LINKS.TITLE_MESSAGE']); // Set default title to link else, set title as data attribute $key = $a->hasAttribute('title') ? 'data-title' : 'title'; $a->setAttribute($key, $message); } } // Set class attribute if (count($classes) && ($options->get('mode') === 'active')) { $a->setAttribute('class', implode(' ', $classes)); } // Save Dom document back to HTML representation $html = $this->saveDOMDocument($dom); return $html; }, $content); // Write content back to page return $content; } /** ------------------------------- * Private/protected helper methods * -------------------------------- */ /** * Test if a URL is external * * @param string $url The URL to test. * @param array $domains An array of domains to be seen as internal. * @param null|Page $page Null or an instance of \Grav\Common\Page. * * @return mixed Returns the URL as a string, if it is external, * false otherwise. */ protected function isExternalUrl($url, $domains = [], $page = null) { static $allowed_protocols; static $pattern; /** @var Config $config */ $config = Grav::instance()['config']; /** @var Page $page */ $page = $page ?: Grav::instance()['page']; // Statically store allowed protocols if (!isset($allowed_protocols)) { $allowed_protocols = array_flip( $config->get('plugins.external_links.links.schemes', ['http', 'https']) ); } // Statically store internal domains as a PCRE pattern. if (!isset($pattern) || (count($domains) > 0)) { $domains = array_merge($domains, array(Grav::instance()['base_url_absolute'])); foreach ($domains as $domain) { $domains[] = preg_quote($domain, '#'); } $pattern = '#(' . str_replace(array('\*', '/*'), '.*?', implode('|', $domains)) . ')#i'; } $external = false; // Check for URLs that don't match any excluded domain if (!preg_match($pattern, $url)) { // Check if URL is external by extracting colon position $colonpos = strpos($url, ':'); if ($colonpos > 0) { // We found a colon, possibly a protocol. Verify. $protocol = strtolower(substr($url, 0, $colonpos)); if (isset($allowed_protocols[$protocol])) { // The protocol turns out be an allowed protocol $external = $url; } } else { if ($config->get('plugins.external_links.links.www')) { // Remove possible path duplicate $route = Grav::instance()['base_url'] . $page->route(); $href = Utils::startsWith($url, $route) ? ltrim(mb_substr($url, mb_strlen($route)), '/') : $url; // We found an url without protocol, but with starting 'www' (sub-)domain if (Utils::startsWith($url, 'www.')) { $external = 'http://' . $url; } elseif (Utils::startsWith($href, 'www.')) { $external = 'http://' . $href; } } if ($config->get('plugins.external_links.links.redirects')) { $targetPage = Grav::instance()['pages']->find($url); if ($targetPage && $targetPage->redirect()) { $external = $this->isExternalUrl($targetPage->redirect(), $domains, $page); } } } } // Only if a valid protocol or an URL starting with 'www.' was found return true return $external; } /** * Determine the size of an image * * @param DOMNode $imgNode The image already parsed as a DOMNode * @param integer $limit Load first $limit KB of remote image * * @return array Return the dimension of the image of the * format array(width, height) */ protected function getImageSize($imgNode, $limit = 32) { // Hold units (assume standard font with 16px base pixel size) // Calculations based on pixels $units = array( 'px' => 1, /* base unit: pixel */ 'pt' => 16 / 12, /* 12 point = 16 pixel = 1/72 inch */ 'pc' => 16, /* 1 pica = 16 pixel = 12 points */ 'in' => 96, /* 1 inch = 96 pixel = 2.54 centimeters */ 'mm' => 96 / 25.4, /* 1 millimeter = 96 pixel / 1 inch [mm] */ 'cm' => 96 / 2.54, /* 1 centimeter = 96 pixel / 1 inch [cm] */ 'm' => 96 / 0.0254, /* 1 centimeter = 96 pixel / 1 inch [m] */ 'ex' => 7, /* 1 ex = 7 pixel */ 'em' => 16, /* 1 em = 16 pixel */ 'rem' => 16, /* 1 rem = 16 pixel */ '%' => 16 / 100, /* 100 percent = 16 pixel */ ); // Initialize dimensions $width = 0; $height = 0; // Determine image dimensions based on "src" atrribute if ($imgNode->hasAttribute('src')) { $src = $imgNode->getAttribute('src'); // Simple check if the URL is internal i.e. check if path exists $path = $_SERVER['DOCUMENT_ROOT'] . $src; if (realpath($path) && is_file($path)) { $size = @getimagesize($path); } else { // The URL is external; try to load it (default: 32 KB) $size = $this->getRemoteImageSize($src, $limit * 1024); } } // Read out width and height from <img> attributes $width = $imgNode->hasAttribute('width') ? $imgNode->getAttribute('width') : $size[0]; $height = $imgNode->hasAttribute('height') ? $imgNode->getAttribute('height') : $size[1]; // Get width and height from style attribute if ( $imgNode->hasAttribute('style') ) { $style = $imgNode->getAttribute('style'); // Width if (preg_match('~width:\s*(\d+)([a-z]+)~i', $style, $matches)) { $width = $matches[1]; // Convert unit to pixel if ( isset($units[$matches[2]]) ) { $width *= $units[$matches[2]]; } } // Height if (preg_match('~height:\s*(\d+)([a-z]+)~i', $style, $matches)) { $height = $matches[1]; // Convert unit to pixel if (isset($units[$matches[2]])) { $height *= $units[$matches[2]]; } } } // Update width and height $size[0] = $width; $size[1] = $height; // Return image dimensions return $size; } /** * Get the size of a remote image * * @param string $uri The URI of the remote image * @param integer $limit Load first $limit bytes of remote image * * @return mixed Returns an array with up to 7 elements */ protected function getRemoteImageSize($uri, $limit = -1) { // Create temporary file to store data from $uri $tmp_name = tempnam(sys_get_temp_dir(), uniqid('ris')); if ($tmp_name === false) { return false; } // Open temporary file $tmp = fopen($tmp_name, 'wb'); // Check which method we should use to get remote image sizes $allow_url_fopen = ini_get('allow_url_fopen') ? true : false; $use_curl = function_exists('curl_version'); // Use stream copy if ($allow_url_fopen) { $options = []; if ( $limit > 0 ) { // Loading number of $limit bytes $options['http']['header'] = array('Range: bytes=0-' . $limit); } // Create stream context $context = stream_context_create($options); @copy($uri, $tmp_name, $context); // Use Curl } elseif ($use_curl) { // Initialize Curl $options = array( CURLOPT_HEADER => false, // Don't return headers CURLOPT_FOLLOWLOCATION => true, // Follow redirects CURLOPT_AUTOREFERER => true, // Set referrer on redirect CURLOPT_CONNECTTIMEOUT => 120, // Timeout on connect CURLOPT_TIMEOUT => 120, // Timeout on response CURLOPT_MAXREDIRS => 10, // Stop after 10 redirects CURLOPT_ENCODING => '', // Handle all encodings CURLOPT_BINARYTRANSFER => true, // Transfer as binary file CURLOPT_FILE => $tmp, // Curl file CURLOPT_URL => $uri, // URI ); $curl = curl_init(); curl_setopt_array($curl, $options); if ($limit > 0) { // Loading number of $limit $headers = array('Range: bytes=0-' . $limit); curl_setopt($curl, CURLOPT_HTTPHEADER, $headers); curl_setopt($curl, CURLOPT_RANGE, '0-' . $limit); // Abort request when more data is received curl_setopt($curl, CURLOPT_BUFFERSIZE, 512); // More progress info curl_setopt($curl, CURLOPT_NOPROGRESS, false); // Monitor progress curl_setopt($curl, CURLOPT_PROGRESSFUNCTION, function($download_size, $downloaded, $upload_size, $uploaded) use ($limit) { // If $downloaded exceeds $limit, returning non-zero breaks // the connection! return ( $downloaded > $limit ) ? 1 : 0; }); } // Execute Curl curl_exec($curl); curl_close($curl); } // Close temporary file fclose($tmp); // Retrieve image information $info = array(0, 0, 'width="0" height="0"'); if (filesize($tmp_name) > 0) { $info = @getimagesize($tmp_name); } // Delete temporary file unlink($tmp_name); return $info; } /** * Load contents into PHP built-in DOMDocument object * * Two Really good resources to handle DOMDocument with HTML(5) * correctly. * * @see http://stackoverflow.com/questions/3577641/how-do-you-parse-and-process-html-xml-in-php * @see http://stackoverflow.com/questions/7997936/how-do-you-format-dom-structures-in-php * * @param string $content The content to be loaded into the * DOMDocument object * * @return DOMDocument DOMDocument object of content */ protected function loadDOMDocument($content) { // Clear previous errors if (libxml_use_internal_errors(true) === true) { libxml_clear_errors(); } // Parse content using PHP built-in DOMDocument class $document = new \DOMDocument('1.0', 'UTF-8'); // Encode contents as UTF-8, strip whitespaces & normalize newlines $content = mb_convert_encoding($content, 'HTML-ENTITIES', 'UTF-8'); // $whitespaces = array( // '~\R~u' => "\n", // Normalize new line // '~\>[^\S ]+~s' => '>', // Strip whitespaces after tags, except space // '~[^\S ]+\<~s' => '<', // Strip whitespaces before tags, except space // '~(\s)+~s' => '\\1' // Shorten multiple whitespace sequences // ); // $content = preg_replace(array_keys($whitespaces), $whitespaces, $content); // Parse the HTML using UTF-8 // The @ before the method call suppresses any warnings that // loadHTML might throw because of invalid HTML in the page. @$document->loadHTML($content); // Do nothing, if DOM is empty if (is_null($document->documentElement)) { return null; } return $document; } /** * Save contents of PHP built-in DOMDocument object as HTML5 * * @param DOMDocument $document DOMDocument object with nodes * * @return string The outputted DOM document as HTML(5) * compliant string */ protected function saveDOMDocument($document) { // Pretty print output $document->preserveWhiteSpace = false; $document->formatOutput = true; // Transform DOM document to valid HTML(5) $content = ''; $body = $document->getElementsByTagName('body')->item(0); foreach ($body->childNodes as $node) { // Expand empty tags (e.g. <br/> to <br></br>) if (($html = $document->saveXML($node, LIBXML_NOEMPTYTAG)) !== false) { $content .= $html; } } // Fix formatting for self-closing tags in HTML5 and removing // encapsulated (uncommented) CDATA blocks in <script> and // <style> tags $regex = array( '~' . preg_quote('<![CDATA[', '~') . '~' => '', '~' . preg_quote(']]>', '~') . '~' => '', '~></(?:area|base(?:font)?|br|col|command|embed|frame|hr|img|input|keygen|link|meta|param|source|track|wbr)>~' => ' />', ); // Make XML HTML5 compliant $content = preg_replace(array_keys($regex), $regex, $content); return $content; } }