diff --git a/src/plugins/irc-events/link.js b/src/plugins/irc-events/link.js index 9029716e..6d0bede6 100644 --- a/src/plugins/irc-events/link.js +++ b/src/plugins/irc-events/link.js @@ -2,8 +2,7 @@ const cheerio = require("cheerio"); const request = require("request"); -const url = require("url"); -const URI = require("urijs"); +const URL = require("url").URL; const mime = require("mime-types"); const Helper = require("../../helper"); const cleanIrcMessage = require("../../../client/js/libs/handlebars/ircmessageparser/cleanIrcMessage"); @@ -32,26 +31,36 @@ module.exports = function(client, chan, msg) { // Remove all IRC formatting characters before searching for links const cleanText = cleanIrcMessage(msg.text); - // We will only try to prefetch http(s) links - const links = findLinks(cleanText).filter((w) => isValidLink(w.link)); + msg.previews = findLinks(cleanText).reduce((cleanLinks, link) => { + const url = normalizeURL(link.link); - if (links.length === 0) { - return; - } + // If the URL is invalid and cannot be normalized, don't fetch it + if (url === null) { + return cleanLinks; + } - msg.previews = Array.from(new Set( // Remove duplicate links - links.map((link) => link.link) - )).map((link) => ({ - type: "loading", - head: "", - body: "", - thumb: "", - link: link, - shown: true, - })).slice(0, 5); // Only preview the first 5 URLs in message to avoid abuse + // If there are too many urls in this message, only fetch first X valid links + if (cleanLinks.length > 4) { + return cleanLinks; + } - msg.previews.forEach((preview) => { - fetch(normalizeURL(preview.link), { + // Do not fetch duplicate links twice + if (cleanLinks.some((l) => l.link === link.link)) { + return cleanLinks; + } + + const preview = { + type: "loading", + head: "", + body: "", + thumb: "", + link: link.link, // Send original matched link to the client + shown: true, + }; + + cleanLinks.push(preview); + + fetch(url, { accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", language: client.language, }, function(res, err) { @@ -68,7 +77,9 @@ module.exports = function(client, chan, msg) { parse(msg, preview, res, client); }); - }); + + return cleanLinks; + }, []); }; function parseHtml(preview, res, client) { @@ -93,18 +104,14 @@ function parseHtml(preview, res, client) { || $('link[rel="image_src"]').attr("href") || ""; + // Make sure thumbnail is a valid and absolute url if (preview.thumb.length) { - preview.thumb = url.resolve(preview.link, preview.thumb); - } - - // Make sure thumbnail is a valid url - if (!isValidLink(preview.thumb)) { - preview.thumb = ""; + preview.thumb = normalizeURL(preview.thumb, preview.link) || ""; } // Verify that thumbnail pic exists and is under allowed size if (preview.thumb.length) { - fetch(normalizeURL(preview.thumb), {language: client.language}, (resThumb) => { + fetch(preview.thumb, {language: client.language}, (resThumb) => { if (resThumb === null || !(/^image\/.+/.test(resThumb.type)) || resThumb.size > (Helper.config.prefetchMaxImageSize * 1024)) { @@ -134,16 +141,19 @@ function parseHtmlMedia($, preview, res, client) { if (mediaTypeRegex.test(mimeType)) { // If we match a clean video or audio tag, parse that as a preview instead - const mediaUrl = $($(`meta[property="og:${type}"]`).get(i)).attr("content"); + let mediaUrl = $($(`meta[property="og:${type}"]`).get(i)).attr("content"); // Make sure media is a valid url - if (!mediaUrl.startsWith("https://")) { + mediaUrl = normalizeURL(mediaUrl, preview.link, true); + + // Make sure media is a valid url + if (!mediaUrl) { return; } foundMedia = true; - fetch(normalizeURL(mediaUrl), { + fetch(mediaUrl, { accept: type === "video" ? "video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5" : "audio/webm, audio/ogg, audio/wav, audio/*;q=0.9, application/ogg;q=0.7, video/*;q=0.6; */*;q=0.5", @@ -360,27 +370,31 @@ function fetch(uri, headers, cb) { }); } -function normalizeURL(header) { - return URI(header).normalize().toString(); -} - -function isValidLink(link) { +function normalizeURL(link, baseLink, disallowHttp = false) { try { - const uri = URI(link); - const protocol = uri.protocol(); + const url = new URL(link, baseLink); // Only fetch http and https links - if (protocol !== "http" && protocol !== "https") { - return false; + if (url.protocol !== "http:" && url.protocol !== "https:") { + return null; + } + + if (disallowHttp && url.protocol === "http:") { + return null; } // Do not fetch links without hostname or ones that contain authorization - if (!uri.hostname() || uri.username() || uri.password()) { - return false; + if (!url.hostname || url.username || url.password) { + return null; } + + // Drop hash from the url, if any + url.hash = ""; + + return url.toString(); } catch (e) { - return false; + // if an exception was thrown, the url is not valid } - return true; + return null; } diff --git a/test/plugins/link.js b/test/plugins/link.js index 82278f41..121f7048 100644 --- a/test/plugins/link.js +++ b/test/plugins/link.js @@ -385,6 +385,28 @@ describe("Link plugin", function() { }); }); + it("should de-duplicate links", function(done) { + const message = this.irc.createMessage({ + text: "//localhost:9002 http://localhost:9002 http://localhost:9002", + }); + + link(this.irc, this.network.channels[0], message); + + expect(message.previews).to.deep.equal([{ + type: "loading", + head: "", + body: "", + thumb: "", + link: "http://localhost:9002", + shown: true, + }]); + + this.irc.once("msg:preview", function(data) { + expect(data.preview.link).to.equal("http://localhost:9002"); + done(); + }); + }); + it("should not try to fetch links with wrong protocol", function() { const message = this.irc.createMessage({ text: "ssh://example.com ftp://example.com irc://example.com http:////////example.com",