From 58ec2768ec794fc29c8a56ac0433fb49952a2b42 Mon Sep 17 00:00:00 2001 From: Pavel Djundik Date: Thu, 26 Apr 2018 19:03:33 +0300 Subject: [PATCH 1/6] Use linkify-it --- .../handlebars/ircmessageparser/findLinks.js | 67 +++------ package.json | 2 + .../handlebars/ircmessageparser/findLinks.js | 138 ++++++++++++++++++ test/client/js/libs/handlebars/parse.js | 2 +- yarn.lock | 14 ++ 5 files changed, 172 insertions(+), 51 deletions(-) diff --git a/client/js/libs/handlebars/ircmessageparser/findLinks.js b/client/js/libs/handlebars/ircmessageparser/findLinks.js index 48726dc3..2c162f50 100644 --- a/client/js/libs/handlebars/ircmessageparser/findLinks.js +++ b/client/js/libs/handlebars/ircmessageparser/findLinks.js @@ -1,13 +1,12 @@ "use strict"; -const URI = require("urijs"); +const linkify = require("linkify-it")() + .tlds(require("tlds")) + .tlds("onion", true); -// Known schemes to detect in a text. If a text contains `foo...bar://foo.com`, -// the parsed scheme should be `foo...bar` but if it contains -// `foo...http://foo.com`, we assume the scheme to extract will be `http`. +// Known schemes to detect in text const commonSchemes = [ - "http", "https", - "ftp", "sftp", + "sftp", "smb", "file", "irc", "ircs", "svn", "git", @@ -15,54 +14,22 @@ const commonSchemes = [ "svn+ssh", "ssh", ]; +for (const schema of commonSchemes) { + linkify.add(schema + ":", "http:"); +} + function findLinks(text) { - const result = []; + const matches = linkify.match(text); - // URI.withinString() identifies URIs within text, e.g. to translate them to - // -Tags. - // See https://medialize.github.io/URI.js/docs.html#static-withinString - // In our case, we store each URI encountered in a result array. - try { - URI.withinString(text, function(url, start, end) { - let parsedScheme; - - try { - // Extract the scheme of the URL detected, if there is one - parsedScheme = URI(url).scheme().toLowerCase(); - } catch (e) { - // URI may throw an exception for malformed urls, - // as to why withinString finds these in the first place is a mystery - return; - } - - // Check if the scheme of the detected URL matches a common one above. - // In a URL like `foo..http://example.com`, the scheme would be `foo..http`, - // so we need to clean up the end of the scheme and filter out the rest. - const matchedScheme = commonSchemes.find((scheme) => parsedScheme.endsWith(scheme)); - - // A known scheme was found, extract the unknown part from the URL - if (matchedScheme) { - const prefix = parsedScheme.length - matchedScheme.length; - start += prefix; - url = url.slice(prefix); - } - - // The URL matched but does not start with a scheme (`www.foo.com`), add it - if (!parsedScheme.length) { - url = "http://" + url; - } - - result.push({ - start: start, - end: end, - link: url, - }); - }); - } catch (e) { - // withinString is wrapped in a try/catch due to https://github.com/medialize/URI.js/issues/359 + if (!matches) { + return []; } - return result; + return matches.map((url) => ({ + start: url.index, + end: url.lastIndex, + link: url.url, + })); } module.exports = findLinks; diff --git a/package.json b/package.json index ecd06af5..accbc349 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ "express": "4.16.3", "fs-extra": "6.0.1", "irc-framework": "2.11.0", + "linkify-it": "2.0.3", "lodash": "4.17.10", "mime-types": "2.1.18", "moment": "2.22.1", @@ -58,6 +59,7 @@ "spdy": "3.4.7", "sqlite3": "4.0.0", "thelounge-ldapjs-non-maintained-fork": "1.0.2", + "tlds": "1.203.1", "ua-parser-js": "0.7.18", "urijs": "1.19.1", "uuid": "3.2.1", diff --git a/test/client/js/libs/handlebars/ircmessageparser/findLinks.js b/test/client/js/libs/handlebars/ircmessageparser/findLinks.js index 7f8ab4e2..90113e6b 100644 --- a/test/client/js/libs/handlebars/ircmessageparser/findLinks.js +++ b/test/client/js/libs/handlebars/ircmessageparser/findLinks.js @@ -104,6 +104,144 @@ describe("findLinks", () => { expect(actual).to.deep.equal(expected); }); + it("should find domains without www. but valid tld", () => { + const input = "google.com google.lv google.museum"; + const expected = [{ + link: "http://google.com", + start: 0, + end: 10, + }, { + link: "http://google.lv", + start: 11, + end: 20, + }, { + link: "http://google.museum", + start: 21, + end: 34, + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should find .onion domains", () => { + const input = "facebookcorewwwi.onion/test?url"; + const expected = [{ + link: "http://facebookcorewwwi.onion/test?url", + start: 0, + end: 31, + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should not consider invalid TLDs as domains", () => { + const input = "google.wtfgugl google.xx www.google.wtfgugl www.google.xx"; + const expected = []; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should consider invalid TLDs as domains if protocol is specified", () => { + const input = "http://google.wtfgugl http://google.xx http://www.google.wtfgugl http://www.google.xx"; + const expected = [{ + link: "http://google.wtfgugl", + start: 0, + end: 21, + }, { + link: "http://google.xx", + start: 22, + end: 38, + }, { + link: "http://www.google.wtfgugl", + start: 39, + end: 64, + }, { + link: "http://www.google.xx", + start: 65, + end: 85, + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should correctly stop at punctuation", () => { // Issue #2351 + const input = + "https://en.wikipedia.org/wiki/Dig! " + + "https://en.wikipedia.org/wiki/Dig? " + + "https://en.wikipedia.org/wiki/Dig. " + + "https://www.google.com* " + + "https://www.google.com/test* " + + "https://www.google.com@ " + + "https://www.google.com/test@ " + + "https://www.google.com! "; + const expected = [{ + link: "https://en.wikipedia.org/wiki/Dig", + start: 0, + end: 33, + }, { + link: "https://en.wikipedia.org/wiki/Dig", + start: 35, + end: 68, + }, { + link: "https://en.wikipedia.org/wiki/Dig", + start: 70, + end: 103, + }, { + link: "https://www.google.com", + start: 105, + end: 127, + }, { + link: "https://www.google.com/test*", + start: 129, + end: 157, + }, { + link: "https://www.google.com", + start: 158, + end: 180, + }, { + link: "https://www.google.com/test@", + start: 182, + end: 210, + }, { + link: "https://www.google.com", + start: 211, + end: 233, + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + + it("should correctly stop at apostrophe", () => { + const input = "https://www.google.com's www.google.com's google.com's"; // Issue #1302 + const expected = [{ + link: "https://www.google.com", + start: 0, + end: 22, + }, { + link: "http://www.google.com", + start: 25, + end: 39, + }, { + link: "http://google.com", + start: 42, + end: 52, + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); + it("does not find invalid urls", () => { const input = "www.example.com ssh://-oProxyCommand=whois"; // Issue #1412 const expected = [{ diff --git a/test/client/js/libs/handlebars/parse.js b/test/client/js/libs/handlebars/parse.js index 98d79cd0..2e0dea8b 100644 --- a/test/client/js/libs/handlebars/parse.js +++ b/test/client/js/libs/handlebars/parse.js @@ -7,7 +7,7 @@ describe("parse Handlebars helper", () => { it("should not introduce xss", () => { const testCases = [{ input: "", - expected: "<img onerror='location.href="//youtube.com"'>", + expected: "<img onerror='location.href="//youtube.com"'>", }, { input: '#&">bug', expected: '#&">bug', diff --git a/yarn.lock b/yarn.lock index d0330abd..8cd59910 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4310,6 +4310,12 @@ levn@^0.3.0, levn@~0.3.0: prelude-ls "~1.1.2" type-check "~0.3.2" +linkify-it@2.0.3: + version "2.0.3" + resolved "https://registry.yarnpkg.com/linkify-it/-/linkify-it-2.0.3.tgz#d94a4648f9b1c179d64fa97291268bdb6ce9434f" + dependencies: + uc.micro "^1.0.1" + listr-silent-renderer@^1.1.1: version "1.1.1" resolved "https://registry.yarnpkg.com/listr-silent-renderer/-/listr-silent-renderer-1.1.1.tgz#924b5a3757153770bf1a8e3fbf74b8bbf3f9242e" @@ -7446,6 +7452,10 @@ timers-browserify@^2.0.4: dependencies: setimmediate "^1.0.4" +tlds@1.203.1: + version "1.203.1" + resolved "https://registry.yarnpkg.com/tlds/-/tlds-1.203.1.tgz#4dc9b02f53de3315bc98b80665e13de3edfc1dfc" + tmp@^0.0.33: version "0.0.33" resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.0.33.tgz#6d34335889768d21b2bcda0aa277ced3b1bfadf9" @@ -7559,6 +7569,10 @@ ua-parser-js@0.7.18: version "0.7.18" resolved "https://registry.yarnpkg.com/ua-parser-js/-/ua-parser-js-0.7.18.tgz#a7bfd92f56edfb117083b69e31d2aa8882d4b1ed" +uc.micro@^1.0.1: + version "1.0.5" + resolved "https://registry.yarnpkg.com/uc.micro/-/uc.micro-1.0.5.tgz#0c65f15f815aa08b560a61ce8b4db7ffc3f45376" + uglify-es@^3.3.4: version "3.3.9" resolved "https://registry.yarnpkg.com/uglify-es/-/uglify-es-3.3.9.tgz#0c1c4f0700bed8dbc124cdb304d2592ca203e677" From 15c14c6dea0626040188394e6525fd8bb3046314 Mon Sep 17 00:00:00 2001 From: Pavel Djundik Date: Fri, 27 Apr 2018 13:55:49 +0300 Subject: [PATCH 2/6] Change test cases to match linkify-it --- .../handlebars/ircmessageparser/findLinks.js | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/test/client/js/libs/handlebars/ircmessageparser/findLinks.js b/test/client/js/libs/handlebars/ircmessageparser/findLinks.js index 90113e6b..ad04580c 100644 --- a/test/client/js/libs/handlebars/ircmessageparser/findLinks.js +++ b/test/client/js/libs/handlebars/ircmessageparser/findLinks.js @@ -69,12 +69,12 @@ describe("findLinks", () => { expect(actual).to.deep.equal(expected); }); - it("should find urls with starting with www. and odd surroundings", () => { - const input = ".:www.github.com:."; + it("should find urls with starting with http:// and odd surroundings", () => { + const input = ".:http://www.github.com:. .:www.github.com:."; const expected = [{ link: "http://www.github.com", start: 2, - end: 16, + end: 23, }]; const actual = findLinks(input); @@ -248,10 +248,6 @@ describe("findLinks", () => { start: 0, end: 15, link: "http://www.example.com", - }, { - end: 42, - start: 16, - link: "ssh://-oProxyCommand=whois", }]; const actual = findLinks(input); @@ -263,6 +259,10 @@ describe("findLinks", () => { start: 0, end: 15, link: "http://www.example.com", + }, { + start: 16, + end: 57, + link: "http://root:'some%pass'@hostname/database", }]; const actual2 = findLinks(input2); @@ -276,6 +276,10 @@ describe("findLinks", () => { start: 0, end: 15, link: "http://www.example.com", + }, { + start: 16, + end: 29, + link: "http://a:%p@c", }, { start: 30, end: 51, From 629ae8bfa442b23e9fba21f5ec7d1d29e62c5bd0 Mon Sep 17 00:00:00 2001 From: Pavel Djundik Date: Fri, 27 Apr 2018 14:11:54 +0300 Subject: [PATCH 3/6] Fix protocol-aware urls, add better link validation in previews --- .../handlebars/ircmessageparser/findLinks.js | 17 +++++++---- src/plugins/irc-events/link.js | 26 +++++++++++++++-- .../handlebars/ircmessageparser/findLinks.js | 13 +++++++++ test/client/js/libs/handlebars/parse.js | 2 +- test/plugins/link.js | 29 +++++++++++++++++++ 5 files changed, 78 insertions(+), 9 deletions(-) diff --git a/client/js/libs/handlebars/ircmessageparser/findLinks.js b/client/js/libs/handlebars/ircmessageparser/findLinks.js index 2c162f50..fadcb2f2 100644 --- a/client/js/libs/handlebars/ircmessageparser/findLinks.js +++ b/client/js/libs/handlebars/ircmessageparser/findLinks.js @@ -25,11 +25,18 @@ function findLinks(text) { return []; } - return matches.map((url) => ({ - start: url.index, - end: url.lastIndex, - link: url.url, - })); + return matches.map((url) => { + // Prefix protocol to protocol-aware urls + if (url.schema === "//") { + url.url = `http:${url.url}`; + } + + return { + start: url.index, + end: url.lastIndex, + link: url.url, + }; + }); } module.exports = findLinks; diff --git a/src/plugins/irc-events/link.js b/src/plugins/irc-events/link.js index aec71826..9029716e 100644 --- a/src/plugins/irc-events/link.js +++ b/src/plugins/irc-events/link.js @@ -11,7 +11,6 @@ const findLinks = require("../../../client/js/libs/handlebars/ircmessageparser/f const storage = require("../storage"); const mediaTypeRegex = /^(audio|video)\/.+/; -const linkRegex = /^https?:\/\//; // Fix ECDH curve client compatibility in Node v8/v9 // This is fixed in Node 10, but The Lounge supports LTS versions @@ -34,7 +33,7 @@ module.exports = function(client, chan, msg) { const cleanText = cleanIrcMessage(msg.text); // We will only try to prefetch http(s) links - const links = findLinks(cleanText).filter((w) => linkRegex.test(w.link)); + const links = findLinks(cleanText).filter((w) => isValidLink(w.link)); if (links.length === 0) { return; @@ -99,7 +98,7 @@ function parseHtml(preview, res, client) { } // Make sure thumbnail is a valid url - if (!linkRegex.test(preview.thumb)) { + if (!isValidLink(preview.thumb)) { preview.thumb = ""; } @@ -364,3 +363,24 @@ function fetch(uri, headers, cb) { function normalizeURL(header) { return URI(header).normalize().toString(); } + +function isValidLink(link) { + try { + const uri = URI(link); + const protocol = uri.protocol(); + + // Only fetch http and https links + if (protocol !== "http" && protocol !== "https") { + return false; + } + + // Do not fetch links without hostname or ones that contain authorization + if (!uri.hostname() || uri.username() || uri.password()) { + return false; + } + } catch (e) { + return false; + } + + return true; +} diff --git a/test/client/js/libs/handlebars/ircmessageparser/findLinks.js b/test/client/js/libs/handlebars/ircmessageparser/findLinks.js index ad04580c..e6e8420f 100644 --- a/test/client/js/libs/handlebars/ircmessageparser/findLinks.js +++ b/test/client/js/libs/handlebars/ircmessageparser/findLinks.js @@ -290,4 +290,17 @@ describe("findLinks", () => { expect(actual).to.deep.equal(expected); }); + + it("should add protocol to protocol-aware urls", () => { + const input = "//example.com"; + const expected = [{ + link: "http://example.com", + start: 0, + end: 13, + }]; + + const actual = findLinks(input); + + expect(actual).to.deep.equal(expected); + }); }); diff --git a/test/client/js/libs/handlebars/parse.js b/test/client/js/libs/handlebars/parse.js index 2e0dea8b..801d92f0 100644 --- a/test/client/js/libs/handlebars/parse.js +++ b/test/client/js/libs/handlebars/parse.js @@ -7,7 +7,7 @@ describe("parse Handlebars helper", () => { it("should not introduce xss", () => { const testCases = [{ input: "", - expected: "<img onerror='location.href="//youtube.com"'>", + expected: "<img onerror='location.href="//youtube.com"'>", }, { input: '#&">bug', expected: '#&">bug', diff --git a/test/plugins/link.js b/test/plugins/link.js index 22a9c633..82278f41 100644 --- a/test/plugins/link.js +++ b/test/plugins/link.js @@ -371,4 +371,33 @@ describe("Link plugin", function() { } }); }); + + it("should fetch protocol-aware links", function(done) { + const message = this.irc.createMessage({ + text: "//localhost:9002", + }); + + link(this.irc, this.network.channels[0], message); + + this.irc.once("msg:preview", function(data) { + expect(data.preview.link).to.equal("http://localhost:9002"); + done(); + }); + }); + + it("should not try to fetch links with wrong protocol", function() { + const message = this.irc.createMessage({ + text: "ssh://example.com ftp://example.com irc://example.com http:////////example.com", + }); + + expect(message.previews).to.be.empty; + }); + + it("should not try to fetch links with username or password", function() { + const message = this.irc.createMessage({ + text: "http://root:'some%pass'@hostname/database http://a:%p@c http://a:%p@example.com http://test@example.com", + }); + + expect(message.previews).to.be.empty; + }); }); From d4fa6bbcb06a7a911b8351cbab4bde152985eb3e Mon Sep 17 00:00:00 2001 From: Pavel Djundik Date: Fri, 27 Apr 2018 16:27:26 +0300 Subject: [PATCH 4/6] Use WHATWG URL parser in link prefetcher --- src/plugins/irc-events/link.js | 100 +++++++++++++++++++-------------- test/plugins/link.js | 22 ++++++++ 2 files changed, 79 insertions(+), 43 deletions(-) diff --git a/src/plugins/irc-events/link.js b/src/plugins/irc-events/link.js index 9029716e..6d0bede6 100644 --- a/src/plugins/irc-events/link.js +++ b/src/plugins/irc-events/link.js @@ -2,8 +2,7 @@ const cheerio = require("cheerio"); const request = require("request"); -const url = require("url"); -const URI = require("urijs"); +const URL = require("url").URL; const mime = require("mime-types"); const Helper = require("../../helper"); const cleanIrcMessage = require("../../../client/js/libs/handlebars/ircmessageparser/cleanIrcMessage"); @@ -32,26 +31,36 @@ module.exports = function(client, chan, msg) { // Remove all IRC formatting characters before searching for links const cleanText = cleanIrcMessage(msg.text); - // We will only try to prefetch http(s) links - const links = findLinks(cleanText).filter((w) => isValidLink(w.link)); + msg.previews = findLinks(cleanText).reduce((cleanLinks, link) => { + const url = normalizeURL(link.link); - if (links.length === 0) { - return; - } + // If the URL is invalid and cannot be normalized, don't fetch it + if (url === null) { + return cleanLinks; + } - msg.previews = Array.from(new Set( // Remove duplicate links - links.map((link) => link.link) - )).map((link) => ({ - type: "loading", - head: "", - body: "", - thumb: "", - link: link, - shown: true, - })).slice(0, 5); // Only preview the first 5 URLs in message to avoid abuse + // If there are too many urls in this message, only fetch first X valid links + if (cleanLinks.length > 4) { + return cleanLinks; + } - msg.previews.forEach((preview) => { - fetch(normalizeURL(preview.link), { + // Do not fetch duplicate links twice + if (cleanLinks.some((l) => l.link === link.link)) { + return cleanLinks; + } + + const preview = { + type: "loading", + head: "", + body: "", + thumb: "", + link: link.link, // Send original matched link to the client + shown: true, + }; + + cleanLinks.push(preview); + + fetch(url, { accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", language: client.language, }, function(res, err) { @@ -68,7 +77,9 @@ module.exports = function(client, chan, msg) { parse(msg, preview, res, client); }); - }); + + return cleanLinks; + }, []); }; function parseHtml(preview, res, client) { @@ -93,18 +104,14 @@ function parseHtml(preview, res, client) { || $('link[rel="image_src"]').attr("href") || ""; + // Make sure thumbnail is a valid and absolute url if (preview.thumb.length) { - preview.thumb = url.resolve(preview.link, preview.thumb); - } - - // Make sure thumbnail is a valid url - if (!isValidLink(preview.thumb)) { - preview.thumb = ""; + preview.thumb = normalizeURL(preview.thumb, preview.link) || ""; } // Verify that thumbnail pic exists and is under allowed size if (preview.thumb.length) { - fetch(normalizeURL(preview.thumb), {language: client.language}, (resThumb) => { + fetch(preview.thumb, {language: client.language}, (resThumb) => { if (resThumb === null || !(/^image\/.+/.test(resThumb.type)) || resThumb.size > (Helper.config.prefetchMaxImageSize * 1024)) { @@ -134,16 +141,19 @@ function parseHtmlMedia($, preview, res, client) { if (mediaTypeRegex.test(mimeType)) { // If we match a clean video or audio tag, parse that as a preview instead - const mediaUrl = $($(`meta[property="og:${type}"]`).get(i)).attr("content"); + let mediaUrl = $($(`meta[property="og:${type}"]`).get(i)).attr("content"); // Make sure media is a valid url - if (!mediaUrl.startsWith("https://")) { + mediaUrl = normalizeURL(mediaUrl, preview.link, true); + + // Make sure media is a valid url + if (!mediaUrl) { return; } foundMedia = true; - fetch(normalizeURL(mediaUrl), { + fetch(mediaUrl, { accept: type === "video" ? "video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5" : "audio/webm, audio/ogg, audio/wav, audio/*;q=0.9, application/ogg;q=0.7, video/*;q=0.6; */*;q=0.5", @@ -360,27 +370,31 @@ function fetch(uri, headers, cb) { }); } -function normalizeURL(header) { - return URI(header).normalize().toString(); -} - -function isValidLink(link) { +function normalizeURL(link, baseLink, disallowHttp = false) { try { - const uri = URI(link); - const protocol = uri.protocol(); + const url = new URL(link, baseLink); // Only fetch http and https links - if (protocol !== "http" && protocol !== "https") { - return false; + if (url.protocol !== "http:" && url.protocol !== "https:") { + return null; + } + + if (disallowHttp && url.protocol === "http:") { + return null; } // Do not fetch links without hostname or ones that contain authorization - if (!uri.hostname() || uri.username() || uri.password()) { - return false; + if (!url.hostname || url.username || url.password) { + return null; } + + // Drop hash from the url, if any + url.hash = ""; + + return url.toString(); } catch (e) { - return false; + // if an exception was thrown, the url is not valid } - return true; + return null; } diff --git a/test/plugins/link.js b/test/plugins/link.js index 82278f41..121f7048 100644 --- a/test/plugins/link.js +++ b/test/plugins/link.js @@ -385,6 +385,28 @@ describe("Link plugin", function() { }); }); + it("should de-duplicate links", function(done) { + const message = this.irc.createMessage({ + text: "//localhost:9002 http://localhost:9002 http://localhost:9002", + }); + + link(this.irc, this.network.channels[0], message); + + expect(message.previews).to.deep.equal([{ + type: "loading", + head: "", + body: "", + thumb: "", + link: "http://localhost:9002", + shown: true, + }]); + + this.irc.once("msg:preview", function(data) { + expect(data.preview.link).to.equal("http://localhost:9002"); + done(); + }); + }); + it("should not try to fetch links with wrong protocol", function() { const message = this.irc.createMessage({ text: "ssh://example.com ftp://example.com irc://example.com http:////////example.com", From 42344302de178fed0d89995046283b2a3e3c0ac1 Mon Sep 17 00:00:00 2001 From: Pavel Djundik Date: Fri, 27 Apr 2018 20:09:05 +0300 Subject: [PATCH 5/6] Bump required node version to 6.13.0 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index accbc349..f1b53e43 100644 --- a/package.json +++ b/package.json @@ -36,7 +36,7 @@ ], "license": "MIT", "engines": { - "node": ">=6" + "node": ">=6.13.0" }, "dependencies": { "bcryptjs": "2.4.3", From 97dfdbf7c09acd01d43cbaa31f53e2f1eb25f6c2 Mon Sep 17 00:00:00 2001 From: Pavel Djundik Date: Wed, 23 May 2018 16:50:59 +0300 Subject: [PATCH 6/6] Default to https: for urls with no scheme --- .../handlebars/ircmessageparser/findLinks.js | 37 ++++++++++++------- .../handlebars/ircmessageparser/findLinks.js | 26 ++++++------- test/client/js/libs/handlebars/parse.js | 10 ++--- test/plugins/link.js | 8 ++-- 4 files changed, 46 insertions(+), 35 deletions(-) diff --git a/client/js/libs/handlebars/ircmessageparser/findLinks.js b/client/js/libs/handlebars/ircmessageparser/findLinks.js index fadcb2f2..91f69958 100644 --- a/client/js/libs/handlebars/ircmessageparser/findLinks.js +++ b/client/js/libs/handlebars/ircmessageparser/findLinks.js @@ -1,6 +1,24 @@ "use strict"; -const linkify = require("linkify-it")() +const LinkifyIt = require("linkify-it"); + +LinkifyIt.prototype.normalize = function normalize(match) { + if (!match.schema) { + match.schema = "https:"; + match.url = "https://" + match.url; + } + + if (match.schema === "//") { + match.schema = "https:"; + match.url = "https:" + match.url; + } + + if (match.schema === "mailto:" && !/^mailto:/i.test(match.url)) { + match.url = "mailto:" + match.url; + } +}; + +const linkify = LinkifyIt() .tlds(require("tlds")) .tlds("onion", true); @@ -25,18 +43,11 @@ function findLinks(text) { return []; } - return matches.map((url) => { - // Prefix protocol to protocol-aware urls - if (url.schema === "//") { - url.url = `http:${url.url}`; - } - - return { - start: url.index, - end: url.lastIndex, - link: url.url, - }; - }); + return matches.map((url) => ({ + start: url.index, + end: url.lastIndex, + link: url.url, + })); } module.exports = findLinks; diff --git a/test/client/js/libs/handlebars/ircmessageparser/findLinks.js b/test/client/js/libs/handlebars/ircmessageparser/findLinks.js index e6e8420f..36cfa8e3 100644 --- a/test/client/js/libs/handlebars/ircmessageparser/findLinks.js +++ b/test/client/js/libs/handlebars/ircmessageparser/findLinks.js @@ -22,7 +22,7 @@ describe("findLinks", () => { const expected = [{ start: 0, end: 24, - link: "http://www.nooooooooooooooo.com", + link: "https://www.nooooooooooooooo.com", }]; const actual = findLinks(input); @@ -46,7 +46,7 @@ describe("findLinks", () => { it("should find urls in strings starting with www", () => { const input = "use www.duckduckgo.com for privacy reasons"; const expected = [{ - link: "http://www.duckduckgo.com", + link: "https://www.duckduckgo.com", start: 4, end: 22, }]; @@ -94,7 +94,7 @@ describe("findLinks", () => { it("should handle multiple www. correctly", () => { const input = "www.www.test.com"; const expected = [{ - link: "http://www.www.test.com", + link: "https://www.www.test.com", start: 0, end: 16, }]; @@ -107,15 +107,15 @@ describe("findLinks", () => { it("should find domains without www. but valid tld", () => { const input = "google.com google.lv google.museum"; const expected = [{ - link: "http://google.com", + link: "https://google.com", start: 0, end: 10, }, { - link: "http://google.lv", + link: "https://google.lv", start: 11, end: 20, }, { - link: "http://google.museum", + link: "https://google.museum", start: 21, end: 34, }]; @@ -128,7 +128,7 @@ describe("findLinks", () => { it("should find .onion domains", () => { const input = "facebookcorewwwi.onion/test?url"; const expected = [{ - link: "http://facebookcorewwwi.onion/test?url", + link: "https://facebookcorewwwi.onion/test?url", start: 0, end: 31, }]; @@ -228,11 +228,11 @@ describe("findLinks", () => { start: 0, end: 22, }, { - link: "http://www.google.com", + link: "https://www.google.com", start: 25, end: 39, }, { - link: "http://google.com", + link: "https://google.com", start: 42, end: 52, }]; @@ -247,7 +247,7 @@ describe("findLinks", () => { const expected = [{ start: 0, end: 15, - link: "http://www.example.com", + link: "https://www.example.com", }]; const actual = findLinks(input); @@ -258,7 +258,7 @@ describe("findLinks", () => { const expected2 = [{ start: 0, end: 15, - link: "http://www.example.com", + link: "https://www.example.com", }, { start: 16, end: 57, @@ -275,7 +275,7 @@ describe("findLinks", () => { const expected = [{ start: 0, end: 15, - link: "http://www.example.com", + link: "https://www.example.com", }, { start: 16, end: 29, @@ -294,7 +294,7 @@ describe("findLinks", () => { it("should add protocol to protocol-aware urls", () => { const input = "//example.com"; const expected = [{ - link: "http://example.com", + link: "https://example.com", start: 0, end: 13, }]; diff --git a/test/client/js/libs/handlebars/parse.js b/test/client/js/libs/handlebars/parse.js index 801d92f0..4b247429 100644 --- a/test/client/js/libs/handlebars/parse.js +++ b/test/client/js/libs/handlebars/parse.js @@ -7,7 +7,7 @@ describe("parse Handlebars helper", () => { it("should not introduce xss", () => { const testCases = [{ input: "", - expected: "<img onerror='location.href="//youtube.com"'>", + expected: "<img onerror='location.href="//youtube.com"'>", }, { input: '#&">bug', expected: '#&">bug', @@ -41,7 +41,7 @@ describe("parse Handlebars helper", () => { }, { input: "www.nooooooooooooooo.com", expected: - '' + + '' + "www.nooooooooooooooo.com" + "", }, { @@ -56,7 +56,7 @@ describe("parse Handlebars helper", () => { input: "use www.duckduckgo.com for privacy reasons", expected: "use " + - '' + + '' + "www.duckduckgo.com" + "" + " for privacy reasons", @@ -101,7 +101,7 @@ describe("parse Handlebars helper", () => { input: "abc (www.example.com)", expected: "abc (" + - '' + + '' + "www.example.com" + "" + ")", @@ -114,7 +114,7 @@ describe("parse Handlebars helper", () => { }, { input: "www.example.com/Test_(Page)", expected: - '' + + '' + "www.example.com/Test_(Page)" + "", }]; diff --git a/test/plugins/link.js b/test/plugins/link.js index 121f7048..78a27bcb 100644 --- a/test/plugins/link.js +++ b/test/plugins/link.js @@ -380,14 +380,14 @@ describe("Link plugin", function() { link(this.irc, this.network.channels[0], message); this.irc.once("msg:preview", function(data) { - expect(data.preview.link).to.equal("http://localhost:9002"); + expect(data.preview.link).to.equal("https://localhost:9002"); done(); }); }); it("should de-duplicate links", function(done) { const message = this.irc.createMessage({ - text: "//localhost:9002 http://localhost:9002 http://localhost:9002", + text: "//localhost:9002 https://localhost:9002 https://localhost:9002", }); link(this.irc, this.network.channels[0], message); @@ -397,12 +397,12 @@ describe("Link plugin", function() { head: "", body: "", thumb: "", - link: "http://localhost:9002", + link: "https://localhost:9002", shown: true, }]); this.irc.once("msg:preview", function(data) { - expect(data.preview.link).to.equal("http://localhost:9002"); + expect(data.preview.link).to.equal("https://localhost:9002"); done(); }); });