Use linkify-it
This commit is contained in:
parent
c7c2587079
commit
58ec2768ec
@ -1,13 +1,12 @@
|
||||
"use strict";
|
||||
|
||||
const URI = require("urijs");
|
||||
const linkify = require("linkify-it")()
|
||||
.tlds(require("tlds"))
|
||||
.tlds("onion", true);
|
||||
|
||||
// Known schemes to detect in a text. If a text contains `foo...bar://foo.com`,
|
||||
// the parsed scheme should be `foo...bar` but if it contains
|
||||
// `foo...http://foo.com`, we assume the scheme to extract will be `http`.
|
||||
// Known schemes to detect in text
|
||||
const commonSchemes = [
|
||||
"http", "https",
|
||||
"ftp", "sftp",
|
||||
"sftp",
|
||||
"smb", "file",
|
||||
"irc", "ircs",
|
||||
"svn", "git",
|
||||
@ -15,54 +14,22 @@ const commonSchemes = [
|
||||
"svn+ssh", "ssh",
|
||||
];
|
||||
|
||||
for (const schema of commonSchemes) {
|
||||
linkify.add(schema + ":", "http:");
|
||||
}
|
||||
|
||||
function findLinks(text) {
|
||||
const result = [];
|
||||
const matches = linkify.match(text);
|
||||
|
||||
// URI.withinString() identifies URIs within text, e.g. to translate them to
|
||||
// <a>-Tags.
|
||||
// See https://medialize.github.io/URI.js/docs.html#static-withinString
|
||||
// In our case, we store each URI encountered in a result array.
|
||||
try {
|
||||
URI.withinString(text, function(url, start, end) {
|
||||
let parsedScheme;
|
||||
|
||||
try {
|
||||
// Extract the scheme of the URL detected, if there is one
|
||||
parsedScheme = URI(url).scheme().toLowerCase();
|
||||
} catch (e) {
|
||||
// URI may throw an exception for malformed urls,
|
||||
// as to why withinString finds these in the first place is a mystery
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if the scheme of the detected URL matches a common one above.
|
||||
// In a URL like `foo..http://example.com`, the scheme would be `foo..http`,
|
||||
// so we need to clean up the end of the scheme and filter out the rest.
|
||||
const matchedScheme = commonSchemes.find((scheme) => parsedScheme.endsWith(scheme));
|
||||
|
||||
// A known scheme was found, extract the unknown part from the URL
|
||||
if (matchedScheme) {
|
||||
const prefix = parsedScheme.length - matchedScheme.length;
|
||||
start += prefix;
|
||||
url = url.slice(prefix);
|
||||
}
|
||||
|
||||
// The URL matched but does not start with a scheme (`www.foo.com`), add it
|
||||
if (!parsedScheme.length) {
|
||||
url = "http://" + url;
|
||||
}
|
||||
|
||||
result.push({
|
||||
start: start,
|
||||
end: end,
|
||||
link: url,
|
||||
});
|
||||
});
|
||||
} catch (e) {
|
||||
// withinString is wrapped in a try/catch due to https://github.com/medialize/URI.js/issues/359
|
||||
if (!matches) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return result;
|
||||
return matches.map((url) => ({
|
||||
start: url.index,
|
||||
end: url.lastIndex,
|
||||
link: url.url,
|
||||
}));
|
||||
}
|
||||
|
||||
module.exports = findLinks;
|
||||
|
@ -46,6 +46,7 @@
|
||||
"express": "4.16.3",
|
||||
"fs-extra": "6.0.1",
|
||||
"irc-framework": "2.11.0",
|
||||
"linkify-it": "2.0.3",
|
||||
"lodash": "4.17.10",
|
||||
"mime-types": "2.1.18",
|
||||
"moment": "2.22.1",
|
||||
@ -58,6 +59,7 @@
|
||||
"spdy": "3.4.7",
|
||||
"sqlite3": "4.0.0",
|
||||
"thelounge-ldapjs-non-maintained-fork": "1.0.2",
|
||||
"tlds": "1.203.1",
|
||||
"ua-parser-js": "0.7.18",
|
||||
"urijs": "1.19.1",
|
||||
"uuid": "3.2.1",
|
||||
|
@ -104,6 +104,144 @@ describe("findLinks", () => {
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
|
||||
it("should find domains without www. but valid tld", () => {
|
||||
const input = "google.com google.lv google.museum";
|
||||
const expected = [{
|
||||
link: "http://google.com",
|
||||
start: 0,
|
||||
end: 10,
|
||||
}, {
|
||||
link: "http://google.lv",
|
||||
start: 11,
|
||||
end: 20,
|
||||
}, {
|
||||
link: "http://google.museum",
|
||||
start: 21,
|
||||
end: 34,
|
||||
}];
|
||||
|
||||
const actual = findLinks(input);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
|
||||
it("should find .onion domains", () => {
|
||||
const input = "facebookcorewwwi.onion/test?url";
|
||||
const expected = [{
|
||||
link: "http://facebookcorewwwi.onion/test?url",
|
||||
start: 0,
|
||||
end: 31,
|
||||
}];
|
||||
|
||||
const actual = findLinks(input);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
|
||||
it("should not consider invalid TLDs as domains", () => {
|
||||
const input = "google.wtfgugl google.xx www.google.wtfgugl www.google.xx";
|
||||
const expected = [];
|
||||
|
||||
const actual = findLinks(input);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
|
||||
it("should consider invalid TLDs as domains if protocol is specified", () => {
|
||||
const input = "http://google.wtfgugl http://google.xx http://www.google.wtfgugl http://www.google.xx";
|
||||
const expected = [{
|
||||
link: "http://google.wtfgugl",
|
||||
start: 0,
|
||||
end: 21,
|
||||
}, {
|
||||
link: "http://google.xx",
|
||||
start: 22,
|
||||
end: 38,
|
||||
}, {
|
||||
link: "http://www.google.wtfgugl",
|
||||
start: 39,
|
||||
end: 64,
|
||||
}, {
|
||||
link: "http://www.google.xx",
|
||||
start: 65,
|
||||
end: 85,
|
||||
}];
|
||||
|
||||
const actual = findLinks(input);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
|
||||
it("should correctly stop at punctuation", () => { // Issue #2351
|
||||
const input =
|
||||
"https://en.wikipedia.org/wiki/Dig! " +
|
||||
"https://en.wikipedia.org/wiki/Dig? " +
|
||||
"https://en.wikipedia.org/wiki/Dig. " +
|
||||
"https://www.google.com* " +
|
||||
"https://www.google.com/test* " +
|
||||
"https://www.google.com@ " +
|
||||
"https://www.google.com/test@ " +
|
||||
"https://www.google.com! ";
|
||||
const expected = [{
|
||||
link: "https://en.wikipedia.org/wiki/Dig",
|
||||
start: 0,
|
||||
end: 33,
|
||||
}, {
|
||||
link: "https://en.wikipedia.org/wiki/Dig",
|
||||
start: 35,
|
||||
end: 68,
|
||||
}, {
|
||||
link: "https://en.wikipedia.org/wiki/Dig",
|
||||
start: 70,
|
||||
end: 103,
|
||||
}, {
|
||||
link: "https://www.google.com",
|
||||
start: 105,
|
||||
end: 127,
|
||||
}, {
|
||||
link: "https://www.google.com/test*",
|
||||
start: 129,
|
||||
end: 157,
|
||||
}, {
|
||||
link: "https://www.google.com",
|
||||
start: 158,
|
||||
end: 180,
|
||||
}, {
|
||||
link: "https://www.google.com/test@",
|
||||
start: 182,
|
||||
end: 210,
|
||||
}, {
|
||||
link: "https://www.google.com",
|
||||
start: 211,
|
||||
end: 233,
|
||||
}];
|
||||
|
||||
const actual = findLinks(input);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
|
||||
it("should correctly stop at apostrophe", () => {
|
||||
const input = "https://www.google.com's www.google.com's google.com's"; // Issue #1302
|
||||
const expected = [{
|
||||
link: "https://www.google.com",
|
||||
start: 0,
|
||||
end: 22,
|
||||
}, {
|
||||
link: "http://www.google.com",
|
||||
start: 25,
|
||||
end: 39,
|
||||
}, {
|
||||
link: "http://google.com",
|
||||
start: 42,
|
||||
end: 52,
|
||||
}];
|
||||
|
||||
const actual = findLinks(input);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
|
||||
it("does not find invalid urls", () => {
|
||||
const input = "www.example.com ssh://-oProxyCommand=whois"; // Issue #1412
|
||||
const expected = [{
|
||||
|
@ -7,7 +7,7 @@ describe("parse Handlebars helper", () => {
|
||||
it("should not introduce xss", () => {
|
||||
const testCases = [{
|
||||
input: "<img onerror='location.href=\"//youtube.com\"'>",
|
||||
expected: "<img onerror='location.href="//youtube.com"'>",
|
||||
expected: "<img onerror='location.href="<a href=\"//youtube.com\" target=\"_blank\" rel=\"noopener\">//youtube.com</a>"'>",
|
||||
}, {
|
||||
input: '#&">bug',
|
||||
expected: '<span class="inline-channel" role="button" tabindex="0" data-chan="#&">bug">#&">bug</span>',
|
||||
|
14
yarn.lock
14
yarn.lock
@ -4310,6 +4310,12 @@ levn@^0.3.0, levn@~0.3.0:
|
||||
prelude-ls "~1.1.2"
|
||||
type-check "~0.3.2"
|
||||
|
||||
linkify-it@2.0.3:
|
||||
version "2.0.3"
|
||||
resolved "https://registry.yarnpkg.com/linkify-it/-/linkify-it-2.0.3.tgz#d94a4648f9b1c179d64fa97291268bdb6ce9434f"
|
||||
dependencies:
|
||||
uc.micro "^1.0.1"
|
||||
|
||||
listr-silent-renderer@^1.1.1:
|
||||
version "1.1.1"
|
||||
resolved "https://registry.yarnpkg.com/listr-silent-renderer/-/listr-silent-renderer-1.1.1.tgz#924b5a3757153770bf1a8e3fbf74b8bbf3f9242e"
|
||||
@ -7446,6 +7452,10 @@ timers-browserify@^2.0.4:
|
||||
dependencies:
|
||||
setimmediate "^1.0.4"
|
||||
|
||||
tlds@1.203.1:
|
||||
version "1.203.1"
|
||||
resolved "https://registry.yarnpkg.com/tlds/-/tlds-1.203.1.tgz#4dc9b02f53de3315bc98b80665e13de3edfc1dfc"
|
||||
|
||||
tmp@^0.0.33:
|
||||
version "0.0.33"
|
||||
resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.0.33.tgz#6d34335889768d21b2bcda0aa277ced3b1bfadf9"
|
||||
@ -7559,6 +7569,10 @@ ua-parser-js@0.7.18:
|
||||
version "0.7.18"
|
||||
resolved "https://registry.yarnpkg.com/ua-parser-js/-/ua-parser-js-0.7.18.tgz#a7bfd92f56edfb117083b69e31d2aa8882d4b1ed"
|
||||
|
||||
uc.micro@^1.0.1:
|
||||
version "1.0.5"
|
||||
resolved "https://registry.yarnpkg.com/uc.micro/-/uc.micro-1.0.5.tgz#0c65f15f815aa08b560a61ce8b4db7ffc3f45376"
|
||||
|
||||
uglify-es@^3.3.4:
|
||||
version "3.3.9"
|
||||
resolved "https://registry.yarnpkg.com/uglify-es/-/uglify-es-3.3.9.tgz#0c1c4f0700bed8dbc124cdb304d2592ca203e677"
|
||||
|
Loading…
Reference in New Issue
Block a user