2017-04-18 03:28:35 +00:00
|
|
|
"use strict";
|
|
|
|
|
|
|
|
const URI = require("urijs");
|
|
|
|
|
2017-04-04 04:36:03 +00:00
|
|
|
// Known schemes to detect in a text. If a text contains `foo...bar://foo.com`,
|
|
|
|
// the parsed scheme should be `foo...bar` but if it contains
|
|
|
|
// `foo...http://foo.com`, we assume the scheme to extract will be `http`.
|
2017-04-18 03:28:35 +00:00
|
|
|
const commonSchemes = [
|
|
|
|
"http", "https",
|
|
|
|
"ftp", "sftp",
|
|
|
|
"smb", "file",
|
|
|
|
"irc", "ircs",
|
|
|
|
"svn", "git",
|
|
|
|
"steam", "mumble", "ts3server",
|
|
|
|
"svn+ssh", "ssh",
|
|
|
|
];
|
|
|
|
|
|
|
|
function findLinks(text) {
|
2017-04-08 12:34:31 +00:00
|
|
|
const result = [];
|
2017-04-18 03:28:35 +00:00
|
|
|
|
2017-04-04 04:36:03 +00:00
|
|
|
// URI.withinString() identifies URIs within text, e.g. to translate them to
|
|
|
|
// <a>-Tags.
|
|
|
|
// See https://medialize.github.io/URI.js/docs.html#static-withinString
|
|
|
|
// In our case, we store each URI encountered in a result array.
|
2017-04-18 03:28:35 +00:00
|
|
|
URI.withinString(text, function(url, start, end) {
|
2017-08-14 12:18:41 +00:00
|
|
|
let parsedScheme;
|
|
|
|
|
|
|
|
try {
|
|
|
|
// Extract the scheme of the URL detected, if there is one
|
|
|
|
parsedScheme = URI(url).scheme().toLowerCase();
|
|
|
|
} catch (e) {
|
2017-08-25 15:58:16 +00:00
|
|
|
// URI may throw an exception for malformed urls,
|
2017-08-14 12:18:41 +00:00
|
|
|
// as to why withinString finds these in the first place is a mystery
|
|
|
|
return;
|
|
|
|
}
|
2017-04-04 04:36:03 +00:00
|
|
|
|
|
|
|
// Check if the scheme of the detected URL matches a common one above.
|
|
|
|
// In a URL like `foo..http://example.com`, the scheme would be `foo..http`,
|
|
|
|
// so we need to clean up the end of the scheme and filter out the rest.
|
2017-04-08 12:34:31 +00:00
|
|
|
const matchedScheme = commonSchemes.find((scheme) => parsedScheme.endsWith(scheme));
|
2017-04-18 03:28:35 +00:00
|
|
|
|
2017-04-04 04:36:03 +00:00
|
|
|
// A known scheme was found, extract the unknown part from the URL
|
2017-04-18 03:28:35 +00:00
|
|
|
if (matchedScheme) {
|
|
|
|
const prefix = parsedScheme.length - matchedScheme.length;
|
|
|
|
start += prefix;
|
|
|
|
url = url.slice(prefix);
|
|
|
|
}
|
|
|
|
|
2017-04-04 04:36:03 +00:00
|
|
|
// The URL matched but does not start with a scheme (`www.foo.com`), add it
|
2017-04-18 03:28:35 +00:00
|
|
|
if (!parsedScheme.length) {
|
|
|
|
url = "http://" + url;
|
|
|
|
}
|
|
|
|
|
|
|
|
result.push({
|
|
|
|
start: start,
|
|
|
|
end: end,
|
|
|
|
link: url
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
module.exports = findLinks;
|