hardlounge/client/js/libs/handlebars/ircmessageparser/findLinks.js

"use strict";

const URI = require("urijs");

// Known schemes to detect in a text. If a text contains `foo...bar://foo.com`,
// the parsed scheme should be `foo...bar` but if it contains
// `foo...http://foo.com`, we assume the scheme to extract will be `http`.
const commonSchemes = [
	"http", "https",
	"ftp", "sftp",
	"smb", "file",
	"irc", "ircs",
	"svn", "git",
	"steam", "mumble", "ts3server",
	"svn+ssh", "ssh",
];

function findLinks(text) {
	const result = [];

	// URI.withinString() identifies URIs within text, e.g. to translate them to
	// <a>-Tags.
	// See https://medialize.github.io/URI.js/docs.html#static-withinString
	// In our case, we store each URI encountered in a result array.
	URI.withinString(text, function(url, start, end) {
		let parsedScheme;

		try {
			// Extract the scheme of the URL detected, if there is one
			parsedScheme = URI(url).scheme().toLowerCase();
		} catch (e) {
			// URI may throw an exception for malformed urls,
			// as to why withinString finds these in the first place is a mystery
			return;
		}

		// Check if the scheme of the detected URL matches a common one above.
		// In a URL like `foo..http://example.com`, the scheme would be `foo..http`,
		// so we need to clean up the end of the scheme and filter out the rest.
		const matchedScheme = commonSchemes.find((scheme) => parsedScheme.endsWith(scheme));

		// A known scheme was found, extract the unknown part from the URL
		if (matchedScheme) {
			const prefix = parsedScheme.length - matchedScheme.length;
			start += prefix;
			url = url.slice(prefix);
		}

		// The URL matched but does not start with a scheme (`www.foo.com`), add it
		if (!parsedScheme.length) {
			url = "http://" + url;
		}

		result.push({
			start: start,
			end: end,
			link: url
		});
	});

	return result;
}

module.exports = findLinks;
Extract findLinks into its own file and add tests Tests were taken from https://github.com/Bonuspunkt/ircmessageparser/blob/5a249c30b1e0379e5df62f10734a3dc7ce02306b/test/findLinks.js. The underlying code is different but the tests are the same. 2017-04-18 03:28:35 +00:00			`"use strict";`

			`const URI = require("urijs");`

Explain the modules of the message parser and add tests - Add comments and descriptions to: - `findChannels.js` - `parseStyle` - `findLinks` - `fill` - `anyIntersection` - `merge` - `parse` - Minor optimizations to `parseStyle` - Add tests for `fill` 2017-04-04 04:36:03 +00:00			// Known schemes to detect in a text. If a text contains `foo...bar://foo.com`,
			// the parsed scheme should be `foo...bar` but if it contains
			// `foo...http://foo.com`, we assume the scheme to extract will be `http`.
Extract findLinks into its own file and add tests Tests were taken from https://github.com/Bonuspunkt/ircmessageparser/blob/5a249c30b1e0379e5df62f10734a3dc7ce02306b/test/findLinks.js. The underlying code is different but the tests are the same. 2017-04-18 03:28:35 +00:00			`const commonSchemes = [`
			`"http", "https",`
			`"ftp", "sftp",`
			`"smb", "file",`
			`"irc", "ircs",`
			`"svn", "git",`
			`"steam", "mumble", "ts3server",`
			`"svn+ssh", "ssh",`
			`];`

			`function findLinks(text) {`
Update to eslint 4 and enforce extra rules 2017-04-08 12:34:31 +00:00			`const result = [];`
Extract findLinks into its own file and add tests Tests were taken from https://github.com/Bonuspunkt/ircmessageparser/blob/5a249c30b1e0379e5df62f10734a3dc7ce02306b/test/findLinks.js. The underlying code is different but the tests are the same. 2017-04-18 03:28:35 +00:00
Explain the modules of the message parser and add tests - Add comments and descriptions to: - `findChannels.js` - `parseStyle` - `findLinks` - `fill` - `anyIntersection` - `merge` - `parse` - Minor optimizations to `parseStyle` - Add tests for `fill` 2017-04-04 04:36:03 +00:00			`// URI.withinString() identifies URIs within text, e.g. to translate them to`
			`// <a>-Tags.`
			`// See https://medialize.github.io/URI.js/docs.html#static-withinString`
			`// In our case, we store each URI encountered in a result array.`
Extract findLinks into its own file and add tests Tests were taken from https://github.com/Bonuspunkt/ircmessageparser/blob/5a249c30b1e0379e5df62f10734a3dc7ce02306b/test/findLinks.js. The underlying code is different but the tests are the same. 2017-04-18 03:28:35 +00:00			`URI.withinString(text, function(url, start, end) {`
Do not throw an exception when URI parsing fails 2017-08-14 12:18:41 +00:00			`let parsedScheme;`

			`try {`
			`// Extract the scheme of the URL detected, if there is one`
			`parsedScheme = URI(url).scheme().toLowerCase();`
			`} catch (e) {`
Fix general spelling errors 2017-08-25 15:58:16 +00:00			`// URI may throw an exception for malformed urls,`
Do not throw an exception when URI parsing fails 2017-08-14 12:18:41 +00:00			`// as to why withinString finds these in the first place is a mystery`
			`return;`
			`}`
Explain the modules of the message parser and add tests - Add comments and descriptions to: - `findChannels.js` - `parseStyle` - `findLinks` - `fill` - `anyIntersection` - `merge` - `parse` - Minor optimizations to `parseStyle` - Add tests for `fill` 2017-04-04 04:36:03 +00:00
			`// Check if the scheme of the detected URL matches a common one above.`
			// In a URL like `foo..http://example.com`, the scheme would be `foo..http`,
			`// so we need to clean up the end of the scheme and filter out the rest.`
Update to eslint 4 and enforce extra rules 2017-04-08 12:34:31 +00:00			`const matchedScheme = commonSchemes.find((scheme) => parsedScheme.endsWith(scheme));`
Extract findLinks into its own file and add tests Tests were taken from https://github.com/Bonuspunkt/ircmessageparser/blob/5a249c30b1e0379e5df62f10734a3dc7ce02306b/test/findLinks.js. The underlying code is different but the tests are the same. 2017-04-18 03:28:35 +00:00
Explain the modules of the message parser and add tests - Add comments and descriptions to: - `findChannels.js` - `parseStyle` - `findLinks` - `fill` - `anyIntersection` - `merge` - `parse` - Minor optimizations to `parseStyle` - Add tests for `fill` 2017-04-04 04:36:03 +00:00			`// A known scheme was found, extract the unknown part from the URL`
Extract findLinks into its own file and add tests Tests were taken from https://github.com/Bonuspunkt/ircmessageparser/blob/5a249c30b1e0379e5df62f10734a3dc7ce02306b/test/findLinks.js. The underlying code is different but the tests are the same. 2017-04-18 03:28:35 +00:00			`if (matchedScheme) {`
			`const prefix = parsedScheme.length - matchedScheme.length;`
			`start += prefix;`
			`url = url.slice(prefix);`
			`}`

Explain the modules of the message parser and add tests - Add comments and descriptions to: - `findChannels.js` - `parseStyle` - `findLinks` - `fill` - `anyIntersection` - `merge` - `parse` - Minor optimizations to `parseStyle` - Add tests for `fill` 2017-04-04 04:36:03 +00:00			// The URL matched but does not start with a scheme (`www.foo.com`), add it
Extract findLinks into its own file and add tests Tests were taken from https://github.com/Bonuspunkt/ircmessageparser/blob/5a249c30b1e0379e5df62f10734a3dc7ce02306b/test/findLinks.js. The underlying code is different but the tests are the same. 2017-04-18 03:28:35 +00:00			`if (!parsedScheme.length) {`
			`url = "http://" + url;`
			`}`

			`result.push({`
			`start: start,`
			`end: end,`
			`link: url`
			`});`
			`});`

			`return result;`
			`}`

			`module.exports = findLinks;`