Explain the modules of the message parser and add tests

- Add comments and descriptions to: - `findChannels.js` - `parseStyle` - `findLinks` - `fill` - `anyIntersection` - `merge` - `parse` - Minor optimizations to `parseStyle` - Add tests for `fill`
2017-04-04 00:36:03 -04:00 · 2017-04-04 00:36:03 -04:00 · 03e3444a35
commit 03e3444a35
parent 90f4a94bb2
9 changed files with 195 additions and 63 deletions
--- a/client/js/libs/handlebars/ircmessageparser/anyIntersection.js
+++ b/client/js/libs/handlebars/ircmessageparser/anyIntersection.js
@ -1,5 +1,7 @@
 "use strict";
 // Return true if any section of "a" or "b" parts (defined by their start/end
 // markers) intersect each other, false otherwise.
 function anyIntersection(a, b) {
 	return a.start <= b.start && b.start < a.end ||
 		a.start < b.end && b.end <= a.end ||
--- a/client/js/libs/handlebars/ircmessageparser/fill.js
+++ b/client/js/libs/handlebars/ircmessageparser/fill.js
@ -1,21 +1,26 @@
 "use strict";
 // Create plain text entries corresponding to areas of the text that match no
 // existing entries. Returns an empty array if all parts of the text have been
 // parsed into recognizable entries already.
 function fill(existingEntries, text) {
 	let position = 0;
 	const result = [];
 	for (let i = 0; i < existingEntries.length; i++) {
 		const textSegment = existingEntries[i];
 	// Fill inner parts of the text. For example, if text is `foobarbaz` and both
 	// `foo` and `baz` have matched into an entry, this will return a dummy entry
 	// corresponding to `bar`.
 	const result = existingEntries.reduce((acc, textSegment) => {
 		if (textSegment.start > position) {
-			result.push({
+			acc.push({
 				start: position,
 				end: textSegment.start
 			});
 		}
 		position = textSegment.end;
-	}
+		return acc;
 	}, []);
 	// Complete the unmatched end of the text with a dummy entry
 	if (position < text.length) {
 		result.push({
 			start: position,
--- a/client/js/libs/handlebars/ircmessageparser/findChannels.js
+++ b/client/js/libs/handlebars/ircmessageparser/findChannels.js
@ -1,20 +1,31 @@
 "use strict";
 // Escapes the RegExp special characters "^", "$", "", ".", "*", "+", "?", "(",
 // ")", "[", "]", "{", "}", and "|" in string.
 // See https://lodash.com/docs/#escapeRegExp
 const escapeRegExp = require("lodash/escapeRegExp");
-// NOTE: channel prefixes should be RPL_ISUPPORT.CHANTYPES
+// Given an array of channel prefixes (such as "#" and "&") and an array of user
-// NOTE: userModes should be RPL_ISUPPORT.PREFIX
+// modes (such as "@" and "+"), this function extracts channels and nicks from a
 // text.
 // It returns an array of objects for each channel found with their start index,
 // end index and channel name.
 function findChannels(text, channelPrefixes, userModes) {
 	// `userModePattern` is necessary to ignore user modes in /whois responses.
 	// For example, a voiced user in #thelounge will have a /whois response of:
 	// > foo is on the following channels: +#thelounge
 	// We need to explicitly ignore user modes to parse such channels correctly.
 	const userModePattern = userModes.map(escapeRegExp).join("");
 	const channelPrefixPattern = channelPrefixes.map(escapeRegExp).join("");
-
+	const channelPattern = `(?:^|\\s)[${userModePattern}]*([${channelPrefixPattern}][^ \u0007]+)`;
 	const channelPattern = `(?:^|\\s)[${ userModePattern }]*([${ channelPrefixPattern }][^ \u0007]+)`;
 	const channelRegExp = new RegExp(channelPattern, "g");
 	const result = [];
 	let match;
 	do {
 		// With global ("g") regexes, calling `exec` multiple times will find
 		// successive matches in the same string.
 		match = channelRegExp.exec(text);
 		if (match) {
--- a/client/js/libs/handlebars/ircmessageparser/findLinks.js
+++ b/client/js/libs/handlebars/ircmessageparser/findLinks.js
@ -2,6 +2,9 @@
 const URI = require("urijs");
 // Known schemes to detect in a text. If a text contains `foo...bar://foo.com`,
 // the parsed scheme should be `foo...bar` but if it contains
 // `foo...http://foo.com`, we assume the scheme to extract will be `http`.
 const commonSchemes = [
 	"http", "https",
 	"ftp", "sftp",
@ -16,6 +19,10 @@ function findLinks(text) {
 	let result = [];
 	let lastPosition = 0;
 	// URI.withinString() identifies URIs within text, e.g. to translate them to
 	// <a>-Tags.
 	// See https://medialize.github.io/URI.js/docs.html#static-withinString
 	// In our case, we store each URI encountered in a result array.
 	URI.withinString(text, function(url, start, end) {
 		// v-- fix: url was modified and does not match input string -> cant be mapped
 		if (text.indexOf(url, lastPosition) < 0) {
@ -23,19 +30,22 @@ function findLinks(text) {
 		}
 		// ^-- /fix: url was modified and does not match input string -> cant be mapped
-		// v-- fix: use prefered scheme
+		// Extract the scheme of the URL detected, if there is one
-		const parsed = URI(url);
+		const parsedScheme = URI(url).scheme().toLowerCase();
-		const parsedScheme = parsed.scheme().toLowerCase();
+
 		// Check if the scheme of the detected URL matches a common one above.
 		// In a URL like `foo..http://example.com`, the scheme would be `foo..http`,
 		// so we need to clean up the end of the scheme and filter out the rest.
 		const matchedScheme = commonSchemes.find(scheme => parsedScheme.endsWith(scheme));
 		// A known scheme was found, extract the unknown part from the URL
 		if (matchedScheme) {
 			const prefix = parsedScheme.length - matchedScheme.length;
 			start += prefix;
 			url = url.slice(prefix);
 		}
 		// ^-- /fix: use prefered scheme
-		// URL matched, but does not start with a protocol, add it
+		// The URL matched but does not start with a scheme (`www.foo.com`), add it
 		if (!parsedScheme.length) {
 			url = "http://" + url;
 		}
--- a/client/js/libs/handlebars/ircmessageparser/merge.js
+++ b/client/js/libs/handlebars/ircmessageparser/merge.js
@ -16,6 +16,7 @@ if (typeof Object_assign !== "function") {
 	};
 }
 // Merge text part information within a styling fragment
 function assign(textPart, fragment) {
 	const fragStart = fragment.start;
 	const start = Math.max(fragment.start, textPart.start);
@ -28,13 +29,25 @@ function assign(textPart, fragment) {
 	});
 }
 // Merge the style fragments withing the text parts, taking into account
 // boundaries and text sections that have not matched to links or channels.
 // For example, given a string "foobar" where "foo" and "bar" have been
 // identified as parts (channels, links, etc.) and "fo", "ob" and "ar" have 3
 // different styles, the first resulting part will contain fragments "fo" and
 // "o", and the second resulting part will contain "b" and "ar". "o" and "b"
 // fragments will contain duplicate styling attributes.
 function merge(textParts, styleFragments) {
-	const cleanText = styleFragments.map(fragment => fragment.text).join("");
+	// Re-build the overall text (without control codes) from the style fragments
 	const cleanText = styleFragments.reduce((acc, frag) => acc + frag.text, "");
 	// Every section of the original text that has not been captured in a "part"
 	// is filled with "text" parts, dummy objects with start/end but no extra
 	// metadata.
 	const allParts = textParts
 		.concat(fill(textParts, cleanText))
 		.sort((a, b) => a.start - b.start);
 	// Distribute the style fragments within the text parts
 	return allParts.map(textPart => {
 		textPart.fragments = styleFragments
 			.filter(fragment => anyIntersection(textPart, fragment))
--- a/client/js/libs/handlebars/ircmessageparser/parseStyle.js
+++ b/client/js/libs/handlebars/ircmessageparser/parseStyle.js
@ -1,5 +1,6 @@
 "use strict";
 // Styling control codes
 const BOLD = "\x02";
 const COLOR = "\x03";
 const RESET = "\x0f";
@ -7,14 +8,24 @@ const REVERSE = "\x16";
 const ITALIC = "\x1d";
 const UNDERLINE = "\x1f";
 // Color code matcher, with format `XX,YY` where both `XX` and `YY` are
 // integers, `XX` is the text color and `YY` is an optional background color.
 const colorRx = /^(\d{1,2})(?:,(\d{1,2}))?/;
 // Represents all other control codes that to be ignored/filtered from the text
 const controlCodesRx = /[\u0000-\u001F]/g;
 // Converts a given text into an array of objects, each of them representing a
 // similarly styled section of the text. Each object carries the `text`, style
 // information (`bold`, `textColor`, `bgcolor`, `reverse`, `italic`,
 // `underline`), and `start`/`end` cursors.
 function parseStyle(text) {
 	const result = [];
 	let start = 0;
 	let position = 0;
 	// At any given time, these carry style information since last time a styling
 	// control code was met.
 	let colorCodes, bold, textColor, bgColor, reverse, italic, underline;
 	const resetStyle = () => {
@ -27,27 +38,42 @@ function parseStyle(text) {
 	};
 	resetStyle();
 	// When called, this "closes" the current fragment by adding an entry to the
 	// `result` array using the styling information set last time a control code
 	// was met.
 	const emitFragment = () => {
 		// Uses the text fragment starting from the last control code position up to
 		// the current position
 		const textPart = text.slice(start, position);
 		start = position + 1;
 		// Filters out all non-style related control codes present in this text
 		const processedText = textPart.replace(controlCodesRx, "");
-		if (!processedText.length) {
+		if (processedText.length) {
-			return;
+			// Current fragment starts where the previous one ends, or at 0 if none
 			const fragmentStart = result.length ? result[result.length - 1].end : 0;
 			result.push({
 				bold,
 				textColor,
 				bgColor,
 				reverse,
 				italic,
 				underline,
 				text: processedText,
 				start: fragmentStart,
 				end: fragmentStart + processedText.length
 			});
 		}
-		result.push({
+		// Now that a fragment has been "closed", the next one will start after that
-			bold,
+		start = position + 1;
 			textColor,
 			bgColor,
 			reverse,
 			italic,
 			underline,
 			text: processedText
 		});
 	};
 	// This loop goes through each character of the given text one by one by
 	// bumping the `position` cursor. Every time a new special "styling" character
 	// is met, an object gets created (with `emitFragment()`)information on text
 	// encountered since the previous styling character.
 	while (position < text.length) {
 		switch (text[position]) {
@ -56,6 +82,10 @@ function parseStyle(text) {
 			resetStyle();
 			break;
 		// Meeting a BOLD character means that the ongoing text is either going to
 		// be in bold or that the previous one was in bold and the following one
 		// must be reset.
 		// This same behavior applies to COLOR, REVERSE, ITALIC, and UNDERLINE.
 		case BOLD:
 			emitFragment();
 			bold = !bold;
@ -64,20 +94,23 @@ function parseStyle(text) {
 		case COLOR:
 			emitFragment();
 			// Go one step further to find the corresponding color
 			colorCodes = text.slice(position + 1).match(colorRx);
 			if (colorCodes) {
 				textColor = Number(colorCodes[1]);
-				bgColor = Number(colorCodes[2]);
+				if (colorCodes[2]) {
-				if (Number.isNaN(bgColor)) {
+					bgColor = Number(colorCodes[2]);
 					bgColor = undefined;
 				}
 				// Color code length is > 1, so bump the current position cursor by as
 				// much (and reset the start cursor for the current text block as well)
 				position += colorCodes[0].length;
 				start = position + 1;
 			} else {
 				// If no color codes were found, toggles back to no colors (like BOLD).
 				textColor = undefined;
 				bgColor = undefined;
 			}
 			start = position + 1;
 			break;
 		case REVERSE:
@ -95,9 +128,12 @@ function parseStyle(text) {
 			underline = !underline;
 			break;
 		}
 		// Evaluate the next character at the next iteration
 		position += 1;
 	}
 	// The entire text has been parsed, so we finalize the current text fragment.
 	emitFragment();
 	return result;
@ -107,25 +143,19 @@ const properties = ["bold", "textColor", "bgColor", "italic", "underline", "reve
 function prepare(text) {
 	return parseStyle(text)
-		.filter(fragment => fragment.text.length)
+		// This optimizes fragments by combining them together when all their values
-		.reduce((prev, curr, i) => {
+		// for the properties defined above are equal.
-			if (i === 0) {
+		.reduce((prev, curr) => {
-				return prev.concat([curr]);
+			if (prev.length) {
 				const lastEntry = prev[prev.length - 1];
 				if (properties.every(key => curr[key] === lastEntry[key])) {
 					lastEntry.text += curr.text;
 					lastEntry.end += curr.text.length;
 					return prev;
 				}
 			}
-
+			return prev.concat([curr]);
-			const lastEntry = prev[prev.length - 1];
+		}, []);
 			if (properties.some(key => curr[key] !== lastEntry[key])) {
 				return prev.concat([curr]);
 			}
 			lastEntry.text += curr.text;
 			return prev;
 		}, [])
 		.map((fragment, i, array) => {
 			fragment.start = i === 0 ? 0 : array[i - 1].end;
 			fragment.end = fragment.start + fragment.text.length;
 			return fragment;
 		});
 }
 module.exports = prepare;
--- a/client/js/libs/handlebars/parse.js
+++ b/client/js/libs/handlebars/parse.js
@ -6,6 +6,7 @@ const findChannels = require("./ircmessageparser/findChannels");
 const findLinks = require("./ircmessageparser/findLinks");
 const merge = require("./ircmessageparser/merge");
 // Create an HTML `span` with styling information for a given fragment
 function createFragment(fragment) {
 	let classes = [];
 	if (fragment.bold) {
@ -30,23 +31,33 @@ function createFragment(fragment) {
 	return escapedText;
 }
 // Transform an IRC message potentially filled with styling control codes, URLs
 // and channels into a string of HTML elements to display on the client.
 module.exports = function parse(text) {
 	// Extract the styling information and get the plain text version from it
 	const styleFragments = parseStyle(text);
 	const cleanText = styleFragments.map(fragment => fragment.text).join("");
-	const channelPrefixes = ["#", "&"]; // RPL_ISUPPORT.CHANTYPES
+	// On the plain text, find channels and URLs, returned as "parts". Parts are
-	const userModes = ["!", "@", "%", "+"]; // RPL_ISUPPORT.PREFIX
+	// arrays of objects containing start and end markers, as well as metadata
 	// depending on what was found (channel or link).
 	const channelPrefixes = ["#", "&"]; // TODO Channel prefixes should be RPL_ISUPPORT.CHANTYPES
 	const userModes = ["!", "@", "%", "+"]; // TODO User modes should be RPL_ISUPPORT.PREFIX
 	const channelParts = findChannels(cleanText, channelPrefixes, userModes);
 	const linkParts = findLinks(cleanText);
 	// Sort all parts identified based on their position in the original text
 	const parts = channelParts
 		.concat(linkParts)
 		.sort((a, b) => a.start - b.start);
 	// Merge the styling information with the channels / URLs / text objects and
 	// generate HTML strings with the resulting fragments
 	return merge(parts, styleFragments).map(textPart => {
 		// Create HTML strings with styling information
 		const fragments = textPart.fragments.map(createFragment).join("");
 		// Wrap these potentially styled fragments with links and channel buttons
 		if (textPart.link) {
 			const escapedLink = Handlebars.Utils.escapeExpression(textPart.link);
 			return `<a href="${escapedLink}" target="_blank" rel="noopener">${fragments}</a>`;
--- a/test/client/js/libs/handlebars/ircmessageparser/fill.js
+++ b/test/client/js/libs/handlebars/ircmessageparser/fill.js
@ -0,0 +1,50 @@
 "use strict";
 const expect = require("chai").expect;
 const fill = require("../../../../../../client/js/libs/handlebars/ircmessageparser/fill");
 describe("fill", () => {
 	const text = "01234567890123456789";
 	it("should return an entry for the unmatched end of string", () => {
 		const existingEntries = [
 			{start: 0, end: 10},
 			{start: 5, end: 15},
 		];
 		const expected = [
 			{start: 15, end: 20},
 		];
 		const actual = fill(existingEntries, text);
 		expect(actual).to.deep.equal(expected);
 	});
 	it("should return an entry per unmatched areas of the text", () => {
 		const existingEntries = [
 			{start: 0, end: 5},
 			{start: 10, end: 15},
 		];
 		const expected = [
 			{start: 5, end: 10},
 			{start: 15, end: 20},
 		];
 		const actual = fill(existingEntries, text);
 		expect(actual).to.deep.equal(expected);
 	});
 	it("should not return anything when entries match all text", () => {
 		const existingEntries = [
 			{start: 0, end: 10},
 			{start: 10, end: 20},
 		];
 		const actual = fill(existingEntries, text);
 		expect(actual).to.be.empty;
 	});
 });
--- a/test/client/js/libs/handlebars/ircmessageparser/findChannels.js
+++ b/test/client/js/libs/handlebars/ircmessageparser/findChannels.js
@ -1,7 +1,7 @@
 "use strict";
 const expect = require("chai").expect;
-const analyseText = require("../../../../../../client/js/libs/handlebars/ircmessageparser/findChannels");
+const findChannels = require("../../../../../../client/js/libs/handlebars/ircmessageparser/findChannels");
 describe("findChannels", () => {
 	it("should find single letter channel", () => {
@ -12,7 +12,7 @@ describe("findChannels", () => {
 			end: 2
 		}];
-		const actual = analyseText(input, ["#"], ["@", "+"]);
+		const actual = findChannels(input, ["#"], ["@", "+"]);
 		expect(actual).to.deep.equal(expected);
 	});
@ -25,7 +25,7 @@ describe("findChannels", () => {
 			end: 4
 		}];
-		const actual = analyseText(input, ["#"], ["@", "+"]);
+		const actual = findChannels(input, ["#"], ["@", "+"]);
 		expect(actual).to.deep.equal(expected);
 	});
@ -38,7 +38,7 @@ describe("findChannels", () => {
 			end: 15
 		}];
-		const actual = analyseText(input, ["#"], ["@", "+"]);
+		const actual = findChannels(input, ["#"], ["@", "+"]);
 		expect(actual).to.deep.equal(expected);
 	});
@ -51,7 +51,7 @@ describe("findChannels", () => {
 			end: 5
 		}];
-		const actual = analyseText(input, ["#"], ["@", "+"]);
+		const actual = findChannels(input, ["#"], ["@", "+"]);
 		expect(actual).to.deep.equal(expected);
 	});
@ -64,7 +64,7 @@ describe("findChannels", () => {
 			end: 6
 		}];
-		const actual = analyseText(input, ["#"], ["@", "+"]);
+		const actual = findChannels(input, ["#"], ["@", "+"]);
 		expect(actual).to.deep.equal(expected);
 	});
@ -77,7 +77,7 @@ describe("findChannels", () => {
 			end: 3
 		}];
-		const actual = analyseText(input, ["#"], ["@", "+"]);
+		const actual = findChannels(input, ["#"], ["@", "+"]);
 		expect(actual).to.deep.equal(expected);
 	});
@ -90,7 +90,7 @@ describe("findChannels", () => {
 			end: 6
 		}];
-		const actual = analyseText(input, ["#"], ["!", "@", "%", "+"]);
+		const actual = findChannels(input, ["#"], ["!", "@", "%", "+"]);
 		expect(actual).to.deep.equal(expected);
 	});
@ -103,7 +103,7 @@ describe("findChannels", () => {
 			end: 2
 		}];
-		const actual = analyseText(input, ["@"], ["#", "+"]);
+		const actual = findChannels(input, ["@"], ["#", "+"]);
 		expect(actual).to.deep.equal(expected);
 	});
@ -116,7 +116,7 @@ describe("findChannels", () => {
 			end: 6
 		}];
-		const actual = analyseText(input, ["#"], ["@", "+"]);
+		const actual = findChannels(input, ["#"], ["@", "+"]);
 		expect(actual).to.deep.equal(expected);
 	});