Explain the modules of the message parser and add tests
- Add comments and descriptions to: - `findChannels.js` - `parseStyle` - `findLinks` - `fill` - `anyIntersection` - `merge` - `parse` - Minor optimizations to `parseStyle` - Add tests for `fill`
This commit is contained in:
parent
90f4a94bb2
commit
03e3444a35
@ -1,5 +1,7 @@
|
||||
"use strict";
|
||||
|
||||
// Return true if any section of "a" or "b" parts (defined by their start/end
|
||||
// markers) intersect each other, false otherwise.
|
||||
function anyIntersection(a, b) {
|
||||
return a.start <= b.start && b.start < a.end ||
|
||||
a.start < b.end && b.end <= a.end ||
|
||||
|
@ -1,21 +1,26 @@
|
||||
"use strict";
|
||||
|
||||
// Create plain text entries corresponding to areas of the text that match no
|
||||
// existing entries. Returns an empty array if all parts of the text have been
|
||||
// parsed into recognizable entries already.
|
||||
function fill(existingEntries, text) {
|
||||
let position = 0;
|
||||
const result = [];
|
||||
|
||||
for (let i = 0; i < existingEntries.length; i++) {
|
||||
const textSegment = existingEntries[i];
|
||||
|
||||
// Fill inner parts of the text. For example, if text is `foobarbaz` and both
|
||||
// `foo` and `baz` have matched into an entry, this will return a dummy entry
|
||||
// corresponding to `bar`.
|
||||
const result = existingEntries.reduce((acc, textSegment) => {
|
||||
if (textSegment.start > position) {
|
||||
result.push({
|
||||
acc.push({
|
||||
start: position,
|
||||
end: textSegment.start
|
||||
});
|
||||
}
|
||||
position = textSegment.end;
|
||||
}
|
||||
return acc;
|
||||
}, []);
|
||||
|
||||
// Complete the unmatched end of the text with a dummy entry
|
||||
if (position < text.length) {
|
||||
result.push({
|
||||
start: position,
|
||||
|
@ -1,13 +1,22 @@
|
||||
"use strict";
|
||||
|
||||
// Escapes the RegExp special characters "^", "$", "", ".", "*", "+", "?", "(",
|
||||
// ")", "[", "]", "{", "}", and "|" in string.
|
||||
// See https://lodash.com/docs/#escapeRegExp
|
||||
const escapeRegExp = require("lodash/escapeRegExp");
|
||||
|
||||
// NOTE: channel prefixes should be RPL_ISUPPORT.CHANTYPES
|
||||
// NOTE: userModes should be RPL_ISUPPORT.PREFIX
|
||||
// Given an array of channel prefixes (such as "#" and "&") and an array of user
|
||||
// modes (such as "@" and "+"), this function extracts channels and nicks from a
|
||||
// text.
|
||||
// It returns an array of objects for each channel found with their start index,
|
||||
// end index and channel name.
|
||||
function findChannels(text, channelPrefixes, userModes) {
|
||||
// `userModePattern` is necessary to ignore user modes in /whois responses.
|
||||
// For example, a voiced user in #thelounge will have a /whois response of:
|
||||
// > foo is on the following channels: +#thelounge
|
||||
// We need to explicitly ignore user modes to parse such channels correctly.
|
||||
const userModePattern = userModes.map(escapeRegExp).join("");
|
||||
const channelPrefixPattern = channelPrefixes.map(escapeRegExp).join("");
|
||||
|
||||
const channelPattern = `(?:^|\\s)[${userModePattern}]*([${channelPrefixPattern}][^ \u0007]+)`;
|
||||
const channelRegExp = new RegExp(channelPattern, "g");
|
||||
|
||||
@ -15,6 +24,8 @@ function findChannels(text, channelPrefixes, userModes) {
|
||||
let match;
|
||||
|
||||
do {
|
||||
// With global ("g") regexes, calling `exec` multiple times will find
|
||||
// successive matches in the same string.
|
||||
match = channelRegExp.exec(text);
|
||||
|
||||
if (match) {
|
||||
|
@ -2,6 +2,9 @@
|
||||
|
||||
const URI = require("urijs");
|
||||
|
||||
// Known schemes to detect in a text. If a text contains `foo...bar://foo.com`,
|
||||
// the parsed scheme should be `foo...bar` but if it contains
|
||||
// `foo...http://foo.com`, we assume the scheme to extract will be `http`.
|
||||
const commonSchemes = [
|
||||
"http", "https",
|
||||
"ftp", "sftp",
|
||||
@ -16,6 +19,10 @@ function findLinks(text) {
|
||||
let result = [];
|
||||
let lastPosition = 0;
|
||||
|
||||
// URI.withinString() identifies URIs within text, e.g. to translate them to
|
||||
// <a>-Tags.
|
||||
// See https://medialize.github.io/URI.js/docs.html#static-withinString
|
||||
// In our case, we store each URI encountered in a result array.
|
||||
URI.withinString(text, function(url, start, end) {
|
||||
// v-- fix: url was modified and does not match input string -> cant be mapped
|
||||
if (text.indexOf(url, lastPosition) < 0) {
|
||||
@ -23,19 +30,22 @@ function findLinks(text) {
|
||||
}
|
||||
// ^-- /fix: url was modified and does not match input string -> cant be mapped
|
||||
|
||||
// v-- fix: use prefered scheme
|
||||
const parsed = URI(url);
|
||||
const parsedScheme = parsed.scheme().toLowerCase();
|
||||
// Extract the scheme of the URL detected, if there is one
|
||||
const parsedScheme = URI(url).scheme().toLowerCase();
|
||||
|
||||
// Check if the scheme of the detected URL matches a common one above.
|
||||
// In a URL like `foo..http://example.com`, the scheme would be `foo..http`,
|
||||
// so we need to clean up the end of the scheme and filter out the rest.
|
||||
const matchedScheme = commonSchemes.find(scheme => parsedScheme.endsWith(scheme));
|
||||
|
||||
// A known scheme was found, extract the unknown part from the URL
|
||||
if (matchedScheme) {
|
||||
const prefix = parsedScheme.length - matchedScheme.length;
|
||||
start += prefix;
|
||||
url = url.slice(prefix);
|
||||
}
|
||||
// ^-- /fix: use prefered scheme
|
||||
|
||||
// URL matched, but does not start with a protocol, add it
|
||||
// The URL matched but does not start with a scheme (`www.foo.com`), add it
|
||||
if (!parsedScheme.length) {
|
||||
url = "http://" + url;
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ if (typeof Object_assign !== "function") {
|
||||
};
|
||||
}
|
||||
|
||||
// Merge text part information within a styling fragment
|
||||
function assign(textPart, fragment) {
|
||||
const fragStart = fragment.start;
|
||||
const start = Math.max(fragment.start, textPart.start);
|
||||
@ -28,13 +29,25 @@ function assign(textPart, fragment) {
|
||||
});
|
||||
}
|
||||
|
||||
// Merge the style fragments withing the text parts, taking into account
|
||||
// boundaries and text sections that have not matched to links or channels.
|
||||
// For example, given a string "foobar" where "foo" and "bar" have been
|
||||
// identified as parts (channels, links, etc.) and "fo", "ob" and "ar" have 3
|
||||
// different styles, the first resulting part will contain fragments "fo" and
|
||||
// "o", and the second resulting part will contain "b" and "ar". "o" and "b"
|
||||
// fragments will contain duplicate styling attributes.
|
||||
function merge(textParts, styleFragments) {
|
||||
const cleanText = styleFragments.map(fragment => fragment.text).join("");
|
||||
// Re-build the overall text (without control codes) from the style fragments
|
||||
const cleanText = styleFragments.reduce((acc, frag) => acc + frag.text, "");
|
||||
|
||||
// Every section of the original text that has not been captured in a "part"
|
||||
// is filled with "text" parts, dummy objects with start/end but no extra
|
||||
// metadata.
|
||||
const allParts = textParts
|
||||
.concat(fill(textParts, cleanText))
|
||||
.sort((a, b) => a.start - b.start);
|
||||
|
||||
// Distribute the style fragments within the text parts
|
||||
return allParts.map(textPart => {
|
||||
textPart.fragments = styleFragments
|
||||
.filter(fragment => anyIntersection(textPart, fragment))
|
||||
|
@ -1,5 +1,6 @@
|
||||
"use strict";
|
||||
|
||||
// Styling control codes
|
||||
const BOLD = "\x02";
|
||||
const COLOR = "\x03";
|
||||
const RESET = "\x0f";
|
||||
@ -7,14 +8,24 @@ const REVERSE = "\x16";
|
||||
const ITALIC = "\x1d";
|
||||
const UNDERLINE = "\x1f";
|
||||
|
||||
// Color code matcher, with format `XX,YY` where both `XX` and `YY` are
|
||||
// integers, `XX` is the text color and `YY` is an optional background color.
|
||||
const colorRx = /^(\d{1,2})(?:,(\d{1,2}))?/;
|
||||
|
||||
// Represents all other control codes that to be ignored/filtered from the text
|
||||
const controlCodesRx = /[\u0000-\u001F]/g;
|
||||
|
||||
// Converts a given text into an array of objects, each of them representing a
|
||||
// similarly styled section of the text. Each object carries the `text`, style
|
||||
// information (`bold`, `textColor`, `bgcolor`, `reverse`, `italic`,
|
||||
// `underline`), and `start`/`end` cursors.
|
||||
function parseStyle(text) {
|
||||
const result = [];
|
||||
let start = 0;
|
||||
let position = 0;
|
||||
|
||||
// At any given time, these carry style information since last time a styling
|
||||
// control code was met.
|
||||
let colorCodes, bold, textColor, bgColor, reverse, italic, underline;
|
||||
|
||||
const resetStyle = () => {
|
||||
@ -27,15 +38,20 @@ function parseStyle(text) {
|
||||
};
|
||||
resetStyle();
|
||||
|
||||
// When called, this "closes" the current fragment by adding an entry to the
|
||||
// `result` array using the styling information set last time a control code
|
||||
// was met.
|
||||
const emitFragment = () => {
|
||||
// Uses the text fragment starting from the last control code position up to
|
||||
// the current position
|
||||
const textPart = text.slice(start, position);
|
||||
start = position + 1;
|
||||
|
||||
// Filters out all non-style related control codes present in this text
|
||||
const processedText = textPart.replace(controlCodesRx, "");
|
||||
|
||||
if (!processedText.length) {
|
||||
return;
|
||||
}
|
||||
if (processedText.length) {
|
||||
// Current fragment starts where the previous one ends, or at 0 if none
|
||||
const fragmentStart = result.length ? result[result.length - 1].end : 0;
|
||||
|
||||
result.push({
|
||||
bold,
|
||||
@ -44,10 +60,20 @@ function parseStyle(text) {
|
||||
reverse,
|
||||
italic,
|
||||
underline,
|
||||
text: processedText
|
||||
text: processedText,
|
||||
start: fragmentStart,
|
||||
end: fragmentStart + processedText.length
|
||||
});
|
||||
}
|
||||
|
||||
// Now that a fragment has been "closed", the next one will start after that
|
||||
start = position + 1;
|
||||
};
|
||||
|
||||
// This loop goes through each character of the given text one by one by
|
||||
// bumping the `position` cursor. Every time a new special "styling" character
|
||||
// is met, an object gets created (with `emitFragment()`)information on text
|
||||
// encountered since the previous styling character.
|
||||
while (position < text.length) {
|
||||
switch (text[position]) {
|
||||
|
||||
@ -56,6 +82,10 @@ function parseStyle(text) {
|
||||
resetStyle();
|
||||
break;
|
||||
|
||||
// Meeting a BOLD character means that the ongoing text is either going to
|
||||
// be in bold or that the previous one was in bold and the following one
|
||||
// must be reset.
|
||||
// This same behavior applies to COLOR, REVERSE, ITALIC, and UNDERLINE.
|
||||
case BOLD:
|
||||
emitFragment();
|
||||
bold = !bold;
|
||||
@ -64,20 +94,23 @@ function parseStyle(text) {
|
||||
case COLOR:
|
||||
emitFragment();
|
||||
|
||||
// Go one step further to find the corresponding color
|
||||
colorCodes = text.slice(position + 1).match(colorRx);
|
||||
|
||||
if (colorCodes) {
|
||||
textColor = Number(colorCodes[1]);
|
||||
if (colorCodes[2]) {
|
||||
bgColor = Number(colorCodes[2]);
|
||||
if (Number.isNaN(bgColor)) {
|
||||
bgColor = undefined;
|
||||
}
|
||||
// Color code length is > 1, so bump the current position cursor by as
|
||||
// much (and reset the start cursor for the current text block as well)
|
||||
position += colorCodes[0].length;
|
||||
start = position + 1;
|
||||
} else {
|
||||
// If no color codes were found, toggles back to no colors (like BOLD).
|
||||
textColor = undefined;
|
||||
bgColor = undefined;
|
||||
}
|
||||
start = position + 1;
|
||||
break;
|
||||
|
||||
case REVERSE:
|
||||
@ -95,9 +128,12 @@ function parseStyle(text) {
|
||||
underline = !underline;
|
||||
break;
|
||||
}
|
||||
|
||||
// Evaluate the next character at the next iteration
|
||||
position += 1;
|
||||
}
|
||||
|
||||
// The entire text has been parsed, so we finalize the current text fragment.
|
||||
emitFragment();
|
||||
|
||||
return result;
|
||||
@ -107,25 +143,19 @@ const properties = ["bold", "textColor", "bgColor", "italic", "underline", "reve
|
||||
|
||||
function prepare(text) {
|
||||
return parseStyle(text)
|
||||
.filter(fragment => fragment.text.length)
|
||||
.reduce((prev, curr, i) => {
|
||||
if (i === 0) {
|
||||
return prev.concat([curr]);
|
||||
}
|
||||
|
||||
// This optimizes fragments by combining them together when all their values
|
||||
// for the properties defined above are equal.
|
||||
.reduce((prev, curr) => {
|
||||
if (prev.length) {
|
||||
const lastEntry = prev[prev.length - 1];
|
||||
if (properties.some(key => curr[key] !== lastEntry[key])) {
|
||||
return prev.concat([curr]);
|
||||
}
|
||||
|
||||
if (properties.every(key => curr[key] === lastEntry[key])) {
|
||||
lastEntry.text += curr.text;
|
||||
lastEntry.end += curr.text.length;
|
||||
return prev;
|
||||
}, [])
|
||||
.map((fragment, i, array) => {
|
||||
fragment.start = i === 0 ? 0 : array[i - 1].end;
|
||||
fragment.end = fragment.start + fragment.text.length;
|
||||
return fragment;
|
||||
});
|
||||
}
|
||||
}
|
||||
return prev.concat([curr]);
|
||||
}, []);
|
||||
}
|
||||
|
||||
module.exports = prepare;
|
||||
|
@ -6,6 +6,7 @@ const findChannels = require("./ircmessageparser/findChannels");
|
||||
const findLinks = require("./ircmessageparser/findLinks");
|
||||
const merge = require("./ircmessageparser/merge");
|
||||
|
||||
// Create an HTML `span` with styling information for a given fragment
|
||||
function createFragment(fragment) {
|
||||
let classes = [];
|
||||
if (fragment.bold) {
|
||||
@ -30,23 +31,33 @@ function createFragment(fragment) {
|
||||
return escapedText;
|
||||
}
|
||||
|
||||
// Transform an IRC message potentially filled with styling control codes, URLs
|
||||
// and channels into a string of HTML elements to display on the client.
|
||||
module.exports = function parse(text) {
|
||||
// Extract the styling information and get the plain text version from it
|
||||
const styleFragments = parseStyle(text);
|
||||
const cleanText = styleFragments.map(fragment => fragment.text).join("");
|
||||
|
||||
const channelPrefixes = ["#", "&"]; // RPL_ISUPPORT.CHANTYPES
|
||||
const userModes = ["!", "@", "%", "+"]; // RPL_ISUPPORT.PREFIX
|
||||
// On the plain text, find channels and URLs, returned as "parts". Parts are
|
||||
// arrays of objects containing start and end markers, as well as metadata
|
||||
// depending on what was found (channel or link).
|
||||
const channelPrefixes = ["#", "&"]; // TODO Channel prefixes should be RPL_ISUPPORT.CHANTYPES
|
||||
const userModes = ["!", "@", "%", "+"]; // TODO User modes should be RPL_ISUPPORT.PREFIX
|
||||
const channelParts = findChannels(cleanText, channelPrefixes, userModes);
|
||||
|
||||
const linkParts = findLinks(cleanText);
|
||||
|
||||
// Sort all parts identified based on their position in the original text
|
||||
const parts = channelParts
|
||||
.concat(linkParts)
|
||||
.sort((a, b) => a.start - b.start);
|
||||
|
||||
// Merge the styling information with the channels / URLs / text objects and
|
||||
// generate HTML strings with the resulting fragments
|
||||
return merge(parts, styleFragments).map(textPart => {
|
||||
// Create HTML strings with styling information
|
||||
const fragments = textPart.fragments.map(createFragment).join("");
|
||||
|
||||
// Wrap these potentially styled fragments with links and channel buttons
|
||||
if (textPart.link) {
|
||||
const escapedLink = Handlebars.Utils.escapeExpression(textPart.link);
|
||||
return `<a href="${escapedLink}" target="_blank" rel="noopener">${fragments}</a>`;
|
||||
|
50
test/client/js/libs/handlebars/ircmessageparser/fill.js
Normal file
50
test/client/js/libs/handlebars/ircmessageparser/fill.js
Normal file
@ -0,0 +1,50 @@
|
||||
"use strict";
|
||||
|
||||
const expect = require("chai").expect;
|
||||
const fill = require("../../../../../../client/js/libs/handlebars/ircmessageparser/fill");
|
||||
|
||||
describe("fill", () => {
|
||||
const text = "01234567890123456789";
|
||||
|
||||
it("should return an entry for the unmatched end of string", () => {
|
||||
const existingEntries = [
|
||||
{start: 0, end: 10},
|
||||
{start: 5, end: 15},
|
||||
];
|
||||
|
||||
const expected = [
|
||||
{start: 15, end: 20},
|
||||
];
|
||||
|
||||
const actual = fill(existingEntries, text);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
|
||||
it("should return an entry per unmatched areas of the text", () => {
|
||||
const existingEntries = [
|
||||
{start: 0, end: 5},
|
||||
{start: 10, end: 15},
|
||||
];
|
||||
|
||||
const expected = [
|
||||
{start: 5, end: 10},
|
||||
{start: 15, end: 20},
|
||||
];
|
||||
|
||||
const actual = fill(existingEntries, text);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
|
||||
it("should not return anything when entries match all text", () => {
|
||||
const existingEntries = [
|
||||
{start: 0, end: 10},
|
||||
{start: 10, end: 20},
|
||||
];
|
||||
|
||||
const actual = fill(existingEntries, text);
|
||||
|
||||
expect(actual).to.be.empty;
|
||||
});
|
||||
});
|
@ -1,7 +1,7 @@
|
||||
"use strict";
|
||||
|
||||
const expect = require("chai").expect;
|
||||
const analyseText = require("../../../../../../client/js/libs/handlebars/ircmessageparser/findChannels");
|
||||
const findChannels = require("../../../../../../client/js/libs/handlebars/ircmessageparser/findChannels");
|
||||
|
||||
describe("findChannels", () => {
|
||||
it("should find single letter channel", () => {
|
||||
@ -12,7 +12,7 @@ describe("findChannels", () => {
|
||||
end: 2
|
||||
}];
|
||||
|
||||
const actual = analyseText(input, ["#"], ["@", "+"]);
|
||||
const actual = findChannels(input, ["#"], ["@", "+"]);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
@ -25,7 +25,7 @@ describe("findChannels", () => {
|
||||
end: 4
|
||||
}];
|
||||
|
||||
const actual = analyseText(input, ["#"], ["@", "+"]);
|
||||
const actual = findChannels(input, ["#"], ["@", "+"]);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
@ -38,7 +38,7 @@ describe("findChannels", () => {
|
||||
end: 15
|
||||
}];
|
||||
|
||||
const actual = analyseText(input, ["#"], ["@", "+"]);
|
||||
const actual = findChannels(input, ["#"], ["@", "+"]);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
@ -51,7 +51,7 @@ describe("findChannels", () => {
|
||||
end: 5
|
||||
}];
|
||||
|
||||
const actual = analyseText(input, ["#"], ["@", "+"]);
|
||||
const actual = findChannels(input, ["#"], ["@", "+"]);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
@ -64,7 +64,7 @@ describe("findChannels", () => {
|
||||
end: 6
|
||||
}];
|
||||
|
||||
const actual = analyseText(input, ["#"], ["@", "+"]);
|
||||
const actual = findChannels(input, ["#"], ["@", "+"]);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
@ -77,7 +77,7 @@ describe("findChannels", () => {
|
||||
end: 3
|
||||
}];
|
||||
|
||||
const actual = analyseText(input, ["#"], ["@", "+"]);
|
||||
const actual = findChannels(input, ["#"], ["@", "+"]);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
@ -90,7 +90,7 @@ describe("findChannels", () => {
|
||||
end: 6
|
||||
}];
|
||||
|
||||
const actual = analyseText(input, ["#"], ["!", "@", "%", "+"]);
|
||||
const actual = findChannels(input, ["#"], ["!", "@", "%", "+"]);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
@ -103,7 +103,7 @@ describe("findChannels", () => {
|
||||
end: 2
|
||||
}];
|
||||
|
||||
const actual = analyseText(input, ["@"], ["#", "+"]);
|
||||
const actual = findChannels(input, ["@"], ["#", "+"]);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
@ -116,7 +116,7 @@ describe("findChannels", () => {
|
||||
end: 6
|
||||
}];
|
||||
|
||||
const actual = analyseText(input, ["#"], ["@", "+"]);
|
||||
const actual = findChannels(input, ["#"], ["@", "+"]);
|
||||
|
||||
expect(actual).to.deep.equal(expected);
|
||||
});
|
||||
|
Loading…
Reference in New Issue
Block a user