Use WHATWG URL parser in link prefetcher
This commit is contained in:
parent
629ae8bfa4
commit
d4fa6bbcb0
@ -2,8 +2,7 @@
|
|||||||
|
|
||||||
const cheerio = require("cheerio");
|
const cheerio = require("cheerio");
|
||||||
const request = require("request");
|
const request = require("request");
|
||||||
const url = require("url");
|
const URL = require("url").URL;
|
||||||
const URI = require("urijs");
|
|
||||||
const mime = require("mime-types");
|
const mime = require("mime-types");
|
||||||
const Helper = require("../../helper");
|
const Helper = require("../../helper");
|
||||||
const cleanIrcMessage = require("../../../client/js/libs/handlebars/ircmessageparser/cleanIrcMessage");
|
const cleanIrcMessage = require("../../../client/js/libs/handlebars/ircmessageparser/cleanIrcMessage");
|
||||||
@ -32,26 +31,36 @@ module.exports = function(client, chan, msg) {
|
|||||||
// Remove all IRC formatting characters before searching for links
|
// Remove all IRC formatting characters before searching for links
|
||||||
const cleanText = cleanIrcMessage(msg.text);
|
const cleanText = cleanIrcMessage(msg.text);
|
||||||
|
|
||||||
// We will only try to prefetch http(s) links
|
msg.previews = findLinks(cleanText).reduce((cleanLinks, link) => {
|
||||||
const links = findLinks(cleanText).filter((w) => isValidLink(w.link));
|
const url = normalizeURL(link.link);
|
||||||
|
|
||||||
if (links.length === 0) {
|
// If the URL is invalid and cannot be normalized, don't fetch it
|
||||||
return;
|
if (url === null) {
|
||||||
|
return cleanLinks;
|
||||||
}
|
}
|
||||||
|
|
||||||
msg.previews = Array.from(new Set( // Remove duplicate links
|
// If there are too many urls in this message, only fetch first X valid links
|
||||||
links.map((link) => link.link)
|
if (cleanLinks.length > 4) {
|
||||||
)).map((link) => ({
|
return cleanLinks;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do not fetch duplicate links twice
|
||||||
|
if (cleanLinks.some((l) => l.link === link.link)) {
|
||||||
|
return cleanLinks;
|
||||||
|
}
|
||||||
|
|
||||||
|
const preview = {
|
||||||
type: "loading",
|
type: "loading",
|
||||||
head: "",
|
head: "",
|
||||||
body: "",
|
body: "",
|
||||||
thumb: "",
|
thumb: "",
|
||||||
link: link,
|
link: link.link, // Send original matched link to the client
|
||||||
shown: true,
|
shown: true,
|
||||||
})).slice(0, 5); // Only preview the first 5 URLs in message to avoid abuse
|
};
|
||||||
|
|
||||||
msg.previews.forEach((preview) => {
|
cleanLinks.push(preview);
|
||||||
fetch(normalizeURL(preview.link), {
|
|
||||||
|
fetch(url, {
|
||||||
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
language: client.language,
|
language: client.language,
|
||||||
}, function(res, err) {
|
}, function(res, err) {
|
||||||
@ -68,7 +77,9 @@ module.exports = function(client, chan, msg) {
|
|||||||
|
|
||||||
parse(msg, preview, res, client);
|
parse(msg, preview, res, client);
|
||||||
});
|
});
|
||||||
});
|
|
||||||
|
return cleanLinks;
|
||||||
|
}, []);
|
||||||
};
|
};
|
||||||
|
|
||||||
function parseHtml(preview, res, client) {
|
function parseHtml(preview, res, client) {
|
||||||
@ -93,18 +104,14 @@ function parseHtml(preview, res, client) {
|
|||||||
|| $('link[rel="image_src"]').attr("href")
|
|| $('link[rel="image_src"]').attr("href")
|
||||||
|| "";
|
|| "";
|
||||||
|
|
||||||
|
// Make sure thumbnail is a valid and absolute url
|
||||||
if (preview.thumb.length) {
|
if (preview.thumb.length) {
|
||||||
preview.thumb = url.resolve(preview.link, preview.thumb);
|
preview.thumb = normalizeURL(preview.thumb, preview.link) || "";
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure thumbnail is a valid url
|
|
||||||
if (!isValidLink(preview.thumb)) {
|
|
||||||
preview.thumb = "";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify that thumbnail pic exists and is under allowed size
|
// Verify that thumbnail pic exists and is under allowed size
|
||||||
if (preview.thumb.length) {
|
if (preview.thumb.length) {
|
||||||
fetch(normalizeURL(preview.thumb), {language: client.language}, (resThumb) => {
|
fetch(preview.thumb, {language: client.language}, (resThumb) => {
|
||||||
if (resThumb === null
|
if (resThumb === null
|
||||||
|| !(/^image\/.+/.test(resThumb.type))
|
|| !(/^image\/.+/.test(resThumb.type))
|
||||||
|| resThumb.size > (Helper.config.prefetchMaxImageSize * 1024)) {
|
|| resThumb.size > (Helper.config.prefetchMaxImageSize * 1024)) {
|
||||||
@ -134,16 +141,19 @@ function parseHtmlMedia($, preview, res, client) {
|
|||||||
|
|
||||||
if (mediaTypeRegex.test(mimeType)) {
|
if (mediaTypeRegex.test(mimeType)) {
|
||||||
// If we match a clean video or audio tag, parse that as a preview instead
|
// If we match a clean video or audio tag, parse that as a preview instead
|
||||||
const mediaUrl = $($(`meta[property="og:${type}"]`).get(i)).attr("content");
|
let mediaUrl = $($(`meta[property="og:${type}"]`).get(i)).attr("content");
|
||||||
|
|
||||||
// Make sure media is a valid url
|
// Make sure media is a valid url
|
||||||
if (!mediaUrl.startsWith("https://")) {
|
mediaUrl = normalizeURL(mediaUrl, preview.link, true);
|
||||||
|
|
||||||
|
// Make sure media is a valid url
|
||||||
|
if (!mediaUrl) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
foundMedia = true;
|
foundMedia = true;
|
||||||
|
|
||||||
fetch(normalizeURL(mediaUrl), {
|
fetch(mediaUrl, {
|
||||||
accept: type === "video" ?
|
accept: type === "video" ?
|
||||||
"video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5" :
|
"video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5" :
|
||||||
"audio/webm, audio/ogg, audio/wav, audio/*;q=0.9, application/ogg;q=0.7, video/*;q=0.6; */*;q=0.5",
|
"audio/webm, audio/ogg, audio/wav, audio/*;q=0.9, application/ogg;q=0.7, video/*;q=0.6; */*;q=0.5",
|
||||||
@ -360,27 +370,31 @@ function fetch(uri, headers, cb) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeURL(header) {
|
function normalizeURL(link, baseLink, disallowHttp = false) {
|
||||||
return URI(header).normalize().toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
function isValidLink(link) {
|
|
||||||
try {
|
try {
|
||||||
const uri = URI(link);
|
const url = new URL(link, baseLink);
|
||||||
const protocol = uri.protocol();
|
|
||||||
|
|
||||||
// Only fetch http and https links
|
// Only fetch http and https links
|
||||||
if (protocol !== "http" && protocol !== "https") {
|
if (url.protocol !== "http:" && url.protocol !== "https:") {
|
||||||
return false;
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (disallowHttp && url.protocol === "http:") {
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do not fetch links without hostname or ones that contain authorization
|
// Do not fetch links without hostname or ones that contain authorization
|
||||||
if (!uri.hostname() || uri.username() || uri.password()) {
|
if (!url.hostname || url.username || url.password) {
|
||||||
return false;
|
return null;
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
// Drop hash from the url, if any
|
||||||
|
url.hash = "";
|
||||||
|
|
||||||
|
return url.toString();
|
||||||
|
} catch (e) {
|
||||||
|
// if an exception was thrown, the url is not valid
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -385,6 +385,28 @@ describe("Link plugin", function() {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("should de-duplicate links", function(done) {
|
||||||
|
const message = this.irc.createMessage({
|
||||||
|
text: "//localhost:9002 http://localhost:9002 http://localhost:9002",
|
||||||
|
});
|
||||||
|
|
||||||
|
link(this.irc, this.network.channels[0], message);
|
||||||
|
|
||||||
|
expect(message.previews).to.deep.equal([{
|
||||||
|
type: "loading",
|
||||||
|
head: "",
|
||||||
|
body: "",
|
||||||
|
thumb: "",
|
||||||
|
link: "http://localhost:9002",
|
||||||
|
shown: true,
|
||||||
|
}]);
|
||||||
|
|
||||||
|
this.irc.once("msg:preview", function(data) {
|
||||||
|
expect(data.preview.link).to.equal("http://localhost:9002");
|
||||||
|
done();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
it("should not try to fetch links with wrong protocol", function() {
|
it("should not try to fetch links with wrong protocol", function() {
|
||||||
const message = this.irc.createMessage({
|
const message = this.irc.createMessage({
|
||||||
text: "ssh://example.com ftp://example.com irc://example.com http:////////example.com",
|
text: "ssh://example.com ftp://example.com irc://example.com http:////////example.com",
|
||||||
|
Loading…
Reference in New Issue
Block a user