Merge pull request #2538 from thelounge/xpaw/fetch-links-once

De-duplicate link fetching to one request at once
This commit is contained in:
Pavel Djundik 2018-06-11 10:49:29 +03:00 committed by GitHub
commit e420a210b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 165 additions and 69 deletions

View File

@ -8,7 +8,7 @@ const Helper = require("../../helper");
const cleanIrcMessage = require("../../../client/js/libs/handlebars/ircmessageparser/cleanIrcMessage"); const cleanIrcMessage = require("../../../client/js/libs/handlebars/ircmessageparser/cleanIrcMessage");
const findLinks = require("../../../client/js/libs/handlebars/ircmessageparser/findLinks"); const findLinks = require("../../../client/js/libs/handlebars/ircmessageparser/findLinks");
const storage = require("../storage"); const storage = require("../storage");
const currentFetchPromises = new Map();
const mediaTypeRegex = /^(audio|video)\/.+/; const mediaTypeRegex = /^(audio|video)\/.+/;
// Fix ECDH curve client compatibility in Node v8/v9 // Fix ECDH curve client compatibility in Node v8/v9
@ -63,19 +63,13 @@ module.exports = function(client, chan, msg) {
fetch(url, { fetch(url, {
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
language: client.language, language: client.language,
}, function(res, err) { }).then((res) => {
if (err) {
preview.type = "error";
preview.error = "message";
preview.message = err.message;
handlePreview(client, msg, preview, res);
}
if (res === null) {
return;
}
parse(msg, preview, res, client); parse(msg, preview, res, client);
}).catch((err) => {
preview.type = "error";
preview.error = "message";
preview.message = err.message;
handlePreview(client, msg, preview, null);
}); });
return cleanLinks; return cleanLinks;
@ -111,7 +105,7 @@ function parseHtml(preview, res, client) {
// Verify that thumbnail pic exists and is under allowed size // Verify that thumbnail pic exists and is under allowed size
if (preview.thumb.length) { if (preview.thumb.length) {
fetch(preview.thumb, {language: client.language}, (resThumb) => { fetch(preview.thumb, {language: client.language}).then((resThumb) => {
if (resThumb === null if (resThumb === null
|| !(/^image\/.+/.test(resThumb.type)) || !(/^image\/.+/.test(resThumb.type))
|| resThumb.size > (Helper.config.prefetchMaxImageSize * 1024)) { || resThumb.size > (Helper.config.prefetchMaxImageSize * 1024)) {
@ -119,6 +113,9 @@ function parseHtml(preview, res, client) {
} }
resolve(resThumb); resolve(resThumb);
}).catch(() => {
preview.thumb = "";
resolve(null);
}); });
} else { } else {
resolve(res); resolve(res);
@ -158,7 +155,7 @@ function parseHtmlMedia($, preview, res, client) {
"video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5" : "video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5" :
"audio/webm, audio/ogg, audio/wav, audio/*;q=0.9, application/ogg;q=0.7, video/*;q=0.6; */*;q=0.5", "audio/webm, audio/ogg, audio/wav, audio/*;q=0.9, application/ogg;q=0.7, video/*;q=0.6; */*;q=0.5",
language: client.language, language: client.language,
}, (resMedia) => { }).then((resMedia) => {
if (resMedia === null || !mediaTypeRegex.test(resMedia.type)) { if (resMedia === null || !mediaTypeRegex.test(resMedia.type)) {
return reject(); return reject();
} }
@ -168,7 +165,7 @@ function parseHtmlMedia($, preview, res, client) {
preview.mediaType = resMedia.type; preview.mediaType = resMedia.type;
resolve(resMedia); resolve(resMedia);
}); }).catch(reject);
return false; return false;
} }
@ -313,71 +310,89 @@ function getRequestHeaders(headers) {
return formattedHeaders; return formattedHeaders;
} }
function fetch(uri, headers, cb) { function fetch(uri, headers) {
let req; // Stringify the object otherwise the objects won't compute to the same value
const cacheKey = JSON.stringify([uri, headers]);
let promise = currentFetchPromises.get(cacheKey);
try { if (promise) {
req = request.get({ return promise;
url: uri,
maxRedirects: 5,
timeout: 5000,
headers: getRequestHeaders(headers),
});
} catch (e) {
return cb(null, e);
} }
const buffers = []; promise = new Promise((resolve, reject) => {
let length = 0; let req;
let limit = Helper.config.prefetchMaxImageSize * 1024;
req try {
.on("response", function(res) { req = request.get({
if (/^image\/.+/.test(res.headers["content-type"])) { url: uri,
// response is an image maxRedirects: 5,
// if Content-Length header reports a size exceeding the prefetch limit, abort fetch timeout: 5000,
const contentLength = parseInt(res.headers["content-length"], 10) || 0; headers: getRequestHeaders(headers),
});
} catch (e) {
return reject(e);
}
if (contentLength > limit) { const buffers = [];
let length = 0;
let limit = Helper.config.prefetchMaxImageSize * 1024;
req
.on("response", function(res) {
if (/^image\/.+/.test(res.headers["content-type"])) {
// response is an image
// if Content-Length header reports a size exceeding the prefetch limit, abort fetch
const contentLength = parseInt(res.headers["content-length"], 10) || 0;
if (contentLength > limit) {
req.abort();
}
} else if (mediaTypeRegex.test(res.headers["content-type"])) {
// We don't need to download the file any further after we received content-type header
req.abort();
} else {
// if not image, limit download to 50kb, since we need only meta tags
// twitter.com sends opengraph meta tags within ~20kb of data for individual tweets
limit = 1024 * 50;
}
})
.on("error", (e) => reject(e))
.on("data", (data) => {
length += data.length;
buffers.push(data);
if (length > limit) {
req.abort(); req.abort();
} }
} else if (mediaTypeRegex.test(res.headers["content-type"])) { })
// We don't need to download the file any further after we received content-type header .on("end", () => {
req.abort(); if (req.response.statusCode < 200 || req.response.statusCode > 299) {
} else { return reject(new Error(`HTTP ${req.response.statusCode}`));
// if not image, limit download to 50kb, since we need only meta tags }
// twitter.com sends opengraph meta tags within ~20kb of data for individual tweets
limit = 1024 * 50;
}
})
.on("error", (e) => cb(null, e))
.on("data", (data) => {
length += data.length;
buffers.push(data);
if (length > limit) { let type = "";
req.abort(); let size = parseInt(req.response.headers["content-length"], 10) || length;
}
})
.on("end", () => {
if (req.response.statusCode < 200 || req.response.statusCode > 299) {
return cb(null, new Error(`HTTP ${req.response.statusCode}`));
}
let type = ""; if (size < length) {
let size = parseInt(req.response.headers["content-length"], 10) || length; size = length;
}
if (size < length) { if (req.response.headers["content-type"]) {
size = length; type = req.response.headers["content-type"].split(/ *; */).shift();
} }
if (req.response.headers["content-type"]) { const data = Buffer.concat(buffers, length);
type = req.response.headers["content-type"].split(/ *; */).shift(); resolve({data, type, size});
} });
});
const data = Buffer.concat(buffers, length); const removeCache = () => currentFetchPromises.delete(cacheKey);
cb({data, type, size});
}); promise.then(removeCache).catch(removeCache);
currentFetchPromises.set(cacheKey, promise);
return promise;
} }
function normalizeURL(link, baseLink, disallowHttp = false) { function normalizeURL(link, baseLink, disallowHttp = false) {

View File

@ -422,4 +422,85 @@ describe("Link plugin", function() {
expect(message.previews).to.be.empty; expect(message.previews).to.be.empty;
}); });
it("should fetch same link only once at the same time", function(done) {
const message = this.irc.createMessage({
text: "http://localhost:9002/basic-og-once",
});
let requests = 0;
let responses = 0;
this.irc.language = "very nice language";
link(this.irc, this.network.channels[0], message);
link(this.irc, this.network.channels[0], message);
process.nextTick(() => link(this.irc, this.network.channels[0], message));
app.get("/basic-og-once", function(req, res) {
requests++;
expect(req.header("accept-language")).to.equal("very nice language");
// delay the request so it doesn't resolve immediately
setTimeout(() => {
res.send("<title>test prefetch</title>");
}, 100);
});
const cb = (data) => {
responses++;
expect(data.preview.head, "test prefetch");
if (responses === 3) {
this.irc.removeListener("msg:preview", cb);
expect(requests).to.equal(1);
done();
}
};
this.irc.on("msg:preview", cb);
});
it("should fetch same link with different languages multiple times", function(done) {
const message = this.irc.createMessage({
text: "http://localhost:9002/basic-og-once-lang",
});
const requests = [];
let responses = 0;
this.irc.language = "first language";
link(this.irc, this.network.channels[0], message);
this.irc.language = "second language";
link(this.irc, this.network.channels[0], message);
app.get("/basic-og-once-lang", function(req, res) {
requests.push(req.header("accept-language"));
// delay the request so it doesn't resolve immediately
setTimeout(() => {
res.send("<title>test prefetch</title>");
}, 100);
});
const cb = (data) => {
responses++;
expect(data.preview.head, "test prefetch");
if (responses === 2) {
this.irc.removeListener("msg:preview", cb);
expect(requests).to.deep.equal([
"first language",
"second language",
]);
done();
}
};
this.irc.on("msg:preview", cb);
});
}); });