hardlounge/src/plugins/irc-events/link.js

267 lines
6.3 KiB
JavaScript
Raw Normal View History

"use strict";
const cheerio = require("cheerio");
const request = require("request");
const url = require("url");
const mime = require("mime-types");
const Helper = require("../../helper");
const cleanIrcMessage = require("../../../client/js/libs/handlebars/ircmessageparser/cleanIrcMessage");
2017-06-26 09:01:55 +00:00
const findLinks = require("../../../client/js/libs/handlebars/ircmessageparser/findLinks");
const storage = require("../storage");
2014-09-27 19:17:05 +00:00
2015-04-29 19:55:13 +00:00
process.setMaxListeners(0);
2017-06-26 09:01:55 +00:00
module.exports = function(client, chan, msg) {
if (!Helper.config.prefetch) {
return;
}
2017-06-26 09:01:55 +00:00
// Remove all IRC formatting characters before searching for links
const cleanText = cleanIrcMessage(msg.text);
2017-06-26 09:01:55 +00:00
// We will only try to prefetch http(s) links
const links = findLinks(cleanText).filter((w) => /^https?:\/\//.test(w.link));
if (links.length === 0) {
return;
}
msg.previews = Array.from(new Set( // Remove duplicate links
links.map((link) => escapeHeader(link.link))
)).map((link) => ({
type: "loading",
head: "",
body: "",
thumb: "",
link: link,
shown: true,
})).slice(0, 5); // Only preview the first 5 URLs in message to avoid abuse
msg.previews.forEach((preview) => {
fetch(preview.link, function(res) {
if (res === null) {
return;
}
parse(msg, preview, res, client);
});
});
2014-09-27 19:17:05 +00:00
};
function parse(msg, preview, res, client) {
2014-09-27 19:17:05 +00:00
switch (res.type) {
case "text/html":
var $ = cheerio.load(res.data);
2017-06-26 09:01:55 +00:00
preview.type = "link";
preview.head =
$('meta[property="og:title"]').attr("content")
2017-06-22 19:41:05 +00:00
|| $("title").text()
|| "";
2017-06-26 09:01:55 +00:00
preview.body =
$('meta[property="og:description"]').attr("content")
|| $('meta[name="description"]').attr("content")
|| "";
2017-06-26 09:01:55 +00:00
preview.thumb =
$('meta[property="og:image"]').attr("content")
|| $('meta[name="twitter:image:src"]').attr("content")
|| $('link[rel="image_src"]').attr("href")
2014-10-14 18:51:27 +00:00
|| "";
if (preview.thumb.length) {
preview.thumb = url.resolve(preview.link, preview.thumb);
}
// Make sure thumbnail is a valid url
2017-06-26 09:01:55 +00:00
if (!/^https?:\/\//.test(preview.thumb)) {
preview.thumb = "";
}
// Verify that thumbnail pic exists and is under allowed size
2017-06-26 09:01:55 +00:00
if (preview.thumb.length) {
fetch(escapeHeader(preview.thumb), (resThumb) => {
if (resThumb === null
|| !(/^image\/.+/.test(resThumb.type))
|| resThumb.size > (Helper.config.prefetchMaxImageSize * 1024)) {
2017-06-26 09:01:55 +00:00
preview.thumb = "";
}
handlePreview(client, msg, preview, resThumb);
});
return;
}
2014-09-27 19:17:05 +00:00
break;
case "image/png":
case "image/gif":
case "image/jpg":
case "image/jpeg":
2017-11-27 13:50:35 +00:00
case "image/webp":
if (res.size > (Helper.config.prefetchMaxImageSize * 1024)) {
return;
}
preview.type = "image";
preview.thumb = preview.link;
2014-09-27 19:17:05 +00:00
break;
2017-12-06 22:27:35 +00:00
case "audio/midi":
case "audio/mpeg":
case "audio/mpeg3":
case "audio/ogg":
case "audio/wav":
case "audio/x-mid":
case "audio/x-midi":
case "audio/x-mpeg":
case "audio/x-mpeg-3":
if (!preview.link.startsWith("https://")) {
break;
}
preview.type = "audio";
preview.res = res.type;
2017-12-09 23:25:01 +00:00
break;
case "video/webm":
case "video/ogg":
case "video/mp4":
if (!preview.link.startsWith("https://")) {
break;
}
preview.res = res.type;
preview.type = "video";
2017-12-06 22:27:35 +00:00
break;
2014-09-27 19:17:05 +00:00
default:
return;
}
2014-09-27 23:47:04 +00:00
handlePreview(client, msg, preview, res);
}
function handlePreview(client, msg, preview, res) {
if (!preview.thumb.length || !Helper.config.prefetchStorage) {
return emitPreview(client, msg, preview);
}
// Get the correct file extension for the provided content-type
// This is done to prevent user-input being stored in the file name (extension)
const extension = mime.extension(res.type);
if (!extension) {
// For link previews, drop the thumbnail
// For other types, do not display preview at all
if (preview.type !== "link") {
return;
}
preview.thumb = "";
return emitPreview(client, msg, preview);
}
storage.store(res.data, extension, (uri) => {
preview.thumb = uri;
emitPreview(client, msg, preview);
});
}
2017-06-26 09:01:55 +00:00
function emitPreview(client, msg, preview) {
// If there is no title but there is preview or description, set title
// otherwise bail out and show no preview
2017-06-26 09:01:55 +00:00
if (!preview.head.length && preview.type === "link") {
if (preview.thumb.length || preview.body.length) {
preview.head = "Untitled page";
} else {
return;
}
}
2017-06-26 09:01:55 +00:00
client.emit("msg:preview", {
id: msg.id,
preview: preview,
2017-06-26 09:01:55 +00:00
});
2014-09-27 19:17:05 +00:00
}
function fetch(uri, cb) {
2016-10-09 08:54:44 +00:00
let req;
2015-01-04 02:58:12 +00:00
try {
2016-10-09 08:54:44 +00:00
req = request.get({
url: uri,
maxRedirects: 5,
timeout: 5000,
headers: {
"User-Agent": "Mozilla/5.0 (compatible; The Lounge IRC Client; +https://github.com/thelounge/lounge)",
},
});
2015-09-30 22:39:57 +00:00
} catch (e) {
return cb(null);
2015-01-04 02:58:12 +00:00
}
const buffers = [];
let length = 0;
let limit = Helper.config.prefetchMaxImageSize * 1024;
2014-12-23 01:06:11 +00:00
req
2015-09-30 22:39:57 +00:00
.on("response", function(res) {
if (/^image\/.+/.test(res.headers["content-type"])) {
// response is an image
// if Content-Length header reports a size exceeding the prefetch limit, abort fetch
const contentLength = parseInt(res.headers["content-length"], 10) || 0;
if (contentLength > limit) {
req.abort();
}
2017-12-09 23:25:01 +00:00
} else if (/^(audio|video)\/.+/.test(res.headers["content-type"])) {
2017-12-06 22:27:35 +00:00
req.abort(); // ensure server doesn't download the audio file
} else {
// if not image, limit download to 50kb, since we need only meta tags
// twitter.com sends opengraph meta tags within ~20kb of data for individual tweets
limit = 1024 * 50;
2014-12-23 01:06:11 +00:00
}
})
2017-11-25 18:59:12 +00:00
.on("error", () => cb(null))
.on("data", (data) => {
2014-12-23 01:06:11 +00:00
length += data.length;
buffers.push(data);
2014-12-23 01:06:11 +00:00
if (length > limit) {
req.abort();
}
})
.on("end", () => {
if (req.response.statusCode < 200 || req.response.statusCode > 299) {
return cb(null);
}
let type = "";
let size = parseInt(req.response.headers["content-length"], 10) || length;
if (size < length) {
size = length;
2014-12-23 01:06:11 +00:00
}
if (req.response.headers["content-type"]) {
2015-09-30 22:39:57 +00:00
type = req.response.headers["content-type"].split(/ *; */).shift();
}
cb({
data: Buffer.concat(buffers, length),
type: type,
size: size,
});
});
2014-09-27 19:17:05 +00:00
}
// https://github.com/request/request/issues/2120
// https://github.com/nodejs/node/issues/1693
// https://github.com/alexeyten/descript/commit/50ee540b30188324198176e445330294922665fc
function escapeHeader(header) {
return header
.replace(/([\uD800-\uDBFF][\uDC00-\uDFFF])+/g, encodeURI)
.replace(/[\uD800-\uDFFF]/g, "")
.replace(/[\u0000-\u001F\u007F-\uFFFF]+/g, encodeURI);
}