Try to find og:video and og:audio on html pages

This commit is contained in:
Pavel Djundik 2017-12-14 13:14:45 +02:00
parent fe51c6d7e7
commit 58d9490c2a
2 changed files with 108 additions and 41 deletions

View File

@ -7,13 +7,13 @@
{{/equal}} {{/equal}}
{{#equal type "audio"}} {{#equal type "audio"}}
<audio controls preload="metadata"> <audio controls preload="metadata">
<source src="{{link}}" type="{{res}}"> <source src="{{media}}" type="{{mediaType}}">
Your browser does not support the audio element. Your browser does not support the audio element.
</audio> </audio>
{{/equal}} {{/equal}}
{{#equal type "video"}} {{#equal type "video"}}
<video preload="metadata" controls> <video preload="metadata" controls>
<source src="{{link}}" type="{{res}}"> <source src="{{media}}" type="{{mediaType}}">
Your browser does not support the video element. Your browser does not support the video element.
</video> </video>
{{/equal}} {{/equal}}

View File

@ -9,6 +9,9 @@ const cleanIrcMessage = require("../../../client/js/libs/handlebars/ircmessagepa
const findLinks = require("../../../client/js/libs/handlebars/ircmessageparser/findLinks"); const findLinks = require("../../../client/js/libs/handlebars/ircmessageparser/findLinks");
const storage = require("../storage"); const storage = require("../storage");
const mediaTypeRegex = /^(audio|video)\/.+/;
const linkRegex = /^https?:\/\//;
// Fix ECDH curve client compatibility in Node v8/v9 // Fix ECDH curve client compatibility in Node v8/v9
// This is fixed in Node 10, but The Lounge supports LTS versions // This is fixed in Node 10, but The Lounge supports LTS versions
// https://github.com/nodejs/node/issues/16196 // https://github.com/nodejs/node/issues/16196
@ -30,7 +33,7 @@ module.exports = function(client, chan, msg) {
const cleanText = cleanIrcMessage(msg.text); const cleanText = cleanIrcMessage(msg.text);
// We will only try to prefetch http(s) links // We will only try to prefetch http(s) links
const links = findLinks(cleanText).filter((w) => /^https?:\/\//.test(w.link)); const links = findLinks(cleanText).filter((w) => linkRegex.test(w.link));
if (links.length === 0) { if (links.length === 0) {
return; return;
@ -65,10 +68,13 @@ module.exports = function(client, chan, msg) {
}); });
}; };
function parse(msg, preview, res, client) { function parseHtml(preview, res, client) {
switch (res.type) { return new Promise((resolve) => {
case "text/html": {
const $ = cheerio.load(res.data); const $ = cheerio.load(res.data);
return parseHtmlMedia($, preview, res, client)
.then((newRes) => resolve(newRes))
.catch(() => {
preview.type = "link"; preview.type = "link";
preview.head = preview.head =
$('meta[property="og:title"]').attr("content") $('meta[property="og:title"]').attr("content")
@ -89,7 +95,7 @@ function parse(msg, preview, res, client) {
} }
// Make sure thumbnail is a valid url // Make sure thumbnail is a valid url
if (!/^https?:\/\//.test(preview.thumb)) { if (!linkRegex.test(preview.thumb)) {
preview.thumb = ""; preview.thumb = "";
} }
@ -102,15 +108,69 @@ function parse(msg, preview, res, client) {
preview.thumb = ""; preview.thumb = "";
} }
handlePreview(client, msg, preview, resThumb); resolve(resThumb);
}); });
} else {
resolve(res);
}
});
});
}
function parseHtmlMedia($, preview, res, client) {
return new Promise((resolve, reject) => {
let foundMedia = false;
["video", "audio"].forEach((type) => {
if (foundMedia) {
return; return;
} }
break; $(`meta[property="og:${type}:type"]`).each(function(i) {
const mimeType = $(this).attr("content");
if (mediaTypeRegex.test(mimeType)) {
// If we match a clean video or audio tag, parse that as a preview instead
const mediaUrl = $($(`meta[property="og:${type}"]`).get(i)).attr("content");
// Make sure media is a valid url
if (!mediaUrl.startsWith("https://")) {
return;
} }
foundMedia = true;
fetch(escapeHeader(mediaUrl), {language: client.language}, (resMedia) => {
if (resMedia === null || !mediaTypeRegex.test(resMedia.type)) {
return reject();
}
preview.type = type;
preview.media = mediaUrl;
preview.mediaType = resMedia.type;
resolve(resMedia);
});
return false;
}
});
});
if (!foundMedia) {
reject();
}
});
}
function parse(msg, preview, res, client) {
let promise;
switch (res.type) {
case "text/html":
promise = parseHtml(preview, res, client);
break;
case "image/png": case "image/png":
case "image/gif": case "image/gif":
case "image/jpg": case "image/jpg":
@ -141,7 +201,8 @@ function parse(msg, preview, res, client) {
} }
preview.type = "audio"; preview.type = "audio";
preview.res = res.type; preview.media = preview.link;
preview.mediaType = res.type;
break; break;
@ -152,8 +213,9 @@ function parse(msg, preview, res, client) {
break; break;
} }
preview.res = res.type;
preview.type = "video"; preview.type = "video";
preview.media = preview.link;
preview.mediaType = res.type;
break; break;
@ -161,7 +223,11 @@ function parse(msg, preview, res, client) {
return; return;
} }
handlePreview(client, msg, preview, res); if (!promise) {
return handlePreview(client, msg, preview, res);
}
promise.then((newRes) => handlePreview(client, msg, preview, newRes));
} }
function handlePreview(client, msg, preview, res) { function handlePreview(client, msg, preview, res) {
@ -248,8 +314,9 @@ function fetch(uri, {language}, cb) {
if (contentLength > limit) { if (contentLength > limit) {
req.abort(); req.abort();
} }
} else if (/^(audio|video)\/.+/.test(res.headers["content-type"])) { } else if (mediaTypeRegex.test(res.headers["content-type"])) {
req.abort(); // ensure server doesn't download the audio file // We don't need to download the file any further after we received content-type header
req.abort();
} else { } else {
// if not image, limit download to 50kb, since we need only meta tags // if not image, limit download to 50kb, since we need only meta tags
// twitter.com sends opengraph meta tags within ~20kb of data for individual tweets // twitter.com sends opengraph meta tags within ~20kb of data for individual tweets