Update useragent
This commit is contained in:
parent
9e8588cc38
commit
f96e947a3d
@ -1,11 +1,11 @@
|
|||||||
import * as cheerio from "cheerio";
|
import * as cheerio from "cheerio";
|
||||||
import got from "got";
|
import got from "got";
|
||||||
import {URL} from "url";
|
import { URL } from "url";
|
||||||
import mime from "mime-types";
|
import mime from "mime-types";
|
||||||
|
|
||||||
import log from "../../log";
|
import log from "../../log";
|
||||||
import Config from "../../config";
|
import Config from "../../config";
|
||||||
import {findLinksWithSchema} from "../../../shared/linkify";
|
import { findLinksWithSchema } from "../../../shared/linkify";
|
||||||
import storage from "../storage";
|
import storage from "../storage";
|
||||||
import Client from "../../client";
|
import Client from "../../client";
|
||||||
import Chan from "../../models/chan";
|
import Chan from "../../models/chan";
|
||||||
@ -37,57 +37,65 @@ export type LinkPreview = {
|
|||||||
thumbActualUrl?: string;
|
thumbActualUrl?: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
export default function (client: Client, chan: Chan, msg: Msg, cleanText: string) {
|
export default function (
|
||||||
|
client: Client,
|
||||||
|
chan: Chan,
|
||||||
|
msg: Msg,
|
||||||
|
cleanText: string
|
||||||
|
) {
|
||||||
if (!Config.values.prefetch) {
|
if (!Config.values.prefetch) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
msg.previews = findLinksWithSchema(cleanText).reduce((cleanLinks: LinkPreview[], link) => {
|
msg.previews = findLinksWithSchema(cleanText).reduce(
|
||||||
const url = normalizeURL(link.link);
|
(cleanLinks: LinkPreview[], link) => {
|
||||||
|
const url = normalizeURL(link.link);
|
||||||
|
|
||||||
// If the URL is invalid and cannot be normalized, don't fetch it
|
// If the URL is invalid and cannot be normalized, don't fetch it
|
||||||
if (!url) {
|
if (!url) {
|
||||||
return cleanLinks;
|
return cleanLinks;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If there are too many urls in this message, only fetch first X valid links
|
// If there are too many urls in this message, only fetch first X valid links
|
||||||
if (cleanLinks.length > 4) {
|
if (cleanLinks.length > 4) {
|
||||||
return cleanLinks;
|
return cleanLinks;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do not fetch duplicate links twice
|
// Do not fetch duplicate links twice
|
||||||
if (cleanLinks.some((l) => l.link === link.link)) {
|
if (cleanLinks.some((l) => l.link === link.link)) {
|
||||||
return cleanLinks;
|
return cleanLinks;
|
||||||
}
|
}
|
||||||
|
|
||||||
const preview: LinkPreview = {
|
const preview: LinkPreview = {
|
||||||
type: "loading",
|
type: "loading",
|
||||||
head: "",
|
head: "",
|
||||||
body: "",
|
body: "",
|
||||||
thumb: "",
|
thumb: "",
|
||||||
size: -1,
|
size: -1,
|
||||||
link: link.link, // Send original matched link to the client
|
link: link.link, // Send original matched link to the client
|
||||||
shown: null,
|
shown: null,
|
||||||
};
|
};
|
||||||
|
|
||||||
cleanLinks.push(preview);
|
cleanLinks.push(preview);
|
||||||
|
|
||||||
fetch(url, {
|
fetch(url, {
|
||||||
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
language: client.config.browser?.language || "",
|
language: client.config.browser?.language || "",
|
||||||
})
|
|
||||||
.then((res) => {
|
|
||||||
parse(msg, chan, preview, res, client);
|
|
||||||
})
|
})
|
||||||
.catch((err) => {
|
.then((res) => {
|
||||||
preview.type = "error";
|
parse(msg, chan, preview, res, client);
|
||||||
preview.error = "message";
|
})
|
||||||
preview.message = err.message;
|
.catch((err) => {
|
||||||
emitPreview(client, chan, msg, preview);
|
preview.type = "error";
|
||||||
});
|
preview.error = "message";
|
||||||
|
preview.message = err.message;
|
||||||
|
emitPreview(client, chan, msg, preview);
|
||||||
|
});
|
||||||
|
|
||||||
return cleanLinks;
|
return cleanLinks;
|
||||||
}, []);
|
},
|
||||||
|
[]
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseHtml(preview, res, client: Client) {
|
function parseHtml(preview, res, client: Client) {
|
||||||
@ -117,7 +125,10 @@ function parseHtml(preview, res, client: Client) {
|
|||||||
preview.body = preview.body.substr(0, 300);
|
preview.body = preview.body.substr(0, 300);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Config.values.prefetchStorage && Config.values.disableMediaPreview) {
|
if (
|
||||||
|
!Config.values.prefetchStorage &&
|
||||||
|
Config.values.disableMediaPreview
|
||||||
|
) {
|
||||||
resolve(res);
|
resolve(res);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -135,12 +146,15 @@ function parseHtml(preview, res, client: Client) {
|
|||||||
|
|
||||||
// Verify that thumbnail pic exists and is under allowed size
|
// Verify that thumbnail pic exists and is under allowed size
|
||||||
if (thumb.length) {
|
if (thumb.length) {
|
||||||
fetch(thumb, {language: client.config.browser?.language || ""})
|
fetch(thumb, {
|
||||||
|
language: client.config.browser?.language || "",
|
||||||
|
})
|
||||||
.then((resThumb) => {
|
.then((resThumb) => {
|
||||||
if (
|
if (
|
||||||
resThumb !== null &&
|
resThumb !== null &&
|
||||||
imageTypeRegex.test(resThumb.type) &&
|
imageTypeRegex.test(resThumb.type) &&
|
||||||
resThumb.size <= Config.values.prefetchMaxImageSize * 1024
|
resThumb.size <=
|
||||||
|
Config.values.prefetchMaxImageSize * 1024
|
||||||
) {
|
) {
|
||||||
preview.thumbActualUrl = thumb;
|
preview.thumbActualUrl = thumb;
|
||||||
}
|
}
|
||||||
@ -156,7 +170,11 @@ function parseHtml(preview, res, client: Client) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: type $
|
// TODO: type $
|
||||||
function parseHtmlMedia($: any, preview, client: Client): Promise<FetchRequest> {
|
function parseHtmlMedia(
|
||||||
|
$: any,
|
||||||
|
preview,
|
||||||
|
client: Client
|
||||||
|
): Promise<FetchRequest> {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
if (Config.values.disableMediaPreview) {
|
if (Config.values.disableMediaPreview) {
|
||||||
reject();
|
reject();
|
||||||
@ -183,7 +201,10 @@ function parseHtmlMedia($: any, preview, client: Client): Promise<FetchRequest>
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
$(`meta[property="og:${type}:type"]`).each(function (this: cheerio.Element, i: number) {
|
$(`meta[property="og:${type}:type"]`).each(function (
|
||||||
|
this: cheerio.Element,
|
||||||
|
i: number
|
||||||
|
) {
|
||||||
const mimeType = $(this).attr("content");
|
const mimeType = $(this).attr("content");
|
||||||
|
|
||||||
if (!mimeType) {
|
if (!mimeType) {
|
||||||
@ -192,7 +213,9 @@ function parseHtmlMedia($: any, preview, client: Client): Promise<FetchRequest>
|
|||||||
|
|
||||||
if (mediaTypeRegex.test(mimeType)) {
|
if (mediaTypeRegex.test(mimeType)) {
|
||||||
// If we match a clean video or audio tag, parse that as a preview instead
|
// If we match a clean video or audio tag, parse that as a preview instead
|
||||||
let mediaUrl = $($(`meta[property="og:${type}"]`).get(i)).attr("content");
|
let mediaUrl = $(
|
||||||
|
$(`meta[property="og:${type}"]`).get(i)
|
||||||
|
).attr("content");
|
||||||
|
|
||||||
if (!mediaUrl) {
|
if (!mediaUrl) {
|
||||||
return;
|
return;
|
||||||
@ -216,7 +239,10 @@ function parseHtmlMedia($: any, preview, client: Client): Promise<FetchRequest>
|
|||||||
language: client.config.browser?.language || "",
|
language: client.config.browser?.language || "",
|
||||||
})
|
})
|
||||||
.then((resMedia) => {
|
.then((resMedia) => {
|
||||||
if (resMedia === null || !mediaTypeRegex.test(resMedia.type)) {
|
if (
|
||||||
|
resMedia === null ||
|
||||||
|
!mediaTypeRegex.test(resMedia.type)
|
||||||
|
) {
|
||||||
return reject();
|
return reject();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -239,7 +265,13 @@ function parseHtmlMedia($: any, preview, client: Client): Promise<FetchRequest>
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function parse(msg: Msg, chan: Chan, preview: LinkPreview, res: FetchRequest, client: Client) {
|
function parse(
|
||||||
|
msg: Msg,
|
||||||
|
chan: Chan,
|
||||||
|
preview: LinkPreview,
|
||||||
|
res: FetchRequest,
|
||||||
|
client: Client
|
||||||
|
) {
|
||||||
let promise: Promise<FetchRequest | null> | null = null;
|
let promise: Promise<FetchRequest | null> | null = null;
|
||||||
|
|
||||||
preview.size = res.size;
|
preview.size = res.size;
|
||||||
@ -262,7 +294,10 @@ function parse(msg: Msg, chan: Chan, preview: LinkPreview, res: FetchRequest, cl
|
|||||||
case "image/jxl":
|
case "image/jxl":
|
||||||
case "image/webp":
|
case "image/webp":
|
||||||
case "image/avif":
|
case "image/avif":
|
||||||
if (!Config.values.prefetchStorage && Config.values.disableMediaPreview) {
|
if (
|
||||||
|
!Config.values.prefetchStorage &&
|
||||||
|
Config.values.disableMediaPreview
|
||||||
|
) {
|
||||||
return removePreview(msg, preview);
|
return removePreview(msg, preview);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -330,10 +365,18 @@ function parse(msg: Msg, chan: Chan, preview: LinkPreview, res: FetchRequest, cl
|
|||||||
return handlePreview(client, chan, msg, preview, res);
|
return handlePreview(client, chan, msg, preview, res);
|
||||||
}
|
}
|
||||||
|
|
||||||
void promise.then((newRes) => handlePreview(client, chan, msg, preview, newRes));
|
void promise.then((newRes) =>
|
||||||
|
handlePreview(client, chan, msg, preview, newRes)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function handlePreview(client: Client, chan: Chan, msg: Msg, preview: LinkPreview, res) {
|
function handlePreview(
|
||||||
|
client: Client,
|
||||||
|
chan: Chan,
|
||||||
|
msg: Msg,
|
||||||
|
preview: LinkPreview,
|
||||||
|
res
|
||||||
|
) {
|
||||||
const thumb = preview.thumbActualUrl || "";
|
const thumb = preview.thumbActualUrl || "";
|
||||||
delete preview.thumbActualUrl;
|
delete preview.thumbActualUrl;
|
||||||
|
|
||||||
@ -363,7 +406,12 @@ function handlePreview(client: Client, chan: Chan, msg: Msg, preview: LinkPrevie
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function emitPreview(client: Client, chan: Chan, msg: Msg, preview: LinkPreview) {
|
function emitPreview(
|
||||||
|
client: Client,
|
||||||
|
chan: Chan,
|
||||||
|
msg: Msg,
|
||||||
|
preview: LinkPreview
|
||||||
|
) {
|
||||||
// If there is no title but there is preview or description, set title
|
// If there is no title but there is preview or description, set title
|
||||||
// otherwise bail out and show no preview
|
// otherwise bail out and show no preview
|
||||||
if (!preview.head.length && preview.type === "link") {
|
if (!preview.head.length && preview.type === "link") {
|
||||||
@ -396,7 +444,7 @@ function getRequestHeaders(headers: Record<string, string>) {
|
|||||||
// Certain websites like Amazon only add <meta> tags to known bots,
|
// Certain websites like Amazon only add <meta> tags to known bots,
|
||||||
// lets pretend to be them to get the metadata
|
// lets pretend to be them to get the metadata
|
||||||
"User-Agent":
|
"User-Agent":
|
||||||
"Mozilla/5.0 (compatible; Hard Lounge IRC Client; COLD HARD CHATS; +https://git.supernets.org/supernets/hardlounge)" +
|
"Mozilla/5.0 (compatible; Hard Lounge IRC Client; COLD HARD CHATS ONLY ON IRC.SUPERNETS.ORG; +https://git.supernets.org/supernets/hardlounge)" +
|
||||||
" facebookexternalhit/1.1 Twitterbot/1.0",
|
" facebookexternalhit/1.1 Twitterbot/1.0",
|
||||||
Accept: headers.accept || "*/*",
|
Accept: headers.accept || "*/*",
|
||||||
"X-Purpose": "preview",
|
"X-Purpose": "preview",
|
||||||
@ -442,17 +490,24 @@ function fetch(uri: string, headers: Record<string, string>) {
|
|||||||
|
|
||||||
gotStream
|
gotStream
|
||||||
.on("response", function (res) {
|
.on("response", function (res) {
|
||||||
contentLength = parseInt(res.headers["content-length"], 10) || 0;
|
contentLength =
|
||||||
|
parseInt(res.headers["content-length"], 10) || 0;
|
||||||
contentType = res.headers["content-type"];
|
contentType = res.headers["content-type"];
|
||||||
|
|
||||||
if (contentType && imageTypeRegex.test(contentType)) {
|
if (contentType && imageTypeRegex.test(contentType)) {
|
||||||
// response is an image
|
// response is an image
|
||||||
// if Content-Length header reports a size exceeding the prefetch limit, abort fetch
|
// if Content-Length header reports a size exceeding the prefetch limit, abort fetch
|
||||||
// and if file is not to be stored we don't need to download further either
|
// and if file is not to be stored we don't need to download further either
|
||||||
if (contentLength > limit || !Config.values.prefetchStorage) {
|
if (
|
||||||
|
contentLength > limit ||
|
||||||
|
!Config.values.prefetchStorage
|
||||||
|
) {
|
||||||
gotStream.destroy();
|
gotStream.destroy();
|
||||||
}
|
}
|
||||||
} else if (contentType && mediaTypeRegex.test(contentType)) {
|
} else if (
|
||||||
|
contentType &&
|
||||||
|
mediaTypeRegex.test(contentType)
|
||||||
|
) {
|
||||||
// We don't need to download the file any further after we received content-type header
|
// We don't need to download the file any further after we received content-type header
|
||||||
gotStream.destroy();
|
gotStream.destroy();
|
||||||
} else {
|
} else {
|
||||||
@ -482,13 +537,16 @@ function fetch(uri: string, headers: Record<string, string>) {
|
|||||||
let type = "";
|
let type = "";
|
||||||
|
|
||||||
// If we downloaded more data then specified in Content-Length, use real data size
|
// If we downloaded more data then specified in Content-Length, use real data size
|
||||||
const size = contentLength > buffer.length ? contentLength : buffer.length;
|
const size =
|
||||||
|
contentLength > buffer.length
|
||||||
|
? contentLength
|
||||||
|
: buffer.length;
|
||||||
|
|
||||||
if (contentType) {
|
if (contentType) {
|
||||||
type = contentType.split(/ *; */).shift() || "";
|
type = contentType.split(/ *; */).shift() || "";
|
||||||
}
|
}
|
||||||
|
|
||||||
resolve({data: buffer, type, size});
|
resolve({ data: buffer, type, size });
|
||||||
});
|
});
|
||||||
} catch (e: any) {
|
} catch (e: any) {
|
||||||
return reject(e);
|
return reject(e);
|
||||||
|
Loading…
Reference in New Issue
Block a user