Update useragent

This commit is contained in:
hgw 2023-12-10 04:44:10 +00:00
parent 9e8588cc38
commit f96e947a3d
Signed by: hgw
SSH Key Fingerprint: SHA256:diG7RVYHjd3aDYkZWHYcBJbImu+6zfptuUP+3k/wol4

View File

@ -1,11 +1,11 @@
import * as cheerio from "cheerio"; import * as cheerio from "cheerio";
import got from "got"; import got from "got";
import {URL} from "url"; import { URL } from "url";
import mime from "mime-types"; import mime from "mime-types";
import log from "../../log"; import log from "../../log";
import Config from "../../config"; import Config from "../../config";
import {findLinksWithSchema} from "../../../shared/linkify"; import { findLinksWithSchema } from "../../../shared/linkify";
import storage from "../storage"; import storage from "../storage";
import Client from "../../client"; import Client from "../../client";
import Chan from "../../models/chan"; import Chan from "../../models/chan";
@ -37,57 +37,65 @@ export type LinkPreview = {
thumbActualUrl?: string; thumbActualUrl?: string;
}; };
export default function (client: Client, chan: Chan, msg: Msg, cleanText: string) { export default function (
client: Client,
chan: Chan,
msg: Msg,
cleanText: string
) {
if (!Config.values.prefetch) { if (!Config.values.prefetch) {
return; return;
} }
msg.previews = findLinksWithSchema(cleanText).reduce((cleanLinks: LinkPreview[], link) => { msg.previews = findLinksWithSchema(cleanText).reduce(
const url = normalizeURL(link.link); (cleanLinks: LinkPreview[], link) => {
const url = normalizeURL(link.link);
// If the URL is invalid and cannot be normalized, don't fetch it // If the URL is invalid and cannot be normalized, don't fetch it
if (!url) { if (!url) {
return cleanLinks; return cleanLinks;
} }
// If there are too many urls in this message, only fetch first X valid links // If there are too many urls in this message, only fetch first X valid links
if (cleanLinks.length > 4) { if (cleanLinks.length > 4) {
return cleanLinks; return cleanLinks;
} }
// Do not fetch duplicate links twice // Do not fetch duplicate links twice
if (cleanLinks.some((l) => l.link === link.link)) { if (cleanLinks.some((l) => l.link === link.link)) {
return cleanLinks; return cleanLinks;
} }
const preview: LinkPreview = { const preview: LinkPreview = {
type: "loading", type: "loading",
head: "", head: "",
body: "", body: "",
thumb: "", thumb: "",
size: -1, size: -1,
link: link.link, // Send original matched link to the client link: link.link, // Send original matched link to the client
shown: null, shown: null,
}; };
cleanLinks.push(preview); cleanLinks.push(preview);
fetch(url, { fetch(url, {
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
language: client.config.browser?.language || "", language: client.config.browser?.language || "",
})
.then((res) => {
parse(msg, chan, preview, res, client);
}) })
.catch((err) => { .then((res) => {
preview.type = "error"; parse(msg, chan, preview, res, client);
preview.error = "message"; })
preview.message = err.message; .catch((err) => {
emitPreview(client, chan, msg, preview); preview.type = "error";
}); preview.error = "message";
preview.message = err.message;
emitPreview(client, chan, msg, preview);
});
return cleanLinks; return cleanLinks;
}, []); },
[]
);
} }
function parseHtml(preview, res, client: Client) { function parseHtml(preview, res, client: Client) {
@ -117,7 +125,10 @@ function parseHtml(preview, res, client: Client) {
preview.body = preview.body.substr(0, 300); preview.body = preview.body.substr(0, 300);
} }
if (!Config.values.prefetchStorage && Config.values.disableMediaPreview) { if (
!Config.values.prefetchStorage &&
Config.values.disableMediaPreview
) {
resolve(res); resolve(res);
return; return;
} }
@ -135,12 +146,15 @@ function parseHtml(preview, res, client: Client) {
// Verify that thumbnail pic exists and is under allowed size // Verify that thumbnail pic exists and is under allowed size
if (thumb.length) { if (thumb.length) {
fetch(thumb, {language: client.config.browser?.language || ""}) fetch(thumb, {
language: client.config.browser?.language || "",
})
.then((resThumb) => { .then((resThumb) => {
if ( if (
resThumb !== null && resThumb !== null &&
imageTypeRegex.test(resThumb.type) && imageTypeRegex.test(resThumb.type) &&
resThumb.size <= Config.values.prefetchMaxImageSize * 1024 resThumb.size <=
Config.values.prefetchMaxImageSize * 1024
) { ) {
preview.thumbActualUrl = thumb; preview.thumbActualUrl = thumb;
} }
@ -156,7 +170,11 @@ function parseHtml(preview, res, client: Client) {
} }
// TODO: type $ // TODO: type $
function parseHtmlMedia($: any, preview, client: Client): Promise<FetchRequest> { function parseHtmlMedia(
$: any,
preview,
client: Client
): Promise<FetchRequest> {
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {
if (Config.values.disableMediaPreview) { if (Config.values.disableMediaPreview) {
reject(); reject();
@ -183,7 +201,10 @@ function parseHtmlMedia($: any, preview, client: Client): Promise<FetchRequest>
return; return;
} }
$(`meta[property="og:${type}:type"]`).each(function (this: cheerio.Element, i: number) { $(`meta[property="og:${type}:type"]`).each(function (
this: cheerio.Element,
i: number
) {
const mimeType = $(this).attr("content"); const mimeType = $(this).attr("content");
if (!mimeType) { if (!mimeType) {
@ -192,7 +213,9 @@ function parseHtmlMedia($: any, preview, client: Client): Promise<FetchRequest>
if (mediaTypeRegex.test(mimeType)) { if (mediaTypeRegex.test(mimeType)) {
// If we match a clean video or audio tag, parse that as a preview instead // If we match a clean video or audio tag, parse that as a preview instead
let mediaUrl = $($(`meta[property="og:${type}"]`).get(i)).attr("content"); let mediaUrl = $(
$(`meta[property="og:${type}"]`).get(i)
).attr("content");
if (!mediaUrl) { if (!mediaUrl) {
return; return;
@ -216,7 +239,10 @@ function parseHtmlMedia($: any, preview, client: Client): Promise<FetchRequest>
language: client.config.browser?.language || "", language: client.config.browser?.language || "",
}) })
.then((resMedia) => { .then((resMedia) => {
if (resMedia === null || !mediaTypeRegex.test(resMedia.type)) { if (
resMedia === null ||
!mediaTypeRegex.test(resMedia.type)
) {
return reject(); return reject();
} }
@ -239,7 +265,13 @@ function parseHtmlMedia($: any, preview, client: Client): Promise<FetchRequest>
}); });
} }
function parse(msg: Msg, chan: Chan, preview: LinkPreview, res: FetchRequest, client: Client) { function parse(
msg: Msg,
chan: Chan,
preview: LinkPreview,
res: FetchRequest,
client: Client
) {
let promise: Promise<FetchRequest | null> | null = null; let promise: Promise<FetchRequest | null> | null = null;
preview.size = res.size; preview.size = res.size;
@ -262,7 +294,10 @@ function parse(msg: Msg, chan: Chan, preview: LinkPreview, res: FetchRequest, cl
case "image/jxl": case "image/jxl":
case "image/webp": case "image/webp":
case "image/avif": case "image/avif":
if (!Config.values.prefetchStorage && Config.values.disableMediaPreview) { if (
!Config.values.prefetchStorage &&
Config.values.disableMediaPreview
) {
return removePreview(msg, preview); return removePreview(msg, preview);
} }
@ -330,10 +365,18 @@ function parse(msg: Msg, chan: Chan, preview: LinkPreview, res: FetchRequest, cl
return handlePreview(client, chan, msg, preview, res); return handlePreview(client, chan, msg, preview, res);
} }
void promise.then((newRes) => handlePreview(client, chan, msg, preview, newRes)); void promise.then((newRes) =>
handlePreview(client, chan, msg, preview, newRes)
);
} }
function handlePreview(client: Client, chan: Chan, msg: Msg, preview: LinkPreview, res) { function handlePreview(
client: Client,
chan: Chan,
msg: Msg,
preview: LinkPreview,
res
) {
const thumb = preview.thumbActualUrl || ""; const thumb = preview.thumbActualUrl || "";
delete preview.thumbActualUrl; delete preview.thumbActualUrl;
@ -363,7 +406,12 @@ function handlePreview(client: Client, chan: Chan, msg: Msg, preview: LinkPrevie
}); });
} }
function emitPreview(client: Client, chan: Chan, msg: Msg, preview: LinkPreview) { function emitPreview(
client: Client,
chan: Chan,
msg: Msg,
preview: LinkPreview
) {
// If there is no title but there is preview or description, set title // If there is no title but there is preview or description, set title
// otherwise bail out and show no preview // otherwise bail out and show no preview
if (!preview.head.length && preview.type === "link") { if (!preview.head.length && preview.type === "link") {
@ -396,7 +444,7 @@ function getRequestHeaders(headers: Record<string, string>) {
// Certain websites like Amazon only add <meta> tags to known bots, // Certain websites like Amazon only add <meta> tags to known bots,
// lets pretend to be them to get the metadata // lets pretend to be them to get the metadata
"User-Agent": "User-Agent":
"Mozilla/5.0 (compatible; Hard Lounge IRC Client; COLD HARD CHATS; +https://git.supernets.org/supernets/hardlounge)" + "Mozilla/5.0 (compatible; Hard Lounge IRC Client; COLD HARD CHATS ONLY ON IRC.SUPERNETS.ORG; +https://git.supernets.org/supernets/hardlounge)" +
" facebookexternalhit/1.1 Twitterbot/1.0", " facebookexternalhit/1.1 Twitterbot/1.0",
Accept: headers.accept || "*/*", Accept: headers.accept || "*/*",
"X-Purpose": "preview", "X-Purpose": "preview",
@ -442,17 +490,24 @@ function fetch(uri: string, headers: Record<string, string>) {
gotStream gotStream
.on("response", function (res) { .on("response", function (res) {
contentLength = parseInt(res.headers["content-length"], 10) || 0; contentLength =
parseInt(res.headers["content-length"], 10) || 0;
contentType = res.headers["content-type"]; contentType = res.headers["content-type"];
if (contentType && imageTypeRegex.test(contentType)) { if (contentType && imageTypeRegex.test(contentType)) {
// response is an image // response is an image
// if Content-Length header reports a size exceeding the prefetch limit, abort fetch // if Content-Length header reports a size exceeding the prefetch limit, abort fetch
// and if file is not to be stored we don't need to download further either // and if file is not to be stored we don't need to download further either
if (contentLength > limit || !Config.values.prefetchStorage) { if (
contentLength > limit ||
!Config.values.prefetchStorage
) {
gotStream.destroy(); gotStream.destroy();
} }
} else if (contentType && mediaTypeRegex.test(contentType)) { } else if (
contentType &&
mediaTypeRegex.test(contentType)
) {
// We don't need to download the file any further after we received content-type header // We don't need to download the file any further after we received content-type header
gotStream.destroy(); gotStream.destroy();
} else { } else {
@ -482,13 +537,16 @@ function fetch(uri: string, headers: Record<string, string>) {
let type = ""; let type = "";
// If we downloaded more data then specified in Content-Length, use real data size // If we downloaded more data then specified in Content-Length, use real data size
const size = contentLength > buffer.length ? contentLength : buffer.length; const size =
contentLength > buffer.length
? contentLength
: buffer.length;
if (contentType) { if (contentType) {
type = contentType.split(/ *; */).shift() || ""; type = contentType.split(/ *; */).shift() || "";
} }
resolve({data: buffer, type, size}); resolve({ data: buffer, type, size });
}); });
} catch (e: any) { } catch (e: any) {
return reject(e); return reject(e);