diff --git a/api/package.json b/api/package.json index 829106c3..8f049e9c 100644 --- a/api/package.json +++ b/api/package.json @@ -1,7 +1,7 @@ { "name": "@imput/cobalt-api", "description": "save what you love", - "version": "10.6", + "version": "10.7", "author": "imput", "exports": "./src/cobalt.js", "type": "module", diff --git a/api/src/misc/run-test.js b/api/src/misc/run-test.js index 21d97d04..2dc1a28a 100644 --- a/api/src/misc/run-test.js +++ b/api/src/misc/run-test.js @@ -25,6 +25,11 @@ export async function runTest(url, params, expect) { error.push(`status mismatch: ${detail}`); } + if (expect.errorCode && expect.errorCode !== result.body?.error?.code) { + const detail = `${expect.errorCode} (expected) != ${result.body.error.code} (actual)` + error.push(`error mismatch: ${detail}`); + } + if (expect.code !== result.status) { const detail = `${expect.code} (expected) != ${result.status} (actual)`; error.push(`status code mismatch: ${detail}`); diff --git a/api/src/misc/utils.js b/api/src/misc/utils.js index 331528d4..76d7a3eb 100644 --- a/api/src/misc/utils.js +++ b/api/src/misc/utils.js @@ -1,12 +1,14 @@ +import { request } from 'undici'; const redirectStatuses = new Set([301, 302, 303, 307, 308]); -export async function getRedirectingURL(url, dispatcher) { - const location = await fetch(url, { - redirect: 'manual', +export async function getRedirectingURL(url, dispatcher, userAgent) { + const location = await request(url, { dispatcher, - }).then((r) => { - if (redirectStatuses.has(r.status) && r.headers.has('location')) { - return r.headers.get('location'); + method: 'HEAD', + headers: { 'user-agent': userAgent } + }).then(r => { + if (redirectStatuses.has(r.statusCode) && r.headers['location']) { + return r.headers['location']; } }).catch(() => null); diff --git a/api/src/processing/service-config.js b/api/src/processing/service-config.js index 86352f9a..68301adc 100644 --- a/api/src/processing/service-config.js +++ b/api/src/processing/service-config.js @@ -41,7 +41,8 @@ export const services = { "p/:postId", ":username/p/:postId", "tv/:postId", - "stories/:username/:storyId" + "stories/:username/:storyId", + "share/:shareId" ], altDomains: ["ddinstagram.com"], }, diff --git a/api/src/processing/service-patterns.js b/api/src/processing/service-patterns.js index 42f64d26..cf89d574 100644 --- a/api/src/processing/service-patterns.js +++ b/api/src/processing/service-patterns.js @@ -6,7 +6,8 @@ export const testers = { "dailymotion": pattern => pattern.id?.length <= 32, "instagram": pattern => - pattern.postId?.length <= 12 + pattern.postId?.length <= 48 + || pattern.shareId?.length <= 16 || (pattern.username?.length <= 30 && pattern.storyId?.length <= 24), "loom": pattern => diff --git a/api/src/processing/services/bilibili.js b/api/src/processing/services/bilibili.js index 47932711..8747a781 100644 --- a/api/src/processing/services/bilibili.js +++ b/api/src/processing/services/bilibili.js @@ -1,19 +1,8 @@ import { genericUserAgent, env } from "../../config.js"; +import { resolveRedirectingURL } from "../url.js"; // TO-DO: higher quality downloads (currently requires an account) -function com_resolveShortlink(shortId) { - return fetch(`https://b23.tv/${shortId}`, { redirect: 'manual' }) - .then(r => r.status > 300 && r.status < 400 && r.headers.get('location')) - .then(url => { - if (!url) return; - const path = new URL(url).pathname; - if (path.startsWith('/video/')) - return path.split('/')[2]; - }) - .catch(() => {}) -} - function getBest(content) { return content?.filter(v => v.baseUrl || v.url) .map(v => (v.baseUrl = v.baseUrl || v.url, v)) @@ -100,7 +89,8 @@ async function tv_download(id) { export default async function({ comId, tvId, comShortLink }) { if (comShortLink) { - comId = await com_resolveShortlink(comShortLink); + const patternMatch = await resolveRedirectingURL(`https://b23.tv/${comShortLink}`); + comId = patternMatch?.comId; } if (comId) { diff --git a/api/src/processing/services/instagram.js b/api/src/processing/services/instagram.js index d9a646aa..eab4776e 100644 --- a/api/src/processing/services/instagram.js +++ b/api/src/processing/services/instagram.js @@ -1,3 +1,5 @@ +import { randomBytes } from "node:crypto"; +import { resolveRedirectingURL } from "../url.js"; import { genericUserAgent } from "../../config.js"; import { createStream } from "../../stream/manage.js"; import { getCookie, updateCookie } from "../cookie/manager.js"; @@ -8,6 +10,7 @@ const commonHeaders = { "sec-fetch-site": "same-origin", "x-ig-app-id": "936619743392459" } + const mobileHeaders = { "x-ig-app-locale": "en_US", "x-ig-device-locale": "en_US", @@ -19,6 +22,7 @@ const mobileHeaders = { "x-fb-server-cluster": "True", "content-length": "0", } + const embedHeaders = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Accept-Language": "en-GB,en;q=0.9", @@ -33,7 +37,7 @@ const embedHeaders = { "Sec-Fetch-Site": "none", "Sec-Fetch-User": "?1", "Upgrade-Insecure-Requests": "1", - "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", + "User-Agent": genericUserAgent, } const cachedDtsg = { @@ -41,7 +45,17 @@ const cachedDtsg = { expiry: 0 } -export default function(obj) { +const getNumberFromQuery = (name, data) => { + const s = data?.match(new RegExp(name + '=(\\d+)'))?.[1]; + if (+s) return +s; +} + +const getObjectFromEntries = (name, data) => { + const obj = data?.match(new RegExp('\\["' + name + '",.*?,({.*?}),\\d+\\]'))?.[1]; + return obj && JSON.parse(obj); +} + +export default function instagram(obj) { const dispatcher = obj.dispatcher; async function findDtsgId(cookie) { @@ -91,6 +105,7 @@ export default function(obj) { updateCookie(cookie, data.headers); return data.json(); } + async function getMediaId(id, { cookie, token } = {}) { const oembedURL = new URL('https://i.instagram.com/api/v1/oembed/'); oembedURL.searchParams.set('url', `https://www.instagram.com/p/${id}/`); @@ -119,6 +134,7 @@ export default function(obj) { return mediaInfo?.items?.[0]; } + async function requestHTML(id, cookie) { const data = await fetch(`https://www.instagram.com/p/${id}/embed/captioned/`, { headers: { @@ -136,40 +152,167 @@ export default function(obj) { return embedData; } - async function requestGQL(id, cookie) { - let dtsgId; - if (cookie) { - dtsgId = await findDtsgId(cookie); - } - const url = new URL('https://www.instagram.com/api/graphql/'); + async function getGQLParams(id, cookie) { + const req = await fetch(`https://www.instagram.com/p/${id}/`, { + headers: { + ...embedHeaders, + cookie + }, + dispatcher + }); - const requestData = { - jazoest: '26406', - variables: JSON.stringify({ - shortcode: id, - __relay_internal__pv__PolarisShareMenurelayprovider: false - }), - doc_id: '7153618348081770' + const html = await req.text(); + const siteData = getObjectFromEntries('SiteData', html); + const polarisSiteData = getObjectFromEntries('PolarisSiteData', html); + const webConfig = getObjectFromEntries('DGWWebConfig', html); + const pushInfo = getObjectFromEntries('InstagramWebPushInfo', html); + const lsd = getObjectFromEntries('LSD', html)?.token || randomBytes(8).toString('base64url'); + const csrf = getObjectFromEntries('InstagramSecurityConfig', html)?.csrf_token; + + const anon_cookie = [ + csrf && "csrftoken=" + csrf, + polarisSiteData?.device_id && "ig_did=" + polarisSiteData?.device_id, + "wd=1280x720", + "dpr=2", + polarisSiteData?.machine_id && "mid=" + polarisSiteData.machine_id, + "ig_nrcb=1" + ].filter(a => a).join('; '); + + return { + headers: { + 'x-ig-app-id': webConfig?.appId || '936619743392459', + 'X-FB-LSD': lsd, + 'X-CSRFToken': csrf, + 'X-Bloks-Version-Id': getObjectFromEntries('WebBloksVersioningID', html)?.versioningID, + 'x-asbd-id': 129477, + cookie: anon_cookie + }, + body: { + __d: 'www', + __a: '1', + __s: '::' + Math.random().toString(36).substring(2).replace(/\d/g, '').slice(0, 6), + __hs: siteData?.haste_session || '20126.HYP:instagram_web_pkg.2.1...0', + __req: 'b', + __ccg: 'EXCELLENT', + __rev: pushInfo?.rollout_hash || '1019933358', + __hsi: siteData?.hsi || '7436540909012459023', + __dyn: randomBytes(154).toString('base64url'), + __csr: randomBytes(154).toString('base64url'), + __user: '0', + __comet_req: getNumberFromQuery('__comet_req', html) || '7', + av: '0', + dpr: '2', + lsd, + jazoest: getNumberFromQuery('jazoest', html) || Math.floor(Math.random() * 10000), + __spin_r: siteData?.__spin_r || '1019933358', + __spin_b: siteData?.__spin_b || 'trunk', + __spin_t: siteData?.__spin_t || Math.floor(new Date().getTime() / 1000), + } }; - if (dtsgId) { - requestData.fb_dtsg = dtsgId; + } + + async function requestGQL(id, cookie) { + const { headers, body } = await getGQLParams(id, cookie); + + const req = await fetch('https://www.instagram.com/graphql/query', { + method: 'POST', + dispatcher, + headers: { + ...embedHeaders, + ...headers, + cookie, + 'content-type': 'application/x-www-form-urlencoded', + 'X-FB-Friendly-Name': 'PolarisPostActionLoadPostQueryQuery', + }, + body: new URLSearchParams({ + ...body, + fb_api_caller_class: 'RelayModern', + fb_api_req_friendly_name: 'PolarisPostActionLoadPostQueryQuery', + variables: JSON.stringify({ + shortcode: id, + fetch_tagged_user_count: null, + hoisted_comment_id: null, + hoisted_reply_id: null + }), + server_timestamps: true, + doc_id: '8845758582119845' + }).toString() + }); + + return { + gql_data: await req.json() + .then(r => r.data) + .catch(() => null) + }; + } + + async function getErrorContext(id) { + try { + const { headers, body } = await getGQLParams(id); + + const req = await fetch('https://www.instagram.com/ajax/bulk-route-definitions/', { + method: 'POST', + dispatcher, + headers: { + ...embedHeaders, + ...headers, + 'content-type': 'application/x-www-form-urlencoded', + 'X-Ig-D': 'www', + }, + body: new URLSearchParams({ + 'route_urls[0]': `/p/${id}/`, + routing_namespace: 'igx_www', + ...body + }).toString() + }); + + const response = await req.text(); + if (response.includes('"tracePolicy":"polaris.privatePostPage"')) + return { error: 'content.post.private' }; + + const [, mediaId, mediaOwnerId] = response.match( + /"media_id":\s*?"(\d+)","media_owner_id":\s*?"(\d+)"/ + ) || []; + + if (mediaId && mediaOwnerId) { + const rulingURL = new URL('https://www.instagram.com/api/v1/web/get_ruling_for_media_content_logged_out'); + rulingURL.searchParams.set('media_id', mediaId); + rulingURL.searchParams.set('owner_id', mediaOwnerId); + + const rulingResponse = await fetch(rulingURL, { + headers: { + ...headers, + ...commonHeaders + }, + dispatcher, + }).then(a => a.json()).catch(() => ({})); + + if (rulingResponse?.title?.includes('Restricted')) + return { error: "content.post.age" }; + } + } catch { + return { error: "fetch.fail" }; } - return (await request(url, cookie, 'POST', requestData)) - .data - ?.xdt_api__v1__media__shortcode__web_info - ?.items - ?.[0]; + return { error: "fetch.empty" }; } function extractOldPost(data, id, alwaysProxy) { - const sidecar = data?.gql_data?.shortcode_media?.edge_sidecar_to_children; + const shortcodeMedia = data?.gql_data?.shortcode_media || data?.gql_data?.xdt_shortcode_media; + const sidecar = shortcodeMedia?.edge_sidecar_to_children; + if (sidecar) { const picker = sidecar.edges.filter(e => e.node?.display_url) .map((e, i) => { const type = e.node?.is_video ? "video" : "photo"; - const url = type === "video" ? e.node?.video_url : e.node?.display_url; + + let url; + if (type === 'video') { + url = e.node?.video_url; + } else if (type === 'photo') { + url = e.node?.display_url; + } let itemExt = type === "video" ? "mp4" : "jpg"; @@ -196,16 +339,21 @@ export default function(obj) { }); if (picker.length) return { picker } - } else if (data?.gql_data?.shortcode_media?.video_url) { + } + + if (shortcodeMedia?.video_url) { return { - urls: data.gql_data.shortcode_media.video_url, + urls: shortcodeMedia.video_url, filename: `instagram_${id}.mp4`, audioFilename: `instagram_${id}_audio` } - } else if (data?.gql_data?.shortcode_media?.display_url) { + } + + if (shortcodeMedia?.display_url) { return { - urls: data.gql_data?.shortcode_media.display_url, - isPhoto: true + urls: shortcodeMedia.display_url, + isPhoto: true, + filename: `instagram_${id}.jpg`, } } } @@ -266,7 +414,9 @@ export default function(obj) { } async function getPost(id, alwaysProxy) { - const hasData = (data) => data && data.gql_data !== null; + const hasData = (data) => data + && data.gql_data !== null + && data?.gql_data?.xdt_shortcode_media !== null; let data, result; try { const cookie = getCookie('instagram'); @@ -295,7 +445,9 @@ export default function(obj) { if (!hasData(data) && cookie) data = await requestGQL(id, cookie); } catch {} - if (!data) return { error: "fetch.fail" }; + if (!hasData(data)) { + return getErrorContext(id); + } if (data?.gql_data) { result = extractOldPost(data, id, alwaysProxy) @@ -358,14 +510,30 @@ export default function(obj) { if (item.image_versions2?.candidates) { return { urls: item.image_versions2.candidates[0].url, - isPhoto: true + isPhoto: true, + filename: `instagram_${id}.jpg`, } } return { error: "link.unsupported" }; } - const { postId, storyId, username, alwaysProxy } = obj; + const { postId, shareId, storyId, username, alwaysProxy } = obj; + + if (shareId) { + return resolveRedirectingURL( + `https://www.instagram.com/share/${shareId}/`, + dispatcher, + // for some reason instagram decides to return HTML + // instead of a redirect when requesting with a normal + // browser user-agent + 'curl/7.88.1' + ).then(match => instagram({ + ...obj, ...match, + shareId: undefined + })); + } + if (postId) return getPost(postId, alwaysProxy); if (username && storyId) return getStory(username, storyId); diff --git a/api/src/processing/services/pinterest.js b/api/src/processing/services/pinterest.js index 9c0ac9c5..ea4275cb 100644 --- a/api/src/processing/services/pinterest.js +++ b/api/src/processing/services/pinterest.js @@ -1,4 +1,5 @@ import { genericUserAgent } from "../../config.js"; +import { resolveRedirectingURL } from "../url.js"; const videoRegex = /"url":"(https:\/\/v1\.pinimg\.com\/videos\/.*?)"/g; const imageRegex = /src="(https:\/\/i\.pinimg\.com\/.*\.(jpg|gif))"/g; @@ -7,10 +8,10 @@ export default async function(o) { let id = o.id; if (!o.id && o.shortLink) { - id = await fetch(`https://api.pinterest.com/url_shortener/${o.shortLink}/redirect/`, { redirect: "manual" }) - .then(r => r.headers.get("location").split('pin/')[1].split('/')[0]) - .catch(() => {}); + const patternMatch = await resolveRedirectingURL(`https://api.pinterest.com/url_shortener/${o.shortLink}/redirect/`); + id = patternMatch?.id; } + if (id.includes("--")) id = id.split("--")[1]; if (!id) return { error: "fetch.fail" }; @@ -26,8 +27,8 @@ export default async function(o) { if (videoLink) return { urls: videoLink, - filename: `pinterest_${o.id}.mp4`, - audioFilename: `pinterest_${o.id}_audio` + filename: `pinterest_${id}.mp4`, + audioFilename: `pinterest_${id}_audio` } const imageLink = [...html.matchAll(imageRegex)] @@ -39,7 +40,7 @@ export default async function(o) { if (imageLink) return { urls: imageLink, isPhoto: true, - filename: `pinterest_${o.id}.${imageType}` + filename: `pinterest_${id}.${imageType}` } return { error: "fetch.empty" }; diff --git a/api/src/processing/services/snapchat.js b/api/src/processing/services/snapchat.js index 4c62a5ff..10359a03 100644 --- a/api/src/processing/services/snapchat.js +++ b/api/src/processing/services/snapchat.js @@ -1,7 +1,6 @@ -import { extract, normalizeURL } from "../url.js"; +import { resolveRedirectingURL } from "../url.js"; import { genericUserAgent } from "../../config.js"; import { createStream } from "../../stream/manage.js"; -import { getRedirectingURL } from "../../misc/utils.js"; const SPOTLIGHT_VIDEO_REGEX = //; const NEXT_DATA_REGEX = /