canopy/src/utils/media/yanker.js

173 lines
No EOL
6.8 KiB
JavaScript

/*Canopy - The next generation of stoner streaming software
Copyright (C) 2024-2025 Rainbownapkin and the TTN Community
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.*/
//NPM Imports
//const url = require("node:url");
const validator = require('validator');//No express here, so regular validator it is!
const {sanitizeUrl} = require("@braintree/sanitize-url");
//local import
const iaUtil = require('./internetArchiveUtils');
const ytdlpUtil = require('./ytdlpUtils');
/**
* Checks a given URL and runs the proper metadata fetching function to create a media object from any supported URL
* @param {String} url - URL to yank media against
* @param {String} title - Title to apply to yanked media
* @returns {Array} Returns list of yanked media objects on success
*/
module.exports.yankMedia = async function(url, title){
//Get pull type
const pullType = await this.getMediaType(url);
//Check pull type
switch(pullType.type){
case "ia":
//return media object list from IA module
return await iaUtil.fetchMetadata(pullType.id, title);
case "yt":
//return media object list from the YT-DLP module's youtube function
return await ytdlpUtil.fetchYoutubeMetadata(pullType.id, title);
case "ytp":
//return media object list from YT-DLP module's youtube playlist function
//return await ytdlpUtil.fetchYoutubePlaylistMetadata(pullType.id, title);
//Holding off on this since YT-DLP takes 10 years to do a playlist as it needs to pull each and every video one-by-one
//Maybe in the future a piped alternative might be in order, however this would most likely require us to host our own local instance.
//Though it could give us added resistance against youtube/google's rolling IP bans
return null;
case "dm":
//return mediao object list from the YT-DLP module's dailymotion function
return await ytdlpUtil.fetchDailymotionMetadata(pullType.id, title);
default:
//return null to signify a bad url
return null;
}
}
/**
* Refreshes raw links on relevant media objects
*
* Useful for sources like youtube, who only provide expiring raw links
* @param {ScheduledMedia} mediaObj - Media Object to refresh
* @returns {ScheduledMedia} Refreshed media object
*/
module.exports.refreshRawLink = async function(mediaObj){
switch(mediaObj.type){
case 'yt':
//Create boolean to hold expired state
let expired = false;
//Create boolean to hold whether or not rawLink object is empty
let empty = true;
//For each link map in the rawLink object
for(const key of Object.keys(mediaObj.rawLink)){
//Ignore da wombo-combo since it's probably just the fuckin regular URL
if(key != "combo"){
for(const link of mediaObj.rawLink[key]){
//Let it be known, this bitch got links
empty = false;
//Get expiration parameter from the link
const expires = new URL(link[1]).searchParams.get("expire") * 1000;
//If this shit's already expired
if(expires < Date.now()){
//Set expired to true, don't directly set the bool because we don't ever want to unset this flag
expired = true;
}
}
}
}
//If the raw link object is empty or expired
if(empty || expired){
//Re-fetch media metadata
metadata = await ytdlpUtil.fetchYoutubeMetadata(mediaObj.id);
//Refresh media rawlink from metadata
mediaObj.rawLink = metadata[0].rawLink;
//return media object
return mediaObj;
}
}
//Return null to tell the calling function there is no refresh required for this media type
return null;
}
/**
* Detects media type by URL
*
* I'd be lying if this didn't take at least some inspiration/regex patterns from extractQueryParam() in cytube/forest's browser-side 'util.js'
* Still this has some improvements like url pre-checks and the fact that it's handled serverside, recuing possibility of bad requests.
* Some of the regex expressions for certain services have also been improved, such as youtube, and the fore.st-unique archive.org
*
* @param {String} dirtyURL - URL to determine media type of
* @returns {Object} containing URL type and clipped ID string
*/
module.exports.getMediaType = async function(dirtyURL){
//Sanatize our URL
const url = sanitizeUrl(dirtyURL);
//Check if we have a valid url, encode it on the fly in case it's too humie-friendly
if(!validator.isURL(encodeURI(url,{require_valid_protocol: true}))){
//If not toss the fucker out
return {
type: null,
id: null
}
}
//If we have link to a resource from archive.org
if(match = url.match(/archive\.org\/(?:details|download)\/(.+)/)){
//return internet archive upload id and filepath
return {
type: "ia",
id: match[1]
}
}
//If we have a match to a youtube video
if((match = url.match(/youtube\.com\/watch\?v=([a-zA-Z0-9_-]{11})/)) || (match = url.match(/youtu\.be\/([a-zA-Z0-9_-]{11})/))){
//return youtube video id
return {
type: "yt",
id: match[1]
}
}
//If we have a match to a youtube playlist
if((match = url.match(/youtube\.com\/playlist\?list=([a-zA-Z0-9_-]{34})/)) || (match = url.match(/youtu\.be\/playlist\?list=([a-zA-Z0-9_-]{34})/))){
//return youtube playlist id
return {
type: "ytp",
id: match[1]
}
}
//If we have a match to a dailymotion video
if(match = url.match(/dailymotion\.com\/video\/([a-zA-Z0-9]+)/)){
return {
type: "dm",
id: match[1]
}
}
//If we fell through all of our media types without a match
return{
type: null,
id: null
}
}