canopy/src/utils/media/yanker.js

/*Canopy - The next generation of stoner streaming software
Copyright (C) 2024-2025 Rainbownapkin and the TTN Community

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.*/

//NPM Imports
//const url = require("node:url");
const validator = require('validator');//No express here, so regular validator it is!
const {sanitizeUrl} = require("@braintree/sanitize-url");

//local import
const iaUtil = require('./internetArchiveUtils');
const ytdlpUtil = require('./ytdlpUtils');

/**
 * Checks a given URL and runs the proper metadata fetching function to create a media object from any supported URL
 * @param {String} url - URL to yank media against
 * @param {String} title - Title to apply to yanked media
 * @returns {Array} Returns list of yanked media objects on success
 */
module.exports.yankMedia = async function(url, title){
    //Get pull type
    const pullType = await this.getMediaType(url);

    //Check pull type
    switch(pullType.type){
        case "ia":
            //return media object list from IA module
            return await iaUtil.fetchMetadata(pullType.id, title);
        case "yt":
            //return media object list from the YT-DLP module's youtube function
            return await ytdlpUtil.fetchYoutubeMetadata(pullType.id, title);
        case "ytp":
            //return media object list from YT-DLP module's youtube playlist function
            //return await ytdlpUtil.fetchYoutubePlaylistMetadata(pullType.id, title);
            //Holding off on this since YT-DLP takes 10 years to do a playlist as it needs to pull each and every video one-by-one
            //Maybe in the future a piped alternative might be in order, however this would most likely require us to host our own local instance.
            //Though it could give us added resistance against youtube/google's rolling IP bans
            return null;
        case "dm":
            //return mediao object list from the YT-DLP module's dailymotion function
            return await ytdlpUtil.fetchDailymotionMetadata(pullType.id, title);
        default:
            //return null to signify a bad url
            return null;
    }
}

/**
 * Refreshes raw links on relevant media objects
 *
 * Useful for sources like youtube, who only provide expiring raw links
 * @param {ScheduledMedia} mediaObj - Media Object to refresh
 * @returns {ScheduledMedia} Refreshed media object
 */
module.exports.refreshRawLink = async function(mediaObj){
    switch(mediaObj.type){
        case 'yt':
           //Create boolean to hold expired state
           let expired = false;
           //Create boolean to hold whether or not rawLink object is empty
           let empty = true;

           //For each link map in the rawLink object
            for(const key of Object.keys(mediaObj.rawLink)){
                //Ignore da wombo-combo since it's probably just the fuckin regular URL
                if(key != "combo"){
                    for(const link of mediaObj.rawLink[key]){
                        //Let it be known, this bitch got links
                        empty = false;
                        //Get expiration parameter from the link
                        const expires = new URL(link[1]).searchParams.get("expire") * 1000;

                        //If this shit's already expired
                        if(expires < Date.now()){
                            //Set expired to true, don't directly set the bool because we don't ever want to unset this flag
                            expired = true;
                        }
                    }
                }
            }

            //If the raw link object is empty or expired
            if(empty || expired){
                //Re-fetch media metadata
                metadata = await ytdlpUtil.fetchYoutubeMetadata(mediaObj.id);

                //Refresh media rawlink from metadata
                mediaObj.rawLink = metadata[0].rawLink;

                //return media object
                return mediaObj;
            }
    }

    //Return null to tell the calling function there is no refresh required for this media type
    return null;
}
/**
 * Detects media type by URL
 *
 * I'd be lying if this didn't take at least some inspiration/regex patterns from extractQueryParam() in cytube/forest's browser-side 'util.js'
 * Still this has some improvements like url pre-checks and the fact that it's handled serverside, recuing possibility of bad requests.
 * Some of the regex expressions for certain services have also been improved, such as youtube, and the fore.st-unique archive.org
 *
 * @param {String} dirtyURL - URL to determine media type of
 * @returns {Object} containing URL type and clipped ID string
 */
module.exports.getMediaType = async function(dirtyURL){
    //Sanatize our URL
    const url = sanitizeUrl(dirtyURL);

    //Check if we have a valid url, encode it on the fly in case it's too humie-friendly
    if(!validator.isURL(encodeURI(url,{require_valid_protocol: true}))){
        //If not toss the fucker out
        return {
            type: null,
            id: null
        }
    }

    //If we have link to a resource from archive.org
    if(match = url.match(/archive\.org\/(?:details|download)\/(.+)/)){
        //return internet archive upload id and filepath
        return {
            type: "ia",
            id: match[1]
        }
    }

    //If we have a match to a youtube video
    if((match = url.match(/youtube\.com\/watch\?v=([a-zA-Z0-9_-]{11})/)) || (match = url.match(/youtu\.be\/([a-zA-Z0-9_-]{11})/))){
        //return youtube video id
        return {
            type: "yt",
            id: match[1]
        }
    }

    //If we have a match to a youtube playlist
    if((match = url.match(/youtube\.com\/playlist\?list=([a-zA-Z0-9_-]{34})/)) || (match = url.match(/youtu\.be\/playlist\?list=([a-zA-Z0-9_-]{34})/))){
        //return youtube playlist id
        return {
            type: "ytp",
            id: match[1]
        }
    }

    //If we have a match to a dailymotion video
    if(match = url.match(/dailymotion\.com\/video\/([a-zA-Z0-9]+)/)){
        return {
            type: "dm",
            id: match[1]
        }
    }

    //If we fell through all of our media types without a match
    return{
        type: null,
        id: null
    }
}