JSDoc: Source: utils/media/yanker.js

/*Canopy - The next generation of stoner streaming software
Copyright (C) 2024-2025 Rainbownapkin and the TTN Community

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.*/

//NPM Imports
//const url = require("node:url");
const validator = require('validator');//No express here, so regular validator it is!

//local import
const iaUtil = require('./internetArchiveUtils');
const ytdlpUtil = require('./ytdlpUtils');

/**
 * Checks a given URL and runs the proper metadata fetching function to create a media object from any supported URL
 * @param {String} url - URL to yank media against
 * @param {String} title - Title to apply to yanked media
 * @returns {Array} Returns list of yanked media objects on success
 */
module.exports.yankMedia = async function(url, title){
    //Get pull type
    const pullType = await this.getMediaType(url);

    //Check pull type
    switch(pullType.type){
        case "ia":
            //return media object list from IA module
            return await iaUtil.fetchMetadata(pullType.id, title); 
        case "yt":
            //return media object list from the YT-DLP module's youtube function
            return await ytdlpUtil.fetchYoutubeMetadata(pullType.id, title);
        case "ytp":
            //return media object list from YT-DLP module's youtube playlist function
            //return await ytdlpUtil.fetchYoutubePlaylistMetadata(pullType.id, title);
            //Holding off on this since YT-DLP takes 10 years to do a playlist as it needs to pull each and every video one-by-one
            //Maybe in the future a piped alternative might be in order, however this would most likely require us to host our own local instance.
            //Though it could give us added resistance against youtube/google's rolling IP bans
            return null;
        case "dm":
            //return mediao object list from the YT-DLP module's dailymotion function
            return await ytdlpUtil.fetchDailymotionMetadata(pullType.id, title);
        default:
            //return null to signify a bad url
            return null;
    }
}

/**
 * Refreshes raw links on relevant media objects
 * 
 * Useful for sources like youtube, who only provide expiring raw links
 * @param {ScheduledMedia} mediaObj - Media Object to refresh
 * @returns {ScheduledMedia} Refreshed media object
 */
module.exports.refreshRawLink = async function(mediaObj){
    switch(mediaObj.type){
        case 'yt':
            //Scrape expiration from query strings
            const expires = mediaObj.rawLink.match(/expire=([0-9]+)/); 
            //Went with regex for speed, but I figure I'd keep this around in case we want the accuracy of a battle-tested implementation
            //const expires = new URL(mediaObj.rawLink).searchParams.get("expire");

            //If we have a valid raw file link that will be good by the end of the video
            if(expires != null && (expires * 1000) > mediaObj.getEndTime()){
                //Return null to tell the calling function there is no refresh required for this video at this time
                return null;
            }

            //Re-fetch media metadata
            metadata = await ytdlpUtil.fetchYoutubeMetadata(mediaObj.id);
            //Refresh media rawlink from metadata
            mediaObj.rawLink = metadata[0].rawLink;

            //return media object
            return mediaObj;
    }

    //Return null to tell the calling function there is no refresh required for this media type
    return null;
}
/**
 * Detects media type by URL
 * 
 * I'd be lying if this didn't take at least some inspiration/regex patterns from extractQueryParam() in cytube/forest's browser-side 'util.js'
 * Still this has some improvements like url pre-checks and the fact that it's handled serverside, recuing possibility of bad requests.
 * Some of the regex expressions for certain services have also been improved, such as youtube, and the fore.st-unique archive.org
 * 
 * @param {String} url - URL to determine media type of
 * @returns {Object} containing URL type and clipped ID string
 */
module.exports.getMediaType = async function(url){
    //Check if we have a valid url, encode it on the fly in case it's too humie-friendly
    if(!validator.isURL(encodeURI(url))){
        //If not toss the fucker out
        return {
            type: null,
            id: null
        }
    }
    
    //If we have link to a resource from archive.org
    if(match = url.match(/archive\.org\/(?:details|download)\/([a-zA-Z0-9\/._-\s\%]+)/)){
        //return internet archive upload id and filepath
        return {
            type: "ia",
            id: match[1]
        }
    }

    //If we have a match to a youtube video
    if((match = url.match(/youtube\.com\/watch\?v=([a-zA-Z0-9_-]{11})/)) || (match = url.match(/youtu\.be\/([a-zA-Z0-9_-]{11})/))){
        //return youtube video id
        return {
            type: "yt",
            id: match[1]
        }
    }

    //If we have a match to a youtube playlist
    if((match = url.match(/youtube\.com\/playlist\?list=([a-zA-Z0-9_-]{34})/)) || (match = url.match(/youtu\.be\/playlist\?list=([a-zA-Z0-9_-]{34})/))){
        //return youtube playlist id
        return {
            type: "ytp",
            id: match[1]
        }
    }

    //If we have a match to a dailymotion video
    if(match = url.match(/dailymotion\.com\/video\/([a-zA-Z0-9]+)/)){
        return {
            type: "dm",
            id: match[1]
        }
    }

    //If we fell through all of our media types without a match
    return{
        type: null,
        id: null
    }
}