JSDoc: Source: utils/media/internetArchiveUtils.js

/*Canopy - The next generation of stoner streaming software
Copyright (C) 2024-2025 Rainbownapkin and the TTN Community

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.*/

//Node Imports
const validator = require('validator');

//Local Imports
const media = require('../../app/channel/media/media.js');
const regexUtils = require('../regexUtils.js');
const loggerUtils = require('../loggerUtils.js')

/**
 * Pulls metadate for a given archive.org item
 * @param {String} fullID - Full path of the requested upload
 * @param {String} title - Title to add to media object
 * @returns {Array} Generated list of media objects from given upload path
 */
module.exports.fetchMetadata = async function(fullID, title){
    //Split fullID by first slash
    const [itemID, requestedPath] = decodeURIComponent(fullID).split(/\/(.*)/);
    //Create empty list to hold media objects
    const mediaList = [];
    //Create empty variable to hold return data object
    let data;

    //Create metadata link from itemID
    const metadataLink = `https://archive.org/metadata/${itemID}`;

    //Fetch item metadata from the internet archive
    const response = await fetch(metadataLink,
        {
            method: "GET"
        }
    );

    //If we hit a snag
    if(!response.ok){
        //Scream and shout
        const errorBody = await response.text();
        throw loggerUtils.exceptionSmith(`Internet Archive Error '${response.status}': ${errorBody}`, "queue");
    }

    //Collect our metadata
    const rawMetadata = await response.json();

    //Filter out any in-compatible files
    const compatibleFiles = rawMetadata.files.filter(compatibilityFilter);


    //If we're requesting an empty path
    if(requestedPath == '' || requestedPath == null){
        //Return item metadata and compatible files
        data = {
                files: compatibleFiles,
                metadata: rawMetadata.metadata
        }
    //Other wise
    }else{
        //Return item metadata and matching compatible files
        data = {
            //Filter files out that don't match requested path and return remaining list
            files: compatibleFiles.filter(pathFilter),
            metadata: rawMetadata.metadata
        }
    }

    //for every compatible and relevant file returned from IA
    for(let file of data.files){
        //Split file path by directories
        const path = file.name.split('/');

        //pull filename from path and escape in-case someone put something nasty in there
        const name = validator.escape(validator.trim(path[path.length - 1]));

        //Construct link from pulled info
        const link = `https://archive.org/download/${data.metadata.identifier}/${file.name}`;

        //if we where handed a null title
        if(title == null || title == ''){
            //Create new media object from file info substituting filename for title
            mediaList.push(new media(name, name, link, link, 'ia', Number(file.length)));
        }else{
            //Create new media object from file info
            mediaList.push(new media(title, name, link, link, 'ia', Number(file.length)));
        }
    }

    //return media object list
    return mediaList;

    function compatibilityFilter(file){
        //return true for all files that match for web-safe formats
        return file.format == "h.264 IA" || file.format == "h.264" || file.format == "Ogg Video" || file.format.match("MPEG4");
    }

    function pathFilter(file){
        //return true for all file names which match the given requested file path
        return file.name.match(`^${regexUtils.escapeRegex(requestedPath)}`);
    }
}