Refactored media link parsing, started work on YT-DLP integration.

2025-05-06 06:32:16 -04:00 · 2025-05-06 06:32:16 -04:00 · 0ce0685fd5
parent 67c687a8d3
commit 0ce0685fd5
7 changed files with 98 additions and 30 deletions
--- a/config.example.json
+++ b/config.example.json
@ -5,6 +5,7 @@
    "proxied": false,
    "protocol": "http",
    "domain": "localhost",
    "ytdlpPath": "/home/canopy/.local/pipx/venvs/yt-dlp/bin/yt-dlp",
    "sessionSecret": "CHANGE_ME",
    "altchaSecret": "CHANGE_ME",
    "ipSecret": "CHANGE_ME",
--- a/config.example.jsonc
+++ b/config.example.jsonc
@ -13,6 +13,9 @@
    "protocol": "http",
    //Domain the server is available at, used for server-side link generation
    "domain": "localhost",
    //Path to YT-DLP Executable for scraping youtube, dailymotion, and vimeo
    //Dailymotion and Vimeo could work using official apis w/o keys, but you wouldn't have any raw file playback options :P
    "ytdlpPath": "/home/canopy/.local/pipx/venvs/yt-dlp/bin/yt-dlp",
    //Be careful with what you keep in secrets, you should use special chars, but test your deployment, as some chars may break account registration
    //An update to either kill the server and bitch it's planned so it's not so confusing for new admins
    //Session secret used to secure session keys
--- a/package.json
+++ b/package.json
@ -16,7 +16,8 @@
    "mongoose": "^8.4.3",
    "node-cron": "^3.0.3",
    "nodemailer": "^6.9.16",
-    "socket.io": "^4.8.1"
+    "socket.io": "^4.8.1",
    "youtube-dl-exec": "^3.0.20"
  },
  "scripts": {
    "start": "node ./src/server.js",
--- a/src/app/channel/media/media.js
+++ b/src/app/channel/media/media.js
@ -14,16 +14,14 @@ GNU Affero General Public License for more details.
 You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see <https://www.gnu.org/licenses/>.*/
 //Node imports
 const crypto = require('node:crypto');
 module.exports = class{
-    constructor(title, fileName, url, id, type, duration){
+    constructor(title, fileName, url, id, type, duration, rawLink = url){
        this.title = title;
        this.fileName = fileName
        this.url = url;
        this.id = id;
        this.type = type;
        this.duration = duration;
        this.rawLink = rawLink;
    }
 }
--- a/src/utils/media/internetArchiveUtils.js
+++ b/src/utils/media/internetArchiveUtils.js
@ -23,17 +23,9 @@ const media = require('../../app/channel/media/media.js');
 const regexUtils = require('../regexUtils.js');
 const loggerUtils = require('../loggerUtils.js')
-module.exports.fetchMetadata = async function(link, title){
+module.exports.fetchMetadata = async function(fullID, title){
-    //Parse link
+    //Split fullID by first slash
-    const parsedLink = new url.URL(link);
+    const [itemID, requestedPath] = decodeURIComponent(fullID).split(/\/(.*)/);
    //Split link path
    const splitPath = parsedLink.pathname.split('/');
    //Get ItemID from link path
    const itemID = splitPath[2]
    //Splice the empty string, request type, and item ID out from link path
    splitPath.splice(0,3)
    //Join remaining link path back together to get requested file path within the given archive.org upload
    const requestedPath = decodeURIComponent(splitPath.join('/'));
    //Create empty list to hold media objects
    const mediaList = [];
    //Create empty variable to hold return data object
@ -64,7 +56,7 @@ module.exports.fetchMetadata = async function(link, title){
    //If we're requesting an empty path
-    if(requestedPath == ''){
+    if(requestedPath == '' || requestedPath == null){
        //Return item metadata and compatible files
        data = {
                files: compatibleFiles,
--- a/src/utils/media/yanker.js
+++ b/src/utils/media/yanker.js
@ -19,37 +19,45 @@ const validator = require('validator');//No express here, so regular validator i
 //local import
 const iaUtil = require('./internetArchiveUtils');
 const ytdlpUtil = require('./ytdlpUtils');
 module.exports.yankMedia = async function(url, title){
    //Get pull type
    const pullType = await this.getMediaType(url);
    //Check pull type
-    switch(pullType){
+    switch(pullType.type){
        case "ia":
            //return media object list from IA module
-            return await iaUtil.fetchMetadata(url, title); 
+            return await iaUtil.fetchMetadata(pullType.id, title); 
        default:
            //return null to signify a bad url
            return null;
    }
 }
 //I'd be lying if this didn't take at least some inspiration/regex patterns from extractQueryParam() in cytube/forest's browser-side 'util.js'
 //Still this has some improvements like url pre-checks and the fact that it's handled serverside, recuing possibility of bad requests
 module.exports.getMediaType = async function(url){
-    //Encode URI in-case we where handed something a little too humie friendly
+    //Check if we have a valid url, encode it on the fly in case it's too humie-friendly
-    url = encodeURI(url);
+    if(!validator.isURL(encodeURI(url))){
    //Check if we have a valid url
    if(!validator.isURL(url)){
        //If not toss the fucker out
-        return null;
+        return {
            type: null,
            id: url
        }
    //If we have link to a resource from archive.org
-    if(url.match(/^https\:\/\/archive.org\//g)){
+    }else if(match = url.match(/archive\.org\/(?:details|download)\/([a-zA-Z0-9\/._-\s\%]+)/)){
        //return internet archive code
-        return "ia";
+        return {
            type: "ia",
            id: match[1]
        }
    }
-    return null;
+    //If we fell through all of our media types without a match
    return{
        type: null,
        id: url
    }
 }
--- a/src/utils/media/ytdlpUtils.js
+++ b/src/utils/media/ytdlpUtils.js
@ -0,0 +1,65 @@
 /*Canopy - The next generation of stoner streaming software
 Copyright (C) 2024-2025 Rainbownapkin and the TTN Community
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as
 published by the Free Software Foundation, either version 3 of the
 License, or (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.
 You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see <https://www.gnu.org/licenses/>.*/
 //Config
 const config = require('../../../config.json');
 //Node Imports
 const { create: ytdlpMaker } = require('youtube-dl-exec');
 //Import ytdlp w/ custom path from config so we can force the newest build of yt-dlp from pip
 const ytdlp = ytdlpMaker(config.ytdlpPath);
 const url = require("node:url");
 const validator = require('validator');
 //Local Imports
 const media = require('../../app/channel/media/media.js');
 const regexUtils = require('../regexUtils.js');
 const loggerUtils = require('../loggerUtils.js')
 module.exports.fetchYoutubeVideoMetadata = async function(id, title){
    const media = await fetchMetadata(`youtu.be/${id}`, title,'yt');
    return media;
 }
 //Generic YTDLP function meant to be used by service-sepecific fetchers which will then be used to fetch video metadata
 async function fetchMetadata(link, title, type){
    //Create media list
    const mediaList = [];    
    //Pull raw metadata
    const rawMetadata = await ytdlp(link, {
        dumpSingleJson: true,
        format: 'b'
    });
    //Pull data from rawMetadata, sanatizing title to prevent XSS
    const name = validator.escape(validator.trim(rawMetadata.title));
    const rawLink = rawMetadata.requested_downloads[0].url;
    const id = rawMetadata.id;
    //if we where handed a null title
    if(title == null || title == ''){
        //Create new media object from file info substituting filename for title
        mediaList.push(new media(name, name, link, id, type, Number(rawMetadata.duration), rawLink));
    }else{
        //Create new media object from file info
        mediaList.push(new media(title, name, link, id, type, Number(rawMetadata.duration), rawLink));
    }
    //Return list of media
    return mediaList;
 }