diff --git a/config.example.json b/config.example.json index 68c5002..22c388c 100644 --- a/config.example.json +++ b/config.example.json @@ -5,6 +5,7 @@ "proxied": false, "protocol": "http", "domain": "localhost", + "ytdlpPath": "/home/canopy/.local/pipx/venvs/yt-dlp/bin/yt-dlp", "sessionSecret": "CHANGE_ME", "altchaSecret": "CHANGE_ME", "ipSecret": "CHANGE_ME", diff --git a/config.example.jsonc b/config.example.jsonc index 8f99353..818ca58 100644 --- a/config.example.jsonc +++ b/config.example.jsonc @@ -13,6 +13,9 @@ "protocol": "http", //Domain the server is available at, used for server-side link generation "domain": "localhost", + //Path to YT-DLP Executable for scraping youtube, dailymotion, and vimeo + //Dailymotion and Vimeo could work using official apis w/o keys, but you wouldn't have any raw file playback options :P + "ytdlpPath": "/home/canopy/.local/pipx/venvs/yt-dlp/bin/yt-dlp", //Be careful with what you keep in secrets, you should use special chars, but test your deployment, as some chars may break account registration //An update to either kill the server and bitch it's planned so it's not so confusing for new admins //Session secret used to secure session keys diff --git a/package.json b/package.json index b1c6957..ecc3f6a 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,8 @@ "mongoose": "^8.4.3", "node-cron": "^3.0.3", "nodemailer": "^6.9.16", - "socket.io": "^4.8.1" + "socket.io": "^4.8.1", + "youtube-dl-exec": "^3.0.20" }, "scripts": { "start": "node ./src/server.js", diff --git a/src/app/channel/media/media.js b/src/app/channel/media/media.js index 97dfeee..d138a58 100644 --- a/src/app/channel/media/media.js +++ b/src/app/channel/media/media.js @@ -14,16 +14,14 @@ GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see .*/ -//Node imports -const crypto = require('node:crypto'); - module.exports = class{ - constructor(title, fileName, url, id, type, duration){ + constructor(title, fileName, url, id, type, duration, rawLink = url){ this.title = title; this.fileName = fileName this.url = url; this.id = id; this.type = type; this.duration = duration; + this.rawLink = rawLink; } } \ No newline at end of file diff --git a/src/utils/media/internetArchiveUtils.js b/src/utils/media/internetArchiveUtils.js index 0b940be..8467935 100644 --- a/src/utils/media/internetArchiveUtils.js +++ b/src/utils/media/internetArchiveUtils.js @@ -23,17 +23,9 @@ const media = require('../../app/channel/media/media.js'); const regexUtils = require('../regexUtils.js'); const loggerUtils = require('../loggerUtils.js') -module.exports.fetchMetadata = async function(link, title){ - //Parse link - const parsedLink = new url.URL(link); - //Split link path - const splitPath = parsedLink.pathname.split('/'); - //Get ItemID from link path - const itemID = splitPath[2] - //Splice the empty string, request type, and item ID out from link path - splitPath.splice(0,3) - //Join remaining link path back together to get requested file path within the given archive.org upload - const requestedPath = decodeURIComponent(splitPath.join('/')); +module.exports.fetchMetadata = async function(fullID, title){ + //Split fullID by first slash + const [itemID, requestedPath] = decodeURIComponent(fullID).split(/\/(.*)/); //Create empty list to hold media objects const mediaList = []; //Create empty variable to hold return data object @@ -64,7 +56,7 @@ module.exports.fetchMetadata = async function(link, title){ //If we're requesting an empty path - if(requestedPath == ''){ + if(requestedPath == '' || requestedPath == null){ //Return item metadata and compatible files data = { files: compatibleFiles, diff --git a/src/utils/media/yanker.js b/src/utils/media/yanker.js index 5b8dde9..581448a 100644 --- a/src/utils/media/yanker.js +++ b/src/utils/media/yanker.js @@ -19,37 +19,45 @@ const validator = require('validator');//No express here, so regular validator i //local import const iaUtil = require('./internetArchiveUtils'); +const ytdlpUtil = require('./ytdlpUtils'); module.exports.yankMedia = async function(url, title){ //Get pull type const pullType = await this.getMediaType(url); //Check pull type - switch(pullType){ + switch(pullType.type){ case "ia": //return media object list from IA module - return await iaUtil.fetchMetadata(url, title); + return await iaUtil.fetchMetadata(pullType.id, title); default: //return null to signify a bad url return null; } } +//I'd be lying if this didn't take at least some inspiration/regex patterns from extractQueryParam() in cytube/forest's browser-side 'util.js' +//Still this has some improvements like url pre-checks and the fact that it's handled serverside, recuing possibility of bad requests module.exports.getMediaType = async function(url){ - //Encode URI in-case we where handed something a little too humie friendly - url = encodeURI(url); - - //Check if we have a valid url - if(!validator.isURL(url)){ + //Check if we have a valid url, encode it on the fly in case it's too humie-friendly + if(!validator.isURL(encodeURI(url))){ //If not toss the fucker out - return null; - } - + return { + type: null, + id: url + } //If we have link to a resource from archive.org - if(url.match(/^https\:\/\/archive.org\//g)){ + }else if(match = url.match(/archive\.org\/(?:details|download)\/([a-zA-Z0-9\/._-\s\%]+)/)){ //return internet archive code - return "ia"; + return { + type: "ia", + id: match[1] + } } - return null; + //If we fell through all of our media types without a match + return{ + type: null, + id: url + } } \ No newline at end of file diff --git a/src/utils/media/ytdlpUtils.js b/src/utils/media/ytdlpUtils.js new file mode 100644 index 0000000..794c49f --- /dev/null +++ b/src/utils/media/ytdlpUtils.js @@ -0,0 +1,65 @@ +/*Canopy - The next generation of stoner streaming software +Copyright (C) 2024-2025 Rainbownapkin and the TTN Community + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see .*/ + +//Config +const config = require('../../../config.json'); + +//Node Imports +const { create: ytdlpMaker } = require('youtube-dl-exec'); +//Import ytdlp w/ custom path from config so we can force the newest build of yt-dlp from pip +const ytdlp = ytdlpMaker(config.ytdlpPath); +const url = require("node:url"); +const validator = require('validator'); + +//Local Imports +const media = require('../../app/channel/media/media.js'); +const regexUtils = require('../regexUtils.js'); +const loggerUtils = require('../loggerUtils.js') + +module.exports.fetchYoutubeVideoMetadata = async function(id, title){ + const media = await fetchMetadata(`youtu.be/${id}`, title,'yt'); + + return media; +} + +//Generic YTDLP function meant to be used by service-sepecific fetchers which will then be used to fetch video metadata +async function fetchMetadata(link, title, type){ + //Create media list + const mediaList = []; + + //Pull raw metadata + const rawMetadata = await ytdlp(link, { + dumpSingleJson: true, + format: 'b' + }); + + //Pull data from rawMetadata, sanatizing title to prevent XSS + const name = validator.escape(validator.trim(rawMetadata.title)); + const rawLink = rawMetadata.requested_downloads[0].url; + const id = rawMetadata.id; + + //if we where handed a null title + if(title == null || title == ''){ + //Create new media object from file info substituting filename for title + mediaList.push(new media(name, name, link, id, type, Number(rawMetadata.duration), rawLink)); + }else{ + //Create new media object from file info + mediaList.push(new media(title, name, link, id, type, Number(rawMetadata.duration), rawLink)); + } + + //Return list of media + return mediaList; +} \ No newline at end of file