Refactored media link parsing, started work on YT-DLP integration.

This commit is contained in:
rainbow napkin 2025-05-06 06:32:16 -04:00
parent 67c687a8d3
commit 0ce0685fd5
7 changed files with 98 additions and 30 deletions

View file

@ -5,6 +5,7 @@
"proxied": false, "proxied": false,
"protocol": "http", "protocol": "http",
"domain": "localhost", "domain": "localhost",
"ytdlpPath": "/home/canopy/.local/pipx/venvs/yt-dlp/bin/yt-dlp",
"sessionSecret": "CHANGE_ME", "sessionSecret": "CHANGE_ME",
"altchaSecret": "CHANGE_ME", "altchaSecret": "CHANGE_ME",
"ipSecret": "CHANGE_ME", "ipSecret": "CHANGE_ME",

View file

@ -13,6 +13,9 @@
"protocol": "http", "protocol": "http",
//Domain the server is available at, used for server-side link generation //Domain the server is available at, used for server-side link generation
"domain": "localhost", "domain": "localhost",
//Path to YT-DLP Executable for scraping youtube, dailymotion, and vimeo
//Dailymotion and Vimeo could work using official apis w/o keys, but you wouldn't have any raw file playback options :P
"ytdlpPath": "/home/canopy/.local/pipx/venvs/yt-dlp/bin/yt-dlp",
//Be careful with what you keep in secrets, you should use special chars, but test your deployment, as some chars may break account registration //Be careful with what you keep in secrets, you should use special chars, but test your deployment, as some chars may break account registration
//An update to either kill the server and bitch it's planned so it's not so confusing for new admins //An update to either kill the server and bitch it's planned so it's not so confusing for new admins
//Session secret used to secure session keys //Session secret used to secure session keys

View file

@ -16,7 +16,8 @@
"mongoose": "^8.4.3", "mongoose": "^8.4.3",
"node-cron": "^3.0.3", "node-cron": "^3.0.3",
"nodemailer": "^6.9.16", "nodemailer": "^6.9.16",
"socket.io": "^4.8.1" "socket.io": "^4.8.1",
"youtube-dl-exec": "^3.0.20"
}, },
"scripts": { "scripts": {
"start": "node ./src/server.js", "start": "node ./src/server.js",

View file

@ -14,16 +14,14 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.*/ along with this program. If not, see <https://www.gnu.org/licenses/>.*/
//Node imports
const crypto = require('node:crypto');
module.exports = class{ module.exports = class{
constructor(title, fileName, url, id, type, duration){ constructor(title, fileName, url, id, type, duration, rawLink = url){
this.title = title; this.title = title;
this.fileName = fileName this.fileName = fileName
this.url = url; this.url = url;
this.id = id; this.id = id;
this.type = type; this.type = type;
this.duration = duration; this.duration = duration;
this.rawLink = rawLink;
} }
} }

View file

@ -23,17 +23,9 @@ const media = require('../../app/channel/media/media.js');
const regexUtils = require('../regexUtils.js'); const regexUtils = require('../regexUtils.js');
const loggerUtils = require('../loggerUtils.js') const loggerUtils = require('../loggerUtils.js')
module.exports.fetchMetadata = async function(link, title){ module.exports.fetchMetadata = async function(fullID, title){
//Parse link //Split fullID by first slash
const parsedLink = new url.URL(link); const [itemID, requestedPath] = decodeURIComponent(fullID).split(/\/(.*)/);
//Split link path
const splitPath = parsedLink.pathname.split('/');
//Get ItemID from link path
const itemID = splitPath[2]
//Splice the empty string, request type, and item ID out from link path
splitPath.splice(0,3)
//Join remaining link path back together to get requested file path within the given archive.org upload
const requestedPath = decodeURIComponent(splitPath.join('/'));
//Create empty list to hold media objects //Create empty list to hold media objects
const mediaList = []; const mediaList = [];
//Create empty variable to hold return data object //Create empty variable to hold return data object
@ -64,7 +56,7 @@ module.exports.fetchMetadata = async function(link, title){
//If we're requesting an empty path //If we're requesting an empty path
if(requestedPath == ''){ if(requestedPath == '' || requestedPath == null){
//Return item metadata and compatible files //Return item metadata and compatible files
data = { data = {
files: compatibleFiles, files: compatibleFiles,

View file

@ -19,37 +19,45 @@ const validator = require('validator');//No express here, so regular validator i
//local import //local import
const iaUtil = require('./internetArchiveUtils'); const iaUtil = require('./internetArchiveUtils');
const ytdlpUtil = require('./ytdlpUtils');
module.exports.yankMedia = async function(url, title){ module.exports.yankMedia = async function(url, title){
//Get pull type //Get pull type
const pullType = await this.getMediaType(url); const pullType = await this.getMediaType(url);
//Check pull type //Check pull type
switch(pullType){ switch(pullType.type){
case "ia": case "ia":
//return media object list from IA module //return media object list from IA module
return await iaUtil.fetchMetadata(url, title); return await iaUtil.fetchMetadata(pullType.id, title);
default: default:
//return null to signify a bad url //return null to signify a bad url
return null; return null;
} }
} }
//I'd be lying if this didn't take at least some inspiration/regex patterns from extractQueryParam() in cytube/forest's browser-side 'util.js'
//Still this has some improvements like url pre-checks and the fact that it's handled serverside, recuing possibility of bad requests
module.exports.getMediaType = async function(url){ module.exports.getMediaType = async function(url){
//Encode URI in-case we where handed something a little too humie friendly //Check if we have a valid url, encode it on the fly in case it's too humie-friendly
url = encodeURI(url); if(!validator.isURL(encodeURI(url))){
//Check if we have a valid url
if(!validator.isURL(url)){
//If not toss the fucker out //If not toss the fucker out
return null; return {
type: null,
id: url
} }
//If we have link to a resource from archive.org //If we have link to a resource from archive.org
if(url.match(/^https\:\/\/archive.org\//g)){ }else if(match = url.match(/archive\.org\/(?:details|download)\/([a-zA-Z0-9\/._-\s\%]+)/)){
//return internet archive code //return internet archive code
return "ia"; return {
type: "ia",
id: match[1]
}
} }
return null; //If we fell through all of our media types without a match
return{
type: null,
id: url
}
} }

View file

@ -0,0 +1,65 @@
/*Canopy - The next generation of stoner streaming software
Copyright (C) 2024-2025 Rainbownapkin and the TTN Community
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.*/
//Config
const config = require('../../../config.json');
//Node Imports
const { create: ytdlpMaker } = require('youtube-dl-exec');
//Import ytdlp w/ custom path from config so we can force the newest build of yt-dlp from pip
const ytdlp = ytdlpMaker(config.ytdlpPath);
const url = require("node:url");
const validator = require('validator');
//Local Imports
const media = require('../../app/channel/media/media.js');
const regexUtils = require('../regexUtils.js');
const loggerUtils = require('../loggerUtils.js')
module.exports.fetchYoutubeVideoMetadata = async function(id, title){
const media = await fetchMetadata(`youtu.be/${id}`, title,'yt');
return media;
}
//Generic YTDLP function meant to be used by service-sepecific fetchers which will then be used to fetch video metadata
async function fetchMetadata(link, title, type){
//Create media list
const mediaList = [];
//Pull raw metadata
const rawMetadata = await ytdlp(link, {
dumpSingleJson: true,
format: 'b'
});
//Pull data from rawMetadata, sanatizing title to prevent XSS
const name = validator.escape(validator.trim(rawMetadata.title));
const rawLink = rawMetadata.requested_downloads[0].url;
const id = rawMetadata.id;
//if we where handed a null title
if(title == null || title == ''){
//Create new media object from file info substituting filename for title
mediaList.push(new media(name, name, link, id, type, Number(rawMetadata.duration), rawLink));
}else{
//Create new media object from file info
mediaList.push(new media(title, name, link, id, type, Number(rawMetadata.duration), rawLink));
}
//Return list of media
return mediaList;
}