1
0
This repository has been archived on 2023-12-27. You can view files and clone it, but cannot push or open issues or pull requests.
youtube-dl-subscriptions/dl.py
Tony Grosinger ea994e3d9f
Move subs.xml and last.txt into configs dir
New runtime option allows specifying an optional config directory in
which the two required configuration files will be stored. It is useful
to make this directory a mounted volume when running in a docker
container.
2019-11-07 06:45:49 -08:00

108 lines
3.9 KiB
Python

#!/usr/bin/env python3
import os
import opml
import feedparser
import youtube_dl
import sys
from pathlib import Path
import argparse
from time import time, mktime, strptime
from datetime import datetime
from dateutil.relativedelta import relativedelta
if sys.version_info.major < 3 and sys.version_info.minor < 6:
raise Exception('Must be using Python 3.6 or greater')
if __name__ == '__main__':
parser = argparse.ArgumentParser('Download YouTube subscriptions.')
parser.add_argument('--save-directory', '-o',
dest='output',
default=None,
help='The directory to which to save the videos.')
parser.add_argument('--retain', '-c',
dest='retain',
default=None,
help='Retain videos up to the given number of days since today.')
parser.add_argument('--since', '-s',
dest='since',
default=None,
help='Only download videos newer than the given number of days.')
parser.add_argument('--config-directory', '-f',
dest='config',
default=None,
help='The directory to which config is saved.')
args = parser.parse_args()
# The current run time.
script_time = time()
subsPath = 'subs.xml'
if args.config is not None:
subsPath = f'{args.config}/{subsPath}'
outlines = opml.parse(subsPath)
if args.output is not None:
args.output = Path(args.output).absolute()
os.chdir(args.output)
else:
print('Must specify an ouput directory with -o')
lastPath = 'last.txt'
if args.config is not None:
lastPath = f'{args.config}/{lastPath}'
if not Path(lastPath).exists():
with open(lastPath, 'w') as f:
f.write(str(time()))
print('Initialized a last.txt file with current timestamp.')
else:
with open(lastPath, 'r') as f:
# The last run time.
threshold_time = datetime.utcfromtimestamp(float(f.read()))
# Overrule the time from which to download video if we've been asked to
# keep videos since a certain number of days ago.
if args.since is not None:
threshold_time = datetime.fromtimestamp(script_time) - relativedelta(days=float(args.since))
if args.retain is not None:
# Find the videos in this directory which are older than the time
# stamp since the last run and remove them.
keep_time = datetime.fromtimestamp(script_time) - relativedelta(days=float(args.retain))
for video in Path('.').glob('**/*.*'):
if 'last.txt' in str(video):
continue
modified_time = datetime.utcfromtimestamp(os.path.getmtime(video))
if modified_time < keep_time:
print(f'Removing {str(video)}.')
video.unlink()
urls = [outline.xmlUrl for outline in outlines[0]]
videos = []
for i, url in enumerate(urls):
print(f'Parsing through channel {i + 1} of {len(urls)}', end='\r')
feed = feedparser.parse(url)
for item in feed['items']:
video_time = datetime.fromtimestamp(mktime(item['published_parsed']))
if video_time > threshold_time:
videos.append(item['link'])
print(' ' * 100, end='\r')
if len(videos) == 0:
print('Sorry, no new video found')
quit()
else:
print(f'{len(videos)} new videos found')
ydl_opts = {'ignoreerrors': True, 'quiet': True, 'outtmpl': (args.output / Path('%(uploader)s', '%(title)s.%(ext)s')).as_posix()}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(videos)
with open(lastPath, 'w') as f:
f.write(str(script_time))