1
0
This repository has been archived on 2023-12-27. You can view files and clone it, but cannot push or open issues or pull requests.
youtube-dl-subscriptions/dl.py

108 lines
3.9 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
import os
2015-05-11 21:22:27 -07:00
import opml
import feedparser
import youtube_dl
2018-02-07 08:43:37 -08:00
import sys
2018-09-28 14:41:33 -07:00
from pathlib import Path
import argparse
2015-05-11 21:22:27 -07:00
from time import time, mktime, strptime
from datetime import datetime
from dateutil.relativedelta import relativedelta
2018-10-08 04:39:45 -07:00
if sys.version_info.major < 3 and sys.version_info.minor < 6:
raise Exception('Must be using Python 3.6 or greater')
if __name__ == '__main__':
2018-09-28 14:44:56 -07:00
parser = argparse.ArgumentParser('Download YouTube subscriptions.')
parser.add_argument('--save-directory', '-o',
2018-09-28 14:44:56 -07:00
dest='output',
default=None,
help='The directory to which to save the videos.')
parser.add_argument('--retain', '-c',
dest='retain',
default=None,
help='Retain videos up to the given number of days since today.')
parser.add_argument('--since', '-s',
dest='since',
default=None,
help='Only download videos newer than the given number of days.')
parser.add_argument('--config-directory', '-f',
dest='config',
default=None,
help='The directory to which config is saved.')
2018-09-28 14:44:56 -07:00
args = parser.parse_args()
# The current run time.
script_time = time()
2018-09-28 14:44:56 -07:00
subsPath = 'subs.xml'
if args.config is not None:
subsPath = f'{args.config}/{subsPath}'
outlines = opml.parse(subsPath)
2018-09-28 14:44:56 -07:00
if args.output is not None:
args.output = Path(args.output).absolute()
os.chdir(args.output)
else:
print('Must specify an ouput directory with -o')
2018-09-28 14:44:56 -07:00
lastPath = 'last.txt'
if args.config is not None:
lastPath = f'{args.config}/{lastPath}'
if not Path(lastPath).exists():
with open(lastPath, 'w') as f:
2018-09-28 14:44:56 -07:00
f.write(str(time()))
print('Initialized a last.txt file with current timestamp.')
2015-05-11 21:22:27 -07:00
else:
with open(lastPath, 'r') as f:
2018-09-28 14:44:56 -07:00
# The last run time.
threshold_time = datetime.utcfromtimestamp(float(f.read()))
# Overrule the time from which to download video if we've been asked to
# keep videos since a certain number of days ago.
if args.since is not None:
threshold_time = datetime.fromtimestamp(script_time) - relativedelta(days=float(args.since))
2018-09-28 14:44:56 -07:00
if args.retain is not None:
# Find the videos in this directory which are older than the time
# stamp since the last run and remove them.
keep_time = datetime.fromtimestamp(script_time) - relativedelta(days=float(args.retain))
for video in Path('.').glob('**/*.*'):
if 'last.txt' in str(video):
continue
modified_time = datetime.utcfromtimestamp(os.path.getmtime(video))
if modified_time < keep_time:
2018-09-28 14:44:56 -07:00
print(f'Removing {str(video)}.')
2018-09-28 14:55:22 -07:00
video.unlink()
urls = [outline.xmlUrl for outline in outlines[0]]
2018-09-28 14:44:56 -07:00
videos = []
for i, url in enumerate(urls):
print(f'Parsing through channel {i + 1} of {len(urls)}', end='\r')
feed = feedparser.parse(url)
for item in feed['items']:
video_time = datetime.fromtimestamp(mktime(item['published_parsed']))
if video_time > threshold_time:
2018-09-28 14:44:56 -07:00
videos.append(item['link'])
print(' ' * 100, end='\r')
2018-09-28 14:44:56 -07:00
if len(videos) == 0:
print('Sorry, no new video found')
quit()
2018-09-28 14:44:56 -07:00
else:
print(f'{len(videos)} new videos found')
ydl_opts = {'ignoreerrors': True, 'quiet': True, 'outtmpl': (args.output / Path('%(uploader)s', '%(title)s.%(ext)s')).as_posix()}
2018-09-28 14:44:56 -07:00
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(videos)
with open(lastPath, 'w') as f:
f.write(str(script_time))