SpotiFile/spotify_mass_download.py

from threading import Thread, get_ident
import pickle
from spotify_client import SpotifyClient
from spotify_scraper import SpotifyScraper
from config import *
import base64
from time import sleep
from datetime import datetime
import random

client = SpotifyClient(sp_key=SP_KEY, sp_dc=SP_DC)
client.get_me()
scraper = SpotifyScraper(client=client)

g_downloaded_artist_covers = []
g_downloaded_songs = []
g_keep_saving = 0


class Console:
    console_output = []

    def log(self, value: str):
        self.cout(value, 'inherit')
    
    def error(self, value: str):
        self.cout(value, 'rgba(255,30,30,0.9)')

    def info(self, value: str):
        self.cout(value, 'rgba(30,255,255,0.9)')

    def happy(self, value: str):
        self.cout(value, 'rgba(30,255,30,0.9)')

    def cout(self, value: str, color: str):
        self.console_output.append(
            {
                'time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                'value': value,
                'color': color,
            }
            )

    def get(self):
        return self.console_output

console = Console()


def download_track_list(download_dir: str, track_list: list, recursive_artist: bool=False, recursive_album: bool=False, recursive: bool=False, recursive_limit=1024):
    global g_downloaded_songs, g_downloaded_artist_covers
    my_thread_id = str(get_ident()).zfill(6)
    artist_images_download_dir = f'{download_dir}/{settings.ARTIST_IMAGES_SUB_DIR}'
    downloaded_count = 0
    for track in track_list:
        try:
            if downloaded_count % 20 == 0:
                client.refresh_tokens()
            if track.spotify_id in g_downloaded_songs:
                console.info(f'Thread<{my_thread_id}> | Skipping already downloaded song: {track.title}')
                downloaded_count += 1
                continue
            track_path = f'{download_dir}{clean_file_path(track.artists[0].name)}/{clean_file_path(track.album.title)}'
            track.download_to_file(scraper, track_path)
            console.happy(f'Thread<{my_thread_id}> | Downloaded: {track.preview_title()}')
            g_downloaded_songs.append(track.spotify_id)
            if (recursive_album or recursive) and len(track_list) < recursive_limit:
                new_tracks = list(scraper.scrape_album_tracks(track.album.spotify_id))
                for new_track in new_tracks:
                    if new_track not in track_list and len(track_list) < recursive_limit:
                        track_list.append(new_track)
                console.log(f'Thread<{my_thread_id}> | Scraped {len(new_tracks)} new songs through recursive album!')
            
            for artist in track.artists:
                if artist.spotify_id not in g_downloaded_artist_covers:
                    try:
                        artist_image = artist.download_image(scraper)
                        artist_name = base64.b64encode(artist.name.encode()).decode()
                        with open(f'{artist_images_download_dir}/{artist_name}.jpg', 'wb') as f:
                            f.write(artist_image)
                    except Exception as ex:
                        console.error(str(ex))
                    g_downloaded_artist_covers.append(artist.spotify_id)

                if (recursive_artist or recursive) and len(track_list) < recursive_limit:
                    old_size = len(track_list)
                    track_list += list(scraper.scrape_artist_tracks(artist.spotify_id))
                    if recursive_artist:
                        albums = list(scraper.scrape_artist_albums(artist.spotify_id))
                        for album in albums:
                            track_list += list(scraper.scrape_album_tracks(album['id']))
                    console.log(f'Thread<{my_thread_id}> | Scraped {len(track_list) - old_size} new songs through recursive artist!')
        except Exception as ex:
            console.error(f'Thread<{my_thread_id}> | Exception: {ex}')
        downloaded_count += 1
        if settings.VERBOSE_OUTPUTS:
            console.log(f'Thread<{my_thread_id}> | Processed {downloaded_count} / {len(track_list)}')


def save_globals_save_file():
    global g_keep_saving, g_downloaded_artist_covers, g_downloaded_songs
    try:
        with open(settings.GLOBALS_SAVE_FILE, 'r') as f:
            data = json.loads(f.read())
            g_downloaded_songs = json.loads(data['songs'])
            g_downloaded_artist_covers = json.loads(data['artists'])
            console.log(f'Loaded {len(g_downloaded_songs)} songs & {len(g_downloaded_artist_covers)} artists')
    except Exception as ex:
        console.error(f'Failed to load globals save file! Exception: {ex}')
        if os.path.exists(settings.GLOBALS_SAVE_FILE):
            console.error(f'TO avoid data loss, SpotiFile will now exit.')
            exit(1)
    while g_keep_saving > 0:
        with open(settings.GLOBALS_SAVE_FILE, 'w') as f:
            g_downloaded_songs_json = json.dumps(g_downloaded_songs)
            g_downloaded_artist_covers_json = json.dumps(g_downloaded_artist_covers)
            data = {'songs':g_downloaded_songs_json, 'artists': g_downloaded_artist_covers_json }
            f.write( json.dumps(data) )
        if settings.VERBOSE_OUTPUTS:
            console.log('Saved globals file!')
        sleep(15)


def full_download(download_dir: str, identifier: str, recursive_artist: bool=False, recursive_album: bool=False, recursive: bool=False, recursive_limit:int=1024, thread_count:int=5):
    global g_downloaded_songs, g_downloaded_artist_covers, g_keep_saving
    try:
        artist_images_download_dir = f'{download_dir}/{settings.ARTIST_IMAGES_SUB_DIR}'
        os.makedirs(artist_images_download_dir, exist_ok=True)
        os.makedirs(f'temp', exist_ok=True)

        g_keep_saving += 1

        client.refresh_tokens()
        console.log(f'Recieved scrape command on identifier: {identifier}, {recursive=}, {recursive_artist=}, {recursive_album=}, {recursive_limit=}, {thread_count=}')
        #console.log(f'Scraping on identifier: {identifier} yielded {len(track_list)} tracks!')
        download_threads = []
        track_list = []
        for track in scraper.scrape_tracks(identifier, console=console):
            track_list.append(track)
            if len(track_list) == recursive_limit / thread_count:
                download_threads.append(Thread(target=download_track_list, args=(download_dir, list(track_list), recursive_artist, recursive_album, recursive, recursive_limit)))
                download_threads[-1].start()
                sleep(0.05)
        download_threads.append(Thread(target=download_track_list, args=(download_dir, list(track_list), recursive_artist, recursive_album, recursive, recursive_limit)))
        download_threads[-1].start()

        [x.join() for x in download_threads]

        console.log(f'Comletely done scraping identifier: {identifier}!')

        g_keep_saving -= 1
    except Exception as ex:
        console.error(f'Full download exception: {ex}')


def download_category_playlists(category_id, category_index, category_ids, download_meta_data_only):
    playlist_ids = scraper.get_category_playlist_ids(category_id)
    random.shuffle(playlist_ids)
    for playlist_index, playlist_id in enumerate(playlist_ids):
        console.log(f'Scraping playlist data from playlist {playlist_id} ({playlist_index + 1}/{len(playlist_ids)}) from category {category_id} ({category_index + 1}/{len(category_ids)})')
        try:
            playlist = scraper.get_playlist(playlist_id)
            playlist.export_to_file()
            if not download_meta_data_only:
                full_download(f'{settings.DEFAULT_DOWNLOAD_DIRECTORY}', identifier=playlist.href, thread_count=15)
        except Exception as ex:
            console.error(f'Scraping categories exception: {ex}')


def download_all_categories_playlists(download_meta_data_only=True, query:str=''):
    client.refresh_tokens()
    os.makedirs(f'{settings.DEFAULT_DOWNLOAD_DIRECTORY}/{settings.PLAYLIST_METADATA_SUB_DIR}/', exist_ok=True)
    console.log(f'Scraping playlists from "{query}" categories')
    categories = scraper.get_categories_full(query=query)
    threads = []
    random.shuffle(categories)
    for category_index, category in enumerate(categories):
        console.log(f'Scraping playlists from category {category.name} ({category_index + 1}/{len(categories)})')
        #category.download_metadata(scraper=scraper)
        try:
            thread = Thread(target=download_category_playlists, args=(category.spotify_id, category_index, categories, download_meta_data_only))
            thread.start()
            threads.append(thread)
            #download_category_playlists(category_id, category_index=category_index, category_ids=category_ids, download_meta_data_only=download_meta_data_only)
        except Exception as ex:
                    console.error(f'Scraping categories exception: {ex}')

    [x.join() for x in threads]
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`from threading import Thread, get_ident`
			`import pickle`
			`from spotify_client import SpotifyClient`
			`from spotify_scraper import SpotifyScraper`
			`from config import *`
			`import base64`
			`from time import sleep`
			`from datetime import datetime`
Fix category downloading to embrace errors 2023-01-05 13:36:57 +00:00			`import random`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00
			`client = SpotifyClient(sp_key=SP_KEY, sp_dc=SP_DC)`
			`client.get_me()`
			`scraper = SpotifyScraper(client=client)`

			`g_downloaded_artist_covers = []`
			`g_downloaded_songs = []`
			`g_keep_saving = 0`


			`class Console:`
			`console_output = []`

			`def log(self, value: str):`
			`self.cout(value, 'inherit')`

			`def error(self, value: str):`
			`self.cout(value, 'rgba(255,30,30,0.9)')`

			`def info(self, value: str):`
			`self.cout(value, 'rgba(30,255,255,0.9)')`

			`def happy(self, value: str):`
			`self.cout(value, 'rgba(30,255,30,0.9)')`

			`def cout(self, value: str, color: str):`
			`self.console_output.append(`
			`{`
			`'time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),`
			`'value': value,`
			`'color': color,`
			`}`
			`)`

			`def get(self):`
			`return self.console_output`

			`console = Console()`


			`def download_track_list(download_dir: str, track_list: list, recursive_artist: bool=False, recursive_album: bool=False, recursive: bool=False, recursive_limit=1024):`
			`global g_downloaded_songs, g_downloaded_artist_covers`
			`my_thread_id = str(get_ident()).zfill(6)`
Impement dynamic settings changing & support for external disks 2023-01-09 17:41:15 +00:00			`artist_images_download_dir = f'{download_dir}/{settings.ARTIST_IMAGES_SUB_DIR}'`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`downloaded_count = 0`
			`for track in track_list:`
			`try:`
			`if downloaded_count % 20 == 0:`
			`client.refresh_tokens()`
			`if track.spotify_id in g_downloaded_songs:`
			`console.info(f'Thread<{my_thread_id}> \| Skipping already downloaded song: {track.title}')`
			`downloaded_count += 1`
			`continue`
Fix file paths error 2023-01-14 16:56:40 +00:00			`track_path = f'{download_dir}{clean_file_path(track.artists[0].name)}/{clean_file_path(track.album.title)}'`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`track.download_to_file(scraper, track_path)`
Impement dynamic settings changing & support for external disks 2023-01-09 17:41:15 +00:00			`console.happy(f'Thread<{my_thread_id}> \| Downloaded: {track.preview_title()}')`
Fix failed downloads being saved as successful 2023-01-20 11:24:38 +00:00			`g_downloaded_songs.append(track.spotify_id)`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`if (recursive_album or recursive) and len(track_list) < recursive_limit:`
Implement user playlist scraping & changed track scrapers to generators 2023-01-09 18:47:59 +00:00			`new_tracks = list(scraper.scrape_album_tracks(track.album.spotify_id))`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`for new_track in new_tracks:`
			`if new_track not in track_list and len(track_list) < recursive_limit:`
			`track_list.append(new_track)`
			`console.log(f'Thread<{my_thread_id}> \| Scraped {len(new_tracks)} new songs through recursive album!')`

			`for artist in track.artists:`
			`if artist.spotify_id not in g_downloaded_artist_covers:`
			`try:`
			`artist_image = artist.download_image(scraper)`
			`artist_name = base64.b64encode(artist.name.encode()).decode()`
			`with open(f'{artist_images_download_dir}/{artist_name}.jpg', 'wb') as f:`
			`f.write(artist_image)`
			`except Exception as ex:`
			`console.error(str(ex))`
			`g_downloaded_artist_covers.append(artist.spotify_id)`

			`if (recursive_artist or recursive) and len(track_list) < recursive_limit:`
			`old_size = len(track_list)`
Implement user playlist scraping & changed track scrapers to generators 2023-01-09 18:47:59 +00:00			`track_list += list(scraper.scrape_artist_tracks(artist.spotify_id))`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`if recursive_artist:`
Implement user playlist scraping & changed track scrapers to generators 2023-01-09 18:47:59 +00:00			`albums = list(scraper.scrape_artist_albums(artist.spotify_id))`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`for album in albums:`
Implement user playlist scraping & changed track scrapers to generators 2023-01-09 18:47:59 +00:00			`track_list += list(scraper.scrape_album_tracks(album['id']))`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`console.log(f'Thread<{my_thread_id}> \| Scraped {len(track_list) - old_size} new songs through recursive artist!')`
			`except Exception as ex:`
			`console.error(f'Thread<{my_thread_id}> \| Exception: {ex}')`
			`downloaded_count += 1`
Impement dynamic settings changing & support for external disks 2023-01-09 17:41:15 +00:00			`if settings.VERBOSE_OUTPUTS:`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`console.log(f'Thread<{my_thread_id}> \| Processed {downloaded_count} / {len(track_list)}')`


			`def save_globals_save_file():`
			`global g_keep_saving, g_downloaded_artist_covers, g_downloaded_songs`
			`try:`
Impement dynamic settings changing & support for external disks 2023-01-09 17:41:15 +00:00			`with open(settings.GLOBALS_SAVE_FILE, 'r') as f:`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`data = json.loads(f.read())`
			`g_downloaded_songs = json.loads(data['songs'])`
			`g_downloaded_artist_covers = json.loads(data['artists'])`
Add log after successful global saves file loading 2023-01-09 17:04:20 +00:00			`console.log(f'Loaded {len(g_downloaded_songs)} songs & {len(g_downloaded_artist_covers)} artists')`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`except Exception as ex:`
			`console.error(f'Failed to load globals save file! Exception: {ex}')`
Add dataloss prevention message on db loading error 2023-01-20 11:20:05 +00:00			`if os.path.exists(settings.GLOBALS_SAVE_FILE):`
			`console.error(f'TO avoid data loss, SpotiFile will now exit.')`
			`exit(1)`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`while g_keep_saving > 0:`
Impement dynamic settings changing & support for external disks 2023-01-09 17:41:15 +00:00			`with open(settings.GLOBALS_SAVE_FILE, 'w') as f:`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`g_downloaded_songs_json = json.dumps(g_downloaded_songs)`
			`g_downloaded_artist_covers_json = json.dumps(g_downloaded_artist_covers)`
			`data = {'songs':g_downloaded_songs_json, 'artists': g_downloaded_artist_covers_json }`
			`f.write( json.dumps(data) )`
Impement dynamic settings changing & support for external disks 2023-01-09 17:41:15 +00:00			`if settings.VERBOSE_OUTPUTS:`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`console.log('Saved globals file!')`
			`sleep(15)`


			`def full_download(download_dir: str, identifier: str, recursive_artist: bool=False, recursive_album: bool=False, recursive: bool=False, recursive_limit:int=1024, thread_count:int=5):`
			`global g_downloaded_songs, g_downloaded_artist_covers, g_keep_saving`
Impement dynamic settings changing & support for external disks 2023-01-09 17:41:15 +00:00			`try:`
			`artist_images_download_dir = f'{download_dir}/{settings.ARTIST_IMAGES_SUB_DIR}'`
			`os.makedirs(artist_images_download_dir, exist_ok=True)`
			`os.makedirs(f'temp', exist_ok=True)`

			`g_keep_saving += 1`

			`client.refresh_tokens()`
			`console.log(f'Recieved scrape command on identifier: {identifier}, {recursive=}, {recursive_artist=}, {recursive_album=}, {recursive_limit=}, {thread_count=}')`
Implement user playlist scraping & changed track scrapers to generators 2023-01-09 18:47:59 +00:00			`#console.log(f'Scraping on identifier: {identifier} yielded {len(track_list)} tracks!')`
Impement dynamic settings changing & support for external disks 2023-01-09 17:41:15 +00:00			`download_threads = []`
Implement user playlist scraping & changed track scrapers to generators 2023-01-09 18:47:59 +00:00			`track_list = []`
			`for track in scraper.scrape_tracks(identifier, console=console):`
			`track_list.append(track)`
			`if len(track_list) == recursive_limit / thread_count:`
			`download_threads.append(Thread(target=download_track_list, args=(download_dir, list(track_list), recursive_artist, recursive_album, recursive, recursive_limit)))`
			`download_threads[-1].start()`
			`sleep(0.05)`
			`download_threads.append(Thread(target=download_track_list, args=(download_dir, list(track_list), recursive_artist, recursive_album, recursive, recursive_limit)))`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`download_threads[-1].start()`

Impement dynamic settings changing & support for external disks 2023-01-09 17:41:15 +00:00			`[x.join() for x in download_threads]`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00
Impement dynamic settings changing & support for external disks 2023-01-09 17:41:15 +00:00			`console.log(f'Comletely done scraping identifier: {identifier}!')`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00
Impement dynamic settings changing & support for external disks 2023-01-09 17:41:15 +00:00			`g_keep_saving -= 1`
			`except Exception as ex:`
			`console.error(f'Full download exception: {ex}')`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00

Major bug fixes 2023-01-14 16:48:46 +00:00			`def download_category_playlists(category_id, category_index, category_ids, download_meta_data_only):`
			`playlist_ids = scraper.get_category_playlist_ids(category_id)`
			`random.shuffle(playlist_ids)`
			`for playlist_index, playlist_id in enumerate(playlist_ids):`
			`console.log(f'Scraping playlist data from playlist {playlist_id} ({playlist_index + 1}/{len(playlist_ids)}) from category {category_id} ({category_index + 1}/{len(category_ids)})')`
			`try:`
			`playlist = scraper.get_playlist(playlist_id)`
			`playlist.export_to_file()`
			`if not download_meta_data_only:`
			`full_download(f'{settings.DEFAULT_DOWNLOAD_DIRECTORY}', identifier=playlist.href, thread_count=15)`
			`except Exception as ex:`
			`console.error(f'Scraping categories exception: {ex}')`


			`def download_all_categories_playlists(download_meta_data_only=True, query:str=''):`
Implement basic spotify scraper 2023-01-05 12:31:44 +00:00			`client.refresh_tokens()`
Impement dynamic settings changing & support for external disks 2023-01-09 17:41:15 +00:00			`os.makedirs(f'{settings.DEFAULT_DOWNLOAD_DIRECTORY}/{settings.PLAYLIST_METADATA_SUB_DIR}/', exist_ok=True)`
Major bug fixes 2023-01-14 16:48:46 +00:00			`console.log(f'Scraping playlists from "{query}" categories')`
			`categories = scraper.get_categories_full(query=query)`
			`threads = []`
			`random.shuffle(categories)`
			`for category_index, category in enumerate(categories):`
			`console.log(f'Scraping playlists from category {category.name} ({category_index + 1}/{len(categories)})')`
			`#category.download_metadata(scraper=scraper)`
Fix category scraping exiting on category exception 2023-01-05 13:39:24 +00:00			`try:`
Major bug fixes 2023-01-14 16:48:46 +00:00			`thread = Thread(target=download_category_playlists, args=(category.spotify_id, category_index, categories, download_meta_data_only))`
			`thread.start()`
			`threads.append(thread)`
			`#download_category_playlists(category_id, category_index=category_index, category_ids=category_ids, download_meta_data_only=download_meta_data_only)`
Fix category scraping exiting on category exception 2023-01-05 13:39:24 +00:00			`except Exception as ex:`
			`console.error(f'Scraping categories exception: {ex}')`
Major bug fixes 2023-01-14 16:48:46 +00:00
			`[x.join() for x in threads]`