2023-01-05 12:31:44 +00:00
from threading import Thread , get_ident
import pickle
from spotify_client import SpotifyClient
from spotify_scraper import SpotifyScraper
from config import *
import base64
from time import sleep
from datetime import datetime
2023-01-05 13:36:57 +00:00
import random
2023-01-05 12:31:44 +00:00
client = SpotifyClient ( sp_key = SP_KEY , sp_dc = SP_DC )
client . get_me ( )
scraper = SpotifyScraper ( client = client )
g_downloaded_artist_covers = [ ]
g_downloaded_songs = [ ]
g_keep_saving = 0
class Console :
console_output = [ ]
def log ( self , value : str ) :
self . cout ( value , ' inherit ' )
def error ( self , value : str ) :
self . cout ( value , ' rgba(255,30,30,0.9) ' )
def info ( self , value : str ) :
self . cout ( value , ' rgba(30,255,255,0.9) ' )
def happy ( self , value : str ) :
self . cout ( value , ' rgba(30,255,30,0.9) ' )
def cout ( self , value : str , color : str ) :
self . console_output . append (
{
' time ' : datetime . now ( ) . strftime ( ' % Y- % m- %d % H: % M: % S ' ) ,
' value ' : value ,
' color ' : color ,
}
)
def get ( self ) :
return self . console_output
console = Console ( )
def download_track_list ( download_dir : str , track_list : list , recursive_artist : bool = False , recursive_album : bool = False , recursive : bool = False , recursive_limit = 1024 ) :
global g_downloaded_songs , g_downloaded_artist_covers
my_thread_id = str ( get_ident ( ) ) . zfill ( 6 )
2023-01-09 17:41:15 +00:00
artist_images_download_dir = f ' { download_dir } / { settings . ARTIST_IMAGES_SUB_DIR } '
2023-01-05 12:31:44 +00:00
downloaded_count = 0
for track in track_list :
try :
if downloaded_count % 20 == 0 :
client . refresh_tokens ( )
if track . spotify_id in g_downloaded_songs :
console . info ( f ' Thread< { my_thread_id } > | Skipping already downloaded song: { track . title } ' )
downloaded_count + = 1
continue
2023-01-14 16:56:40 +00:00
track_path = f ' { download_dir } { clean_file_path ( track . artists [ 0 ] . name ) } / { clean_file_path ( track . album . title ) } '
2023-01-05 12:31:44 +00:00
track . download_to_file ( scraper , track_path )
2023-01-09 17:41:15 +00:00
console . happy ( f ' Thread< { my_thread_id } > | Downloaded: { track . preview_title ( ) } ' )
2023-01-20 11:24:38 +00:00
g_downloaded_songs . append ( track . spotify_id )
2023-01-05 12:31:44 +00:00
if ( recursive_album or recursive ) and len ( track_list ) < recursive_limit :
2023-01-09 18:47:59 +00:00
new_tracks = list ( scraper . scrape_album_tracks ( track . album . spotify_id ) )
2023-01-05 12:31:44 +00:00
for new_track in new_tracks :
if new_track not in track_list and len ( track_list ) < recursive_limit :
track_list . append ( new_track )
console . log ( f ' Thread< { my_thread_id } > | Scraped { len ( new_tracks ) } new songs through recursive album! ' )
for artist in track . artists :
if artist . spotify_id not in g_downloaded_artist_covers :
try :
artist_image = artist . download_image ( scraper )
artist_name = base64 . b64encode ( artist . name . encode ( ) ) . decode ( )
with open ( f ' { artist_images_download_dir } / { artist_name } .jpg ' , ' wb ' ) as f :
f . write ( artist_image )
except Exception as ex :
console . error ( str ( ex ) )
g_downloaded_artist_covers . append ( artist . spotify_id )
if ( recursive_artist or recursive ) and len ( track_list ) < recursive_limit :
old_size = len ( track_list )
2023-01-09 18:47:59 +00:00
track_list + = list ( scraper . scrape_artist_tracks ( artist . spotify_id ) )
2023-01-05 12:31:44 +00:00
if recursive_artist :
2023-01-09 18:47:59 +00:00
albums = list ( scraper . scrape_artist_albums ( artist . spotify_id ) )
2023-01-05 12:31:44 +00:00
for album in albums :
2023-01-09 18:47:59 +00:00
track_list + = list ( scraper . scrape_album_tracks ( album [ ' id ' ] ) )
2023-01-05 12:31:44 +00:00
console . log ( f ' Thread< { my_thread_id } > | Scraped { len ( track_list ) - old_size } new songs through recursive artist! ' )
except Exception as ex :
console . error ( f ' Thread< { my_thread_id } > | Exception: { ex } ' )
downloaded_count + = 1
2023-01-09 17:41:15 +00:00
if settings . VERBOSE_OUTPUTS :
2023-01-05 12:31:44 +00:00
console . log ( f ' Thread< { my_thread_id } > | Processed { downloaded_count } / { len ( track_list ) } ' )
def save_globals_save_file ( ) :
global g_keep_saving , g_downloaded_artist_covers , g_downloaded_songs
try :
2023-01-09 17:41:15 +00:00
with open ( settings . GLOBALS_SAVE_FILE , ' r ' ) as f :
2023-01-05 12:31:44 +00:00
data = json . loads ( f . read ( ) )
g_downloaded_songs = json . loads ( data [ ' songs ' ] )
g_downloaded_artist_covers = json . loads ( data [ ' artists ' ] )
2023-01-09 17:04:20 +00:00
console . log ( f ' Loaded { len ( g_downloaded_songs ) } songs & { len ( g_downloaded_artist_covers ) } artists ' )
2023-01-05 12:31:44 +00:00
except Exception as ex :
console . error ( f ' Failed to load globals save file! Exception: { ex } ' )
2023-01-20 11:20:05 +00:00
if os . path . exists ( settings . GLOBALS_SAVE_FILE ) :
console . error ( f ' TO avoid data loss, SpotiFile will now exit. ' )
exit ( 1 )
2023-01-05 12:31:44 +00:00
while g_keep_saving > 0 :
2023-01-09 17:41:15 +00:00
with open ( settings . GLOBALS_SAVE_FILE , ' w ' ) as f :
2023-01-05 12:31:44 +00:00
g_downloaded_songs_json = json . dumps ( g_downloaded_songs )
g_downloaded_artist_covers_json = json . dumps ( g_downloaded_artist_covers )
data = { ' songs ' : g_downloaded_songs_json , ' artists ' : g_downloaded_artist_covers_json }
f . write ( json . dumps ( data ) )
2023-01-09 17:41:15 +00:00
if settings . VERBOSE_OUTPUTS :
2023-01-05 12:31:44 +00:00
console . log ( ' Saved globals file! ' )
sleep ( 15 )
def full_download ( download_dir : str , identifier : str , recursive_artist : bool = False , recursive_album : bool = False , recursive : bool = False , recursive_limit : int = 1024 , thread_count : int = 5 ) :
global g_downloaded_songs , g_downloaded_artist_covers , g_keep_saving
2023-01-09 17:41:15 +00:00
try :
artist_images_download_dir = f ' { download_dir } / { settings . ARTIST_IMAGES_SUB_DIR } '
os . makedirs ( artist_images_download_dir , exist_ok = True )
os . makedirs ( f ' temp ' , exist_ok = True )
g_keep_saving + = 1
client . refresh_tokens ( )
console . log ( f ' Recieved scrape command on identifier: { identifier } , { recursive =} , { recursive_artist =} , { recursive_album =} , { recursive_limit =} , { thread_count =} ' )
2023-01-09 18:47:59 +00:00
#console.log(f'Scraping on identifier: {identifier} yielded {len(track_list)} tracks!')
2023-01-09 17:41:15 +00:00
download_threads = [ ]
2023-01-09 18:47:59 +00:00
track_list = [ ]
for track in scraper . scrape_tracks ( identifier , console = console ) :
track_list . append ( track )
if len ( track_list ) == recursive_limit / thread_count :
download_threads . append ( Thread ( target = download_track_list , args = ( download_dir , list ( track_list ) , recursive_artist , recursive_album , recursive , recursive_limit ) ) )
download_threads [ - 1 ] . start ( )
sleep ( 0.05 )
download_threads . append ( Thread ( target = download_track_list , args = ( download_dir , list ( track_list ) , recursive_artist , recursive_album , recursive , recursive_limit ) ) )
2023-01-05 12:31:44 +00:00
download_threads [ - 1 ] . start ( )
2023-01-09 17:41:15 +00:00
[ x . join ( ) for x in download_threads ]
2023-01-05 12:31:44 +00:00
2023-01-09 17:41:15 +00:00
console . log ( f ' Comletely done scraping identifier: { identifier } ! ' )
2023-01-05 12:31:44 +00:00
2023-01-09 17:41:15 +00:00
g_keep_saving - = 1
except Exception as ex :
console . error ( f ' Full download exception: { ex } ' )
2023-01-05 12:31:44 +00:00
2023-01-14 16:48:46 +00:00
def download_category_playlists ( category_id , category_index , category_ids , download_meta_data_only ) :
playlist_ids = scraper . get_category_playlist_ids ( category_id )
random . shuffle ( playlist_ids )
for playlist_index , playlist_id in enumerate ( playlist_ids ) :
console . log ( f ' Scraping playlist data from playlist { playlist_id } ( { playlist_index + 1 } / { len ( playlist_ids ) } ) from category { category_id } ( { category_index + 1 } / { len ( category_ids ) } ) ' )
try :
playlist = scraper . get_playlist ( playlist_id )
playlist . export_to_file ( )
if not download_meta_data_only :
full_download ( f ' { settings . DEFAULT_DOWNLOAD_DIRECTORY } ' , identifier = playlist . href , thread_count = 15 )
except Exception as ex :
console . error ( f ' Scraping categories exception: { ex } ' )
def download_all_categories_playlists ( download_meta_data_only = True , query : str = ' ' ) :
2023-01-05 12:31:44 +00:00
client . refresh_tokens ( )
2023-01-09 17:41:15 +00:00
os . makedirs ( f ' { settings . DEFAULT_DOWNLOAD_DIRECTORY } / { settings . PLAYLIST_METADATA_SUB_DIR } / ' , exist_ok = True )
2023-01-14 16:48:46 +00:00
console . log ( f ' Scraping playlists from " { query } " categories ' )
categories = scraper . get_categories_full ( query = query )
threads = [ ]
random . shuffle ( categories )
for category_index , category in enumerate ( categories ) :
console . log ( f ' Scraping playlists from category { category . name } ( { category_index + 1 } / { len ( categories ) } ) ' )
#category.download_metadata(scraper=scraper)
2023-01-05 13:39:24 +00:00
try :
2023-01-14 16:48:46 +00:00
thread = Thread ( target = download_category_playlists , args = ( category . spotify_id , category_index , categories , download_meta_data_only ) )
thread . start ( )
threads . append ( thread )
#download_category_playlists(category_id, category_index=category_index, category_ids=category_ids, download_meta_data_only=download_meta_data_only)
2023-01-05 13:39:24 +00:00
except Exception as ex :
console . error ( f ' Scraping categories exception: { ex } ' )
2023-01-14 16:48:46 +00:00
[ x . join ( ) for x in threads ]