kopia lustrzana https://github.com/Michael-K-Stein/SpotiFile
Switch to generators for scrapers
rodzic
a4131dadf0
commit
d54485d025
|
@ -1,5 +1,6 @@
|
|||
from threading import Thread, get_ident
|
||||
import pickle
|
||||
from typing import Generator
|
||||
from spotify_client import SpotifyClient
|
||||
from spotify_scraper import SpotifyScraper
|
||||
from config import *
|
||||
|
@ -8,6 +9,7 @@ from time import sleep
|
|||
from datetime import datetime
|
||||
import random
|
||||
from utils.utils import clean_file_path
|
||||
from utils.spotify_track import SpotifyTrack
|
||||
|
||||
client = SpotifyClient(sp_key=SP_KEY, sp_dc=SP_DC)
|
||||
client.get_me()
|
||||
|
@ -48,7 +50,7 @@ class Console:
|
|||
console = Console()
|
||||
|
||||
|
||||
def download_track_list(download_dir: str, track_list: list, recursive_artist: bool=False, recursive_album: bool=False, recursive: bool=False, recursive_limit=1024):
|
||||
def download_track_list(download_dir: str, track_list: Generator[SpotifyTrack, None, None], recursive_artist: bool=False, recursive_album: bool=False, recursive: bool=False, recursive_limit=1024):
|
||||
global g_downloaded_songs, g_downloaded_artist_covers
|
||||
my_thread_id = str(get_ident()).zfill(6)
|
||||
artist_images_download_dir = f'{download_dir}/{settings.ARTIST_IMAGES_SUB_DIR}'
|
||||
|
@ -65,12 +67,8 @@ def download_track_list(download_dir: str, track_list: list, recursive_artist: b
|
|||
track.download_to_file(scraper, track_path)
|
||||
console.happy(f'Thread<{my_thread_id}> | Downloaded: {track.preview_title()}')
|
||||
g_downloaded_songs.append(track.spotify_id)
|
||||
if (recursive_album or recursive) and len(track_list) < recursive_limit:
|
||||
new_tracks = list(scraper.scrape_album_tracks(track.album.spotify_id))
|
||||
for new_track in new_tracks:
|
||||
if new_track not in track_list and len(track_list) < recursive_limit:
|
||||
track_list.append(new_track)
|
||||
console.log(f'Thread<{my_thread_id}> | Scraped {len(new_tracks)} new songs through recursive album!')
|
||||
if (recursive_album or recursive):
|
||||
download_track_list(download_dir=download_dir, track_list=scraper.scrape_album_tracks(track.album.spotify_id), recursive=False)
|
||||
|
||||
for artist in track.artists:
|
||||
if artist.spotify_id not in g_downloaded_artist_covers:
|
||||
|
@ -83,19 +81,16 @@ def download_track_list(download_dir: str, track_list: list, recursive_artist: b
|
|||
console.error(str(ex))
|
||||
g_downloaded_artist_covers.append(artist.spotify_id)
|
||||
|
||||
if (recursive_artist or recursive) and len(track_list) < recursive_limit:
|
||||
old_size = len(track_list)
|
||||
track_list += list(scraper.scrape_artist_tracks(artist.spotify_id))
|
||||
if (recursive_artist or recursive):
|
||||
download_track_list(download_dir=download_dir, track_list=scraper.scrape_artist_tracks(track.artist.spotify_id), recursive=False)
|
||||
if recursive_artist:
|
||||
albums = list(scraper.scrape_artist_albums(artist.spotify_id))
|
||||
for album in albums:
|
||||
track_list += list(scraper.scrape_album_tracks(album['id']))
|
||||
console.log(f'Thread<{my_thread_id}> | Scraped {len(track_list) - old_size} new songs through recursive artist!')
|
||||
for album in scraper.scrape_artist_albums(artist.spotify_id):
|
||||
download_track_list(download_dir=download_dir, track_list=scraper.scrape_album_tracks(album['id']), recursive=False)
|
||||
except Exception as ex:
|
||||
console.error(f'Thread<{my_thread_id}> | Exception: {ex}')
|
||||
downloaded_count += 1
|
||||
if settings.VERBOSE_OUTPUTS:
|
||||
console.log(f'Thread<{my_thread_id}> | Processed {downloaded_count} / {len(track_list)}')
|
||||
console.log(f'Thread<{my_thread_id}> | Processed {downloaded_count} tracks')
|
||||
|
||||
|
||||
def save_globals_save_file():
|
||||
|
@ -133,18 +128,7 @@ def full_download(download_dir: str, identifier: str, recursive_artist: bool=Fal
|
|||
|
||||
client.refresh_tokens()
|
||||
console.log(f'Recieved scrape command on identifier: {identifier}, {recursive=}, {recursive_artist=}, {recursive_album=}, {recursive_limit=}, {thread_count=}')
|
||||
download_threads = []
|
||||
track_list = []
|
||||
for track in scraper.scrape_tracks(identifier, console=console):
|
||||
track_list.append(track)
|
||||
if len(track_list) == recursive_limit / thread_count:
|
||||
download_threads.append(Thread(target=download_track_list, args=(download_dir, list(track_list), recursive_artist, recursive_album, recursive, recursive_limit)))
|
||||
download_threads[-1].start()
|
||||
sleep(0.05)
|
||||
download_threads.append(Thread(target=download_track_list, args=(download_dir, list(track_list), recursive_artist, recursive_album, recursive, recursive_limit)))
|
||||
download_threads[-1].start()
|
||||
|
||||
[x.join() for x in download_threads]
|
||||
download_track_list(download_dir=download_dir, track_list=scraper.scrape_tracks(identifier, console=console), recursive=recursive, recursive_album=recursive_album, recursive_artist=recursive_artist, recursive_limit=recursive_limit)
|
||||
|
||||
console.log(f'Comletely done scraping identifier: {identifier}!')
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
from enum import Enum
|
||||
from typing import Generator
|
||||
from config import *
|
||||
from utils.spotify_track import SpotifyTrack
|
||||
from utils.spotify_album import SpotifyAlbum
|
||||
from utils.spotify_playlist import SpotifyPlaylist
|
||||
from utils.spotify_category import SpotifyCategory
|
||||
from spotify_client import SpotifyClient
|
||||
|
@ -39,7 +41,7 @@ class SpotifyScraper:
|
|||
def extract_id_from_link(self, link: str) -> str:
|
||||
return link[link.rindex('/') + 1:]
|
||||
|
||||
def scrape_tracks(self, link: str, console=None) -> list[SpotifyTrack]:
|
||||
def scrape_tracks(self, link: str, console=None) -> Generator[SpotifyTrack, None, None]:
|
||||
id_type = self.identify_link_type(link)
|
||||
if id_type == self.IDTypes.Playlist:
|
||||
return self.scrape_playlist_tracks(self.extract_id_from_link(link))
|
||||
|
@ -80,7 +82,7 @@ class SpotifyScraper:
|
|||
def scrape_album(self, album_id: str):
|
||||
return self._client.get(f'https://api.spotify.com/v1/albums/{album_id}').json()
|
||||
|
||||
def scrape_album_tracks(self, album_id: str):
|
||||
def scrape_album_tracks(self, album_id: str) -> Generator[SpotifyTrack, None, None]:
|
||||
limit = 50
|
||||
offset = 0
|
||||
ret = self._client.get(f'https://api.spotify.com/v1/albums/{album_id}/tracks?limit={limit}').json()
|
||||
|
@ -95,34 +97,30 @@ class SpotifyScraper:
|
|||
def scrape_artist(self, artist_id: str):
|
||||
return self.get(f'https://api.spotify.com/v1/artists/{artist_id}/top-tracks?market=from_token').json()
|
||||
|
||||
def scrape_artist_albums(self, artist_id: str):
|
||||
def scrape_artist_albums(self, artist_id: str) -> Generator[SpotifyAlbum, None, None]:
|
||||
offset = 0
|
||||
limit = 50
|
||||
albums_data = self.get(f'https://api.spotify.com/v1/artists/{artist_id}/albums?market=from_token&limit={limit}&offset={offset}').json()
|
||||
albums = albums_data['items']
|
||||
for album in albums_data['items']:
|
||||
yield SpotifyAlbum(album)
|
||||
while albums_data['next'] is not None:
|
||||
offset += limit
|
||||
albums_data = self.get(f'https://api.spotify.com/v1/artists/{artist_id}/albums?market=from_token&limit={limit}&offset={offset}').json()
|
||||
albums += albums_data['items']
|
||||
return albums
|
||||
for album in albums_data['items']:
|
||||
yield SpotifyAlbum(album)
|
||||
|
||||
def scrape_artist_tracks(self, artist_id: str, intense:bool=False, console=None):
|
||||
def scrape_artist_tracks(self, artist_id: str, intense:bool=False, console=None) -> Generator[SpotifyTrack, None, None]:
|
||||
tracks = self.scrape_artist(artist_id)['tracks']
|
||||
try:
|
||||
artist_name = tracks[0]['album']['artists'][0]['name']
|
||||
except:
|
||||
artist_name = 'Unknown'
|
||||
proccessed_tracks = [SpotifyTrack(track_data) for track_data in tracks]
|
||||
yield proccessed_tracks
|
||||
for track_data in tracks:
|
||||
yield SpotifyTrack(track_data)
|
||||
if intense:
|
||||
albums = self.scrape_artist_albums(artist_id)
|
||||
proccessed_album_count = 0
|
||||
for album in albums:
|
||||
for track in self.scrape_album_tracks(album['id']):
|
||||
for album in self.scrape_artist_albums(artist_id):
|
||||
for track in self.scrape_album_tracks(album.spotify_id):
|
||||
yield track
|
||||
proccessed_album_count += 1
|
||||
if console is not None:
|
||||
console.log(f'Scraping {artist_name}\'s albums: {proccessed_album_count} / {len(albums)}')
|
||||
|
||||
def get(self, url: str) -> Response:
|
||||
return self._client.get(url)
|
||||
|
@ -180,7 +178,7 @@ class SpotifyScraper:
|
|||
tracks = self.scrape_playlist_tracks(playlist_id)
|
||||
return SpotifyPlaylist(spotify_id=playlist_id, tracks=tracks, data=playlist_data)
|
||||
|
||||
def scrape_user_items(self, user_id: str, limit:int=50) -> list[SpotifyTrack]:
|
||||
def scrape_user_items(self, user_id: str, limit:int=50) -> Generator[SpotifyTrack, None, None]:
|
||||
has_next = True
|
||||
user_playlists = []
|
||||
while has_next:
|
||||
|
|
Ładowanie…
Reference in New Issue