Merge pull request #1 from Michael-K-Stein/dev

Merge dev into master
2023-01-05 15:41:39 +02:00 · 2023-01-05 15:41:39 +02:00 · 205b062bb1
commit 205b062bb1
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,4 @@
+__pycache__/
+music/
+*.mp3
+*.json
--- a/README.md
+++ b/README.md
@ -0,0 +1,36 @@
+# SpotiFile
+## A simple and open source spotify scraper.
+
+---
+
+## What?
+SpotiFile is a script which allows users to simply and easily, using a web-gui, scrape on Spotify playlists, albums, artists, etc.
+More advanced usages can be done by importing the relevant classes (e.g. from "spotify_scraper import SpotifyScraper") and then using IPython to access specific Spotify API features.
+### Advantages
+The main advantage of using SpotiFile is that it completely circumvents all of Spotify's api call limmits and restrictions. Spotifile offers an API to communicate with Spotify's API as if it were a real user.
+This allows SpotiFile to download information en-masse quickly.
+
+---
+
+## Why?
+Downloading massive amounts of songs and meta data can help if you prefer listening to music offline, or if you are desgining a music server which runs on an airgapped network.
+*We do not encourage music piracy or condone any illegal activity. SpotiFile is a usefull research tool. Usage of SpotiFile for other purposes is at the user's own risk.*
+
+---
+
+## How?
+SpotiFile starts its life by authenticating as a normal Spotify user, and then performs a wide range of conventional and unconventional API calls to Spotify in order to retrieve relevant information.
+
+---
+
+## Features
+ Authenticating as a legitimate Spotify user.
+ Scraping tracks from a playlist.
+ Scraping tracks from an album.
+ Scraping tracks from an artist.
+ Scraping a track from a track url.
+ Scraping artist images.
+ Scraping popular playlists' metadata and tracks.
+ Premium user token snatching (experimental).
+ Scraping song lyrics (time synced when possible).
+ Scraping track metadata.
--- a/config.py
+++ b/config.py
@ -25,9 +25,10 @@ FULL_DOWNLOAD_RECURISVE_LIMIT = 0x4000
 FULL_DOWNLOAD_THREAD_LIMIT = 50
 VERBOSE_OUTPUTS = False

-DEFAULT_DOWNLOAD_DIRECTORY = 'music_05-01-2023'
+DEFAULT_DOWNLOAD_DIRECTORY = 'music'
 ARTIST_IMAGES_SUB_DIR = '_Artists'
-GLOBALS_SAVE_FILE = '_downloaded_store.pkl'
+PLAYLIST_METADATA_SUB_DIR = '_Playlists'
+GLOBALS_SAVE_FILE = '_downloaded_store.json'

 def clean_file_path(prompt: str):
    return prompt.replace('?', '').replace('"', '').replace('*', '').replace('|', '').replace('\\', '').replace(':', '').replace('>', '').replace('<', '')
--- a/main.py
+++ b/main.py
@ -3,15 +3,16 @@ from webgui import app
 import spotify_mass_download
 from spotify_mass_download import full_download, save_globals_save_file
 from threading import Thread
+import webbrowser

 def main():
-    print(f'Spotify Fuzzer')
-    print('\n\n\n')
-
+    print(f'=== SpotiFile ===')
    spotify_mass_download.g_keep_saving += 1

    save_globals_save_file_thread = Thread(target=save_globals_save_file)
    save_globals_save_file_thread.start()
+
+    webbrowser.open('http://127.0.0.1:8888/')
    app.run(host='127.0.0.1', port=8888, debug=False)

    spotify_mass_download.g_keep_saving -= 1
--- a/requirements.txt
+++ b/requirements.txt
@ -1,2 +1,5 @@
 cryptography
-py-deezer
+py-deezer
+flask
+lxml
+eyed3
--- a/spotify_client.py
+++ b/spotify_client.py
@ -98,6 +98,8 @@ class SpotifyClient:

            response_json = session.get('https://api.spotify.com/v1/me', verify=self._verify_ssl).json()
        self.user_data = response_json
+        if not 'product' in self.user_data:
+            raise Exception('Spotify client keys are invalid.')
        if self.user_data['product'] == 'premium':
            raise Exception('THIS USER IS PREMIUM!')
        return response_json
--- a/spotify_mass_download.py
+++ b/spotify_mass_download.py
@ -6,6 +6,7 @@ from config import *
 import base64
 from time import sleep
 from datetime import datetime
+import random

 client = SpotifyClient(sp_key=SP_KEY, sp_dc=SP_DC)
 client.get_me()
@ -144,14 +145,25 @@ def full_download(download_dir: str, identifier: str, recursive_artist: bool=Fal
    g_keep_saving -= 1


-def download_all_categories_playlists():
+def download_all_categories_playlists(download_meta_data_only=True):
    client.refresh_tokens()
-    os.makedirs(f'{DEFAULT_DOWNLOAD_DIRECTORY}/_Playlists/', exist_ok=True)
+    os.makedirs(f'{DEFAULT_DOWNLOAD_DIRECTORY}/{PLAYLIST_METADATA_SUB_DIR}/', exist_ok=True)
+    console.log(f'Scraping playlists from all categories')
    category_ids = scraper.get_categories_ids()
-    for category_id in category_ids:
-        playlist_ids = scraper.get_category_playlist_ids(category_id)
-        for playlist_id in playlist_ids:
-            playlist = scraper.get_playlist(playlist_id)
-            with open(f'{DEFAULT_DOWNLOAD_DIRECTORY}/_Playlists/{playlist.spotify_id}.playlist', 'w') as f:
-                f.write(playlist.export())
-            full_download(f'{DEFAULT_DOWNLOAD_DIRECTORY}', identifier=playlist.href)
+    random.shuffle(category_ids)
+    for category_index, category_id in enumerate(category_ids):
+        console.log(f'Scraping playlists from category {category_id} ({category_index + 1}/{len(category_ids)})')
+        try:
+            playlist_ids = scraper.get_category_playlist_ids(category_id)
+            for playlist_index, playlist_id in enumerate(playlist_ids):
+                console.log(f'Scraping playlist data from playlist {playlist_id} ({playlist_index + 1}/{len(playlist_ids)}) from category {category_id} ({category_index + 1}/{len(category_ids)})')
+                try:
+                    playlist = scraper.get_playlist(playlist_id)
+                    with open(f'{DEFAULT_DOWNLOAD_DIRECTORY}/{PLAYLIST_METADATA_SUB_DIR}/{playlist.spotify_id}.playlist', 'w') as f:
+                        f.write(playlist.export())
+                    if not download_meta_data_only:
+                        full_download(f'{DEFAULT_DOWNLOAD_DIRECTORY}', identifier=playlist.href)
+                except Exception as ex:
+                    console.error(f'Scraping categories exception: {ex}')
+        except Exception as ex:
+                    console.error(f'Scraping categories exception: {ex}')
--- a/spotify_scraper.py
+++ b/spotify_scraper.py
@ -43,6 +43,8 @@ class SpotifyScraper:
            return self.scrape_album_tracks(self.extract_id_from_link(link))
        elif id_type == self.IDTypes.Artist:
            return self.scrape_artist_tracks(self.extract_id_from_link(link), intense=True, console=console)
+        elif id_type == self.IDTypes.Track:
+            return [SpotifyTrack(self.get(f'https://api.spotify.com/v1/tracks/{self.extract_id_from_link(link)}').json())]

    def scrape_playlist(self, playlist_id: str):
        return self._client.get(f'https://api.spotify.com/v1/playlists/{playlist_id}').json()