Merge pull request #1 from Michael-K-Stein/dev

Merge dev into master
dev
Michael Kuperfish Steinberg 2023-01-05 15:41:39 +02:00 zatwierdzone przez GitHub
commit 205b062bb1
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
8 zmienionych plików z 76 dodań i 15 usunięć

4
.gitignore vendored 100644
Wyświetl plik

@ -0,0 +1,4 @@
__pycache__/
music/
*.mp3
*.json

36
README.md 100644
Wyświetl plik

@ -0,0 +1,36 @@
# SpotiFile
## A simple and open source spotify scraper.
---
## What?
SpotiFile is a script which allows users to simply and easily, using a web-gui, scrape on Spotify playlists, albums, artists, etc.
More advanced usages can be done by importing the relevant classes (e.g. from "spotify_scraper import SpotifyScraper") and then using IPython to access specific Spotify API features.
### Advantages
The main advantage of using SpotiFile is that it completely circumvents all of Spotify's api call limmits and restrictions. Spotifile offers an API to communicate with Spotify's API as if it were a real user.
This allows SpotiFile to download information en-masse quickly.
---
## Why?
Downloading massive amounts of songs and meta data can help if you prefer listening to music offline, or if you are desgining a music server which runs on an airgapped network.
*We do not encourage music piracy or condone any illegal activity. SpotiFile is a usefull research tool. Usage of SpotiFile for other purposes is at the user's own risk.*
---
## How?
SpotiFile starts its life by authenticating as a normal Spotify user, and then performs a wide range of conventional and unconventional API calls to Spotify in order to retrieve relevant information.
---
## Features
+ Authenticating as a legitimate Spotify user.
+ Scraping tracks from a playlist.
+ Scraping tracks from an album.
+ Scraping tracks from an artist.
+ Scraping a track from a track url.
+ Scraping artist images.
+ Scraping popular playlists' metadata and tracks.
+ Premium user token snatching (experimental).
+ Scraping song lyrics (time synced when possible).
+ Scraping track metadata.

Wyświetl plik

@ -25,9 +25,10 @@ FULL_DOWNLOAD_RECURISVE_LIMIT = 0x4000
FULL_DOWNLOAD_THREAD_LIMIT = 50
VERBOSE_OUTPUTS = False
DEFAULT_DOWNLOAD_DIRECTORY = 'music_05-01-2023'
DEFAULT_DOWNLOAD_DIRECTORY = 'music'
ARTIST_IMAGES_SUB_DIR = '_Artists'
GLOBALS_SAVE_FILE = '_downloaded_store.pkl'
PLAYLIST_METADATA_SUB_DIR = '_Playlists'
GLOBALS_SAVE_FILE = '_downloaded_store.json'
def clean_file_path(prompt: str):
return prompt.replace('?', '').replace('"', '').replace('*', '').replace('|', '').replace('\\', '').replace(':', '').replace('>', '').replace('<', '')

Wyświetl plik

@ -3,15 +3,16 @@ from webgui import app
import spotify_mass_download
from spotify_mass_download import full_download, save_globals_save_file
from threading import Thread
import webbrowser
def main():
print(f'Spotify Fuzzer')
print('\n\n\n')
print(f'=== SpotiFile ===')
spotify_mass_download.g_keep_saving += 1
save_globals_save_file_thread = Thread(target=save_globals_save_file)
save_globals_save_file_thread.start()
webbrowser.open('http://127.0.0.1:8888/')
app.run(host='127.0.0.1', port=8888, debug=False)
spotify_mass_download.g_keep_saving -= 1

Wyświetl plik

@ -1,2 +1,5 @@
cryptography
py-deezer
py-deezer
flask
lxml
eyed3

Wyświetl plik

@ -98,6 +98,8 @@ class SpotifyClient:
response_json = session.get('https://api.spotify.com/v1/me', verify=self._verify_ssl).json()
self.user_data = response_json
if not 'product' in self.user_data:
raise Exception('Spotify client keys are invalid.')
if self.user_data['product'] == 'premium':
raise Exception('THIS USER IS PREMIUM!')
return response_json

Wyświetl plik

@ -6,6 +6,7 @@ from config import *
import base64
from time import sleep
from datetime import datetime
import random
client = SpotifyClient(sp_key=SP_KEY, sp_dc=SP_DC)
client.get_me()
@ -144,14 +145,25 @@ def full_download(download_dir: str, identifier: str, recursive_artist: bool=Fal
g_keep_saving -= 1
def download_all_categories_playlists():
def download_all_categories_playlists(download_meta_data_only=True):
client.refresh_tokens()
os.makedirs(f'{DEFAULT_DOWNLOAD_DIRECTORY}/_Playlists/', exist_ok=True)
os.makedirs(f'{DEFAULT_DOWNLOAD_DIRECTORY}/{PLAYLIST_METADATA_SUB_DIR}/', exist_ok=True)
console.log(f'Scraping playlists from all categories')
category_ids = scraper.get_categories_ids()
for category_id in category_ids:
playlist_ids = scraper.get_category_playlist_ids(category_id)
for playlist_id in playlist_ids:
playlist = scraper.get_playlist(playlist_id)
with open(f'{DEFAULT_DOWNLOAD_DIRECTORY}/_Playlists/{playlist.spotify_id}.playlist', 'w') as f:
f.write(playlist.export())
full_download(f'{DEFAULT_DOWNLOAD_DIRECTORY}', identifier=playlist.href)
random.shuffle(category_ids)
for category_index, category_id in enumerate(category_ids):
console.log(f'Scraping playlists from category {category_id} ({category_index + 1}/{len(category_ids)})')
try:
playlist_ids = scraper.get_category_playlist_ids(category_id)
for playlist_index, playlist_id in enumerate(playlist_ids):
console.log(f'Scraping playlist data from playlist {playlist_id} ({playlist_index + 1}/{len(playlist_ids)}) from category {category_id} ({category_index + 1}/{len(category_ids)})')
try:
playlist = scraper.get_playlist(playlist_id)
with open(f'{DEFAULT_DOWNLOAD_DIRECTORY}/{PLAYLIST_METADATA_SUB_DIR}/{playlist.spotify_id}.playlist', 'w') as f:
f.write(playlist.export())
if not download_meta_data_only:
full_download(f'{DEFAULT_DOWNLOAD_DIRECTORY}', identifier=playlist.href)
except Exception as ex:
console.error(f'Scraping categories exception: {ex}')
except Exception as ex:
console.error(f'Scraping categories exception: {ex}')

Wyświetl plik

@ -43,6 +43,8 @@ class SpotifyScraper:
return self.scrape_album_tracks(self.extract_id_from_link(link))
elif id_type == self.IDTypes.Artist:
return self.scrape_artist_tracks(self.extract_id_from_link(link), intense=True, console=console)
elif id_type == self.IDTypes.Track:
return [SpotifyTrack(self.get(f'https://api.spotify.com/v1/tracks/{self.extract_id_from_link(link)}').json())]
def scrape_playlist(self, playlist_id: str):
return self._client.get(f'https://api.spotify.com/v1/playlists/{playlist_id}').json()