kopia lustrzana https://github.com/Michael-K-Stein/SpotiFile
Update README.md
rodzic
91ccfd401c
commit
5e53e3e745
34
README.md
34
README.md
|
@ -95,3 +95,37 @@ Close the window without logging out (Otherwise the cookies are made invalid).
|
||||||
|
|
||||||
# Example usages:
|
# Example usages:
|
||||||
## Using SpotiFile to create a song recommendation module based off song lyrics' semantic similarity:
|
## Using SpotiFile to create a song recommendation module based off song lyrics' semantic similarity:
|
||||||
|
```python
|
||||||
|
from spotify_scraper import SpotifyScraper
|
||||||
|
import nltk
|
||||||
|
from nltk.corpus import stopwords
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from sklearn.metrics.pairwise import cosine_similarity
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
def semantic_similarity(paragraph1, paragraph2):
|
||||||
|
# Preprocess text
|
||||||
|
stop_words = set(stopwords.words('english'))
|
||||||
|
paragraph1 = ' '.join([word.lower() for word in nltk.word_tokenize(paragraph1) if word.lower() not in stop_words])
|
||||||
|
paragraph2 = ' '.join([word.lower() for word in nltk.word_tokenize(paragraph2) if word.lower() not in stop_words])
|
||||||
|
|
||||||
|
# Compute similarity score
|
||||||
|
tfidf_vectorizer = TfidfVectorizer()
|
||||||
|
tfidf_matrix = tfidf_vectorizer.fit_transform([paragraph1, paragraph2])
|
||||||
|
similarity_score = cosine_similarity(tfidf_matrix)[0][1]
|
||||||
|
|
||||||
|
return similarity_score
|
||||||
|
|
||||||
|
|
||||||
|
# Usage
|
||||||
|
scraper = SpotifyScraper()
|
||||||
|
|
||||||
|
lyrics1 = '\n'.join(x['words'] for x in scraper.get_lyrics(sys.argv[1])['lyrics']['lines'])
|
||||||
|
lyrics2 = '\n'.join(x['words'] for x in scraper.get_lyrics(sys.argv[2])['lyrics']['lines'])
|
||||||
|
|
||||||
|
sim = semantic_similarity(lyrics1, lyrics2)
|
||||||
|
|
||||||
|
print(f'The similarity between the two tracks is: {sim}')
|
||||||
|
|
||||||
|
```
|
||||||
|
|
Ładowanie…
Reference in New Issue