kopia lustrzana https://github.com/Michael-K-Stein/SpotiFile
Update README.md
rodzic
91ccfd401c
commit
5e53e3e745
34
README.md
34
README.md
|
@ -95,3 +95,37 @@ Close the window without logging out (Otherwise the cookies are made invalid).
|
|||
|
||||
# Example usages:
|
||||
## Using SpotiFile to create a song recommendation module based off song lyrics' semantic similarity:
|
||||
```python
|
||||
from spotify_scraper import SpotifyScraper
|
||||
import nltk
|
||||
from nltk.corpus import stopwords
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
import sys
|
||||
|
||||
|
||||
def semantic_similarity(paragraph1, paragraph2):
|
||||
# Preprocess text
|
||||
stop_words = set(stopwords.words('english'))
|
||||
paragraph1 = ' '.join([word.lower() for word in nltk.word_tokenize(paragraph1) if word.lower() not in stop_words])
|
||||
paragraph2 = ' '.join([word.lower() for word in nltk.word_tokenize(paragraph2) if word.lower() not in stop_words])
|
||||
|
||||
# Compute similarity score
|
||||
tfidf_vectorizer = TfidfVectorizer()
|
||||
tfidf_matrix = tfidf_vectorizer.fit_transform([paragraph1, paragraph2])
|
||||
similarity_score = cosine_similarity(tfidf_matrix)[0][1]
|
||||
|
||||
return similarity_score
|
||||
|
||||
|
||||
# Usage
|
||||
scraper = SpotifyScraper()
|
||||
|
||||
lyrics1 = '\n'.join(x['words'] for x in scraper.get_lyrics(sys.argv[1])['lyrics']['lines'])
|
||||
lyrics2 = '\n'.join(x['words'] for x in scraper.get_lyrics(sys.argv[2])['lyrics']['lines'])
|
||||
|
||||
sim = semantic_similarity(lyrics1, lyrics2)
|
||||
|
||||
print(f'The similarity between the two tracks is: {sim}')
|
||||
|
||||
```
|
||||
|
|
Ładowanie…
Reference in New Issue