feat: use bulk Spotify API calls & inserts to make spotify crawling more efficient (#271)

* feat(spotify-api): bulk endpoints
* feat(music-library): allow bulk operations
* feat(spotify): bulk track+album+artist+genre import
* feat(spotify): use bulk import api for user crawl
* feat(spotify): bulk listen insert

For the benchmark case of a new user where Listory imports 50 new listens along with all now tracks, artists, albums & genres we significantly reduced the number of things happening:

    Spotify API Requests: 208 => 8
    DB Insert: 96 => 8
    Tracing Spans: 1953 => 66
This commit is contained in:
Julian Tölle 2023-05-07 02:20:43 +02:00 committed by GitHub
parent 24b7308343
commit 8721fd101d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 427 additions and 23 deletions

View file

@ -45,6 +45,22 @@ export class SpotifyApiService {
return artist.data;
}
async getArtists(
accessToken: string,
spotifyIDs: string[]
): Promise<ArtistObject[]> {
const artist = await firstValueFrom(
this.httpService.get<{ artists: ArtistObject[] }>(`v1/artists`, {
headers: { Authorization: `Bearer ${accessToken}` },
params: {
ids: spotifyIDs.join(","),
},
})
);
return artist.data.artists;
}
async getAlbum(accessToken: string, spotifyID: string): Promise<AlbumObject> {
const album = await firstValueFrom(
this.httpService.get<AlbumObject>(`v1/albums/${spotifyID}`, {
@ -54,6 +70,21 @@ export class SpotifyApiService {
return album.data;
}
async getAlbums(
accessToken: string,
spotifyIDs: string[]
): Promise<AlbumObject[]> {
const album = await firstValueFrom(
this.httpService.get<{ albums: AlbumObject[] }>(`v1/albums`, {
headers: { Authorization: `Bearer ${accessToken}` },
params: {
ids: spotifyIDs.join(","),
},
})
);
return album.data.albums;
}
async getTrack(accessToken: string, spotifyID: string): Promise<TrackObject> {
const track = await firstValueFrom(
this.httpService.get<TrackObject>(`v1/tracks/${spotifyID}`, {
@ -63,4 +94,20 @@ export class SpotifyApiService {
return track.data;
}
async getTracks(
accessToken: string,
spotifyIDs: string[]
): Promise<TrackObject[]> {
const track = await firstValueFrom(
this.httpService.get<{ tracks: TrackObject[] }>(`v1/tracks`, {
headers: { Authorization: `Bearer ${accessToken}` },
params: {
ids: spotifyIDs.join(","),
},
})
);
return track.data.tracks;
}
}

View file

@ -1,6 +1,7 @@
import type { Job } from "pg-boss";
import { Injectable, Logger } from "@nestjs/common";
import { chunk, uniq } from "lodash";
import { Span } from "nestjs-otel";
import type { Job } from "pg-boss";
import { ListensService } from "../../listens/listens.service";
import { Album } from "../../music-library/album.entity";
import { Artist } from "../../music-library/artist.entity";
@ -21,6 +22,11 @@ import { TrackObject } from "./spotify-api/entities/track-object";
import { SpotifyApiService } from "./spotify-api/spotify-api.service";
import { SpotifyAuthService } from "./spotify-auth/spotify-auth.service";
/** Number of IDs that can be passed to Spotify Web API "Get Several Artist/Track" calls. */
const SPOTIFY_BULK_MAX_IDS = 50;
/** Number of IDs that can be passed to Spotify Web API "Get Several Album" calls. */
const SPOTIFY_BULK_ALBUMS_MAX_IDS = 20;
@Injectable()
export class SpotifyService {
private readonly logger = new Logger(this.constructor.name);
@ -115,27 +121,27 @@ export class SpotifyService {
return;
}
await Promise.all(
playHistory.map(async (history) => {
const track = await this.importTrack(history.track.id);
const tracks = await this.importTracks(
uniq(playHistory.map((history) => history.track.id))
);
const { isDuplicate } = await this.listensService.createListen({
user,
track,
playedAt: new Date(history.played_at),
});
const listenData = playHistory.map((history) => ({
user,
track: tracks.find((track) => history.track.id === track.spotify.id),
playedAt: new Date(history.played_at),
}));
if (!isDuplicate) {
this.logger.debug(
{ userId: user.id },
`New listen found! ${user.id} listened to "${
track.name
}" by ${track.artists
?.map((artist) => `"${artist.name}"`)
.join(", ")}`
);
}
})
const results = await this.listensService.createListens(listenData);
results.forEach((listen) =>
this.logger.debug(
{ userId: user.id },
`New listen found! ${user.id} listened to "${
listen.track.name
}" by ${listen.track.artists
?.map((artist) => `"${artist.name}"`)
.join(", ")}`
)
);
const newestPlayTime = new Date(
@ -223,6 +229,94 @@ export class SpotifyService {
});
}
@Span()
async importTracks(
spotifyIDs: string[],
retryOnExpiredToken: boolean = true
): Promise<Track[]> {
const tracks = await this.musicLibraryService.findTracks(
spotifyIDs.map((id) => ({ spotify: { id } }))
);
// Get missing ids
const missingIDs = spotifyIDs.filter(
(id) => !tracks.some((track) => track.spotify.id === id)
);
// No need to make spotify api request if all data is available locally
if (missingIDs.length === 0) {
return tracks;
}
let spotifyTracks: TrackObject[] = [];
// Split the import requests so we stay within the spotify api limits
try {
await Promise.all(
chunk(missingIDs, SPOTIFY_BULK_MAX_IDS).map(async (ids) => {
const batchTracks = await this.spotifyApi.getTracks(
this.appAccessToken,
ids
);
spotifyTracks.push(...batchTracks);
})
);
} catch (err) {
if (err.response && err.response.status === 401 && retryOnExpiredToken) {
await this.refreshAppAccessToken();
return this.importTracks(spotifyIDs, false);
}
throw err;
}
// We import albums & artist in series because the album import also
// triggers an artist import. In the best case, all artists will already be
// imported by the importArtists() call, and the album call can get them
// from the database.
const artists = await this.importArtists(
uniq(
spotifyTracks.flatMap((track) =>
track.artists.map((artist) => artist.id)
)
)
);
const albums = await this.importAlbums(
uniq(spotifyTracks.map((track) => track.album.id))
);
// Find the right albums & artists for each spotify track & create db entry
const newTracks = await this.musicLibraryService.createTracks(
spotifyTracks.map((spotifyTrack) => {
const trackAlbum = albums.find(
(album) => spotifyTrack.album.id === album.spotify.id
);
const trackArtists = spotifyTrack.artists.map((trackArtist) =>
artists.find((artist) => trackArtist.id == artist.spotify.id)
);
return {
name: spotifyTrack.name,
album: trackAlbum,
artists: trackArtists,
spotify: {
id: spotifyTrack.id,
uri: spotifyTrack.uri,
type: spotifyTrack.type,
href: spotifyTrack.href,
},
};
})
);
// Return new & existing tracks
return [...tracks, ...newTracks];
}
@Span()
async importAlbum(
spotifyID: string,
@ -270,6 +364,80 @@ export class SpotifyService {
});
}
@Span()
async importAlbums(
spotifyIDs: string[],
retryOnExpiredToken: boolean = true
): Promise<Album[]> {
const albums = await this.musicLibraryService.findAlbums(
spotifyIDs.map((id) => ({ spotify: { id } }))
);
// Get missing ids
const missingIDs = spotifyIDs.filter(
(id) => !albums.some((album) => album.spotify.id === id)
);
// No need to make spotify api request if all data is available locally
if (missingIDs.length === 0) {
return albums;
}
let spotifyAlbums: AlbumObject[] = [];
// Split the import requests so we stay within the spotify api limits
try {
await Promise.all(
chunk(missingIDs, SPOTIFY_BULK_ALBUMS_MAX_IDS).map(async (ids) => {
const batchAlbums = await this.spotifyApi.getAlbums(
this.appAccessToken,
ids
);
spotifyAlbums.push(...batchAlbums);
})
);
} catch (err) {
if (err.response && err.response.status === 401 && retryOnExpiredToken) {
await this.refreshAppAccessToken();
return this.importAlbums(spotifyIDs, false);
}
throw err;
}
const artists = await this.importArtists(
uniq(
spotifyAlbums.flatMap((album) =>
album.artists.map((artist) => artist.id)
)
)
);
// Find the right albums & artists for each spotify track & create db entry
const newAlbums = await this.musicLibraryService.createAlbums(
spotifyAlbums.map((spotifyAlbum) => {
const albumArtists = spotifyAlbum.artists.map((albumArtist) =>
artists.find((artist) => albumArtist.id == artist.spotify.id)
);
return {
name: spotifyAlbum.name,
artists: albumArtists,
spotify: {
id: spotifyAlbum.id,
uri: spotifyAlbum.uri,
type: spotifyAlbum.type,
href: spotifyAlbum.href,
},
};
})
);
return [...albums, ...newAlbums];
}
@Span()
async importArtist(
spotifyID: string,
@ -315,6 +483,76 @@ export class SpotifyService {
});
}
@Span()
async importArtists(
spotifyIDs: string[],
retryOnExpiredToken: boolean = true
): Promise<Artist[]> {
const artists = await this.musicLibraryService.findArtists(
spotifyIDs.map((id) => ({ spotify: { id } }))
);
// Get missing ids
const missingIDs = spotifyIDs.filter(
(id) => !artists.some((artist) => artist.spotify.id === id)
);
// No need to make spotify api request if all data is available locally
if (missingIDs.length === 0) {
return artists;
}
let spotifyArtists: ArtistObject[] = [];
// Split the import requests so we stay within the spotify api limits
try {
await Promise.all(
chunk(missingIDs, SPOTIFY_BULK_MAX_IDS).map(async (ids) => {
const batchArtists = await this.spotifyApi.getArtists(
this.appAccessToken,
ids
);
spotifyArtists.push(...batchArtists);
})
);
} catch (err) {
if (err.response && err.response.status === 401 && retryOnExpiredToken) {
await this.refreshAppAccessToken();
return this.importArtists(spotifyIDs, false);
}
throw err;
}
const genres = await this.importGenres(
uniq(spotifyArtists.flatMap((artist) => artist.genres))
);
// Find the right genres for each spotify artist & create db entry
const newArtists = await this.musicLibraryService.createArtists(
spotifyArtists.map((spotifyArtist) => {
const artistGenres = spotifyArtist.genres.map((artistGenre) =>
genres.find((genre) => artistGenre == genre.name)
);
return {
name: spotifyArtist.name,
genres: artistGenres,
spotify: {
id: spotifyArtist.id,
uri: spotifyArtist.uri,
type: spotifyArtist.type,
href: spotifyArtist.href,
},
};
})
);
return [...artists, ...newArtists];
}
@Span()
async updateArtist(
spotifyID: string,
@ -368,6 +606,29 @@ export class SpotifyService {
});
}
@Span()
async importGenres(names: string[]): Promise<Genre[]> {
const genres = await this.musicLibraryService.findGenres(
names.map((name) => ({ name }))
);
// Get missing genres
const missingGenres = names.filter(
(name) => !genres.some((genre) => genre.name === name)
);
// No need to create genres if all data is available locally
if (missingGenres.length === 0) {
return genres;
}
const newGenres = await this.musicLibraryService.createGenres(
missingGenres.map((name) => ({ name }))
);
return [...genres, ...newGenres];
}
@Span()
private async refreshAppAccessToken(): Promise<void> {
if (!this.appAccessTokenInProgress) {