feat: import listens from spotify extended streaming history (#305)

This commit is contained in:
Julian Tölle 2023-10-01 03:35:02 +02:00 committed by GitHub
parent 23d7ea0995
commit 7140cb0679
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
50 changed files with 1051 additions and 215 deletions

View file

@ -33,11 +33,11 @@ export class SchedulerService implements OnApplicationBootstrap {
}
private async setupSpotifyCrawlerSupervisor(): Promise<void> {
await this.superviseImportJobsJobService.schedule("*/1 * * * *", {}, {});
// await this.superviseImportJobsJobService.schedule("*/1 * * * *", {}, {});
}
@Span()
@CrawlerSupervisorJob.Handle()
// @CrawlerSupervisorJob.Handle()
async superviseImportJobs(): Promise<void> {
this.logger.log("Starting crawler jobs");
const userInfo = await this.spotifyService.getCrawlableUserInfo();

View file

@ -0,0 +1,13 @@
import { ApiProperty } from "@nestjs/swagger";
export class ExtendedStreamingHistoryStatusDto {
@ApiProperty({
type: Number,
})
total: number;
@ApiProperty({
type: Number,
})
imported: number;
}

View file

@ -0,0 +1,13 @@
import { ApiProperty } from "@nestjs/swagger";
import { ArrayMaxSize } from "class-validator";
import { SpotifyExtendedStreamingHistoryItemDto } from "./spotify-extended-streaming-history-item.dto";
export class ImportExtendedStreamingHistoryDto {
@ApiProperty({
type: SpotifyExtendedStreamingHistoryItemDto,
isArray: true,
maxItems: 50_000,
})
@ArrayMaxSize(50_000) // File size is ~16k by default, might need refactoring if Spotify starts exporting larger files
listens: SpotifyExtendedStreamingHistoryItemDto[];
}

View file

@ -0,0 +1,9 @@
import { ApiProperty } from "@nestjs/swagger";
export class SpotifyExtendedStreamingHistoryItemDto {
@ApiProperty({ format: "iso8601", example: "2018-11-30T08:33:33Z" })
ts: string;
@ApiProperty({ example: "spotify:track:6askbS4pEVWbbDnUGEXh3G" })
spotify_track_uri: string;
}

View file

@ -0,0 +1,32 @@
import { Body as NestBody, Controller, Get, Post } from "@nestjs/common";
import { ApiBody, ApiTags } from "@nestjs/swagger";
import { AuthAccessToken } from "../../../auth/decorators/auth-access-token.decorator";
import { ReqUser } from "../../../auth/decorators/req-user.decorator";
import { User } from "../../../users/user.entity";
import { ExtendedStreamingHistoryStatusDto } from "./dto/extended-streaming-history-status.dto";
import { ImportExtendedStreamingHistoryDto } from "./dto/import-extended-streaming-history.dto";
import { ImportService } from "./import.service";
@ApiTags("import")
@Controller("api/v1/import")
export class ImportController {
constructor(private readonly importService: ImportService) {}
@Post("extended-streaming-history")
@ApiBody({ type: () => ImportExtendedStreamingHistoryDto })
@AuthAccessToken()
async importExtendedStreamingHistory(
@ReqUser() user: User,
@NestBody() data: ImportExtendedStreamingHistoryDto,
): Promise<void> {
return this.importService.importExtendedStreamingHistory(user, data);
}
@Get("extended-streaming-history/status")
@AuthAccessToken()
async getExtendedStreamingHistoryStatus(
@ReqUser() user: User,
): Promise<ExtendedStreamingHistoryStatusDto> {
return this.importService.getExtendedStreamingHistoryStatus(user);
}
}

View file

@ -0,0 +1,177 @@
import { JobService } from "@apricote/nest-pg-boss";
import { Injectable, Logger } from "@nestjs/common";
import { uniq } from "lodash";
import { Span } from "nestjs-otel";
import type { Job } from "pg-boss";
import { ListensService } from "../../../listens/listens.service";
import { User } from "../../../users/user.entity";
import { SpotifyService } from "../spotify.service";
import { ExtendedStreamingHistoryStatusDto } from "./dto/extended-streaming-history-status.dto";
import { ImportExtendedStreamingHistoryDto } from "./dto/import-extended-streaming-history.dto";
import {
IProcessSpotifyExtendedStreamingHistoryListenJob,
ProcessSpotifyExtendedStreamingHistoryListenJob,
} from "./jobs";
import { SpotifyExtendedStreamingHistoryListenRepository } from "./listen.repository";
@Injectable()
export class ImportService {
private readonly logger = new Logger(this.constructor.name);
constructor(
private readonly importListenRepository: SpotifyExtendedStreamingHistoryListenRepository,
@ProcessSpotifyExtendedStreamingHistoryListenJob.Inject()
private readonly processListenJobService: JobService<IProcessSpotifyExtendedStreamingHistoryListenJob>,
private readonly spotifyService: SpotifyService,
private readonly listensService: ListensService,
) {}
@Span()
async importExtendedStreamingHistory(
user: User,
{ listens: importListens }: ImportExtendedStreamingHistoryDto,
): Promise<void> {
// IDK what's happening, but my personal data set has entries with duplicate
// listens? might be related to offline mode.
// Anyway, this cleans it up:
const uniqEntries = new Set();
const uniqueListens = importListens.filter((listen) => {
const key = `${listen.spotify_track_uri}-${listen.ts}`;
if (!uniqEntries.has(key)) {
// New entry
uniqEntries.add(key);
return true;
}
return false;
});
let listens = uniqueListens.map((listenData) =>
this.importListenRepository.create({
user,
playedAt: new Date(listenData.ts),
spotifyTrackUri: listenData.spotify_track_uri,
}),
);
// Save listens to import table
const insertResult = await this.importListenRepository.upsert(listens, [
"user",
"playedAt",
"spotifyTrackUri",
]);
const processJobs = insertResult.identifiers.map((listen) => ({
data: {
id: listen.id,
},
singletonKey: listen.id,
retryLimit: 10,
retryDelay: 5,
retryBackoff: true,
}));
// Schedule jobs to process imports
await this.processListenJobService.insert(processJobs);
}
@ProcessSpotifyExtendedStreamingHistoryListenJob.Handle({
// Spotify API "Get Several XY" allows max 50 IDs
batchSize: 50,
newJobCheckInterval: 500,
})
@Span()
async processListens(
jobs: Job<IProcessSpotifyExtendedStreamingHistoryListenJob>[],
): Promise<void> {
this.logger.debug(
{ jobs: jobs.length },
"processing extended streaming history listens",
);
const importListens = await this.importListenRepository.findBy(
jobs.map((job) => ({ id: job.data.id })),
);
const listensWithoutTracks = importListens.filter(
(importListen) => !importListen.track,
);
if (listensWithoutTracks.length > 0) {
const missingTrackIDs = uniq(
listensWithoutTracks.map((importListen) =>
importListen.spotifyTrackUri.replace("spotify:track:", ""),
),
);
const tracks = await this.spotifyService.importTracks(missingTrackIDs);
listensWithoutTracks.forEach((listen) => {
listen.track = tracks.find(
(track) => listen.spotifyTrackUri === track.spotify.uri,
);
if (!listen.track) {
this.logger.warn(
{ listen },
"could not find track for extended streaming history listen",
);
throw new Error(
`could not find track for extended streaming history listen`,
);
}
});
// Using upsert instead of save to only do a single query
await this.importListenRepository.upsert(listensWithoutTracks, ["id"]);
}
const listensWithoutListen = importListens.filter(
(importListen) => !importListen.listen,
);
if (listensWithoutListen.length > 0) {
const listens = await this.listensService.createListens(
listensWithoutListen.map((listen) => ({
user: listen.user,
track: listen.track,
playedAt: listen.playedAt,
})),
);
listensWithoutListen.forEach((importListen) => {
importListen.listen = listens.find(
(listen) =>
importListen.user.id === listen.user.id &&
importListen.track.id === listen.track.id &&
importListen.playedAt.getTime() === listen.playedAt.getTime(),
);
if (!importListen.listen) {
this.logger.warn(
{ listen: importListen, listens: listens },
"could not find listen for extended streaming history listen",
);
throw new Error(
`could not find listen for extended streaming history listen`,
);
}
});
// Using upsert instead of save to only do a single query
await this.importListenRepository.upsert(listensWithoutListen, ["id"]);
}
}
@Span()
async getExtendedStreamingHistoryStatus(
user: User,
): Promise<ExtendedStreamingHistoryStatusDto> {
const qb = this.importListenRepository
.createQueryBuilder("listen")
.where("listen.userId = :user", { user: user.id });
const [total, imported] = await Promise.all([
qb.clone().getCount(),
qb.clone().andWhere("listen.listenId IS NOT NULL").getCount(),
]);
return { total, imported };
}
}

View file

@ -0,0 +1,4 @@
export { ImportController } from "./import.controller";
export { ImportService } from "./import.service";
export { ProcessSpotifyExtendedStreamingHistoryListenJob } from "./jobs";
export { SpotifyExtendedStreamingHistoryListenRepository } from "./listen.repository";

View file

@ -0,0 +1,7 @@
import { createJob } from "@apricote/nest-pg-boss";
export type IProcessSpotifyExtendedStreamingHistoryListenJob = { id: string };
export const ProcessSpotifyExtendedStreamingHistoryListenJob =
createJob<IProcessSpotifyExtendedStreamingHistoryListenJob>(
"process-spotify-extended-streaming-history-listen",
);

View file

@ -0,0 +1,25 @@
import { Column, Entity, ManyToOne, PrimaryGeneratedColumn } from "typeorm";
import { Track } from "../../../music-library/track.entity";
import { User } from "../../../users/user.entity";
import { Listen } from "../../../listens/listen.entity";
@Entity({ name: "spotify_extended_streaming_history_listen" })
export class SpotifyExtendedStreamingHistoryListen {
@PrimaryGeneratedColumn("uuid")
id: string;
@ManyToOne(() => User, { eager: true })
user: User;
@Column({ type: "timestamp" })
playedAt: Date;
@Column()
spotifyTrackUri: string;
@ManyToOne(() => Track, { nullable: true, eager: true })
track?: Track;
@ManyToOne(() => Listen, { nullable: true, eager: true })
listen?: Listen;
}

View file

@ -0,0 +1,6 @@
import { Repository } from "typeorm";
import { EntityRepository } from "../../../database/entity-repository";
import { SpotifyExtendedStreamingHistoryListen } from "./listen.entity";
@EntityRepository(SpotifyExtendedStreamingHistoryListen)
export class SpotifyExtendedStreamingHistoryListenRepository extends Repository<SpotifyExtendedStreamingHistoryListen> {}

View file

@ -1,25 +1,37 @@
import { PGBossModule } from "@apricote/nest-pg-boss";
import { Module } from "@nestjs/common";
import { TypeOrmRepositoryModule } from "../../database/entity-repository/typeorm-repository.module";
import { ListensModule } from "../../listens/listens.module";
import { MusicLibraryModule } from "../../music-library/music-library.module";
import { UsersModule } from "../../users/users.module";
import { ImportSpotifyJob } from "../jobs";
import {
ImportController,
ImportService,
ProcessSpotifyExtendedStreamingHistoryListenJob,
SpotifyExtendedStreamingHistoryListenRepository,
} from "./import-extended-streaming-history";
import { SpotifyApiModule } from "./spotify-api/spotify-api.module";
import { SpotifyAuthModule } from "./spotify-auth/spotify-auth.module";
import { SpotifyService } from "./spotify.service";
@Module({
imports: [
PGBossModule.forJobs([ImportSpotifyJob]),
PGBossModule.forJobs([
ImportSpotifyJob,
ProcessSpotifyExtendedStreamingHistoryListenJob,
]),
TypeOrmRepositoryModule.for([
SpotifyExtendedStreamingHistoryListenRepository,
]),
UsersModule,
ListensModule,
MusicLibraryModule,
SpotifyApiModule,
SpotifyAuthModule,
],
providers: [SpotifyService],
providers: [SpotifyService, ImportService],
controllers: [ImportController],
exports: [SpotifyService],
})
export class SpotifyModule {
constructor(private readonly spotifyService: SpotifyService) {}
}
export class SpotifyModule {}