diff --git a/src/page-loaders.ts b/src/page-loaders.ts index b4e6b00..074ca60 100644 --- a/src/page-loaders.ts +++ b/src/page-loaders.ts @@ -3,6 +3,7 @@ import { getTagWorksFeedAtomUrl, getTagWorksFeedUrl, getUserProfileUrl, + getUserWorksUrl, getWorkUrl, } from "./urls"; @@ -101,6 +102,15 @@ export const loadUserProfilePage = async ({ }); }; +export interface UserWorksPage extends CheerioAPI { + kind: 'UserWorksPage' +} +export const loadUserWorksList = async ({ username, page = 0 }: { username: string, page: number }) => { + return await fetchPage({ + url: getUserWorksUrl({ username, page }), + }); +} + export interface ChapterIndexPage extends CheerioAPI { kind: "ChapterIndexPage"; } diff --git a/src/urls.ts b/src/urls.ts index 90c26ab..0d7843c 100644 --- a/src/urls.ts +++ b/src/urls.ts @@ -25,6 +25,9 @@ export const getWorkUrl = ({ export const getUserProfileUrl = ({ username }: { username: string }) => `https://archiveofourown.org/users/${encodeURI(username)}/profile`; +export const getUserWorksUrl = ({ username, page = 0 }: { username: string, page?: number }) => + `https://archiveofourown.org/users/${encodeURI(username)}/works?page=${page}` + export const getTagUrl = (tagName: string) => `https://archiveofourown.org/tags/${encodeURI(tagName) .replaceAll("/", "*s*") diff --git a/src/users/getters.ts b/src/users/getters.ts index 1838f2b..06e830d 100644 --- a/src/users/getters.ts +++ b/src/users/getters.ts @@ -1,4 +1,4 @@ -import { UserProfile } from "../page-loaders"; +import { UserProfile, UserWorksPage } from "../page-loaders"; import { getUserProfileUrl } from "../urls"; //Dates are ten characters long in the following format: @@ -123,3 +123,35 @@ export const getUserProfileGifts = ($userProfile: UserProfile) => { .slice(GIFTS_PREFIX.length, -STAT_SUFFIX.length) || "0" ); }; + + +export const getTotalPages = ($page: UserWorksPage) => { + const lastNumberPagination = $page('.pagination li:has(+ .next)'); + + return parseInt(lastNumberPagination.text(), 10); +} + +export const getWorkCount = ($page: UserWorksPage) => { + const worksNavItem = $page('.navigation.actions:nth-child(2) li:first-child'); + return parseInt(worksNavItem.text().replaceAll(/\D/g, ''), 10); +} + +export const getSeriesCount = ($page: UserWorksPage) => { + const seriesNavItem = $page('.navigation.actions:nth-child(2) li:nth-child(3)'); + return parseInt(seriesNavItem.text().replaceAll(/\D/g, ''), 10); +} + +export const getBookmarksCount = ($page: UserWorksPage) => { + const bookmarksNavItem = $page('.navigation.actions:nth-child(2) li:nth-child(4)'); + return parseInt(bookmarksNavItem.text().replaceAll(/\D/g, ''), 10); +} + +export const getCollectionsCount = ($page: UserWorksPage) => { + const collectionsNavItem = $page('.navigation.actions:nth-child(2) li:last-child'); + return parseInt(collectionsNavItem.text().replaceAll(/\D/g, ''), 10); +} + +export const getGiftsCount = ($page: UserWorksPage) => { + const giftsNavItem = $page('.navigation.actions:last-child li:last-child'); + return parseInt(giftsNavItem.text().replaceAll(/\D/g, ''), 10); +} diff --git a/src/users/index.ts b/src/users/index.ts index f13c475..47b986a 100644 --- a/src/users/index.ts +++ b/src/users/index.ts @@ -1,4 +1,9 @@ import { + getBookmarksCount, + getCollectionsCount, + getGiftsCount, + getSeriesCount, + getTotalPages, getUserProfileBio, getUserProfileBirthday, getUserProfileBookmarks, @@ -13,11 +18,12 @@ import { getUserProfilePseuds, getUserProfileSeries, getUserProfileWorks, + getWorkCount, } from "./getters"; -import { User } from "types/entities"; +import { User, UserWorks, WorkPreview } from "types/entities"; import { getUserProfileUrl } from "../urls"; -import { loadUserProfilePage } from "../page-loaders"; +import { loadUserProfilePage, loadUserWorksList, loadWorkPage, UserWorksPage } from "../page-loaders"; export const getUser = async ({ username, @@ -46,3 +52,67 @@ export const getUser = async ({ bioHtml: getUserProfileBio(profilePage), }; }; + +const parseUserWorksIntoObject = ($userWorks: UserWorksPage) => { + /** + * It's just easier for me to reason this way, + * I can move this into a more correct file later + */ + const itemSelector = '.index.work.group > li'; + const selectors = { + kudos: '.kudos + .kudos a', + comments: '.comments + .comments a', + chapters: '.chapters + .chapters a', + words: '.words + .words', + hits: '.hits + .hits', + bookmarks: '.bookmarks + .bookmarks a', + title: '.heading a:first-child', + fandom: '.fandoms a', + category: '.category .text', + rating: '.rating .text', + warnings: '.warnings .tag', + complete: '.iswip .text', + datetime: '.header.module .datetime', + } + const numberKeys = ['kudos','comments','chapters','words','hits','bookmarks'] + const works: WorkPreview[] = []; + // unfortunately $userWorks(selector).map doesn't return an Array, it returns a Cheerio + $userWorks(itemSelector).each((_i, el) => { + const data = {} as WorkPreview; + const $item = $userWorks(el); + /** + * Parse into a number if it is a number data point + * otherwise pass in the text + */ + for (const [key, selector] of Object.entries(selectors)) { + data[key] = numberKeys.includes(key) ? parseInt($item.find(selector).text(), 10) : $item.find(selector).text(); + } + works.push(data as WorkPreview); + }) + + return works +} + +export const getUserWorks = async ({ username, page = 0 }: { username: string, page?: number }): Promise => { + const worksPage = await loadUserWorksList({ username, page }); + // parse current works page + // check for next page + // if next page + // loop it + // else return data + return { + username, + pageInfo: { + currentPage: page, + totalPages: getTotalPages(worksPage), + }, + counts: { + works: getWorkCount(worksPage), + series: getSeriesCount(worksPage), + bookmarks: getBookmarksCount(worksPage), + collections: getCollectionsCount(worksPage), + gifts: getGiftsCount(worksPage), + }, + worksInPage: parseUserWorksIntoObject(worksPage) + } +} diff --git a/tests/user.test.ts b/tests/user.test.ts index 4238a4e..0778c95 100644 --- a/tests/user.test.ts +++ b/tests/user.test.ts @@ -1,4 +1,4 @@ -import { getUser } from "src/index"; +import { getUser, getUserWorks } from "src/index"; import { User } from "types/entities"; //NOTE: Some of these tests may fail if the referenced user has updated their profile! @@ -44,4 +44,11 @@ describe("Fetches id data.", () => { header: "Yes, it's really spelled with a Z", } satisfies Partial); }); + + test('Fetches user works list', async () => { + const works = await getUserWorks({ + username: 'franzeska' + }); + console.log(works); + }) }); diff --git a/tsconfig.json b/tsconfig.json index 4089157..0cab153 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -8,7 +8,8 @@ "src/*": ["./src/*"], "types/*": ["./types/*"] }, - "resolveJsonModule": true + "resolveJsonModule": true, + "lib": ["es2022"] }, "include": ["**/*.ts"], "exclude": ["node_modules/"] diff --git a/types/entities.ts b/types/entities.ts index 0b3ee43..85459f0 100644 --- a/types/entities.ts +++ b/types/entities.ts @@ -123,14 +123,10 @@ export interface Author { anonymous: boolean; } -export interface WorkSummary { +export interface WorkPreview extends Record { id: string; title: string; category: WorkCategory[] | null; - // Date in ISO format. See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date/toISOString - // Note that AO3 doesn't publish the actual time of publish, just the date. - publishedAt: string; - updatedAt: string | null; // TODO: should this be in HTML? summary: string | null; rating: WorkRatings; @@ -144,30 +140,39 @@ export interface WorkSummary { relationships: string[]; additional: string[]; }; + language: string; + words: number; + complete: boolean; + series: BasicSeries[]; + stats: { + bookmarks: number; + comments: number; + kudos: number; + hits: number; + }; + locked: false; // If the author is anonymous this array will contain a single // entry whose "anonymous" property is "true". authors: Author[]; - language: string; - words: number; chapters: { published: number; total: number | null; }; + // Date in ISO format. See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date/toISOString + // Note that AO3 doesn't publish the actual time of publish, just the date. + updatedAt: string|null; +} + +export interface WorkSummary extends WorkPreview { + // Date in ISO format. See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Date/toISOString + // Note that AO3 doesn't publish the actual time of publish, just the date. + publishedAt: string; chapterInfo: { id: string; index: number; name: string | null; summary: string | null; } | null; - series: BasicSeries[]; - complete: boolean; - stats: { - bookmarks: number; - comments: number; - kudos: number; - hits: number; - }; - locked: false; } export interface LockedWorkSummary { @@ -175,6 +180,25 @@ export interface LockedWorkSummary { locked: true; } + +export interface UserWorks { + username: string; + // very unsure about name + counts: { + works: number; + series: number; + bookmarks: number; + collections: number; + gifts: number; + } + pageInfo: { + currentPage: number; + totalPages: number; + } + worksInPage: WorkPreview[]; +} + + export interface Chapter { id: string; workId: string; @@ -183,3 +207,4 @@ export interface Chapter { publishedAt: string; url: string; } +