From 1edc9f0466affdcb272f0de842a928eb69a6ab9e Mon Sep 17 00:00:00 2001 From: Alexandre Stahmer Date: Sun, 20 Aug 2023 15:42:57 +0200 Subject: [PATCH 1/2] feat: summarize cool links youtube videos --- package.json | 3 ++- pnpm-lock.yaml | 21 +++++++++++++++++++-- src/cool-links-management.ts | 16 ++++++++++++++-- src/summarize-cool-videos.ts | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 5 deletions(-) create mode 100644 src/summarize-cool-videos.ts diff --git a/package.json b/package.json index 1bf377e1..58276214 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,8 @@ "env-var": "7.3.1", "keyv": "4.5.3", "open-graph-scraper": "6.2.2", - "param-case": "3.0.4" + "param-case": "3.0.4", + "playwright": "1.37.1" }, "devDependencies": { "@types/node": "20.4.10", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d0009e79..fe1ce8a0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -23,6 +23,9 @@ dependencies: param-case: specifier: 3.0.4 version: 3.0.4 + playwright: + specifier: 1.37.1 + version: 1.37.1 devDependencies: '@types/node': @@ -75,7 +78,7 @@ devDependencies: version: 5.1.6 vitest: specifier: 0.34.1 - version: 0.34.1 + version: 0.34.1(playwright@1.37.1) packages: @@ -2813,6 +2816,19 @@ packages: pathe: 1.1.1 dev: true + /playwright-core@1.37.1: + resolution: {integrity: sha512-17EuQxlSIYCmEMwzMqusJ2ztDgJePjrbttaefgdsiqeLWidjYz9BxXaTaZWxH1J95SHGk6tjE+dwgWILJoUZfA==} + engines: {node: '>=16'} + hasBin: true + + /playwright@1.37.1: + resolution: {integrity: sha512-bgUXRrQKhT48zHdxDYQTpf//0xDfDd5hLeEhjuSw8rXEGoT9YeElpfvs/izonTNY21IQZ7d3s22jLxYaAnubbQ==} + engines: {node: '>=16'} + hasBin: true + requiresBuild: true + dependencies: + playwright-core: 1.37.1 + /postcss-load-config@4.0.1: resolution: {integrity: sha512-vEJIc8RdiBRu3oRAI0ymerOn+7rPuMvRXslTvZUKZonDHFIczxztIyJ1urxM1x9JXEikvpWWTUUqal5j/8QgvA==} engines: {node: '>= 14'} @@ -3407,7 +3423,7 @@ packages: fsevents: 2.3.2 dev: true - /vitest@0.34.1: + /vitest@0.34.1(playwright@1.37.1): resolution: {integrity: sha512-G1PzuBEq9A75XSU88yO5G4vPT20UovbC/2osB2KEuV/FisSIIsw7m5y2xMdB7RsAGHAfg2lPmp2qKr3KWliVlQ==} engines: {node: '>=v14.18.0'} hasBin: true @@ -3455,6 +3471,7 @@ packages: magic-string: 0.30.2 pathe: 1.1.1 picocolors: 1.0.0 + playwright: 1.37.1 std-env: 3.3.3 strip-literal: 1.2.0 tinybench: 2.5.0 diff --git a/src/cool-links-management.ts b/src/cool-links-management.ts index dacf7ee8..0e42864f 100644 --- a/src/cool-links-management.ts +++ b/src/cool-links-management.ts @@ -1,5 +1,6 @@ import { type Message, ThreadAutoArchiveDuration } from 'discord.js'; import ogs from 'open-graph-scraper'; +import { getVideoSummary } from './summarize-cool-videos'; const getThreadNameFromOpenGraph = async (url: string): Promise => { try { @@ -24,6 +25,8 @@ const getThreadNameFromOpenGraph = async (url: string): Promise = return null; }; +const youtubeUrlRegex = new RegExp('^(https?)?(://)?(www.)?(m.)?((youtube.com)|(youtu.be))'); + export const coolLinksManagement = async (message: Message) => { const urlRegex = /(((https?:\/\/)|(www\.))[^\s]+)/g; const detectedURLs = message.content.match(urlRegex); @@ -36,9 +39,18 @@ export const coolLinksManagement = async (message: Message) => { await message.react('✅'); await message.react('❌'); - const threadName = await getThreadNameFromOpenGraph(detectedURLs[0]); - await message.startThread({ + const url = detectedURLs[0]; + const threadName = await getThreadNameFromOpenGraph(url); + const thread = await message.startThread({ name: threadName ?? message.content, autoArchiveDuration: ThreadAutoArchiveDuration.ThreeDays, }); + if (thread.joinable) await thread.join(); + + if (youtubeUrlRegex.test(url)) { + const summary = await getVideoSummary(url); + if (!summary) return; + + await thread.send(summary); + } }; diff --git a/src/summarize-cool-videos.ts b/src/summarize-cool-videos.ts new file mode 100644 index 00000000..763079ba --- /dev/null +++ b/src/summarize-cool-videos.ts @@ -0,0 +1,35 @@ +import { chromium } from 'playwright'; + +const summarizeUrl = 'https://www.summarize.tech/'; + +export const getVideoSummary = async (videoUrl: string) => { + const browser = await chromium.launch(); + const context = await browser.newContext(); + const page = await context.newPage(); + + await page.goto(summarizeUrl + videoUrl.replace('https://', '').replace('http://', '')); + + const getSummary = async () => { + const startTime = await page.getByRole('link', { name: '00:00:00' }).elementHandle(); + if (!startTime) return; + + const summaryEl = await startTime.evaluateHandle( + (node) => node.parentElement?.nextElementSibling, + ); + if (!summaryEl) return; + + return summaryEl.asElement()?.innerText(); + }; + const summary = await getSummary(); + if (!summary) return; + + // Teardown + await context.close(); + await browser.close(); + + return summary; +}; + +// Example usage: +// const videoUrl = 'https://www.youtube.com/watch?v=ruUlK6zRwS8'; +// const summary = await getVideoSummary(videoUrl); From 5600f409dfa9934f16a6cac3cccc79a262387e06 Mon Sep 17 00:00:00 2001 From: Alexandre Stahmer Date: Sun, 20 Aug 2023 16:32:33 +0200 Subject: [PATCH 2/2] refactor: rm playwright, just use fetch --- package.json | 3 +- pnpm-lock.yaml | 21 ++------------ src/summarize-cool-videos.ts | 56 ++++++++++++++++++++---------------- 3 files changed, 35 insertions(+), 45 deletions(-) diff --git a/package.json b/package.json index 58276214..1bf377e1 100644 --- a/package.json +++ b/package.json @@ -18,8 +18,7 @@ "env-var": "7.3.1", "keyv": "4.5.3", "open-graph-scraper": "6.2.2", - "param-case": "3.0.4", - "playwright": "1.37.1" + "param-case": "3.0.4" }, "devDependencies": { "@types/node": "20.4.10", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index fe1ce8a0..d0009e79 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -23,9 +23,6 @@ dependencies: param-case: specifier: 3.0.4 version: 3.0.4 - playwright: - specifier: 1.37.1 - version: 1.37.1 devDependencies: '@types/node': @@ -78,7 +75,7 @@ devDependencies: version: 5.1.6 vitest: specifier: 0.34.1 - version: 0.34.1(playwright@1.37.1) + version: 0.34.1 packages: @@ -2816,19 +2813,6 @@ packages: pathe: 1.1.1 dev: true - /playwright-core@1.37.1: - resolution: {integrity: sha512-17EuQxlSIYCmEMwzMqusJ2ztDgJePjrbttaefgdsiqeLWidjYz9BxXaTaZWxH1J95SHGk6tjE+dwgWILJoUZfA==} - engines: {node: '>=16'} - hasBin: true - - /playwright@1.37.1: - resolution: {integrity: sha512-bgUXRrQKhT48zHdxDYQTpf//0xDfDd5hLeEhjuSw8rXEGoT9YeElpfvs/izonTNY21IQZ7d3s22jLxYaAnubbQ==} - engines: {node: '>=16'} - hasBin: true - requiresBuild: true - dependencies: - playwright-core: 1.37.1 - /postcss-load-config@4.0.1: resolution: {integrity: sha512-vEJIc8RdiBRu3oRAI0ymerOn+7rPuMvRXslTvZUKZonDHFIczxztIyJ1urxM1x9JXEikvpWWTUUqal5j/8QgvA==} engines: {node: '>= 14'} @@ -3423,7 +3407,7 @@ packages: fsevents: 2.3.2 dev: true - /vitest@0.34.1(playwright@1.37.1): + /vitest@0.34.1: resolution: {integrity: sha512-G1PzuBEq9A75XSU88yO5G4vPT20UovbC/2osB2KEuV/FisSIIsw7m5y2xMdB7RsAGHAfg2lPmp2qKr3KWliVlQ==} engines: {node: '>=v14.18.0'} hasBin: true @@ -3471,7 +3455,6 @@ packages: magic-string: 0.30.2 pathe: 1.1.1 picocolors: 1.0.0 - playwright: 1.37.1 std-env: 3.3.3 strip-literal: 1.2.0 tinybench: 2.5.0 diff --git a/src/summarize-cool-videos.ts b/src/summarize-cool-videos.ts index 763079ba..a844747c 100644 --- a/src/summarize-cool-videos.ts +++ b/src/summarize-cool-videos.ts @@ -1,35 +1,43 @@ -import { chromium } from 'playwright'; - -const summarizeUrl = 'https://www.summarize.tech/'; +const baseUrl = 'https://www.summarize.tech/api/summary'; export const getVideoSummary = async (videoUrl: string) => { - const browser = await chromium.launch(); - const context = await browser.newContext(); - const page = await context.newPage(); - - await page.goto(summarizeUrl + videoUrl.replace('https://', '').replace('http://', '')); - - const getSummary = async () => { - const startTime = await page.getByRole('link', { name: '00:00:00' }).elementHandle(); - if (!startTime) return; - - const summaryEl = await startTime.evaluateHandle( - (node) => node.parentElement?.nextElementSibling, + const summary = await fetch(baseUrl, { + method: 'POST', + body: JSON.stringify({ url: videoUrl, deviceId: makeId(21) }), + headers: { 'content-type': 'application/json' }, + }) + .then((res) => res.json()) + .then((res) => + (Object.values((res as SummaryResponse).rollups) ?? []) + .map((chunk) => chunk.summary) + .join(' '), ); - if (!summaryEl) return; - return summaryEl.asElement()?.innerText(); - }; - const summary = await getSummary(); - if (!summary) return; + return summary; +}; + +const makeId = (length: number) => { + let result = ''; + const characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_'; + const charactersLength = characters.length; - // Teardown - await context.close(); - await browser.close(); + for (let i = 0; i < length; i++) + result += characters.charAt(Math.floor(Math.random() * charactersLength)); - return summary; + return result; }; // Example usage: // const videoUrl = 'https://www.youtube.com/watch?v=ruUlK6zRwS8'; // const summary = await getVideoSummary(videoUrl); +// console.log({ summary }); + +type SummaryResponse = { + rollups: Record; + title: string; +}; + +type SummaryChunk = { + children: Record; + summary: string; +};