From deefe65cbe115c15d4f9eeae16b87bf66c234167 Mon Sep 17 00:00:00 2001 From: Matt Joyce Date: Sat, 1 Jun 2024 19:16:56 +1000 Subject: [PATCH] Change the way the playwright response is parsed Was failing with a Type Error, but actually looked ok. This fixes the type error, and stop scraper fallback. --- apps/api/src/scraper/WebScraper/single_url.ts | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/single_url.ts b/apps/api/src/scraper/WebScraper/single_url.ts index c7f9469..70b4aa6 100644 --- a/apps/api/src/scraper/WebScraper/single_url.ts +++ b/apps/api/src/scraper/WebScraper/single_url.ts @@ -146,12 +146,18 @@ export async function scrapWithPlaywright(url: string, waitFor: number = 0): Pro if (contentType && contentType.includes('application/pdf')) { return fetchAndProcessPdf(url); } else { - const data = await response.json(); - const html = data.content; - return html ?? ""; + const textData = await response.text(); + try { + const data = JSON.parse(textData); + const html = data.content; + return html ?? ""; + } catch (jsonError) { + console.error(`[Playwright] Error parsing JSON response for url: ${url} -> ${jsonError}`); + return ""; + } } } catch (error) { - console.error(`[Playwright][c] Error fetching url: ${url} -> ${error}`); + console.error(`[Playwright] Error fetching url: ${url} -> ${error}`); return ""; } }