0

Merge pull request #301 from mendableai/bugfix/issue-291

[Bug] Fixed includeHTML to use cleanedHtml as response
This commit is contained in:
Nicolas 2024-06-18 16:26:55 -04:00 committed by GitHub
commit 32dde257a5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 3 additions and 2 deletions

View File

@ -12,6 +12,7 @@
"build": "tsc", "build": "tsc",
"test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'", "test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'",
"test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'", "test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
"test:full": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_withAuth)'",
"test:prod": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_full_withAuth)'", "test:prod": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_full_withAuth)'",
"workers": "nodemon --exec ts-node src/services/queue-worker.ts", "workers": "nodemon --exec ts-node src/services/queue-worker.ts",
"worker:production": "node dist/src/services/queue-worker.js", "worker:production": "node dist/src/services/queue-worker.js",

View File

@ -401,7 +401,7 @@ export async function scrapSingleUrl(
return { return {
text: await parseMarkdown(cleanedHtml), text: await parseMarkdown(cleanedHtml),
html: scraperResponse.text, html: cleanedHtml,
screenshot: scraperResponse.screenshot, screenshot: scraperResponse.screenshot,
pageStatusCode: scraperResponse.metadata.pageStatusCode, pageStatusCode: scraperResponse.metadata.pageStatusCode,
pageError: scraperResponse.metadata.pageError || undefined pageError: scraperResponse.metadata.pageError || undefined
@ -428,7 +428,7 @@ export async function scrapSingleUrl(
if (existingHtml && existingHtml.trim().length >= 100) { if (existingHtml && existingHtml.trim().length >= 100) {
let cleanedHtml = removeUnwantedElements(existingHtml, pageOptions); let cleanedHtml = removeUnwantedElements(existingHtml, pageOptions);
text = await parseMarkdown(cleanedHtml); text = await parseMarkdown(cleanedHtml);
html = existingHtml; html = cleanedHtml;
break; break;
} }