Merge pull request #301 from mendableai/bugfix/issue-291
[Bug] Fixed includeHTML to use cleanedHtml as response
This commit is contained in:
commit
32dde257a5
@ -12,6 +12,7 @@
|
|||||||
"build": "tsc",
|
"build": "tsc",
|
||||||
"test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'",
|
"test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'",
|
||||||
"test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
|
"test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
|
||||||
|
"test:full": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_withAuth)'",
|
||||||
"test:prod": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_full_withAuth)'",
|
"test:prod": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_full_withAuth)'",
|
||||||
"workers": "nodemon --exec ts-node src/services/queue-worker.ts",
|
"workers": "nodemon --exec ts-node src/services/queue-worker.ts",
|
||||||
"worker:production": "node dist/src/services/queue-worker.js",
|
"worker:production": "node dist/src/services/queue-worker.js",
|
||||||
|
@ -401,7 +401,7 @@ export async function scrapSingleUrl(
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
text: await parseMarkdown(cleanedHtml),
|
text: await parseMarkdown(cleanedHtml),
|
||||||
html: scraperResponse.text,
|
html: cleanedHtml,
|
||||||
screenshot: scraperResponse.screenshot,
|
screenshot: scraperResponse.screenshot,
|
||||||
pageStatusCode: scraperResponse.metadata.pageStatusCode,
|
pageStatusCode: scraperResponse.metadata.pageStatusCode,
|
||||||
pageError: scraperResponse.metadata.pageError || undefined
|
pageError: scraperResponse.metadata.pageError || undefined
|
||||||
@ -428,7 +428,7 @@ export async function scrapSingleUrl(
|
|||||||
if (existingHtml && existingHtml.trim().length >= 100) {
|
if (existingHtml && existingHtml.trim().length >= 100) {
|
||||||
let cleanedHtml = removeUnwantedElements(existingHtml, pageOptions);
|
let cleanedHtml = removeUnwantedElements(existingHtml, pageOptions);
|
||||||
text = await parseMarkdown(cleanedHtml);
|
text = await parseMarkdown(cleanedHtml);
|
||||||
html = existingHtml;
|
html = cleanedHtml;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user