0

Nick: fixes

This commit is contained in:
Nicolas 2024-04-17 12:44:23 -07:00
parent 650852cc5a
commit 08ed68ff55
5 changed files with 11 additions and 3 deletions

View File

@ -82,6 +82,7 @@
"scrapingbee": "^1.7.4", "scrapingbee": "^1.7.4",
"stripe": "^12.2.0", "stripe": "^12.2.0",
"turndown": "^7.1.3", "turndown": "^7.1.3",
"turndown-plugin-gfm": "^1.0.2",
"typesense": "^1.5.4", "typesense": "^1.5.4",
"unstructured-client": "^0.9.4", "unstructured-client": "^0.9.4",
"uuid": "^9.0.1", "uuid": "^9.0.1",

View File

@ -134,6 +134,9 @@ dependencies:
turndown: turndown:
specifier: ^7.1.3 specifier: ^7.1.3
version: 7.1.3 version: 7.1.3
turndown-plugin-gfm:
specifier: ^1.0.2
version: 1.0.2
typesense: typesense:
specifier: ^1.5.4 specifier: ^1.5.4
version: 1.7.2(@babel/runtime@7.24.0) version: 1.7.2(@babel/runtime@7.24.0)
@ -5783,6 +5786,10 @@ packages:
resolution: {integrity: sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==} resolution: {integrity: sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==}
dev: false dev: false
/turndown-plugin-gfm@1.0.2:
resolution: {integrity: sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg==}
dev: false
/turndown@7.1.3: /turndown@7.1.3:
resolution: {integrity: sha512-Z3/iJ6IWh8VBiACWQJaA5ulPQE5E1QwvBHj00uGzdQxdRnd8fh1DPqNOJqzQDu6DkOstORrtXzf/9adB+vMtEA==} resolution: {integrity: sha512-Z3/iJ6IWh8VBiACWQJaA5ulPQE5E1QwvBHj00uGzdQxdRnd8fh1DPqNOJqzQDu6DkOstORrtXzf/9adB+vMtEA==}
dependencies: dependencies:

View File

@ -1,5 +1,6 @@
export function parseMarkdown(html: string) { export function parseMarkdown(html: string) {
var TurndownService = require("turndown"); var TurndownService = require("turndown");
var turndownPluginGfm = require("turndown-plugin-gfm");
const turndownService = new TurndownService(); const turndownService = new TurndownService();
turndownService.addRule("inlineLink", { turndownService.addRule("inlineLink", {
@ -16,7 +17,8 @@ export function parseMarkdown(html: string) {
return "[" + content.trim() + "](" + href + title + ")\n"; return "[" + content.trim() + "](" + href + title + ")\n";
}, },
}); });
var gfm = turndownPluginGfm.gfm;
turndownService.use(gfm);
let markdownContent = turndownService.turndown(html); let markdownContent = turndownService.turndown(html);
// multiple line links // multiple line links

View File

@ -134,7 +134,6 @@ export async function scrapSingleUrl(
break; break;
} }
let cleanedHtml = removeUnwantedElements(text); let cleanedHtml = removeUnwantedElements(text);
cleanedHtml = await parseTablesToMarkdown(cleanedHtml);
return [await parseMarkdown(cleanedHtml), text]; return [await parseMarkdown(cleanedHtml), text];
}; };

View File

@ -24,7 +24,6 @@ export const parseTablesToMarkdown = async (html: string): Promise<string> => {
if (isTableEmpty) { if (isTableEmpty) {
markdownTable = ''; markdownTable = '';
} }
console.log({markdownTable})
replacements.push({ start, end, markdownTable }); replacements.push({ start, end, markdownTable });
}); });
} }