0

Nick: fixes

This commit is contained in:
Nicolas 2024-04-17 12:44:23 -07:00
parent 650852cc5a
commit 08ed68ff55
5 changed files with 11 additions and 3 deletions

View File

@ -82,6 +82,7 @@
"scrapingbee": "^1.7.4",
"stripe": "^12.2.0",
"turndown": "^7.1.3",
"turndown-plugin-gfm": "^1.0.2",
"typesense": "^1.5.4",
"unstructured-client": "^0.9.4",
"uuid": "^9.0.1",

View File

@ -134,6 +134,9 @@ dependencies:
turndown:
specifier: ^7.1.3
version: 7.1.3
turndown-plugin-gfm:
specifier: ^1.0.2
version: 1.0.2
typesense:
specifier: ^1.5.4
version: 1.7.2(@babel/runtime@7.24.0)
@ -5783,6 +5786,10 @@ packages:
resolution: {integrity: sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==}
dev: false
/turndown-plugin-gfm@1.0.2:
resolution: {integrity: sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg==}
dev: false
/turndown@7.1.3:
resolution: {integrity: sha512-Z3/iJ6IWh8VBiACWQJaA5ulPQE5E1QwvBHj00uGzdQxdRnd8fh1DPqNOJqzQDu6DkOstORrtXzf/9adB+vMtEA==}
dependencies:

View File

@ -1,5 +1,6 @@
export function parseMarkdown(html: string) {
var TurndownService = require("turndown");
var turndownPluginGfm = require("turndown-plugin-gfm");
const turndownService = new TurndownService();
turndownService.addRule("inlineLink", {
@ -16,7 +17,8 @@ export function parseMarkdown(html: string) {
return "[" + content.trim() + "](" + href + title + ")\n";
},
});
var gfm = turndownPluginGfm.gfm;
turndownService.use(gfm);
let markdownContent = turndownService.turndown(html);
// multiple line links

View File

@ -134,7 +134,6 @@ export async function scrapSingleUrl(
break;
}
let cleanedHtml = removeUnwantedElements(text);
cleanedHtml = await parseTablesToMarkdown(cleanedHtml);
return [await parseMarkdown(cleanedHtml), text];
};

View File

@ -24,7 +24,6 @@ export const parseTablesToMarkdown = async (html: string): Promise<string> => {
if (isTableEmpty) {
markdownTable = '';
}
console.log({markdownTable})
replacements.push({ start, end, markdownTable });
});
}