Nick: fixes
This commit is contained in:
parent
650852cc5a
commit
08ed68ff55
@ -82,6 +82,7 @@
|
||||
"scrapingbee": "^1.7.4",
|
||||
"stripe": "^12.2.0",
|
||||
"turndown": "^7.1.3",
|
||||
"turndown-plugin-gfm": "^1.0.2",
|
||||
"typesense": "^1.5.4",
|
||||
"unstructured-client": "^0.9.4",
|
||||
"uuid": "^9.0.1",
|
||||
|
@ -134,6 +134,9 @@ dependencies:
|
||||
turndown:
|
||||
specifier: ^7.1.3
|
||||
version: 7.1.3
|
||||
turndown-plugin-gfm:
|
||||
specifier: ^1.0.2
|
||||
version: 1.0.2
|
||||
typesense:
|
||||
specifier: ^1.5.4
|
||||
version: 1.7.2(@babel/runtime@7.24.0)
|
||||
@ -5783,6 +5786,10 @@ packages:
|
||||
resolution: {integrity: sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==}
|
||||
dev: false
|
||||
|
||||
/turndown-plugin-gfm@1.0.2:
|
||||
resolution: {integrity: sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg==}
|
||||
dev: false
|
||||
|
||||
/turndown@7.1.3:
|
||||
resolution: {integrity: sha512-Z3/iJ6IWh8VBiACWQJaA5ulPQE5E1QwvBHj00uGzdQxdRnd8fh1DPqNOJqzQDu6DkOstORrtXzf/9adB+vMtEA==}
|
||||
dependencies:
|
||||
|
@ -1,5 +1,6 @@
|
||||
export function parseMarkdown(html: string) {
|
||||
var TurndownService = require("turndown");
|
||||
var turndownPluginGfm = require("turndown-plugin-gfm");
|
||||
|
||||
const turndownService = new TurndownService();
|
||||
turndownService.addRule("inlineLink", {
|
||||
@ -16,7 +17,8 @@ export function parseMarkdown(html: string) {
|
||||
return "[" + content.trim() + "](" + href + title + ")\n";
|
||||
},
|
||||
});
|
||||
|
||||
var gfm = turndownPluginGfm.gfm;
|
||||
turndownService.use(gfm);
|
||||
let markdownContent = turndownService.turndown(html);
|
||||
|
||||
// multiple line links
|
||||
|
@ -134,7 +134,6 @@ export async function scrapSingleUrl(
|
||||
break;
|
||||
}
|
||||
let cleanedHtml = removeUnwantedElements(text);
|
||||
cleanedHtml = await parseTablesToMarkdown(cleanedHtml);
|
||||
return [await parseMarkdown(cleanedHtml), text];
|
||||
};
|
||||
|
||||
|
@ -24,7 +24,6 @@ export const parseTablesToMarkdown = async (html: string): Promise<string> => {
|
||||
if (isTableEmpty) {
|
||||
markdownTable = '';
|
||||
}
|
||||
console.log({markdownTable})
|
||||
replacements.push({ start, end, markdownTable });
|
||||
});
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user