0

Update index.test.ts

This commit is contained in:
Nicolas 2024-04-17 11:04:03 -07:00
parent 60245343c9
commit 2eb81545fa

View File

@ -1,97 +1,171 @@
import { WebScraperDataProvider } from '../index'; import { WebScraperDataProvider } from "../index";
describe('WebScraperDataProvider', () => { describe("WebScraperDataProvider", () => {
describe('replaceImgPathsWithAbsolutePaths', () => { describe("replaceImgPathsWithAbsolutePaths", () => {
it('should replace image paths with absolute paths', () => { it("should replace image paths with absolute paths", () => {
const webScraperDataProvider = new WebScraperDataProvider(); const webScraperDataProvider = new WebScraperDataProvider();
const documents = [ const documents = [
{ {
metadata: { sourceURL: 'https://example.com/page' }, metadata: { sourceURL: "https://example.com/page" },
content: '![alt text](/image.png)', content: "![alt text](/image.png)",
}, },
{ {
metadata: { sourceURL: 'https://example.com/another-page' }, metadata: { sourceURL: "https://example.com/another-page" },
content: '![another alt text](./another-image.png)', content: "![another alt text](./another-image.png)",
}, },
{ {
metadata: { sourceURL: 'https://example.com/data-image' }, metadata: { sourceURL: "https://example.com/data-image" },
content: '![data image](data:image/png;base64,...)', content: "![data image](data:image/png;base64,...)",
} },
]; ];
const expectedDocuments = [ const expectedDocuments = [
{ {
metadata: { sourceURL: 'https://example.com/page' }, metadata: { sourceURL: "https://example.com/page" },
content: '![alt text](https://example.com/image.png)', content: "![alt text](https://example.com/image.png)",
}, },
{ {
metadata: { sourceURL: 'https://example.com/another-page' }, metadata: { sourceURL: "https://example.com/another-page" },
content: '![another alt text](https://example.com/another-image.png)', content: "![another alt text](https://example.com/another-image.png)",
}, },
{ {
metadata: { sourceURL: 'https://example.com/data-image' }, metadata: { sourceURL: "https://example.com/data-image" },
content: '![data image](data:image/png;base64,...)', content: "![data image](data:image/png;base64,...)",
} },
]; ];
const result = webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents); const result =
webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents);
expect(result).toEqual(expectedDocuments); expect(result).toEqual(expectedDocuments);
}); });
it('should handle absolute URLs without modification', () => { it("should handle absolute URLs without modification", () => {
const webScraperDataProvider = new WebScraperDataProvider(); const webScraperDataProvider = new WebScraperDataProvider();
const documents = [ const documents = [
{ {
metadata: { sourceURL: 'https://example.com/page' }, metadata: { sourceURL: "https://example.com/page" },
content: '![alt text](https://example.com/image.png)', content: "![alt text](https://example.com/image.png)",
}, },
{ {
metadata: { sourceURL: 'https://example.com/another-page' }, metadata: { sourceURL: "https://example.com/another-page" },
content: '![another alt text](http://anotherexample.com/another-image.png)', content:
} "![another alt text](http://anotherexample.com/another-image.png)",
},
]; ];
const expectedDocuments = [ const expectedDocuments = [
{ {
metadata: { sourceURL: 'https://example.com/page' }, metadata: { sourceURL: "https://example.com/page" },
content: '![alt text](https://example.com/image.png)', content: "![alt text](https://example.com/image.png)",
}, },
{ {
metadata: { sourceURL: 'https://example.com/another-page' }, metadata: { sourceURL: "https://example.com/another-page" },
content: '![another alt text](http://anotherexample.com/another-image.png)', content:
} "![another alt text](http://anotherexample.com/another-image.png)",
},
]; ];
const result = webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents); const result =
webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents);
expect(result).toEqual(expectedDocuments); expect(result).toEqual(expectedDocuments);
}); });
it('should not replace non-image content within the documents', () => { it("should not replace non-image content within the documents", () => {
const webScraperDataProvider = new WebScraperDataProvider(); const webScraperDataProvider = new WebScraperDataProvider();
const documents = [ const documents = [
{ {
metadata: { sourceURL: 'https://example.com/page' }, metadata: { sourceURL: "https://example.com/page" },
content: 'This is a test. ![alt text](/image.png) Here is a link: [Example](https://example.com).', content:
"This is a test. ![alt text](/image.png) Here is a link: [Example](https://example.com).",
}, },
{ {
metadata: { sourceURL: 'https://example.com/another-page' }, metadata: { sourceURL: "https://example.com/another-page" },
content: 'Another test. ![another alt text](./another-image.png) Here is some **bold text**.', content:
} "Another test. ![another alt text](./another-image.png) Here is some **bold text**.",
},
]; ];
const expectedDocuments = [ const expectedDocuments = [
{ {
metadata: { sourceURL: 'https://example.com/page' }, metadata: { sourceURL: "https://example.com/page" },
content: 'This is a test. ![alt text](https://example.com/image.png) Here is a link: [Example](https://example.com).', content:
"This is a test. ![alt text](https://example.com/image.png) Here is a link: [Example](https://example.com).",
}, },
{ {
metadata: { sourceURL: 'https://example.com/another-page' }, metadata: { sourceURL: "https://example.com/another-page" },
content: 'Another test. ![another alt text](https://example.com/another-image.png) Here is some **bold text**.', content:
} "Another test. ![another alt text](https://example.com/another-image.png) Here is some **bold text**.",
},
]; ];
const result = webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents); const result =
webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents);
expect(result).toEqual(expectedDocuments);
});
it("should replace multiple image paths within the documents", () => {
const webScraperDataProvider = new WebScraperDataProvider();
const documents = [
{
metadata: { sourceURL: "https://example.com/page" },
content:
"This is a test. ![alt text](/image1.png) Here is a link: [Example](https://example.com). ![alt text](/image2.png)",
},
{
metadata: { sourceURL: "https://example.com/another-page" },
content:
"Another test. ![another alt text](./another-image1.png) Here is some **bold text**. ![another alt text](./another-image2.png)",
},
];
const expectedDocuments = [
{
metadata: { sourceURL: "https://example.com/page" },
content:
"This is a test. ![alt text](https://example.com/image1.png) Here is a link: [Example](https://example.com). ![alt text](https://example.com/image2.png)",
},
{
metadata: { sourceURL: "https://example.com/another-page" },
content:
"Another test. ![another alt text](https://example.com/another-image1.png) Here is some **bold text**. ![another alt text](https://example.com/another-image2.png)",
},
];
const result =
webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents);
expect(result).toEqual(expectedDocuments);
});
it("should replace image paths within the documents with complex URLs", () => {
const webScraperDataProvider = new WebScraperDataProvider();
const documents = [
{
metadata: { sourceURL: "https://example.com/page/subpage" },
content:
"This is a test. ![alt text](/image1.png) Here is a link: [Example](https://example.com). ![alt text](/sub/image2.png)",
},
{
metadata: { sourceURL: "https://example.com/another-page/subpage" },
content:
"Another test. ![another alt text](/another-page/another-image1.png) Here is some **bold text**. ![another alt text](/another-page/sub/another-image2.png)",
},
];
const expectedDocuments = [
{
metadata: { sourceURL: "https://example.com/page/subpage" },
content:
"This is a test. ![alt text](https://example.com/image1.png) Here is a link: [Example](https://example.com). ![alt text](https://example.com/sub/image2.png)",
},
{
metadata: { sourceURL: "https://example.com/another-page/subpage" },
content:
"Another test. ![another alt text](https://example.com/another-page/another-image1.png) Here is some **bold text**. ![another alt text](https://example.com/another-page/sub/another-image2.png)",
},
];
const result =
webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents);
expect(result).toEqual(expectedDocuments); expect(result).toEqual(expectedDocuments);
}); });
}); });
}); });