From 2eb81545fa50e8aee61c855bc00fe3f1625c41e2 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 17 Apr 2024 11:04:03 -0700 Subject: [PATCH] Update index.test.ts --- .../WebScraper/__tests__/index.test.ts | 166 +++++++++++++----- 1 file changed, 120 insertions(+), 46 deletions(-) diff --git a/apps/api/src/scraper/WebScraper/__tests__/index.test.ts b/apps/api/src/scraper/WebScraper/__tests__/index.test.ts index e060d16..49b3926 100644 --- a/apps/api/src/scraper/WebScraper/__tests__/index.test.ts +++ b/apps/api/src/scraper/WebScraper/__tests__/index.test.ts @@ -1,97 +1,171 @@ -import { WebScraperDataProvider } from '../index'; +import { WebScraperDataProvider } from "../index"; -describe('WebScraperDataProvider', () => { - describe('replaceImgPathsWithAbsolutePaths', () => { - it('should replace image paths with absolute paths', () => { +describe("WebScraperDataProvider", () => { + describe("replaceImgPathsWithAbsolutePaths", () => { + it("should replace image paths with absolute paths", () => { const webScraperDataProvider = new WebScraperDataProvider(); const documents = [ { - metadata: { sourceURL: 'https://example.com/page' }, - content: '![alt text](/image.png)', + metadata: { sourceURL: "https://example.com/page" }, + content: "![alt text](/image.png)", }, { - metadata: { sourceURL: 'https://example.com/another-page' }, - content: '![another alt text](./another-image.png)', + metadata: { sourceURL: "https://example.com/another-page" }, + content: "![another alt text](./another-image.png)", }, { - metadata: { sourceURL: 'https://example.com/data-image' }, - content: '![data image](data:image/png;base64,...)', - } + metadata: { sourceURL: "https://example.com/data-image" }, + content: "![data image](data:image/png;base64,...)", + }, ]; const expectedDocuments = [ { - metadata: { sourceURL: 'https://example.com/page' }, - content: '![alt text](https://example.com/image.png)', + metadata: { sourceURL: "https://example.com/page" }, + content: "![alt text](https://example.com/image.png)", }, { - metadata: { sourceURL: 'https://example.com/another-page' }, - content: '![another alt text](https://example.com/another-image.png)', + metadata: { sourceURL: "https://example.com/another-page" }, + content: "![another alt text](https://example.com/another-image.png)", }, { - metadata: { sourceURL: 'https://example.com/data-image' }, - content: '![data image](data:image/png;base64,...)', - } + metadata: { sourceURL: "https://example.com/data-image" }, + content: "![data image](data:image/png;base64,...)", + }, ]; - const result = webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents); + const result = + webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents); expect(result).toEqual(expectedDocuments); }); - it('should handle absolute URLs without modification', () => { + it("should handle absolute URLs without modification", () => { const webScraperDataProvider = new WebScraperDataProvider(); const documents = [ { - metadata: { sourceURL: 'https://example.com/page' }, - content: '![alt text](https://example.com/image.png)', + metadata: { sourceURL: "https://example.com/page" }, + content: "![alt text](https://example.com/image.png)", }, { - metadata: { sourceURL: 'https://example.com/another-page' }, - content: '![another alt text](http://anotherexample.com/another-image.png)', - } + metadata: { sourceURL: "https://example.com/another-page" }, + content: + "![another alt text](http://anotherexample.com/another-image.png)", + }, ]; const expectedDocuments = [ { - metadata: { sourceURL: 'https://example.com/page' }, - content: '![alt text](https://example.com/image.png)', + metadata: { sourceURL: "https://example.com/page" }, + content: "![alt text](https://example.com/image.png)", }, { - metadata: { sourceURL: 'https://example.com/another-page' }, - content: '![another alt text](http://anotherexample.com/another-image.png)', - } + metadata: { sourceURL: "https://example.com/another-page" }, + content: + "![another alt text](http://anotherexample.com/another-image.png)", + }, ]; - const result = webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents); + const result = + webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents); expect(result).toEqual(expectedDocuments); }); - it('should not replace non-image content within the documents', () => { + it("should not replace non-image content within the documents", () => { const webScraperDataProvider = new WebScraperDataProvider(); const documents = [ { - metadata: { sourceURL: 'https://example.com/page' }, - content: 'This is a test. ![alt text](/image.png) Here is a link: [Example](https://example.com).', + metadata: { sourceURL: "https://example.com/page" }, + content: + "This is a test. ![alt text](/image.png) Here is a link: [Example](https://example.com).", }, { - metadata: { sourceURL: 'https://example.com/another-page' }, - content: 'Another test. ![another alt text](./another-image.png) Here is some **bold text**.', - } + metadata: { sourceURL: "https://example.com/another-page" }, + content: + "Another test. ![another alt text](./another-image.png) Here is some **bold text**.", + }, ]; - + const expectedDocuments = [ { - metadata: { sourceURL: 'https://example.com/page' }, - content: 'This is a test. ![alt text](https://example.com/image.png) Here is a link: [Example](https://example.com).', + metadata: { sourceURL: "https://example.com/page" }, + content: + "This is a test. ![alt text](https://example.com/image.png) Here is a link: [Example](https://example.com).", }, { - metadata: { sourceURL: 'https://example.com/another-page' }, - content: 'Another test. ![another alt text](https://example.com/another-image.png) Here is some **bold text**.', - } + metadata: { sourceURL: "https://example.com/another-page" }, + content: + "Another test. ![another alt text](https://example.com/another-image.png) Here is some **bold text**.", + }, ]; - - const result = webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents); + + const result = + webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents); + expect(result).toEqual(expectedDocuments); + }); + it("should replace multiple image paths within the documents", () => { + const webScraperDataProvider = new WebScraperDataProvider(); + const documents = [ + { + metadata: { sourceURL: "https://example.com/page" }, + content: + "This is a test. ![alt text](/image1.png) Here is a link: [Example](https://example.com). ![alt text](/image2.png)", + }, + { + metadata: { sourceURL: "https://example.com/another-page" }, + content: + "Another test. ![another alt text](./another-image1.png) Here is some **bold text**. ![another alt text](./another-image2.png)", + }, + ]; + + const expectedDocuments = [ + { + metadata: { sourceURL: "https://example.com/page" }, + content: + "This is a test. ![alt text](https://example.com/image1.png) Here is a link: [Example](https://example.com). ![alt text](https://example.com/image2.png)", + }, + { + metadata: { sourceURL: "https://example.com/another-page" }, + content: + "Another test. ![another alt text](https://example.com/another-image1.png) Here is some **bold text**. ![another alt text](https://example.com/another-image2.png)", + }, + ]; + + const result = + webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents); + expect(result).toEqual(expectedDocuments); + }); + + it("should replace image paths within the documents with complex URLs", () => { + const webScraperDataProvider = new WebScraperDataProvider(); + const documents = [ + { + metadata: { sourceURL: "https://example.com/page/subpage" }, + content: + "This is a test. ![alt text](/image1.png) Here is a link: [Example](https://example.com). ![alt text](/sub/image2.png)", + }, + { + metadata: { sourceURL: "https://example.com/another-page/subpage" }, + content: + "Another test. ![another alt text](/another-page/another-image1.png) Here is some **bold text**. ![another alt text](/another-page/sub/another-image2.png)", + }, + ]; + + const expectedDocuments = [ + { + metadata: { sourceURL: "https://example.com/page/subpage" }, + content: + "This is a test. ![alt text](https://example.com/image1.png) Here is a link: [Example](https://example.com). ![alt text](https://example.com/sub/image2.png)", + }, + { + metadata: { sourceURL: "https://example.com/another-page/subpage" }, + content: + "Another test. ![another alt text](https://example.com/another-page/another-image1.png) Here is some **bold text**. ![another alt text](https://example.com/another-page/sub/another-image2.png)", + }, + ]; + + const result = + webScraperDataProvider.replaceImgPathsWithAbsolutePaths(documents); expect(result).toEqual(expectedDocuments); }); }); -}); \ No newline at end of file +});