Merge pull request #132 from mendableai/feat/idempotency-key
[Feat] Added idempotency key to crawl route
This commit is contained in:
commit
4ce28593b2
@ -1,6 +1,7 @@
|
|||||||
import request from "supertest";
|
import request from "supertest";
|
||||||
import { app } from "../../index";
|
import { app } from "../../index";
|
||||||
import dotenv from "dotenv";
|
import dotenv from "dotenv";
|
||||||
|
import { v4 as uuidv4 } from "uuid";
|
||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
@ -175,6 +176,30 @@ describe("E2E Tests for API Routes", () => {
|
|||||||
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
|
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$/
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
it('should prevent duplicate requests using the same idempotency key', async () => {
|
||||||
|
const uniqueIdempotencyKey = uuidv4();
|
||||||
|
|
||||||
|
// First request with the idempotency key
|
||||||
|
const firstResponse = await request(TEST_URL)
|
||||||
|
.post('/v0/crawl')
|
||||||
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
.set("Content-Type", "application/json")
|
||||||
|
.set("x-idempotency-key", uniqueIdempotencyKey)
|
||||||
|
.send({ url: 'https://mendable.ai' });
|
||||||
|
|
||||||
|
expect(firstResponse.statusCode).toBe(200);
|
||||||
|
|
||||||
|
// Second request with the same idempotency key
|
||||||
|
const secondResponse = await request(TEST_URL)
|
||||||
|
.post('/v0/crawl')
|
||||||
|
.set("Authorization", `Bearer ${process.env.TEST_API_KEY}`)
|
||||||
|
.set("Content-Type", "application/json")
|
||||||
|
.set("x-idempotency-key", uniqueIdempotencyKey)
|
||||||
|
.send({ url: 'https://mendable.ai' });
|
||||||
|
|
||||||
|
expect(secondResponse.statusCode).toBe(409);
|
||||||
|
expect(secondResponse.body.error).toBe('Idempotency key already used');
|
||||||
|
});
|
||||||
|
|
||||||
it("should return a successful response with a valid API key and valid includes option", async () => {
|
it("should return a successful response with a valid API key and valid includes option", async () => {
|
||||||
const crawlResponse = await request(TEST_URL)
|
const crawlResponse = await request(TEST_URL)
|
||||||
|
@ -7,6 +7,8 @@ import { RateLimiterMode } from "../../src/types";
|
|||||||
import { addWebScraperJob } from "../../src/services/queue-jobs";
|
import { addWebScraperJob } from "../../src/services/queue-jobs";
|
||||||
import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist";
|
import { isUrlBlocked } from "../../src/scraper/WebScraper/utils/blocklist";
|
||||||
import { logCrawl } from "../../src/services/logging/crawl_log";
|
import { logCrawl } from "../../src/services/logging/crawl_log";
|
||||||
|
import { validateIdempotencyKey } from "../../src/services/idempotency/validate";
|
||||||
|
import { createIdempotencyKey } from "../../src/services/idempotency/create";
|
||||||
|
|
||||||
export async function crawlController(req: Request, res: Response) {
|
export async function crawlController(req: Request, res: Response) {
|
||||||
try {
|
try {
|
||||||
@ -19,6 +21,19 @@ export async function crawlController(req: Request, res: Response) {
|
|||||||
return res.status(status).json({ error });
|
return res.status(status).json({ error });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (req.headers["x-idempotency-key"]) {
|
||||||
|
const isIdempotencyValid = await validateIdempotencyKey(req);
|
||||||
|
if (!isIdempotencyValid) {
|
||||||
|
return res.status(409).json({ error: "Idempotency key already used" });
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
createIdempotencyKey(req);
|
||||||
|
} catch (error) {
|
||||||
|
console.error(error);
|
||||||
|
return res.status(500).json({ error: error.message });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
|
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
|
||||||
await checkTeamCredits(team_id, 1);
|
await checkTeamCredits(team_id, 1);
|
||||||
if (!creditsCheckSuccess) {
|
if (!creditsCheckSuccess) {
|
||||||
|
22
apps/api/src/services/idempotency/create.ts
Normal file
22
apps/api/src/services/idempotency/create.ts
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
import { Request } from "express";
|
||||||
|
import { supabase_service } from "../supabase";
|
||||||
|
|
||||||
|
export async function createIdempotencyKey(
|
||||||
|
req: Request,
|
||||||
|
): Promise<string> {
|
||||||
|
const idempotencyKey = req.headers['x-idempotency-key'] as string;
|
||||||
|
if (!idempotencyKey) {
|
||||||
|
throw new Error("No idempotency key provided in the request headers.");
|
||||||
|
}
|
||||||
|
|
||||||
|
const { data, error } = await supabase_service
|
||||||
|
.from("idempotency_keys")
|
||||||
|
.insert({ key: idempotencyKey });
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
console.error("Failed to create idempotency key:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
|
||||||
|
return idempotencyKey;
|
||||||
|
}
|
32
apps/api/src/services/idempotency/validate.ts
Normal file
32
apps/api/src/services/idempotency/validate.ts
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
import { Request } from "express";
|
||||||
|
import { supabase_service } from "../supabase";
|
||||||
|
import { validate as isUuid } from 'uuid';
|
||||||
|
|
||||||
|
export async function validateIdempotencyKey(
|
||||||
|
req: Request,
|
||||||
|
): Promise<boolean> {
|
||||||
|
const idempotencyKey = req.headers['x-idempotency-key'];
|
||||||
|
if (!idempotencyKey) {
|
||||||
|
// // not returning for missing idempotency key for now
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (!isUuid(idempotencyKey)) {
|
||||||
|
console.error("Invalid idempotency key provided in the request headers.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const { data, error } = await supabase_service
|
||||||
|
.from("idempotency_keys")
|
||||||
|
.select("key")
|
||||||
|
.eq("key", idempotencyKey);
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
console.error(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!data || data.length === 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
@ -1,3 +1,4 @@
|
|||||||
|
import { v4 as uuidv4 } from 'uuid';
|
||||||
import FirecrawlApp from '@mendable/firecrawl-js';
|
import FirecrawlApp from '@mendable/firecrawl-js';
|
||||||
import { z } from "zod";
|
import { z } from "zod";
|
||||||
|
|
||||||
@ -8,7 +9,8 @@ const scrapeResult = await app.scrapeUrl('firecrawl.dev');
|
|||||||
console.log(scrapeResult.data.content)
|
console.log(scrapeResult.data.content)
|
||||||
|
|
||||||
// Crawl a website:
|
// Crawl a website:
|
||||||
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false);
|
const idempotencyKey = uuidv4(); // optional
|
||||||
|
const crawlResult = await app.crawlUrl('mendable.ai', {crawlerOptions: {excludes: ['blog/*'], limit: 5}}, false, 2, idempotencyKey);
|
||||||
console.log(crawlResult)
|
console.log(crawlResult)
|
||||||
|
|
||||||
const jobId = await crawlResult['jobId'];
|
const jobId = await crawlResult['jobId'];
|
||||||
|
@ -110,11 +110,12 @@ export default class FirecrawlApp {
|
|||||||
* @param {Params | null} params - Additional parameters for the crawl request.
|
* @param {Params | null} params - Additional parameters for the crawl request.
|
||||||
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
||||||
* @param {number} timeout - Timeout in seconds for job status checks.
|
* @param {number} timeout - Timeout in seconds for job status checks.
|
||||||
|
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
||||||
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
||||||
*/
|
*/
|
||||||
crawlUrl(url_1) {
|
crawlUrl(url_1) {
|
||||||
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2) {
|
return __awaiter(this, arguments, void 0, function* (url, params = null, waitUntilDone = true, timeout = 2, idempotencyKey) {
|
||||||
const headers = this.prepareHeaders();
|
const headers = this.prepareHeaders(idempotencyKey);
|
||||||
let jsonData = { url };
|
let jsonData = { url };
|
||||||
if (params) {
|
if (params) {
|
||||||
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
jsonData = Object.assign(Object.assign({}, jsonData), params);
|
||||||
@ -172,11 +173,8 @@ export default class FirecrawlApp {
|
|||||||
* Prepares the headers for an API request.
|
* Prepares the headers for an API request.
|
||||||
* @returns {AxiosRequestHeaders} The prepared headers.
|
* @returns {AxiosRequestHeaders} The prepared headers.
|
||||||
*/
|
*/
|
||||||
prepareHeaders() {
|
prepareHeaders(idempotencyKey) {
|
||||||
return {
|
return Object.assign({ 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.apiKey}` }, (idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}));
|
||||||
"Content-Type": "application/json",
|
|
||||||
Authorization: `Bearer ${this.apiKey}`,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Sends a POST request to the specified URL.
|
* Sends a POST request to the specified URL.
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@mendable/firecrawl-js",
|
"name": "@mendable/firecrawl-js",
|
||||||
"version": "0.0.21",
|
"version": "0.0.22",
|
||||||
"description": "JavaScript SDK for Firecrawl API",
|
"description": "JavaScript SDK for Firecrawl API",
|
||||||
"main": "build/index.js",
|
"main": "build/index.js",
|
||||||
"types": "types/index.d.ts",
|
"types": "types/index.d.ts",
|
||||||
|
@ -173,15 +173,17 @@ export default class FirecrawlApp {
|
|||||||
* @param {Params | null} params - Additional parameters for the crawl request.
|
* @param {Params | null} params - Additional parameters for the crawl request.
|
||||||
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
||||||
* @param {number} timeout - Timeout in seconds for job status checks.
|
* @param {number} timeout - Timeout in seconds for job status checks.
|
||||||
|
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
||||||
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
||||||
*/
|
*/
|
||||||
async crawlUrl(
|
async crawlUrl(
|
||||||
url: string,
|
url: string,
|
||||||
params: Params | null = null,
|
params: Params | null = null,
|
||||||
waitUntilDone: boolean = true,
|
waitUntilDone: boolean = true,
|
||||||
timeout: number = 2
|
timeout: number = 2,
|
||||||
|
idempotencyKey?: string
|
||||||
): Promise<CrawlResponse | any> {
|
): Promise<CrawlResponse | any> {
|
||||||
const headers = this.prepareHeaders();
|
const headers = this.prepareHeaders(idempotencyKey);
|
||||||
let jsonData: Params = { url };
|
let jsonData: Params = { url };
|
||||||
if (params) {
|
if (params) {
|
||||||
jsonData = { ...jsonData, ...params };
|
jsonData = { ...jsonData, ...params };
|
||||||
@ -240,11 +242,12 @@ export default class FirecrawlApp {
|
|||||||
* Prepares the headers for an API request.
|
* Prepares the headers for an API request.
|
||||||
* @returns {AxiosRequestHeaders} The prepared headers.
|
* @returns {AxiosRequestHeaders} The prepared headers.
|
||||||
*/
|
*/
|
||||||
prepareHeaders(): AxiosRequestHeaders {
|
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders {
|
||||||
return {
|
return {
|
||||||
"Content-Type": "application/json",
|
'Content-Type': 'application/json',
|
||||||
Authorization: `Bearer ${this.apiKey}`,
|
'Authorization': `Bearer ${this.apiKey}`,
|
||||||
} as AxiosRequestHeaders;
|
...(idempotencyKey ? { 'x-idempotency-key': idempotencyKey } : {}),
|
||||||
|
} as AxiosRequestHeaders & { 'x-idempotency-key'?: string };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
5
apps/js-sdk/firecrawl/types/index.d.ts
vendored
5
apps/js-sdk/firecrawl/types/index.d.ts
vendored
@ -82,9 +82,10 @@ export default class FirecrawlApp {
|
|||||||
* @param {Params | null} params - Additional parameters for the crawl request.
|
* @param {Params | null} params - Additional parameters for the crawl request.
|
||||||
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
* @param {boolean} waitUntilDone - Whether to wait for the crawl job to complete.
|
||||||
* @param {number} timeout - Timeout in seconds for job status checks.
|
* @param {number} timeout - Timeout in seconds for job status checks.
|
||||||
|
* @param {string} idempotencyKey - Optional idempotency key for the request.
|
||||||
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
* @returns {Promise<CrawlResponse | any>} The response from the crawl operation.
|
||||||
*/
|
*/
|
||||||
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number): Promise<CrawlResponse | any>;
|
crawlUrl(url: string, params?: Params | null, waitUntilDone?: boolean, timeout?: number, idempotencyKey?: string): Promise<CrawlResponse | any>;
|
||||||
/**
|
/**
|
||||||
* Checks the status of a crawl job using the Firecrawl API.
|
* Checks the status of a crawl job using the Firecrawl API.
|
||||||
* @param {string} jobId - The job ID of the crawl operation.
|
* @param {string} jobId - The job ID of the crawl operation.
|
||||||
@ -95,7 +96,7 @@ export default class FirecrawlApp {
|
|||||||
* Prepares the headers for an API request.
|
* Prepares the headers for an API request.
|
||||||
* @returns {AxiosRequestHeaders} The prepared headers.
|
* @returns {AxiosRequestHeaders} The prepared headers.
|
||||||
*/
|
*/
|
||||||
prepareHeaders(): AxiosRequestHeaders;
|
prepareHeaders(idempotencyKey?: string): AxiosRequestHeaders;
|
||||||
/**
|
/**
|
||||||
* Sends a POST request to the specified URL.
|
* Sends a POST request to the specified URL.
|
||||||
* @param {string} url - The URL to send the request to.
|
* @param {string} url - The URL to send the request to.
|
||||||
|
13
apps/js-sdk/package-lock.json
generated
13
apps/js-sdk/package-lock.json
generated
@ -11,6 +11,7 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@mendable/firecrawl-js": "^0.0.19",
|
"@mendable/firecrawl-js": "^0.0.19",
|
||||||
"axios": "^1.6.8",
|
"axios": "^1.6.8",
|
||||||
|
"uuid": "^9.0.1",
|
||||||
"ts-node": "^10.9.2",
|
"ts-node": "^10.9.2",
|
||||||
"typescript": "^5.4.5",
|
"typescript": "^5.4.5",
|
||||||
"zod": "^3.23.8"
|
"zod": "^3.23.8"
|
||||||
@ -771,6 +772,18 @@
|
|||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"zod": "^3.23.3"
|
"zod": "^3.23.3"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"node_modules/uuid": {
|
||||||
|
"version": "9.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
|
||||||
|
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
|
||||||
|
"funding": [
|
||||||
|
"https://github.com/sponsors/broofa",
|
||||||
|
"https://github.com/sponsors/ctavan"
|
||||||
|
],
|
||||||
|
"bin": {
|
||||||
|
"uuid": "dist/bin/uuid"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -11,8 +11,9 @@
|
|||||||
"author": "",
|
"author": "",
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@mendable/firecrawl-js": "^0.0.19",
|
|
||||||
"axios": "^1.6.8",
|
"axios": "^1.6.8",
|
||||||
|
"uuid": "^9.0.1",
|
||||||
|
"@mendable/firecrawl-js": "^0.0.19",
|
||||||
"ts-node": "^10.9.2",
|
"ts-node": "^10.9.2",
|
||||||
"typescript": "^5.4.5",
|
"typescript": "^5.4.5",
|
||||||
"zod": "^3.23.8"
|
"zod": "^3.23.8"
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from firecrawl import FirecrawlApp
|
import uuid
|
||||||
|
from firecrawl.firecrawl import FirecrawlApp
|
||||||
|
|
||||||
app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
|
app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
|
||||||
|
|
||||||
@ -7,7 +8,8 @@ scrape_result = app.scrape_url('firecrawl.dev')
|
|||||||
print(scrape_result['markdown'])
|
print(scrape_result['markdown'])
|
||||||
|
|
||||||
# Crawl a website:
|
# Crawl a website:
|
||||||
crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}})
|
idempotency_key = str(uuid.uuid4()) # optional idempotency key
|
||||||
|
crawl_result = app.crawl_url('mendable.ai', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, idempotency_key)
|
||||||
print(crawl_result)
|
print(crawl_result)
|
||||||
|
|
||||||
# LLM Extraction:
|
# LLM Extraction:
|
||||||
|
@ -127,7 +127,7 @@ class FirecrawlApp:
|
|||||||
else:
|
else:
|
||||||
raise Exception(f'Failed to search. Status code: {response.status_code}')
|
raise Exception(f'Failed to search. Status code: {response.status_code}')
|
||||||
|
|
||||||
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2):
|
def crawl_url(self, url, params=None, wait_until_done=True, timeout=2, idempotency_key=None):
|
||||||
"""
|
"""
|
||||||
Initiate a crawl job for the specified URL using the Firecrawl API.
|
Initiate a crawl job for the specified URL using the Firecrawl API.
|
||||||
|
|
||||||
@ -136,6 +136,7 @@ class FirecrawlApp:
|
|||||||
params (Optional[Dict[str, Any]]): Additional parameters for the crawl request.
|
params (Optional[Dict[str, Any]]): Additional parameters for the crawl request.
|
||||||
wait_until_done (bool): Whether to wait until the crawl job is completed.
|
wait_until_done (bool): Whether to wait until the crawl job is completed.
|
||||||
timeout (int): Timeout between status checks when waiting for job completion.
|
timeout (int): Timeout between status checks when waiting for job completion.
|
||||||
|
idempotency_key (Optional[str]): A unique uuid key to ensure idempotency of requests.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Any: The crawl job ID or the crawl results if waiting until completion.
|
Any: The crawl job ID or the crawl results if waiting until completion.
|
||||||
@ -143,7 +144,7 @@ class FirecrawlApp:
|
|||||||
Raises:
|
Raises:
|
||||||
Exception: If the crawl job initiation or monitoring fails.
|
Exception: If the crawl job initiation or monitoring fails.
|
||||||
"""
|
"""
|
||||||
headers = self._prepare_headers()
|
headers = self._prepare_headers(idempotency_key)
|
||||||
json_data = {'url': url}
|
json_data = {'url': url}
|
||||||
if params:
|
if params:
|
||||||
json_data.update(params)
|
json_data.update(params)
|
||||||
@ -177,16 +178,26 @@ class FirecrawlApp:
|
|||||||
else:
|
else:
|
||||||
self._handle_error(response, 'check crawl status')
|
self._handle_error(response, 'check crawl status')
|
||||||
|
|
||||||
def _prepare_headers(self):
|
def _prepare_headers(self, idempotency_key=None):
|
||||||
"""
|
"""
|
||||||
Prepare the headers for API requests.
|
Prepare the headers for API requests.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
idempotency_key (Optional[str]): A unique key to ensure idempotency of requests.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict[str, str]: The headers including content type and authorization.
|
Dict[str, str]: The headers including content type, authorization, and optionally idempotency key.
|
||||||
"""
|
"""
|
||||||
|
if idempotency_key:
|
||||||
|
return {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': f'Bearer {self.api_key}',
|
||||||
|
'x-idempotency-key': idempotency_key
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
'Authorization': f'Bearer {self.api_key}'
|
'Authorization': f'Bearer {self.api_key}',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5):
|
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5):
|
||||||
|
Loading…
Reference in New Issue
Block a user