From a12f4d96a2130259585ae81a462b670e984b5018 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 17 Apr 2024 00:19:16 -0400 Subject: [PATCH 1/9] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 101fd6c..b35adfe 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,9 @@ Crawl and convert any website into LLM-ready markdown. Build by [Mendable.ai](ht ## How to use it? -We provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.com/playground). You can also self host the backend if you'd like. +We provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.dev/playground). You can also self host the backend if you'd like. -- [x] [API](https://firecrawl.com/playground) +- [x] [API](https://firecrawl.dev/playground) - [x] [Python SDK](https://github.com/mendableai/firecrawl/tree/main/apps/python-sdk) - [x] [Langchain Integration 🦜🔗](https://python.langchain.com/docs/integrations/document_loaders/firecrawl/) - [x] [Llama Index Integration 🦙](https://docs.llamaindex.ai/en/stable/) @@ -25,7 +25,7 @@ Self-host. To self-host refer to guide [here](https://github.com/mendableai/fire ### API Key -To use the API, you need to sign up on [Firecrawl](https://firecrawl.com) and get an API key. +To use the API, you need to sign up on [Firecrawl](https://firecrawl.dev) and get an API key. ### Crawling From 23d391bb2b053616ad61f17d1e8aad2c04984935 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20Sz=C3=A9pe?= Date: Wed, 17 Apr 2024 07:09:14 +0200 Subject: [PATCH 2/9] Delete .DS_Store --- .DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 820b40ca6d639737ccf8e2f95a9ffc4639ea2ef3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKO>fgc5S>i}aa1AY08)>ZxVAL36{zB3g&=xB959pvpkOCaYjC`lKd?{~$!GXO z{1Sc-Z)P_YB}Kg<1Uu5~+nJpi@4ijEULsPXdUl^^KtvvtvC~I$h447*hIFi_3sh3$ zoN9{mDt=a%8_~}3H!{F`*Q0xMMhR8))BaTjMouWC&s1V$P7B(nQ__@Tj~B2@*kg(? z7NPeF_M8gwW~eol$O!fn;|pjZabx(WwnBznuJTl#eZs#TSPkmLv?rOIlS|z_`XI4a z&Gva+%4~mG2j<_E{G7_TDf2khF?`?RJWa}KIQ*_UTiqKsdtT4m_TGi(x(TbOs>@MS zykYC5)@i)%NAYPgZzhA=k9Af>Nj5JeA(_n)^7eI-&2%%;bv7#{H#7rY-|J5XJIm$a z@zJh-^zdM{>o1QFhUgzYSgrcroxA&wUQ9lwi%fsP+#m}7B#~Pd*YFjci}id8i!9aI z8GP&bb(}-TfMdWha6JR|&EaidAL=3;1CD|Ji~)W>_)x~cVrx)d9cbhV0OZd|5%l>k z!8yLgz+!67?xCL)j-QdqYw3bkz4HoK#@YwT=PD zz-0!uj5*-*|H<#~|Cf`THeuq1o7ZY++^S|9oz%EEE2!HX1h Date: Wed, 17 Apr 2024 07:10:06 +0200 Subject: [PATCH 3/9] Update .gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 2a7c2a5..cbfb076 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,8 @@ +.DS_Store /node_modules/ /dist/ .env *.csv dump.rdb /mongo-data -apps/js-sdk/node_modules/ \ No newline at end of file +apps/js-sdk/node_modules/ From 51f94e9e413df0ba818385d5ae115658f0f56283 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20Sz=C3=A9pe?= Date: Wed, 17 Apr 2024 08:53:01 +0200 Subject: [PATCH 4/9] Delete apps/.DS_Store --- apps/.DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 apps/.DS_Store diff --git a/apps/.DS_Store b/apps/.DS_Store deleted file mode 100644 index 61e35a269e128530319b78e078816f56129b60ae..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKQA@)x5WZ~Fb&4=VVUGo073Ve-@uk%H2dwCW%4}`f%4###Ze@%?pY;#a<<576cACX9Wa z*?biHiHLRNhEXvpooa15ZJxIF&DKd{wr@^P8+Ejs$Fo_**xfrkyX^F%r;`qNn($j<~{v>tGXfq3zuzao5mncJuQ=lp40Ee6y*F*a8# zEuD>KKr`?Q4AA>vq7qsPbBW^Vz(KSCi1ZsF1a0aiD8^K1Da<9}4hoZ@h%%I^7K6!f zTsM`Ur7)K$!-1*hgQ+hw)uAx)bv)k`?!YXGdejVP1{N78%4wDA|Hb$H|6-7yX$CX{ zE5!gSwVk$&DXH4JlpNJs6ZHX=g#2=eA_NCjj)g&`xP~eO_e?SnErq#6TtU%40)hrT JXa@e2fp>Rfl}G>p From 11394ef23600915694fd64779d1ba3f2a9adc323 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20Sz=C3=A9pe?= Date: Wed, 17 Apr 2024 08:53:12 +0200 Subject: [PATCH 5/9] Delete apps/api/src/.DS_Store --- apps/api/src/.DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 apps/api/src/.DS_Store diff --git a/apps/api/src/.DS_Store b/apps/api/src/.DS_Store deleted file mode 100644 index bbd93bc18e19db173a03e7b011153a92b1dd3766..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKOHRWu5S>Yb6x2$WRMijN=XX->JxyDdRozyw6tCIE-P_H_RoA;b;$1&>SC#h+FVlwZ>4~0Eb%EKY zUpc=#6w}><|9p8r*F6l@YTEs7j>Yx&==;m zgM$kCgh4TWIxxf|05FF+3g+=259q`I^a+C^ED+XIpr&#T&IjDbD_BfXqR|G)nJ{NGQqD`UVI_)`pUUe3!IZYlNF!Ocmp sb`o>?pD-xG0hNU1!VX}@Bjb+ From d628511b57bea46b8eb142e89e15e39e8fd8f8ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20Sz=C3=A9pe?= Date: Wed, 17 Apr 2024 08:53:23 +0200 Subject: [PATCH 6/9] Delete apps/playwright-service/.DS_Store --- apps/playwright-service/.DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 apps/playwright-service/.DS_Store diff --git a/apps/playwright-service/.DS_Store b/apps/playwright-service/.DS_Store deleted file mode 100644 index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 Date: Wed, 17 Apr 2024 12:59:49 -0300 Subject: [PATCH 7/9] [bugfix] added normalized apikey to craw/status route --- apps/api/src/index.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 7198988..0e89c6a 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -256,11 +256,13 @@ app.get("/v0/crawl/status/:jobId", async (req, res) => { return res.status(401).json({ error: "Unauthorized: Token missing" }); } + const normalizedApi = parseApi(token); // make sure api key is valid, based on the api_keys table in supabase const { data, error } = await supabase_service .from("api_keys") .select("*") - .eq("key", token); + .eq("key", normalizedApi); + if (error || !data || data.length === 0) { return res.status(401).json({ error: "Unauthorized: Invalid token" }); } From 82ed9515f1f2e25578ce31d7b8ae71ecf9241e6e Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 17 Apr 2024 10:52:10 -0700 Subject: [PATCH 8/9] Update index.ts --- apps/api/src/index.ts | 47 +++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 0e89c6a..0663c5c 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -45,14 +45,14 @@ app.get("/test", async (req, res) => { res.send("Hello, world!"); }); -async function authenticateUser(req, res, mode?: string): Promise { +async function authenticateUser(req, res, mode?: string): Promise<{ success: boolean, team_id?: string, error?: string, status?: number }> { const authHeader = req.headers.authorization; if (!authHeader) { - return res.status(401).json({ error: "Unauthorized" }); + return { success: false, error: "Unauthorized", status: 401 }; } const token = authHeader.split(" ")[1]; // Extract the token from "Bearer " if (!token) { - return res.status(401).json({ error: "Unauthorized: Token missing" }); + return { success: false, error: "Unauthorized: Token missing", status: 401 }; } try { @@ -64,13 +64,11 @@ async function authenticateUser(req, res, mode?: string): Promise { ).consume(iptoken); } catch (rateLimiterRes) { console.error(rateLimiterRes); - return res.status(429).json({ - error: "Rate limit exceeded. Too many requests, try again in 1 minute.", - }); + return { success: false, error: "Rate limit exceeded. Too many requests, try again in 1 minute.", status: 429 }; } if (token === "this_is_just_a_preview_token" && mode === "scrape") { - return "preview"; + return { success: true, team_id: "preview" }; } const normalizedApi = parseApi(token); @@ -80,16 +78,19 @@ async function authenticateUser(req, res, mode?: string): Promise { .select("*") .eq("key", normalizedApi); if (error || !data || data.length === 0) { - return res.status(401).json({ error: "Unauthorized: Invalid token" }); + return { success: false, error: "Unauthorized: Invalid token", status: 401 }; } - return data[0].team_id; + return { success: true, team_id: data[0].team_id }; } app.post("/v0/scrape", async (req, res) => { try { // make sure to authenticate user first, Bearer - const team_id = await authenticateUser(req, res, "scrape"); + const { success, team_id, error, status } = await authenticateUser(req, res, "scrape"); + if (!success) { + return res.status(status).json({ error }); + } try { const { success: creditsCheckSuccess, message: creditsCheckMessage } = @@ -155,7 +156,10 @@ app.post("/v0/scrape", async (req, res) => { app.post("/v0/crawl", async (req, res) => { try { - const team_id = await authenticateUser(req, res); + const { success, team_id, error, status } = await authenticateUser(req, res, "scrape"); + if (!success) { + return res.status(status).json({ error }); + } const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, 1); @@ -247,24 +251,9 @@ app.post("/v0/crawlWebsitePreview", async (req, res) => { app.get("/v0/crawl/status/:jobId", async (req, res) => { try { - const authHeader = req.headers.authorization; - if (!authHeader) { - return res.status(401).json({ error: "Unauthorized" }); - } - const token = authHeader.split(" ")[1]; // Extract the token from "Bearer " - if (!token) { - return res.status(401).json({ error: "Unauthorized: Token missing" }); - } - - const normalizedApi = parseApi(token); - // make sure api key is valid, based on the api_keys table in supabase - const { data, error } = await supabase_service - .from("api_keys") - .select("*") - .eq("key", normalizedApi); - - if (error || !data || data.length === 0) { - return res.status(401).json({ error: "Unauthorized: Invalid token" }); + const { success, team_id, error, status } = await authenticateUser(req, res, "scrape"); + if (!success) { + return res.status(status).json({ error }); } const job = await getWebScraperQueue().getJob(req.params.jobId); if (!job) { From 417921ea339f5252bec5864acfbbee1f05ce6368 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Wed, 17 Apr 2024 10:57:01 -0700 Subject: [PATCH 9/9] Update index.ts --- apps/api/src/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index 0663c5c..9358672 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -87,7 +87,7 @@ async function authenticateUser(req, res, mode?: string): Promise<{ success: boo app.post("/v0/scrape", async (req, res) => { try { // make sure to authenticate user first, Bearer - const { success, team_id, error, status } = await authenticateUser(req, res, "scrape"); + const { success, team_id, error, status } = await authenticateUser(req, res, "crawl"); if (!success) { return res.status(status).json({ error }); }