0

Merge branch 'main' into feat/improving-reative-paths

This commit is contained in:
Nicolas 2024-04-17 10:57:49 -07:00
commit 60245343c9
7 changed files with 23 additions and 31 deletions

BIN
.DS_Store vendored

Binary file not shown.

3
.gitignore vendored
View File

@ -1,7 +1,8 @@
.DS_Store
/node_modules/
/dist/
.env
*.csv
dump.rdb
/mongo-data
apps/js-sdk/node_modules/
apps/js-sdk/node_modules/

View File

@ -11,9 +11,9 @@ Crawl and convert any website into LLM-ready markdown. Build by [Mendable.ai](ht
## How to use it?
We provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.com/playground). You can also self host the backend if you'd like.
We provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.dev/playground). You can also self host the backend if you'd like.
- [x] [API](https://firecrawl.com/playground)
- [x] [API](https://firecrawl.dev/playground)
- [x] [Python SDK](https://github.com/mendableai/firecrawl/tree/main/apps/python-sdk)
- [x] [Langchain Integration 🦜🔗](https://python.langchain.com/docs/integrations/document_loaders/firecrawl/)
- [x] [Llama Index Integration 🦙](https://docs.llamaindex.ai/en/stable/)
@ -25,7 +25,7 @@ Self-host. To self-host refer to guide [here](https://github.com/mendableai/fire
### API Key
To use the API, you need to sign up on [Firecrawl](https://firecrawl.com) and get an API key.
To use the API, you need to sign up on [Firecrawl](https://firecrawl.dev) and get an API key.
### Crawling

BIN
apps/.DS_Store vendored

Binary file not shown.

BIN
apps/api/src/.DS_Store vendored

Binary file not shown.

View File

@ -45,14 +45,14 @@ app.get("/test", async (req, res) => {
res.send("Hello, world!");
});
async function authenticateUser(req, res, mode?: string): Promise<string> {
async function authenticateUser(req, res, mode?: string): Promise<{ success: boolean, team_id?: string, error?: string, status?: number }> {
const authHeader = req.headers.authorization;
if (!authHeader) {
return res.status(401).json({ error: "Unauthorized" });
return { success: false, error: "Unauthorized", status: 401 };
}
const token = authHeader.split(" ")[1]; // Extract the token from "Bearer <token>"
if (!token) {
return res.status(401).json({ error: "Unauthorized: Token missing" });
return { success: false, error: "Unauthorized: Token missing", status: 401 };
}
try {
@ -64,13 +64,11 @@ async function authenticateUser(req, res, mode?: string): Promise<string> {
).consume(iptoken);
} catch (rateLimiterRes) {
console.error(rateLimiterRes);
return res.status(429).json({
error: "Rate limit exceeded. Too many requests, try again in 1 minute.",
});
return { success: false, error: "Rate limit exceeded. Too many requests, try again in 1 minute.", status: 429 };
}
if (token === "this_is_just_a_preview_token" && mode === "scrape") {
return "preview";
return { success: true, team_id: "preview" };
}
const normalizedApi = parseApi(token);
@ -80,16 +78,19 @@ async function authenticateUser(req, res, mode?: string): Promise<string> {
.select("*")
.eq("key", normalizedApi);
if (error || !data || data.length === 0) {
return res.status(401).json({ error: "Unauthorized: Invalid token" });
return { success: false, error: "Unauthorized: Invalid token", status: 401 };
}
return data[0].team_id;
return { success: true, team_id: data[0].team_id };
}
app.post("/v0/scrape", async (req, res) => {
try {
// make sure to authenticate user first, Bearer <token>
const team_id = await authenticateUser(req, res, "scrape");
const { success, team_id, error, status } = await authenticateUser(req, res, "crawl");
if (!success) {
return res.status(status).json({ error });
}
const crawlerOptions = req.body.crawlerOptions ?? {};
try {
@ -159,7 +160,10 @@ app.post("/v0/scrape", async (req, res) => {
app.post("/v0/crawl", async (req, res) => {
try {
const team_id = await authenticateUser(req, res);
const { success, team_id, error, status } = await authenticateUser(req, res, "scrape");
if (!success) {
return res.status(status).json({ error });
}
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
await checkTeamCredits(team_id, 1);
@ -251,22 +255,9 @@ app.post("/v0/crawlWebsitePreview", async (req, res) => {
app.get("/v0/crawl/status/:jobId", async (req, res) => {
try {
const authHeader = req.headers.authorization;
if (!authHeader) {
return res.status(401).json({ error: "Unauthorized" });
}
const token = authHeader.split(" ")[1]; // Extract the token from "Bearer <token>"
if (!token) {
return res.status(401).json({ error: "Unauthorized: Token missing" });
}
// make sure api key is valid, based on the api_keys table in supabase
const { data, error } = await supabase_service
.from("api_keys")
.select("*")
.eq("key", token);
if (error || !data || data.length === 0) {
return res.status(401).json({ error: "Unauthorized: Invalid token" });
const { success, team_id, error, status } = await authenticateUser(req, res, "scrape");
if (!success) {
return res.status(status).json({ error });
}
const job = await getWebScraperQueue().getJob(req.params.jobId);
if (!job) {

Binary file not shown.