Merge branch 'main' into feat/improving-reative-paths
This commit is contained in:
commit
60245343c9
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,7 +1,8 @@
|
||||
.DS_Store
|
||||
/node_modules/
|
||||
/dist/
|
||||
.env
|
||||
*.csv
|
||||
dump.rdb
|
||||
/mongo-data
|
||||
apps/js-sdk/node_modules/
|
||||
apps/js-sdk/node_modules/
|
||||
|
@ -11,9 +11,9 @@ Crawl and convert any website into LLM-ready markdown. Build by [Mendable.ai](ht
|
||||
|
||||
## How to use it?
|
||||
|
||||
We provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.com/playground). You can also self host the backend if you'd like.
|
||||
We provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.dev/playground). You can also self host the backend if you'd like.
|
||||
|
||||
- [x] [API](https://firecrawl.com/playground)
|
||||
- [x] [API](https://firecrawl.dev/playground)
|
||||
- [x] [Python SDK](https://github.com/mendableai/firecrawl/tree/main/apps/python-sdk)
|
||||
- [x] [Langchain Integration 🦜🔗](https://python.langchain.com/docs/integrations/document_loaders/firecrawl/)
|
||||
- [x] [Llama Index Integration 🦙](https://docs.llamaindex.ai/en/stable/)
|
||||
@ -25,7 +25,7 @@ Self-host. To self-host refer to guide [here](https://github.com/mendableai/fire
|
||||
|
||||
### API Key
|
||||
|
||||
To use the API, you need to sign up on [Firecrawl](https://firecrawl.com) and get an API key.
|
||||
To use the API, you need to sign up on [Firecrawl](https://firecrawl.dev) and get an API key.
|
||||
|
||||
### Crawling
|
||||
|
||||
|
BIN
apps/.DS_Store
vendored
BIN
apps/.DS_Store
vendored
Binary file not shown.
BIN
apps/api/src/.DS_Store
vendored
BIN
apps/api/src/.DS_Store
vendored
Binary file not shown.
@ -45,14 +45,14 @@ app.get("/test", async (req, res) => {
|
||||
res.send("Hello, world!");
|
||||
});
|
||||
|
||||
async function authenticateUser(req, res, mode?: string): Promise<string> {
|
||||
async function authenticateUser(req, res, mode?: string): Promise<{ success: boolean, team_id?: string, error?: string, status?: number }> {
|
||||
const authHeader = req.headers.authorization;
|
||||
if (!authHeader) {
|
||||
return res.status(401).json({ error: "Unauthorized" });
|
||||
return { success: false, error: "Unauthorized", status: 401 };
|
||||
}
|
||||
const token = authHeader.split(" ")[1]; // Extract the token from "Bearer <token>"
|
||||
if (!token) {
|
||||
return res.status(401).json({ error: "Unauthorized: Token missing" });
|
||||
return { success: false, error: "Unauthorized: Token missing", status: 401 };
|
||||
}
|
||||
|
||||
try {
|
||||
@ -64,13 +64,11 @@ async function authenticateUser(req, res, mode?: string): Promise<string> {
|
||||
).consume(iptoken);
|
||||
} catch (rateLimiterRes) {
|
||||
console.error(rateLimiterRes);
|
||||
return res.status(429).json({
|
||||
error: "Rate limit exceeded. Too many requests, try again in 1 minute.",
|
||||
});
|
||||
return { success: false, error: "Rate limit exceeded. Too many requests, try again in 1 minute.", status: 429 };
|
||||
}
|
||||
|
||||
if (token === "this_is_just_a_preview_token" && mode === "scrape") {
|
||||
return "preview";
|
||||
return { success: true, team_id: "preview" };
|
||||
}
|
||||
|
||||
const normalizedApi = parseApi(token);
|
||||
@ -80,16 +78,19 @@ async function authenticateUser(req, res, mode?: string): Promise<string> {
|
||||
.select("*")
|
||||
.eq("key", normalizedApi);
|
||||
if (error || !data || data.length === 0) {
|
||||
return res.status(401).json({ error: "Unauthorized: Invalid token" });
|
||||
return { success: false, error: "Unauthorized: Invalid token", status: 401 };
|
||||
}
|
||||
|
||||
return data[0].team_id;
|
||||
return { success: true, team_id: data[0].team_id };
|
||||
}
|
||||
|
||||
app.post("/v0/scrape", async (req, res) => {
|
||||
try {
|
||||
// make sure to authenticate user first, Bearer <token>
|
||||
const team_id = await authenticateUser(req, res, "scrape");
|
||||
const { success, team_id, error, status } = await authenticateUser(req, res, "crawl");
|
||||
if (!success) {
|
||||
return res.status(status).json({ error });
|
||||
}
|
||||
const crawlerOptions = req.body.crawlerOptions ?? {};
|
||||
|
||||
try {
|
||||
@ -159,7 +160,10 @@ app.post("/v0/scrape", async (req, res) => {
|
||||
|
||||
app.post("/v0/crawl", async (req, res) => {
|
||||
try {
|
||||
const team_id = await authenticateUser(req, res);
|
||||
const { success, team_id, error, status } = await authenticateUser(req, res, "scrape");
|
||||
if (!success) {
|
||||
return res.status(status).json({ error });
|
||||
}
|
||||
|
||||
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
|
||||
await checkTeamCredits(team_id, 1);
|
||||
@ -251,22 +255,9 @@ app.post("/v0/crawlWebsitePreview", async (req, res) => {
|
||||
|
||||
app.get("/v0/crawl/status/:jobId", async (req, res) => {
|
||||
try {
|
||||
const authHeader = req.headers.authorization;
|
||||
if (!authHeader) {
|
||||
return res.status(401).json({ error: "Unauthorized" });
|
||||
}
|
||||
const token = authHeader.split(" ")[1]; // Extract the token from "Bearer <token>"
|
||||
if (!token) {
|
||||
return res.status(401).json({ error: "Unauthorized: Token missing" });
|
||||
}
|
||||
|
||||
// make sure api key is valid, based on the api_keys table in supabase
|
||||
const { data, error } = await supabase_service
|
||||
.from("api_keys")
|
||||
.select("*")
|
||||
.eq("key", token);
|
||||
if (error || !data || data.length === 0) {
|
||||
return res.status(401).json({ error: "Unauthorized: Invalid token" });
|
||||
const { success, team_id, error, status } = await authenticateUser(req, res, "scrape");
|
||||
if (!success) {
|
||||
return res.status(status).json({ error });
|
||||
}
|
||||
const job = await getWebScraperQueue().getJob(req.params.jobId);
|
||||
if (!job) {
|
||||
|
BIN
apps/playwright-service/.DS_Store
vendored
BIN
apps/playwright-service/.DS_Store
vendored
Binary file not shown.
Loading…
Reference in New Issue
Block a user