Spaces:

can-org
/

canspace

Sleeping

App Files Files Community

Pujan Neupane commited on Apr 25

Commit

e9f0d54

1 Parent(s): 992f09e

Project : pushing all the files to hugging face

Browse files

Files changed (14) hide show

.gitignore +54 -0
Ai-Text-Detector/model/merges.txt +0 -0
Ai-Text-Detector/model/special_tokens_map.json +30 -0
Ai-Text-Detector/model/tokenizer.json +0 -0
Ai-Text-Detector/model/tokenizer_config.json +28 -0
Ai-Text-Detector/model/vocab.json +0 -0
Ai-Text-Detector/model_weights.pth +3 -0
Dockerfile +33 -0
HuggingFace/main.py +18 -0
HuggingFace/readme.md +61 -0
Machine-learning/.gitattributes +2 -0
Machine-learning/README.md +289 -0
app.py +91 -0
requirements.txt +6 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,54 @@

+# ---- Python Environment ----
+venv/
+.venv/
+env/
+ENV/
+*.pyc
+*.pyo
+*.pyd
+__pycache__/
+**/__pycache__/
+# ---- VS Code / IDEs ----
+.vscode/
+.idea/
+*.swp
+# ---- Jupyter / IPython ----
+.ipynb_checkpoints/
+*.ipynb
+# ---- Model & Data Artifacts ----
+*.pt
+*.h5
+*.ckpt
+*.onnx
+*.joblib
+*.pkl
+# ---- Hugging Face Cache ----
+~/.cache/huggingface/
+huggingface_cache/
+# ---- Logs and Dumps ----
+*.log
+*.out
+*.err
+# ---- Build Artifacts ----
+build/
+dist/
+*.egg-info/
+# ---- System Files ----
+.DS_Store
+Thumbs.db
+# ---- Environment Configs ----
+.env
+.env.*
+# ---- Node Projects (if applicable) ----
+node_modules/

Ai-Text-Detector/model/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

Ai-Text-Detector/model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

Ai-Text-Detector/model/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Ai-Text-Detector/model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "max_length": 1024,
+  "model_max_length": 1024,
+  "pad_to_multiple_of": null,
+  "pad_token": "<|endoftext|>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "stride": 0,
+  "tokenizer_class": "GPT2Tokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<|endoftext|>"
+}

Ai-Text-Detector/model/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Ai-Text-Detector/model_weights.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:702042483ae656e9c286660ad82dd9b555d481c800c0d3adbccd22a3505e1c8c
+size 497813466

Dockerfile ADDED Viewed

	@@ -0,0 +1,33 @@

+# Use the latest slim Python 3.11 image
+FROM python:3.11-slim
+# Set environment variables
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Create a non-root user for safety
+RUN useradd -ms /bin/bash user
+USER user
+WORKDIR $HOME/app
+# Copy app source code
+COPY --chown=user . .
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip \
+ && pip install --no-cache-dir -r requirements.txt
+# Expose port
+EXPOSE 7860
+# Start the FastAPI app using uvicorn
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

HuggingFace/main.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import os
+from huggingface_hub import Repository
+def download_repo():
+    hf_token = os.getenv("HF_TOKEN")
+    if not hf_token:
+        raise ValueError("HF_TOKEN not found in environment variables.")
+    repo_id = "Pujan-Dev/test"
+    local_dir = "../Ai-Text-Detector/"
+    repo = Repository(local_dir, clone_from=repo_id, token=hf_token)
+    print(f"Repository downloaded to: {local_dir}")
+if __name__ == "__main__":
+    download_repo()

HuggingFace/readme.md ADDED Viewed

	@@ -0,0 +1,61 @@

+### Hugging Face CLI Tool
+This CLI tool allows you to **upload** and **download** models from Hugging Face repositories. It requires an **Hugging Face Access Token (`HF_TOKEN`)** for authentication, especially for private repositories.
+### Prerequisites
+1. **Install Hugging Face Hub**:
+   ```bash
+   pip install huggingface_hub
+   ```
+2. **Get HF_TOKEN**:
+   - Log in to [Hugging Face](https://huggingface.co/).
+   - Go to **Settings** → **Access Tokens** → **Create a new token** with `read` and `write` permissions.
+   - Save the token.
+### Usage
+1. **Set the Token**:
+   - **Linux/macOS**:
+     ```bash
+     export HF_TOKEN=your_token_here
+     ```
+   - **Windows (CMD)**:
+     ```bash
+     set HF_TOKEN=your_token_here
+     ```
+2. **Download Model**:
+   ```bash
+   python main.py --download --repo-id <repo_name> --save-dir <local_save_path>
+   ```
+3. **Upload Model**:
+   ```bash
+   python main.py --upload --repo-id <repo_name> --model-path <local_model_path>
+   ```
+### Example
+To download a model:
+```bash
+python main.py
+```
+### Authentication
+Ensure you set `HF_TOKEN` to access private repositories. If not set, the script will raise an error.
+Here’s a clearer and more polished version of that note:
+---
+### ⚠️ Note
+**Make sure to run this script from the `HuggingFace` directory to ensure correct path resolution and functionality.**
+---

Machine-learning/.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.pth filter=lfs diff=lfs merge=lfs -text
2	+ Ai-Text-Detector/model_weights.pth filter=lfs diff=lfs merge=lfs -text

Machine-learning/README.md ADDED Viewed

	@@ -0,0 +1,289 @@

+### **FastAPI AI**
+This FastAPI app loads a GPT-2 model, tokenizes input text, classifies it, and returns whether the text is AI-generated or human-written.
+### **install Dependencies**
+```bash
+pip install -r requirements.txt
+```
+This command installs all the dependencies listed in the `requirements.txt` file. It ensures that your environment has the required packages to run the project smoothly.
+**NOTE: IF YOU HAVE DONE ANY CHANGES DON'NT FORGOT TO PUT IT IN THE REQUIREMENTS.TXT USING `bash pip freeze > requirements.txt `**
+---
+### **Functions**
+1. **`load_model()`**
+   Loads the GPT-2 model and tokenizer from specified paths.
+2. **`lifespan()`**
+   Manages the app's lifecycle: loads the model at startup and handles cleanup on shutdown.
+3. **`classify_text_sync()`**
+   Synchronously tokenizes input text and classifies it using the GPT-2 model. Returns the classification and perplexity.
+4. **`classify_text()`**
+   Asynchronously executes `classify_text_sync()` in a thread pool to ensure non-blocking processing.
+5. **`analyze_text()`**
+   **POST** endpoint: accepts text input, classifies it using `classify_text()`, and returns the result with perplexity.
+6. **`health_check()`**
+   **GET** endpoint: simple health check to confirm the API is running.
+---
+### **Code Overview**
+```python
+executor = ThreadPoolExecutor(max_workers=2)
+```
+- **`ThreadPoolExecutor(max_workers=2)`** limits the number of concurrent threads (tasks) per worker process to 2 for text classification. This helps control resource usage and prevent overloading the server.
+---
+### **Running and Load Balancing:**
+To run the app in production with load balancing:
+```bash
+uvicorn app:app --host 0.0.0.0 --port 8000 --workers 4
+```
+This command launches the FastAPI app with **4 worker processes**, allowing it to handle multiple requests concurrently.
+### **Concurrency Explained:**
+1. **`ThreadPoolExecutor(max_workers=20)`**
+   - Controls the **number of threads** within a **single worker** process.
+   - Allows up to 20 tasks (text classification requests) to be handled simultaneously per worker, improving responsiveness for I/O-bound tasks.
+2. **`--workers 4` in Uvicorn**
+   - Spawns **4 independent worker processes** to handle incoming HTTP requests.
+   - Each worker can independently handle multiple tasks, increasing the app's ability to process concurrent requests in parallel.
+### **How They Relate:**
+- **Uvicorn’s `--workers`** defines how many worker processes the server will run.
+- **`ThreadPoolExecutor`** limits how many tasks (threads) each worker can process concurrently.
+For example, with **4 workers** and **20 threads per worker**, the server can handle **80 tasks concurrently**. This provides scalable and efficient processing, balancing the load across multiple workers and threads.
+### **Endpoints**
+#### 1. **`/analyze`**
+- **Method:** `POST`
+- **Description:** Classifies whether the text is AI-generated or human-written.
+- **Request:**
+  ```json
+  { "text": "sample text" }
+  ```
+- **Response:**
+  ```json
+  { "result": "AI-generated", "perplexity": 55.67 }
+  ```
+#### 2. **`/health`**
+- **Method:** `GET`
+- **Description:** Returns the status of the API.
+- **Response:**
+  ```json
+  { "status": "ok" }
+  ```
+---
+### **Running the API**
+Start the server with:
+```bash
+uvicorn app:app --host 0.0.0.0 --port 8000 --workers 4
+```
+---
+### **🧪 Testing the API**
+You can test the FastAPI endpoint using `curl` like this:
+```bash
+curl -X POST http://127.0.0.1:8000/analyze \
+  -H "Authorization: Bearer HelloThere" \
+  -H "Content-Type: application/json" \
+  -d '{"text": "This is a sample sentence for analysis."}'
+```
+- The `-H "Authorization: Bearer HelloThere"` part is used to simulate the **handshake**.
+- FastAPI checks this token against the one loaded from the `.env` file.
+- If the token matches, the request is accepted and processed.
+- Otherwise, it responds with a `403 Unauthorized` error.
+---
+### **API Documentation**
+- **Swagger UI:** `http://127.0.0.1:8000/docs` -> `/docs`
+- **ReDoc:** `http://127.0.0.1:8000/redoc` -> `/redoc`
+### **🔐 Handshake Mechanism**
+In this part, we're implementing a simple handshake to verify that the request is coming from a trusted source (e.g., our NestJS server). Here's how it works:
+- We load a secret token from the `.env` file.
+- When a request is made to the FastAPI server, we extract the `Authorization` header and compare it with our expected secret token.
+- If the token does **not** match, we immediately return a **403 Forbidden** response with the message `"Unauthorized"`.
+- If the token **does** match, we allow the request to proceed to the next step.
+The verification function looks like this:
+```python
+def verify_token(auth: str):
+    if auth != f"Bearer {EXPECTED_TOKEN}":
+        raise HTTPException(status_code=403, detail="Unauthorized")
+```
+This provides a basic but effective layer of security to prevent unauthorized access to the API.
+### **Implement it with NEST.js**
+NOTE: Make an micro service in NEST.JS and implement it there and call it from app.controller.ts
+in fastapi.service.ts file what we have done is
+### Project Structure
+```files
+nestjs-fastapi-bridge/
+├── src/
+│   ├── app.controller.ts
+│   ├── app.module.ts
+│   └── fastapi.service.ts
+├── .env
+```
+---
+### Step-by-Step Setup
+#### 1. `.env`
+Create a `.env` file at the root with the following:
+```environment
+  FASTAPI_BASE_URL=http://localhost:8000
+  SECRET_TOKEN="HelloThere"
+```
+#### 2. `fastapi.service.ts`
+```javascript
+  // src/fastapi.service.ts
+  import { Injectable } from "@nestjs/common";
+  import { HttpService } from "@nestjs/axios";
+  import { ConfigService } from "@nestjs/config";
+  import { firstValueFrom } from "rxjs";
+  @Injectable()
+  export class FastAPIService {
+    constructor(
+      private http: HttpService,
+      private config: ConfigService,
+    ) {}
+    async analyzeText(text: string) {
+      const url = `${this.config.get("FASTAPI_BASE_URL")}/analyze`;
+      const token = this.config.get("SECRET_TOKEN");
+      const response = await firstValueFrom(
+        this.http.post(
+          url,
+          { text },
+          {
+            headers: {
+              Authorization: `Bearer ${token}`,
+            },
+          },
+        ),
+      );
+      return response.data;
+    }
+  }
+```
+#### 3. `app.module.ts`
+```javascript
+// src/app.module.ts
+import { Module } from "@nestjs/common";
+import { ConfigModule } from "@nestjs/config";
+import { HttpModule } from "@nestjs/axios";
+import { AppController } from "./app.controller";
+import { FastAPIService } from "./fastapi.service";
+@Module({
+  imports: [ConfigModule.forRoot(), HttpModule],
+  controllers: [AppController],
+  providers: [FastAPIService],
+})
+export class AppModule {}
+```
+---
+#### 4. `app.controller.ts`
+```javascript
+  // src/app.controller.ts
+  import { Body, Controller, Post, Get, Query } from '@nestjs/common';
+  import { FastAPIService } from './fastapi.service';
+  @Controller()
+  export class AppController {
+    constructor(private readonly fastapiService: FastAPIService) {}
+    @Post('analyze-text')
+    async callFastAPI(@Body('text') text: string) {
+      return this.fastapiService.analyzeText(text);
+    }
+    @Get()
+    getHello(): string {
+      return 'NestJS is connected to FastAPI ';
+    }
+  }
+```
+### 🚀 How to Run
+Run the server of flask and nest.js:
+- for nest.js
+  ```bash
+  npm run start
+  ```
+- for Fastapi
+  ```bash
+  uvicorn app:app --reload
+  ```
+Make sure your FastAPI service is running at `http://localhost:8000`.
+### Test with CURL
+```bash
+curl -X POST http://localhost:3000/analyze-text \
+  -H 'Content-Type: application/json' \
+  -d '{"text": "This is a test input"}'
+```

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import torch
+from transformers import GPT2LMHeadModel, GPT2TokenizerFast, GPT2Config
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from contextlib import asynccontextmanager
+import asyncio
+# FastAPI app instance
+app = FastAPI()
+# Global model and tokenizer variables
+model, tokenizer = None, None
+# Function to load model and tokenizer
+def load_model():
+    model_path = "./Ai-Text-Detector/model"
+    weights_path = "./Ai-Text-Detector/model_weights.pth"
+    try:
+        tokenizer = GPT2TokenizerFast.from_pretrained(model_path)
+        config = GPT2Config.from_pretrained(model_path)
+        model = GPT2LMHeadModel(config)
+        model.load_state_dict(torch.load(weights_path, map_location=torch.device("cpu")))
+        model.eval()  # Set model to evaluation mode
+    except Exception as e:
+        raise RuntimeError(f"Error loading model: {str(e)}")
+    return model, tokenizer
+# Load model on app startup
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global model, tokenizer
+    model, tokenizer = load_model()
+    yield
+# Attach startup loader
+app = FastAPI(lifespan=lifespan)
+# Input schema
+class TextInput(BaseModel):
+    text: str
+# Sync text classification
+def classify_text(sentence: str):
+    inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
+    input_ids = inputs["input_ids"]
+    attention_mask = inputs["attention_mask"]
+    with torch.no_grad():
+        outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
+        loss = outputs.loss
+        perplexity = torch.exp(loss).item()
+    if perplexity < 60:
+        result = "AI-generated"
+    elif perplexity < 80:
+        result = "Probably AI-generated"
+    else:
+        result = "Human-written"
+    return result, perplexity
+# POST route to analyze text
+@app.post("/analyze")
+async def analyze_text(data: TextInput):
+    user_input = data.text.strip()
+    if not user_input:
+        raise HTTPException(status_code=400, detail="Text cannot be empty")
+    # Run classification asynchronously to prevent blocking
+    result, perplexity = await asyncio.to_thread(classify_text, user_input)
+    return {
+        "result": result,
+        "perplexity": round(perplexity, 2),
+    }
+# Health check route
+@app.get("/health")
+async def health_check():
+    return {"status": "ok"}
+# Simple index route
+@app.get("/")
+def index():
+    return {
+        "message": "FastAPI API is up.",
+        "try": "/docs to test the API.",
+        "status": "OK"
+    }

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch==2.6.0
+transformers==4.51.3
+fastapi==0.103.0
+pydantic==1.10.12
+asyncio==3.4.3
+uvicorn[standard]==0.21.1