Spaces:
Running
on
Zero
Running
on
Zero
File size: 7,003 Bytes
5de8a23 9a42dcd 5de8a23 9a42dcd 5de8a23 c811ef4 5de8a23 c811ef4 5de8a23 66d93c5 5de8a23 66d93c5 5de8a23 66d93c5 5de8a23 66d93c5 5de8a23 66d93c5 5de8a23 c811ef4 66d93c5 5de8a23 66d93c5 5de8a23 66d93c5 5de8a23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
# --- START OF FILE app3.py ---
import sys
import os
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
from dotenv import load_dotenv
# --- FIX: Add project root to Python's path ---
project_root = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, project_root)
# --- Updated Spaces import for Zero-GPU compatibility ---
try:
import spaces
print("'spaces' module imported successfully.")
except ImportError:
print("Warning: 'spaces' module not found. Using dummy decorator for local execution.")
class DummySpaces:
def GPU(self, *args, **kwargs):
def decorator(func):
print(f"Note: Dummy @GPU decorator used for function '{func.__name__}'.")
return func
return decorator
spaces = DummySpaces()
# --- Step 1: Hugging Face Authentication ---
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
raise ValueError("FATAL: Hugging Face token not found. Please set the HF_TOKEN environment variable.")
print("--- Logging in to Hugging Face Hub ---")
login(token=HF_TOKEN)
# --- Step 2: Initialize Model and Tokenizer (Load Once on Startup) ---
MODEL_NAME = "Gregniuki/ERNIE-4.5-0.3B-PT-Translator-EN-PL-EN"
print(f"--- Loading model from Hugging Face Hub: {MODEL_NAME} ---")
# --- Device Setup (Zero GPU Support) ---
if torch.cuda.is_available():
device = torch.device("cuda")
print("GPU detected. Using CUDA.")
else:
device = torch.device("cpu")
print("No GPU detected. Using CPU.")
dtype = torch.bfloat16 if device.type == "cuda" else torch.float32
print(f"--- Using dtype: {dtype} ---")
print(f"--- Loading tokenizer from Hub: {MODEL_NAME} ---")
try:
tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME
# trust_remote_code=True
)
print("--- Tokenizer Loaded Successfully ---")
except Exception as e:
raise RuntimeError(f"FATAL: Could not load tokenizer from the Hub. Error: {e}")
print(f"--- Loading Model with PyTorch from Hub: {MODEL_NAME} ---")
try:
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.bfloat16,
#trust_remote_code=True
is_decoder=True
).to(device)
model.eval()
print("--- Model Loaded Successfully ---")
except Exception as e:
raise RuntimeError(f"FATAL: Could not load model from the Hub. Error: {e}")
# --- Helper function for chunking text (Unchanged) ---
def chunk_text(text: str, max_size: int) -> list[str]:
"""Splits text into chunks, trying to break at sentence endings."""
if not text: return []
chunks, start_index = [], 0
while start_index < len(text):
end_index = start_index + max_size
if end_index >= len(text):
chunks.append(text[start_index:])
break
split_pos = text.rfind('.', start_index, end_index)
if split_pos != -1:
chunk, start_index = text[start_index : split_pos + 1], split_pos + 1
else:
chunk, start_index = text[start_index:end_index], end_index
chunks.append(chunk.strip())
return [c for c in chunks if c]
# --- Step 3: Core Translation Function (MODIFIED FOR CONTEXT) ---
@spaces.GPU
@torch.no_grad()
def translate_with_chunks(input_text: str, chunk_size: int, context_words: int, progress=gr.Progress()) -> str:
"""
Processes text by chunks, preserving context and removing the overlapping
part from the beginning of each generated chunk.
"""
progress(0, desc="Starting...")
print("--- Inference function with context preservation and overlap removal started ---")
if not input_text or not input_text.strip():
return "Input text is empty. Please enter some text to translate."
progress(0.1, desc="Chunking Text...")
text_chunks = chunk_text(input_text, chunk_size) if len(input_text) > chunk_size else [input_text]
num_chunks = len(text_chunks)
print(f"Processing {num_chunks} chunk(s).")
all_results = []
# This variable will hold the last few words of the previous translation
translation_context = ""
for i, chunk in enumerate(text_chunks):
progress(0.2 + (i / num_chunks) * 0.7, desc=f"Generating for chunk {i+1}/{num_chunks}")
# --- NEW: Construct the prompt with context ---
if translation_context:
context_prompt = translation_context + chunk
else:
# For the first chunk, no context is needed
context_prompt = chunk
messages = [{"role": "user", "content": context_prompt}]
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# Tokenize the input prompt
model_inputs = tokenizer([prompt], add_special_tokens=False, return_tensors="pt").to(device)
print("--- Generating with top_k=50 to allow for more creative output. ---")
generated_ids = model.generate(
**model_inputs,
max_new_tokens=2048,
do_sample=True,
temperature=0.7,
top_p=0.95,
top_k=50
)
input_token_len = model_inputs.input_ids.shape[1]
output_ids = generated_ids[0][input_token_len:].tolist()
result_text = tokenizer.decode(output_ids, skip_special_tokens=True).strip()
all_results.append(result_text)
print(f"Chunk {i+1} processed.")
# --- NEW: Update the context for the next iteration ---
if context_words > 0:
# Get the last 'context_words' words from the current result
words = result_text.split()
translation_context = " ".join(words[-context_words:])
progress(0.95, desc="Reassembling Results...")
full_output = " ".join(all_results)
progress(1.0, desc="Done!")
return full_output
# --- Step 4: Create and Launch the Gradio App (MODIFIED with Context Slider) ---
print("\n--- Initializing Gradio Interface ---")
app = gr.Interface(
fn=translate_with_chunks,
inputs=[
gr.Textbox(lines=15, label="Input Text", placeholder="Enter long text to process here..."),
gr.Slider(minimum=128, maximum=1536, value=1024, step=64, label="Character Chunk Size"),
gr.Slider(
minimum=0,
maximum=50,
value=20,
step=5,
label="Context Overlap (Words)",
info="Number of words from the previous translated chunk to use as context for the next one. Set to 0 to disable."
)
],
outputs=gr.Textbox(lines=15, label="Model Output", interactive=False),
title="ERNIE 4.5 Context-Aware Translation (PyTorch/Hugging Face)",
description="Processes long text by splitting it into chunks and preserving context between them. This app runs a PyTorch model from the Hugging Face Hub.",
allow_flagging="never"
)
if __name__ == "__main__":
app.queue().launch() |