File size: 8,637 Bytes
2a735cc 207d24c a33458e 403ced7 a33458e 31cd25b 403ced7 31cd25b 403ced7 a33458e 403ced7 31cd25b 403ced7 207d24c f8ed285 207d24c 31cd25b 403ced7 31cd25b f8ed285 403ced7 31cd25b 403ced7 31cd25b 207d24c 403ced7 207d24c 31cd25b a33458e 8faa239 403ced7 8faa239 f8ed285 31cd25b f8ed285 31cd25b f8ed285 a33458e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
from langchain_community.llms import HuggingFaceHub
from langchain_community.llms import HuggingFaceEndpoint, HuggingFacePipeline
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import sys
import os
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Add project root to path for imports
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from app.config import HF_API_KEY, LLM_MODEL, EMBEDDING_MODEL, DEFAULT_TEMPERATURE, MAX_TOKENS
def get_llm():
"""Initialize and return the language model."""
# Set up cache directories with proper permissions
cache_dir = "/app/models"
if not os.path.exists(cache_dir):
try:
os.makedirs(cache_dir, exist_ok=True)
os.chmod(cache_dir, 0o777)
except Exception as e:
logger.warning(f"Could not create cache directory: {e}")
cache_dir = None
# Never rely on API key in Spaces environment
api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") or os.getenv("HF_API_KEY", "")
logger.info(f"Using model: {LLM_MODEL}")
# Always try local pipeline first (most reliable in Spaces)
try:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
logger.info(f"Loading model {LLM_MODEL} as local pipeline")
# Try multiple fallbacks with increasingly simpler models
models_to_try = [
LLM_MODEL,
"distilgpt2", # Smaller fallback
"gpt2", # Standard fallback
"EleutherAI/gpt-neo-125M" # Another option
]
last_error = None
for model_name in models_to_try:
try:
logger.info(f"Attempting to load model: {model_name}")
# Try with explicit loading first
try:
# Set trust_remote_code to False to avoid security issues
tokenizer = AutoTokenizer.from_pretrained(
model_name,
use_auth_token=api_key if api_key else None,
trust_remote_code=False
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
use_auth_token=api_key if api_key else None,
trust_remote_code=False,
low_cpu_mem_usage=True # Help with memory issues
)
# Create pipeline with loaded components
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_length=MAX_TOKENS,
temperature=DEFAULT_TEMPERATURE,
device=-1 # Use CPU
)
logger.info(f"Successfully loaded model: {model_name}")
return HuggingFacePipeline(pipeline=pipe)
except Exception as e:
logger.warning(f"Error loading {model_name} with explicit model/tokenizer: {e}")
last_error = e
# Try direct pipeline loading
pipe = pipeline(
"text-generation",
model=model_name,
max_length=MAX_TOKENS,
temperature=DEFAULT_TEMPERATURE,
use_auth_token=api_key if api_key else None,
device=-1 # Use CPU
)
logger.info(f"Successfully loaded model: {model_name} via direct pipeline")
return HuggingFacePipeline(pipeline=pipe)
except Exception as e:
logger.warning(f"Error loading model {model_name}: {e}")
last_error = e
# Continue to the next model
continue
# If we get here, all models failed
logger.error(f"All models failed to load. Last error: {last_error}")
raise last_error
except Exception as e:
logger.warning(f"Error creating local pipeline: {e}")
# Try the HuggingFaceEndpoint as fallback
try:
logger.info("Attempting to use HuggingFaceEndpoint")
return HuggingFaceEndpoint(
repo_id="gpt2",
max_length=MAX_TOKENS,
temperature=DEFAULT_TEMPERATURE,
huggingfacehub_api_token=api_key
)
except Exception as endpoint_error:
logger.warning(f"HuggingFaceEndpoint failed: {endpoint_error}")
# Last resort - mock LLM for fallback
from langchain.llms.fake import FakeListLLM
logger.warning("Using mock LLM as fallback")
return FakeListLLM(
responses=[
"I'm running in fallback mode due to model loading issues. I have limited capabilities right now.",
"I can't access the language model currently. Please check the Space logs for more information.",
"I'm operating with a simplified model. For better performance, try running this app locally with proper models configured."
]
)
def get_embeddings():
"""Initialize and return the embeddings model."""
# Set up cache directories with proper permissions
cache_dir = "/app/models"
if not os.path.exists(cache_dir):
try:
os.makedirs(cache_dir, exist_ok=True)
os.chmod(cache_dir, 0o777)
except Exception as e:
logger.warning(f"Could not create cache directory: {e}")
cache_dir = None
# Try multiple models with fallbacks
embedding_models_to_try = [
EMBEDDING_MODEL,
"sentence-transformers/all-MiniLM-L6-v2", # Standard model
"sentence-transformers/paraphrase-MiniLM-L3-v2", # Smaller model
"sentence-transformers/paraphrase-albert-small-v2" # Even smaller model
]
api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN", "") or os.getenv("HF_API_KEY", "")
for model_name in embedding_models_to_try:
# Try to use local embeddings
try:
logger.info(f"Loading embeddings model: {model_name}")
return HuggingFaceEmbeddings(
model_name=model_name,
cache_folder=cache_dir,
encode_kwargs={"normalize_embeddings": True},
model_kwargs={"device": "cpu"} # Ensure using CPU
)
except Exception as e:
logger.warning(f"Error initializing embeddings with {model_name}: {e}")
# Continue to the next model
# If all models fail, try with direct transformers access
try:
from sentence_transformers import SentenceTransformer
logger.info("Loading embeddings with SentenceTransformer directly")
model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu")
# Create a custom embeddings class
class DirectEmbeddings:
def embed_documents(self, texts):
return model.encode(texts, normalize_embeddings=True).tolist()
def embed_query(self, text):
return model.encode(text, normalize_embeddings=True).tolist()
return DirectEmbeddings()
except Exception as e:
logger.warning(f"Error with direct SentenceTransformer: {e}")
# Create mock embeddings as last resort
from langchain.embeddings.fake import FakeEmbeddings
logger.warning("Using mock embeddings as fallback")
return FakeEmbeddings(size=384) # Standard size for small embedding models
def get_chat_model():
"""
Create a chat-like interface using a regular LLM.
This is necessary because many free HF models don't have chat interfaces.
"""
llm = get_llm()
# Create a chat-like prompt template
chat_template = """
Context: {context}
Chat History:
{chat_history}
User: {question}
AI Assistant:
"""
prompt = PromptTemplate(
input_variables=["context", "chat_history", "question"],
template=chat_template
)
# Create a chain
return LLMChain(llm=llm, prompt=prompt) |