Spaces:

glassform
/

inference

Runtime error

inference / Dockerfile

matteo.mazzola

feat: first commit2

7fd5900 18 days ago

641 Bytes

	# Base image: Hugging Face TGI
	FROM ghcr.io/huggingface/text-generation-inference:3.0.2

	# Set working directory
	WORKDIR /app

	# Minimal storage directories
	RUN mkdir -p /data && chmod 777 /data
	RUN mkdir -p /.cache && chmod 777 /.cache
	RUN mkdir -p /.triton && chmod 777 /.triton

	# Expose the API port
	EXPOSE 8080

	# Optional: Set Hugging Face token for private model access
	ARG HF_TOKEN
	ENV HF_TOKEN=${HF_TOKEN}

	# Model: Replace with any small or quantized TGI-compatible model
	# Example: TheBloke/Mistral-7B-Instruct-v0.1-GPTQ or TinyLlama/TinyLlama-1.1B-Chat
	CMD ["--model-id", "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "--port", "8080"]