import gradio as gr from huggingface_hub import InferenceApi # public repo, no token needed api = InferenceApi( repo_id="bartowski/cognitivecomputations_Dolphin-Mistral-24B-Venice-Edition-GGUF", task="text-generation" ) def respond(prompt): # send the prompt, get back a list of dicts outputs = api(inputs=prompt, parameters={"max_new_tokens":200, "temperature":0.7}) # the API returns e.g. [{"generated_text": "..."}] return outputs[0]["generated_text"] gr.Interface( fn=respond, inputs=gr.Textbox(lines=3, placeholder="ask me anything…"), outputs="text", title="Dolphin-Mistral-24B via Inference API", description="Powered by the HF Inference API—no local model load needed." ).launch(server_name="0.0.0.0", server_port=7860)