import gradio as gr from transformers import pipeline import pypandoc import os import tempfile # ---------------------- # 1. Load the chat model # ---------------------- # The Tencent Hunyuan model is large; this will download weights the first time # and fall back to CPU if a GPU isn't available. chat_pipe = pipeline( "text-generation", model="tencent/Hunyuan-A13B-Instruct", trust_remote_code=True, device_map="auto", # uses GPU if possible max_new_tokens=512, ) # ---------------------- # 2. Chat helper # ---------------------- def chat_ai(history, user_message): """LLM chat wrapper: takes the chat history + new user msg, returns assistant reply.""" # Re‑create a messages list compatible with the model’s chat format messages = [{"role": "system", "content": "You are a helpful assistant."}] for user, bot in history: messages.append({"role": "user", "content": user}) messages.append({"role": "assistant", "content": bot}) messages.append({"role": "user", "content": user_message}) # Generate completion = chat_pipe(messages)[0]["generated_text"] # Some chat models return the entire conversation; grab only the last assistant line assistant_reply = completion.split("\n")[-1].strip() return assistant_reply # ---------------------- # 3. Document converter # ---------------------- # Powered by Pandoc via the pypandoc wrapper. Make sure pandoc is available in the # environment (apt-get install pandoc on Linux, or add it to requirements.txt). SUPPORTED_TARGETS = [ "pdf", "docx", "html", "md", "txt", "rtf", "odt", "epub" ] def convert_document(file_obj, target_format): """Convert the uploaded file to the selected target format and return the path.""" if file_obj is None: raise gr.Error("Please upload a file.") if target_format not in SUPPORTED_TARGETS: raise gr.Error(f"Unsupported target format: {target_format}") filename = os.path.basename(file_obj.name) src_ext = os.path.splitext(filename)[1].lstrip(".") with tempfile.TemporaryDirectory() as tmpdir: # Save the uploaded file to a temp path that Pandoc can read src_path = os.path.join(tmpdir, f"input.{src_ext}") with open(src_path, "wb") as f: f.write(file_obj.read()) # Build output path inside tmp dir output_path = os.path.join(tmpdir, f"converted.{target_format}") # Run Pandoc try: pypandoc.convert_file(src_path, to=target_format, outputfile=output_path) except RuntimeError as e: raise gr.Error(f"Conversion failed: {e}") # Return as a downloadable file for Gradio return output_path # ---------------------- # 4. Gradio UI # ---------------------- with gr.Blocks(title="LLM Chat + Universal Document Converter") as demo: gr.Markdown( "# 🌐 LLM Chat + 📄 Document Converter\n" "Chat with **Hunyuan-A13B** (Tencent) or convert documents between multiple formats using **Pandoc**." ) with gr.Tabs(): # ----- Tab 1: Chat ----- with gr.TabItem("Chat"): chatbot = gr.Chatbot() user_msg = gr.Textbox(placeholder="Ask me anything...", label="Your message") send_btn = gr.Button("Send") clear_btn = gr.Button("Clear") def _send(history, msg): history = history or [] reply = chat_ai(history, msg) history.append((msg, reply)) return history, "" send_btn.click(_send, inputs=[chatbot, user_msg], outputs=[chatbot, user_msg]) clear_btn.click(lambda: [], None, chatbot) # ----- Tab 2: Document Converter ----- with gr.TabItem("Convert"): file_input = gr.File(label="Upload document") target = gr.Dropdown(SUPPORTED_TARGETS, label="Convert to", value="pdf") convert_btn = gr.Button("Convert") result_file = gr.File(label="Download converted file") convert_btn.click(convert_document, inputs=[file_input, target], outputs=result_file) demo.launch()