Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException | |
| from fastapi.responses import StreamingResponse, HTMLResponse | |
| from transformers import VitsModel, AutoTokenizer | |
| import torch | |
| import numpy as np | |
| from fastapi.middleware.cors import CORSMiddleware | |
| import io | |
| import soundfile as sf | |
| from pydantic import BaseModel | |
| import string | |
| import unicodedata | |
| from pypinyin import pinyin, Style | |
| import re | |
| from umsc import UgMultiScriptConverter | |
| # Initialize uyghur script converter | |
| ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS') | |
| ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS') | |
| import os | |
| # Access the secret named "MY_API_KEY" | |
| hf_token = os.environ.get("HF_TOKEN") | |
| app = FastAPI() | |
| # Allow specific domains or all (*) for testing | |
| app.add_middleware( | |
| CORSMiddleware, | |
| # allow_origins=["*"], # Replace with your domain | |
| allow_origins=["https://piyazon.top", "https://*.piyazon.top"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| def fix_string(batch): | |
| batch = batch.lower() | |
| batch = unicodedata.normalize('NFKC', batch) | |
| extra_punctuation = "–؛;،؟?«»‹›−—¬”“•…" # Add your additional custom punctuation from the training set here | |
| all_punctuation = string.punctuation + extra_punctuation | |
| for char in all_punctuation: | |
| batch = batch.replace(char, ' ') | |
| ## replace ug chars | |
| # Replace 'ژ' with 'ج' | |
| batch = batch.replace('ژ', 'ج') | |
| batch = batch.replace('ک', 'ك') | |
| batch = batch.replace('ی', 'ى') | |
| # batch = batch.replace('ه', 'ە') | |
| batch = batch.replace('ه', 'ە') | |
| ## replace nums | |
| numbers_to_uyghur_map = { | |
| '0': ' نۆل ', | |
| '1': ' بىر ', | |
| '2': ' ئىككى ', | |
| '3': ' ئۈچ ', | |
| '4': ' تۆت ', | |
| '5': ' بەش ', | |
| '6': ' ئالتە ', | |
| '7': ' يەتتە ', | |
| '8': ' سەككىز ', | |
| '9': ' توققۇز ' | |
| } | |
| for num_char, uyghur_char in numbers_to_uyghur_map.items(): | |
| batch = batch.replace(num_char, uyghur_char) | |
| ## replace en chars | |
| english_to_uyghur_map = { | |
| 'a': ' ئېي ', | |
| 'b': ' بى ', | |
| 'c': ' سى ', | |
| 'd': ' دى ', | |
| 'e': ' ئى ', | |
| 'f': ' ئەف ', | |
| 'g': ' جى ', | |
| 'h': ' ئېچ ', | |
| 'i': ' ئاي ', | |
| 'j': ' جېي ', | |
| 'k': ' کېي ', | |
| 'l': ' ئەل ', | |
| 'm': ' ئەم ', | |
| 'n': ' ئېن ', | |
| 'o': ' ئو ', | |
| 'p': ' پى ', | |
| 'q': ' كىيۇ ', | |
| 'r': ' ئار ', | |
| 's': ' ئەس ', | |
| 't': ' تى ', | |
| 'u': ' يۇ ', | |
| 'v': ' ۋى ', | |
| 'w': ' دابىلىيۇ ', | |
| 'x': ' ئېكىس ', | |
| 'y': ' ۋاي ', | |
| 'z': ' زى ', | |
| } | |
| for eng_char, uyghur_char in english_to_uyghur_map.items(): | |
| batch = batch.replace(eng_char, uyghur_char) | |
| # batch = batch.replace('e', ' ئې ') | |
| # Optional: Collapse multiple spaces into one | |
| # batch = ' '.join(batch.split()) | |
| return batch | |
| def number_to_uyghur_arabic_script(number_str): | |
| """ | |
| Converts a number (integer, decimal, fraction, percentage, or ordinal) up to 9 digits (integer and decimal) | |
| to its Uyghur pronunciation in Arabic script. Decimal part is pronounced as a whole number with a fractional term. | |
| Ordinals use the -ىنجى suffix for all numbers up to 9 digits, with special forms for single digits. | |
| Args: | |
| number_str (str): Number as a string (e.g., '123', '0.001', '1/4', '25%', '1968_', '123456789'). | |
| Returns: | |
| str: Uyghur pronunciation in Arabic script. | |
| """ | |
| # Uyghur number words in Arabic script | |
| digits = { | |
| 0: 'نۆل', 1: 'بىر', 2: 'ئىككى', 3: 'ئۈچ', 4: 'تۆت', 5: 'بەش', | |
| 6: 'ئالتە', 7: 'يەتتە', 8: 'سەككىز', 9: 'توققۇز' | |
| } | |
| ordinals = { | |
| 1: 'بىرىنجى', 2: 'ئىككىنجى', 3: 'ئۈچىنجى', 4: 'تۆتىنجى', 5: 'بەشىنجى', | |
| 6: 'ئالتىنجى', 7: 'يەتتىنجى', 8: 'سەككىزىنجى', 9: 'توققۇزىنجى' | |
| } | |
| tens = { | |
| 10: 'ئون', 20: 'يىگىرمە', 30: 'ئوتتۇز', 40: 'قىرىق', 50: 'ئەللىك', | |
| 60: 'ئاتمىش', 70: 'يەتمىش', 80: 'سەكسەن', 90: 'توقسان' | |
| } | |
| units = [ | |
| (1000000000, 'مىليارد'), # billion | |
| (1000000, 'مىليون'), # million | |
| (1000, 'مىڭ'), # thousand | |
| (100, 'يۈز') # hundred | |
| ] | |
| fractions = { | |
| 1: 'ئوندا', # tenths | |
| 2: 'يۈزدە', # hundredths | |
| 3: 'مىڭدە', # thousandths | |
| 4: 'ئون مىڭدە', # ten-thousandths | |
| 5: 'يۈز مىڭدە', # hundred-thousandths | |
| 6: 'مىليوندا', # millionths | |
| 7: 'ئون مىليوندا', # ten-millionths | |
| 8: 'يۈز مىليوندا', # hundred-millionths | |
| 9: 'مىليارددا' # billionths | |
| } | |
| # Convert integer part to words | |
| def integer_to_words(num): | |
| if num == 0: | |
| return digits[0] | |
| result = [] | |
| num = int(num) | |
| # Handle large units (billion, million, thousand, hundred) | |
| for value, unit_name in units: | |
| if num >= value: | |
| count = num // value | |
| if count == 1 and value >= 100: # e.g., 100 → "يۈز", not "بىر يۈز" | |
| result.append(unit_name) | |
| else: | |
| result.append(integer_to_words(count) + ' ' + unit_name) | |
| num %= value | |
| # Handle tens and ones | |
| if num >= 10 and num in tens: | |
| result.append(tens[num]) | |
| elif num > 10: | |
| ten = (num // 10) * 10 | |
| one = num % 10 | |
| if one == 0: | |
| result.append(tens[ten]) | |
| else: | |
| result.append(tens[ten] + ' ' + digits[one]) | |
| elif num > 0: | |
| result.append(digits[num]) | |
| return ' '.join(result) | |
| # Clean the input (remove commas or spaces) | |
| number_str = number_str.replace(',', '').replace(' ', '') | |
| # Check for ordinal (ends with '_') | |
| is_ordinal = number_str.endswith('_') or number_str.endswith('-') | |
| if is_ordinal: | |
| number_str = number_str[:-1] # Remove the _ sign | |
| num = int(number_str) | |
| if num > 999999999: | |
| # raise ValueError("Ordinal number exceeds 9 digits") | |
| return number_str | |
| if num in ordinals: # Use special forms for single-digit ordinals | |
| return ordinals[num] | |
| # Convert to words and modify the last word for ordinal | |
| words = integer_to_words(num).split() | |
| last_num = num % 100 # Get the last two digits to handle tens and ones | |
| if last_num in tens: | |
| words[-1] = tens[last_num] + 'ىنجى ' # e.g., 60_ → ئاتمىشىنجى | |
| elif last_num % 10 == 0 and last_num > 0: | |
| words[-1] = tens[last_num] + 'ىنجى ' # e.g., 60_ → ئاتمىشىنجى | |
| else: | |
| last_digit = num % 10 | |
| if last_digit in ordinals: | |
| words[-1] = ordinals[last_digit] + ' ' # Replace last digit with ordinal form | |
| elif last_digit == 0: | |
| words[-1] += 'ىنجى' | |
| return ' '.join(words) | |
| # Check for percentage | |
| is_percentage = number_str.endswith('%') | |
| if is_percentage: | |
| number_str = number_str[:-1] # Remove the % sign | |
| # Check for fraction | |
| if '/' in number_str: | |
| numerator, denominator = map(int, number_str.split('/')) | |
| if numerator in digits and denominator in digits: | |
| return f"{digits[denominator]}دە {digits[numerator]}" | |
| else: | |
| # raise ValueError("Fractions are only supported for single-digit numerators and denominators") | |
| return number_str | |
| # Split into integer and decimal parts | |
| parts = number_str.split('.') | |
| integer_part = parts[0] | |
| decimal_part = parts[1] if len(parts) > 1 else None | |
| # Validate integer part (up to 9 digits) | |
| if len(integer_part) > 9: | |
| # raise ValueError("Integer part exceeds 9 digits") | |
| return number_str | |
| # Validate decimal part (up to 9 digits) | |
| if decimal_part and len(decimal_part) > 9: | |
| # raise ValueError("Decimal part exceeds 9 digits") | |
| return number_str | |
| # Convert the integer part | |
| pronunciation = integer_to_words(int(integer_part)) | |
| # Handle decimal part as a whole number with fractional term | |
| if decimal_part: | |
| pronunciation += ' پۈتۈن' | |
| if decimal_part != '0': # Only pronounce non-zero decimal parts | |
| decimal_value = int(decimal_part.rstrip('0')) # Remove trailing zeros | |
| decimal_places = len(decimal_part.rstrip('0')) # Count significant decimal places | |
| fraction_term = fractions.get(decimal_places, 'مىليارددا') # Fallback for beyond 9 digits | |
| pronunciation += ' ' + fraction_term + ' ' + integer_to_words(decimal_value) | |
| # Append percentage term if applicable | |
| if is_percentage: | |
| pronunciation += ' پىرسەنت' | |
| return pronunciation.strip() | |
| # return pronunciation | |
| def process_uyghur_text_with_numbers(text): | |
| """ | |
| Processes a string containing Uyghur text and numbers, converting valid numbers to their | |
| Uyghur pronunciation in Arabic script while preserving non-numeric text. | |
| Args: | |
| text (str): Input string with Uyghur text and numbers (e.g., '1/4 كىلو 25% تەملىك'). | |
| Returns: | |
| str: String with numbers converted to Uyghur pronunciation, non-numeric text preserved. | |
| """ | |
| text = text.replace('%', ' پىرسەنت ') | |
| # Valid number characters and symbols | |
| digits = '0123456789' | |
| number_symbols = '/.%_-' | |
| result = [] | |
| i = 0 | |
| while i < len(text): | |
| # Check for spaces and preserve them | |
| if text[i].isspace(): | |
| result.append(text[i]) | |
| i += 1 | |
| continue | |
| # Try to identify a number (fraction, percentage, ordinal, decimal, or integer) | |
| number_start = i | |
| number_str = '' | |
| is_number = False | |
| # Collect potential number characters | |
| while i < len(text) and (text[i] in digits or text[i] in number_symbols): | |
| number_str += text[i] | |
| i += 1 | |
| is_number = True | |
| # If we found a potential number, validate and convert it | |
| if is_number: | |
| # Check if the string is a valid number format | |
| valid = False | |
| if '/' in number_str and number_str.count('/') == 1: | |
| # Fraction: e.g., "1/4" | |
| num, denom = number_str.split('/') | |
| if num.isdigit() and denom.isdigit(): | |
| valid = True | |
| elif number_str.endswith('%'): | |
| # Percentage: e.g., "25%" | |
| if number_str[:-1].isdigit(): | |
| valid = True | |
| elif number_str.endswith('_') or number_str.endswith('-'): | |
| # Ordinal: e.g., "1_" | |
| if number_str[:-1].isdigit(): | |
| valid = True | |
| elif '.' in number_str and number_str.count('.') == 1: | |
| # Decimal: e.g., "3.14" | |
| whole, frac = number_str.split('.') | |
| if whole.isdigit() and frac.isdigit(): | |
| valid = True | |
| elif number_str.isdigit(): | |
| # Integer: e.g., "123" | |
| valid = True | |
| if valid: | |
| try: | |
| # Convert the number to Uyghur pronunciation | |
| converted = number_to_uyghur_arabic_script(number_str) | |
| result.append(converted) | |
| except ValueError: | |
| # If conversion fails, append the original number string | |
| result.append(number_str) | |
| else: | |
| # If not a valid number format, treat as regular text | |
| result.append(number_str) | |
| else: | |
| # Non-number character, append as is | |
| result.append(text[i]) | |
| i += 1 | |
| # Join the result list into a string | |
| return ''.join(result) | |
| def fix_pauctuations(batch): | |
| batch = batch.lower() | |
| batch = unicodedata.normalize('NFKC', batch) | |
| # extra_punctuation = "–؛;،؟?«»‹›−—¬”“•…" # Add your additional custom punctuation from the training set here | |
| # all_punctuation = string.punctuation + extra_punctuation | |
| # for char in all_punctuation: | |
| # batch = batch.replace(char, ' ') | |
| ## replace ug chars | |
| # Replace 'ژ' with 'ج' | |
| batch = batch.replace('ژ', 'ج') | |
| batch = batch.replace('ک', 'ك') | |
| batch = batch.replace('ی', 'ى') | |
| batch = batch.replace('ه', 'ە') | |
| vocab = [" ", "ئ", "ا", "ب", "ت", "ج", "خ", "د", "ر", "ز", "س", "ش", "غ", "ف", "ق", "ك", "ل", "م", "ن", "و", "ى", "ي", "پ", "چ", "ڭ", "گ", "ھ", "ۆ", "ۇ", "ۈ", "ۋ", "ې", "ە"] | |
| # Process each character in the batch | |
| result = [] | |
| for char in batch: | |
| if char in vocab: | |
| result.append(char) | |
| elif char in {'.', '?', '؟'}: | |
| result.append(' ') # Replace dot with two spaces | |
| else: | |
| result.append(' ') # Replace other non-vocab characters with one space | |
| # Join the result into a string | |
| return ''.join(result) | |
| def chinese_to_pinyin(mixed_text): | |
| """ | |
| Convert Chinese characters in a mixed-language string to Pinyin without tone marks, | |
| preserving non-Chinese text, using only English letters. | |
| Args: | |
| mixed_text (str): Input string containing Chinese characters and other languages (e.g., English, Uyghur) | |
| Returns: | |
| str: String with Chinese characters converted to Pinyin (no tone marks), non-Chinese text unchanged | |
| """ | |
| # Regular expression to match Chinese characters (Unicode range for CJK Unified Ideographs) | |
| chinese_pattern = re.compile(r'[\u4e00-\u9fff]+') | |
| def replace_chinese(match): | |
| chinese_text = match.group(0) | |
| # Convert Chinese to Pinyin without tone marks, join syllables with spaces | |
| pinyin_list = pinyin(chinese_text, style=Style.NORMAL) | |
| return ' '.join([item[0] for item in pinyin_list]) | |
| # Replace Chinese characters with their Pinyin, leave other text unchanged | |
| result = chinese_pattern.sub(replace_chinese, mixed_text) | |
| return result | |
| # model = VitsModel.from_pretrained("facebook/mms-tts-uig-script_arabic") | |
| # tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-uig-script_arabic") | |
| uy_model_name = "piyazon/TTS-CV-Radio-RVC-Alikurban-Ug" | |
| model_ug = VitsModel.from_pretrained(uy_model_name, token=hf_token) | |
| tokenizer_ug = AutoTokenizer.from_pretrained(uy_model_name, token=hf_token) | |
| # model_ug = VitsModel.from_pretrained("piyazon/qutadgu_bilik") | |
| # tokenizer_ug = AutoTokenizer.from_pretrained("piyazon/qutadgu_bilik") | |
| model_ru = VitsModel.from_pretrained("facebook/mms-tts-rus") | |
| tokenizer_ru = AutoTokenizer.from_pretrained("facebook/mms-tts-rus") | |
| # Pydantic model for request body | |
| class TextInput(BaseModel): | |
| text: str | |
| lang: str | |
| """ | |
| curl -X POST https://piyazon-tts-piyazon.hf.space/generate-tts \ | |
| -H "Content-Type: application/json" \ | |
| -d '{"text": "Hello, world!", "lang":"ug"}' \ | |
| --output output.wav | |
| """ | |
| async def generate_tts(input: TextInput): | |
| print(input.text) | |
| try: | |
| if input.lang=="ug": | |
| model = model_ug | |
| tokenizer = tokenizer_ug | |
| fixted_text = fix_pauctuations(process_uyghur_text_with_numbers(ug_latn_to_arab(chinese_to_pinyin(input.text)))) | |
| print(fixted_text) | |
| inputs = tokenizer(fixted_text, return_tensors="pt") | |
| else: | |
| model = model_ru | |
| tokenizer = tokenizer_ru | |
| inputs = tokenizer(input.text, return_tensors="pt") | |
| # Tokenize input text | |
| # Generate waveform | |
| with torch.no_grad(): | |
| waveform = model(**inputs).waveform | |
| # Convert waveform to audio file (WAV format) | |
| waveform = waveform.squeeze().numpy() # Convert tensor to numpy array | |
| buffer = io.BytesIO() | |
| sf.write(buffer, waveform, samplerate=model.config.sampling_rate, format="WAV") | |
| buffer.seek(0) | |
| # Return audio as streaming response | |
| return StreamingResponse( | |
| buffer, | |
| media_type="audio/wav", | |
| headers={"Content-Disposition": 'attachment; filename="output.wav"'} | |
| ) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Error generating audio: {str(e)}") | |
| # @app.get("/") | |
| # def greet_json(): | |
| # return { | |
| # "Hello": "World!", | |
| # } | |
| def greet_html(): | |
| return """ | |
| <html> | |
| <body> | |
| <h1> | |
| URL: | |
| <a href="https://translate.piyazon.top">https://translate.piyazon.top</a> | |
| </h1> | |
| </body> | |
| </html> | |
| """ |