Rajendransp133's picture
Upload 86 files
ac901c7 verified
# coding: utf8
"""
Complete collection of Urdu Unicode characters.
Maintainer: Ikram Ali(mrikram1989@gmail.com)
version = 2020.04.07
Source = https://github.com/urduhack/urdu-characters
"""
from typing import FrozenSet, Dict
# Urdu Alphabets
URDU_ALPHABETS: FrozenSet[str] = frozenset(
"آ أ ا ب پ ت ٹ ث "
" ج چ ح خ "
" د ڈ ذ ر ڑ ز ژ "
" س ش ص ض ط ظ ع غ "
" ف ق ک گ ل م "
" ن ں و ؤ ہ ۂ ۃ "
" ھ ء ی ئ ے ۓ ".split()
)
# Urdu Digits from 0 to 9
URDU_DIGITS: FrozenSet[str] = frozenset("۰ ۱ ۲ ۳ ۴ ۵ ۶ ۷ ۸ ۹".split())
# Urdu Punctuations
URDU_PUNCTUATIONS: FrozenSet[str] = frozenset("؛ ، ٫ ؟ ۔ ٪".split())
# Urdu Aerabs
URDU_DIACRITICS: FrozenSet[str] = frozenset(
"\u064e \u064B \u0670 \u0650 \u064F \u064d".split()
)
# Urdu Extra Characters
URDU_EXTRA_CHARACTERS: FrozenSet[str] = frozenset(
" ؀ ؁ ؂ ؃ ؍ ؎ ؏ ؐ ؑ ؒ ؓ ؔ ؕ ٌ ّ ْ ٓ ٔ ٖ ٗ ٘ ٬".split()
)
# Complete list of Urdu language Characters.
URDU_ALL_CHARACTERS: FrozenSet[str] = frozenset().union(
URDU_ALPHABETS,
URDU_DIGITS,
URDU_PUNCTUATIONS, # type: ignore
URDU_DIACRITICS,
URDU_EXTRA_CHARACTERS,
) # type: ignore
URDU_ALL_CHARACTERS_UNICODE: Dict[str, str] = {
"\u0600": "\u0600",
"\u0601": "\u0601",
"\u0602": "\u0602",
"\u0603": "\u0603",
"،": "\u060c",
"؍": "\u060d",
"؎": "\u060e",
"؏": "\u060f",
"ؐ": "\u0610",
"ؑ": "\u0611",
"ؒ": "\u0612",
"ؓ": "\u0613",
"ؔ": "\u0614",
"ؕ": "\u0615",
"؛": "\u061b",
"؟": "\u061f",
"ء": "\u0621",
"ً": "\u064b",
"ٌ": "\u064c",
"ٍ": "\u064d",
"َ": "\u064e",
"ُ": "\u064f",
"ِ": "\u0650",
"ّ": "\u0651",
"ْ": "\u0652",
"ٓ": "\u0653",
"ٔ": "\u0654",
"ٖ": "\u0656",
"ٗ": "\u0657",
"٘": "\u0658",
"٪": "\u066a",
"٫": "\u066b",
"٬": "\u066c",
"ٰ": "\u0670",
"۔": "\u06d4",
"آ": "\u0622",
"أ": "\u0623",
"ا": "\u0627",
"ب": "\u0628",
"پ": "\u067e",
"ت": "\u062a",
"ٹ": "\u0679",
"ث": "\u062b",
"ج": "\u062c",
"چ": "\u0686",
"ح": "\u062d",
"خ": "\u062e",
"د": "\u062f",
"ڈ": "\u0688",
"ذ": "\u0630",
"ر": "\u0631",
"ڑ": "\u0691",
"ز": "\u0632",
"ژ": "\u0698",
"س": "\u0633",
"ش": "\u0634",
"ص": "\u0635",
"ض": "\u0636",
"ط": "\u0637",
"ظ": "\u0638",
"ع": "\u0639",
"غ": "\u063a",
"ف": "\u0641",
"ق": "\u0642",
"ک": "\u06a9",
"گ": "\u06af",
"ل": "\u0644",
"م": "\u0645",
"ن": "\u0646",
"ں": "\u06ba",
"و": "\u0648",
"ؤ": "\u0624",
"ھ": "\u06be",
"ہ": "\u06c1",
"ۂ": "\u06c2",
"ۃ": "\u06c3",
"ی": "\u06cc",
"ئ": "\u0626",
"ے": "\u06d2",
"ۓ": "\u06d3",
"۰": "\u06f0",
"۱": "\u06f1",
"۲": "\u06f2",
"۳": "\u06f3",
"۴": "\u06f4",
"۵": "\u06f5",
"۶": "\u06f6",
"۷": "\u06f7",
"۸": "\u06f8",
"۹": "\u06f9",
}