Coverage for pycommons / strings / chars.py: 100%
12 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 03:04 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 03:04 +0000
1"""Constants for common characters."""
3from typing import Callable, Final
5#: A constant for non-breaking space
6NBSP: Final[str] = "\xa0"
7#: A non-breaking hyphen
8NBDASH: Final[str] = "\u2011"
10#: A regular expression matching all characters that are non-line breaking
11#: white space.
12WHITESPACE: Final[str] = (
13 "\t\x0b\x0c \xa0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
14 "\u2008\u2009\u200a\u202f\u205f\u3000")
16#: A regular expression matching all characters that are non-line breaking
17#: white space.
18NEWLINE: Final[str] = "\n\r\x85\u2028\u2029"
20#: A regular expression matching any white space or newline character.
21WHITESPACE_OR_NEWLINE: Final[str] = (
22 "\t\n\x0b\x0c\r \x85\xa0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006"
23 "\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000")
26#: the internal table for converting normal characters to unicode superscripts
27__SUPERSCRIPT: Final[Callable[[str], str]] = {
28 # numbers from 0 to 9
29 "\x30": "\u2070", # 0
30 "\x31": "\xb9", # 1
31 "\x32": "\xb2", # 2
32 "\x33": "\xb3", # 3
33 "\x34": "\u2074", # 4
34 "\x35": "\u2075", # 5
35 "\x36": "\u2076", # 6
36 "\x37": "\u2077", # 7
37 "\x38": "\u2078", # 8
38 "\x39": "\u2079", # 9
39 # +/-/=/(/)
40 "\x2b": "\u207A", # +
41 "\x2d": "\u207b", # -
42 "\x3d": "\u207c", # =
43 "\x28": "\u207d", # (
44 "\x29": "\u207e", # )
45 # upper case letters
46 "\x41": "\u1d2c", # A
47 "\x42": "\u1d2e", # B
48 "\x43": "\ua7f2", # C
49 "\x44": "\u1d30", # D
50 "\x45": "\u1d31", # E
51 "\x46": "\ua7f3", # F
52 "\x47": "\u1d33", # G
53 "\x48": "\u1d34", # H
54 "\x49": "\u1d35", # I
55 "\x4a": "\u1d36", # J
56 "\x4b": "\u1d37", # K
57 "\x4c": "\u1d38", # L
58 "\x4d": "\u1d39", # M
59 "\x4e": "\u1d3a", # N
60 "\x4f": "\u1d3c", # O
61 "\x50": "\u1d3e", # P
62 "\x51": "\ua7f4", # Q
63 "\x52": "\u1d3f", # R
64 "\x53": "\ua7f1", # S
65 "\x54": "\u1d40", # T
66 "\x55": "\u1d41", # U
67 "\x56": "\u2c7d", # V
68 "\x57": "\u1d42", # W
69 # lower case letters
70 "\x61": "\u1d43", # a
71 "\x62": "\u1d47", # b
72 "\x63": "\u1d9c", # c
73 "\x64": "\u1d48", # d
74 "\x65": "\u1d49", # e
75 "\x66": "\u1da0", # f
76 "\x67": "\u1d4d", # g
77 "\x68": "\u02b0", # h
78 "\x69": "\u2071", # i
79 "\x6a": "\u02b2", # j
80 "\x6b": "\u1d4f", # k
81 "\x6c": "\u1da9", # l; alternative": "\u2e1
82 "\x6d": "\u1d50", # m
83 "\x6e": "\u207f", # n
84 "\x6f": "\u1d52", # o
85 "\x70": "\u1d56", # p
86 "\x71": "\u107a5", # q
87 "\x72": "\u02b3", # r
88 "\x73": "\u02e2", # s
89 "\x74": "\u1d57", # t
90 "\x75": "\u1d58", # u
91 "\x76": "\u1d5b", # v
92 "\x77": "\u02b7", # w
93 "\x78": "\u02e3", # x
94 "\x79": "\u02b8", # y
95 "\x7a": "\u1dbb", # z
96 # white space
97 " ": " ",
98 "\t": "\t",
99 "\n": "\n",
100 "\x0b": "\x0b",
101 "\x0c": "\x0c",
102 "\r": "\r",
103 "\x85": "\x85",
104 "\xa0": "\xa0",
105 "\u1680": "\u1680",
106 "\u2000": "\u2000",
107 "\u2001": "\u2001",
108 "\u2002": "\u2002",
109 "\u2003": "\u2003",
110 "\u2004": "\u2004",
111 "\u2005": "\u2005",
112 "\u2006": "\u2006",
113 "\u2007": "\u2007",
114 "\u2008": "\u2008",
115 "\u2009": "\u2009",
116 "\u200a": "\u200a",
117 "\u2028": "\u2028",
118 "\u2029": "\u2029",
119 "\u202f": "\u202f",
120 "\u205f": "\u205f",
121 "\u3000": "\u3000",
122}.__getitem__
125def superscript(s: str) -> str:
126 """
127 Transform a string into Unicode-based superscript.
129 All characters that can be represented as superscript in unicode will be
130 translated to superscript. Notice that only a subset of the latin
131 characters can be converted to unicode superscropt. If any character
132 cannot be translated, it will raise a :class:`KeyError`. White space is
133 preserved.
135 :param s: the string
136 :returns: the string in subscript
137 :raises KeyError: if a character cannot be converted
138 :raises TypeError: if `s` is not a string
140 >>> superscript("a0 =4(e)")
141 '\u1d43\u2070 \u207c\u2074\u207d\u1d49\u207e'
143 >>> try:
144 ... superscript("a0=4(e)Y")
145 ... except KeyError as ke:
146 ... print(ke)
147 'Y'
149 >>> try:
150 ... superscript(None)
151 ... except TypeError as te:
152 ... print(te)
153 descriptor '__iter__' requires a 'str' object but received a 'NoneType'
155 >>> try:
156 ... superscript(1)
157 ... except TypeError as te:
158 ... print(te)
159 descriptor '__iter__' requires a 'str' object but received a 'int'
160 """
161 return "".join(map(__SUPERSCRIPT, str.__iter__(s)))
164#: the internal table for converting normal characters to unicode subscripts
165__SUBSCRIPT: Final[Callable[[str], str]] = {
166 # numbers from 0 to 9
167 "\x30": "\u2080", # 0
168 "\x31": "\u2081", # 1
169 "\x32": "\u2082", # 2
170 "\x33": "\u2083", # 3
171 "\x34": "\u2084", # 4
172 "\x35": "\u2085", # 5
173 "\x36": "\u2086", # 6
174 "\x37": "\u2087", # 7
175 "\x38": "\u2088", # 8
176 "\x39": "\u2089", # 9
177 # +/-/=/(/)
178 "\x2b": "\u208a", # +
179 "\x2d": "\u208b", # -
180 "\x3d": "\u208c", # =
181 "\x28": "\u208d", # (
182 "\x29": "\u208e", # )
183 # lower case letters
184 "\x61": "\u2090", # a
185 "\x65": "\u2091", # e
186 "\x68": "\u2095", # h
187 "\x69": "\u1d62", # i
188 "\x6a": "\u2c7c", # j
189 "\x6b": "\u2096", # k
190 "\x6c": "\u2097", # l
191 "\x6d": "\u2098", # m
192 "\x6e": "\u2099", # n
193 "\x6f": "\u2092", # o
194 "\x70": "\u209a", # p
195 "\x73": "\u209b", # s
196 "\x74": "\u209c", # t
197 "\x75": "\u1d64", # u
198 "\x76": "\u1d65", # v
199 "\x78": "\u2093", # x
200 "\u018f": "\u2094", # letter schwa", upside-down "e"
201 # white space
202 " ": " ",
203 "\t": "\t",
204 "\n": "\n",
205 "\x0b": "\x0b",
206 "\x0c": "\x0c",
207 "\r": "\r",
208 "\x85": "\x85",
209 "\xa0": "\xa0",
210 "\u1680": "\u1680",
211 "\u2000": "\u2000",
212 "\u2001": "\u2001",
213 "\u2002": "\u2002",
214 "\u2003": "\u2003",
215 "\u2004": "\u2004",
216 "\u2005": "\u2005",
217 "\u2006": "\u2006",
218 "\u2007": "\u2007",
219 "\u2008": "\u2008",
220 "\u2009": "\u2009",
221 "\u200a": "\u200a",
222 "\u2028": "\u2028",
223 "\u2029": "\u2029",
224 "\u202f": "\u202f",
225 "\u205f": "\u205f",
226 "\u3000": "\u3000",
227}.__getitem__
230def subscript(s: str) -> str:
231 """
232 Transform a string into Unicode-based subscript.
234 All characters that can be represented as subscript in unicode will be
235 translated to subscript. Notice that only a subset of the latin
236 characters can be converted to unicode subscript. If any character
237 cannot be translated, it will raise a :class:`KeyError`. White space is
238 preserved.
240 :param s: the string
241 :returns: the string in subscript
242 :raises KeyError: if a character cannot be converted
243 :raises TypeError: if `s` is not a string
245 >>> subscript("a0= 4(e)")
246 '\u2090\u2080\u208c \u2084\u208d\u2091\u208e'
248 >>> try:
249 ... subscript("a0=4(e)Y")
250 ... except KeyError as ke:
251 ... print(ke)
252 'Y'
254 >>> try:
255 ... subscript(None)
256 ... except TypeError as te:
257 ... print(te)
258 descriptor '__iter__' requires a 'str' object but received a 'NoneType'
260 >>> try:
261 ... superscript(1)
262 ... except TypeError as te:
263 ... print(te)
264 descriptor '__iter__' requires a 'str' object but received a 'int'
265 """
266 return "".join(map(__SUBSCRIPT, str.__iter__(s)))