Coverage for pycommons/strings/chars.py: 100%

1"""Constants for common characters."""

3from typing import Callable, Final

5#: A constant for non-breaking space

6NBSP: Final[str] = "\xa0"

7#: A non-breaking hyphen

8NBDASH: Final[str] = "\u2011"

10#: A regular expression matching all characters that are non-line breaking

11#: white space.

12WHITESPACE: Final[str] = (

13 "\t\x0b\x0c \xa0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007"

14 "\u2008\u2009\u200a\u202f\u205f\u3000")

16#: A regular expression matching all characters that are non-line breaking

17#: white space.

18NEWLINE: Final[str] = "\n\r\x85\u2028\u2029"

20#: A regular expression matching any white space or newline character.

21WHITESPACE_OR_NEWLINE: Final[str] = (

22 "\t\n\x0b\x0c\r \x85\xa0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006"

23 "\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000")

26#: the internal table for converting normal characters to unicode superscripts

27__SUPERSCRIPT: Final[Callable[[str], str]] = {

28 # numbers from 0 to 9

29 "\x30": "\u2070", # 0

30 "\x31": "\xb9", # 1

31 "\x32": "\xb2", # 2

32 "\x33": "\xb3", # 3

33 "\x34": "\u2074", # 4

34 "\x35": "\u2075", # 5

35 "\x36": "\u2076", # 6

36 "\x37": "\u2077", # 7

37 "\x38": "\u2078", # 8

38 "\x39": "\u2079", # 9

39 # +/-/=/(/)

40 "\x2b": "\u207A", # +

41 "\x2d": "\u207b", # -

42 "\x3d": "\u207c", # =

43 "\x28": "\u207d", # (

44 "\x29": "\u207e", # )

45 # upper case letters

46 "\x41": "\u1d2c", # A

47 "\x42": "\u1d2e", # B

48 "\x43": "\ua7f2", # C

49 "\x44": "\u1d30", # D

50 "\x45": "\u1d31", # E

51 "\x46": "\ua7f3", # F

52 "\x47": "\u1d33", # G

53 "\x48": "\u1d34", # H

54 "\x49": "\u1d35", # I

55 "\x4a": "\u1d36", # J

56 "\x4b": "\u1d37", # K

57 "\x4c": "\u1d38", # L

58 "\x4d": "\u1d39", # M

59 "\x4e": "\u1d3a", # N

60 "\x4f": "\u1d3c", # O

61 "\x50": "\u1d3e", # P

62 "\x51": "\ua7f4", # Q

63 "\x52": "\u1d3f", # R

64 "\x53": "\ua7f1", # S

65 "\x54": "\u1d40", # T

66 "\x55": "\u1d41", # U

67 "\x56": "\u2c7d", # V

68 "\x57": "\u1d42", # W

69 # lower case letters

70 "\x61": "\u1d43", # a

71 "\x62": "\u1d47", # b

72 "\x63": "\u1d9c", # c

73 "\x64": "\u1d48", # d

74 "\x65": "\u1d49", # e

75 "\x66": "\u1da0", # f

76 "\x67": "\u1d4d", # g

77 "\x68": "\u02b0", # h

78 "\x69": "\u2071", # i

79 "\x6a": "\u02b2", # j

80 "\x6b": "\u1d4f", # k

81 "\x6c": "\u1da9", # l; alternative": "\u2e1

82 "\x6d": "\u1d50", # m

83 "\x6e": "\u207f", # n

84 "\x6f": "\u1d52", # o

85 "\x70": "\u1d56", # p

86 "\x71": "\u107a5", # q

87 "\x72": "\u02b3", # r

88 "\x73": "\u02e2", # s

89 "\x74": "\u1d57", # t

90 "\x75": "\u1d58", # u

91 "\x76": "\u1d5b", # v

92 "\x77": "\u02b7", # w

93 "\x78": "\u02e3", # x

94 "\x79": "\u02b8", # y

95 "\x7a": "\u1dbb", # z

96 # white space

97 " ": " ",

98 "\t": "\t",

99 "\n": "\n",

100 "\x0b": "\x0b",

101 "\x0c": "\x0c",

102 "\r": "\r",

103 "\x85": "\x85",

104 "\xa0": "\xa0",

105 "\u1680": "\u1680",

106 "\u2000": "\u2000",

107 "\u2001": "\u2001",

108 "\u2002": "\u2002",

109 "\u2003": "\u2003",

110 "\u2004": "\u2004",

111 "\u2005": "\u2005",

112 "\u2006": "\u2006",

113 "\u2007": "\u2007",

114 "\u2008": "\u2008",

115 "\u2009": "\u2009",

116 "\u200a": "\u200a",

117 "\u2028": "\u2028",

118 "\u2029": "\u2029",

119 "\u202f": "\u202f",

120 "\u205f": "\u205f",

121 "\u3000": "\u3000",

122}.__getitem__

123

124

125def superscript(s: str) -> str:

126 """

127 Transform a string into Unicode-based superscript.

128

129 All characters that can be represented as superscript in unicode will be

130 translated to superscript. Notice that only a subset of the latin

131 characters can be converted to unicode superscropt. If any character

132 cannot be translated, it will raise a :class:`KeyError`. White space is

133 preserved.

134

135 :param s: the string

136 :returns: the string in subscript

137 :raises KeyError: if a character cannot be converted

138 :raises TypeError: if `s` is not a string

139

140 >>> superscript("a0 =4(e)")

141 '\u1d43\u2070 \u207c\u2074\u207d\u1d49\u207e'

142

143 >>> try:

144 ... superscript("a0=4(e)Y")

145 ... except KeyError as ke:

146 ... print(ke)

147 'Y'

148

149 >>> try:

150 ... superscript(None)

151 ... except TypeError as te:

152 ... print(te)

153 descriptor '__iter__' requires a 'str' object but received a 'NoneType'

154

155 >>> try:

156 ... superscript(1)

157 ... except TypeError as te:

158 ... print(te)

159 descriptor '__iter__' requires a 'str' object but received a 'int'

160 """

161 return "".join(map(__SUPERSCRIPT, str.__iter__(s)))

162

163

164#: the internal table for converting normal characters to unicode subscripts

165__SUBSCRIPT: Final[Callable[[str], str]] = {

166 # numbers from 0 to 9

167 "\x30": "\u2080", # 0

168 "\x31": "\u2081", # 1

169 "\x32": "\u2082", # 2

170 "\x33": "\u2083", # 3

171 "\x34": "\u2084", # 4

172 "\x35": "\u2085", # 5

173 "\x36": "\u2086", # 6

174 "\x37": "\u2087", # 7

175 "\x38": "\u2088", # 8

176 "\x39": "\u2089", # 9

177 # +/-/=/(/)

178 "\x2b": "\u208a", # +

179 "\x2d": "\u208b", # -

180 "\x3d": "\u208c", # =

181 "\x28": "\u208d", # (

182 "\x29": "\u208e", # )

183 # lower case letters

184 "\x61": "\u2090", # a

185 "\x65": "\u2091", # e

186 "\x68": "\u2095", # h

187 "\x69": "\u1d62", # i

188 "\x6a": "\u2c7c", # j

189 "\x6b": "\u2096", # k

190 "\x6c": "\u2097", # l

191 "\x6d": "\u2098", # m

192 "\x6e": "\u2099", # n

193 "\x6f": "\u2092", # o

194 "\x70": "\u209a", # p

195 "\x73": "\u209b", # s

196 "\x74": "\u209c", # t

197 "\x75": "\u1d64", # u

198 "\x76": "\u1d65", # v

199 "\x78": "\u2093", # x

200 "\u018f": "\u2094", # letter schwa", upside-down "e"

201 # white space

202 " ": " ",

203 "\t": "\t",

204 "\n": "\n",

205 "\x0b": "\x0b",

206 "\x0c": "\x0c",

207 "\r": "\r",

208 "\x85": "\x85",

209 "\xa0": "\xa0",

210 "\u1680": "\u1680",

211 "\u2000": "\u2000",

212 "\u2001": "\u2001",

213 "\u2002": "\u2002",

214 "\u2003": "\u2003",

215 "\u2004": "\u2004",

216 "\u2005": "\u2005",

217 "\u2006": "\u2006",

218 "\u2007": "\u2007",

219 "\u2008": "\u2008",

220 "\u2009": "\u2009",

221 "\u200a": "\u200a",

222 "\u2028": "\u2028",

223 "\u2029": "\u2029",

224 "\u202f": "\u202f",

225 "\u205f": "\u205f",

226 "\u3000": "\u3000",

227}.__getitem__

228

229

230def subscript(s: str) -> str:

231 """

232 Transform a string into Unicode-based subscript.

233

234 All characters that can be represented as subscript in unicode will be

235 translated to subscript. Notice that only a subset of the latin

236 characters can be converted to unicode subscript. If any character

237 cannot be translated, it will raise a :class:`KeyError`. White space is

238 preserved.

239

240 :param s: the string

241 :returns: the string in subscript

242 :raises KeyError: if a character cannot be converted

243 :raises TypeError: if `s` is not a string

244

245 >>> subscript("a0= 4(e)")

246 '\u2090\u2080\u208c \u2084\u208d\u2091\u208e'

247

248 >>> try:

249 ... subscript("a0=4(e)Y")

250 ... except KeyError as ke:

251 ... print(ke)

252 'Y'

253

254 >>> try:

255 ... subscript(None)

256 ... except TypeError as te:

257 ... print(te)

258 descriptor '__iter__' requires a 'str' object but received a 'NoneType'

259

260 >>> try:

261 ... superscript(1)

262 ... except TypeError as te:

263 ... print(te)

264 descriptor '__iter__' requires a 'str' object but received a 'int'

265 """

266 return "".join(map(__SUBSCRIPT, str.__iter__(s)))

Coverage for pycommons / strings / chars.py: 100%

12 statements