Coverage for bookbuilderpy/format

1"""A formatter for python code."""

2import io

3import sys

4import token

5import tokenize

6from typing import Iterable

8import regex as reg # type: ignore

9import strip_hints as sh # type: ignore

10import yapf # type: ignore

12from bookbuilderpy.source_tools import (

13 format_empty_lines,

14 select_lines,

15 strip_common_whitespace_prefix,

16)

17from bookbuilderpy.strings import lines_to_str, str_to_lines

18from bookbuilderpy.types import type_error

21def __no_empty_after(line: str) -> bool:

22 """

23 No empty line is permitted after definition.

25 :param line: the line

26 :return: a boolean value

27 >>> __no_empty_after("def ")

28 True

29 >>> __no_empty_after("import ")

30 True

31 >>> __no_empty_after("from ")

32 True

33 >>> __no_empty_after("def")

34 False

35 >>> __no_empty_after("import")

36 False

37 >>> __no_empty_after("from")

38 False

39 """

40 return line.startswith(("def ", "import ", "from "))

43def __empty_before(line: str) -> bool:

44 """

45 Check whether an empty line is needed before this one.

47 :param line: the line

48 :return: a boolean value

49 >>> __empty_before("def")

50 False

51 >>> __empty_before("def ")

52 True

53 >>> __empty_before("class")

54 False

55 >>> __empty_before("class ")

56 True

57 """

58 return line.startswith(("def ", "class "))

61def __force_no_empty_after(line: str) -> bool:

62 """

63 Really no empty line is permitted after definition.

65 :param line: the line

66 :return: a boolean value

67 >>> __force_no_empty_after("@")

68 True

69 """

70 return line.startswith("@")

73#: the internal style for formatting Python code

74__YAPF_STYLE = yapf.style.CreatePEP8Style()

75__YAPF_STYLE["ARITHMETIC_PRECEDENCE_INDICATION"] = True

76__YAPF_STYLE["BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION"] = 1

77__YAPF_STYLE["BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF"] = False

78__YAPF_STYLE["COALESCE_BRACKETS"] = True

79__YAPF_STYLE["COLUMN_LIMIT"] = 74

80__YAPF_STYLE["EACH_DICT_ENTRY_ON_SEPARATE_LINE"] = False

81__YAPF_STYLE["SPLIT_BEFORE_NAMED_ASSIGNS"] = False

84def __format_lines(code: str) -> str:

85 r"""

86 Format Python code lines.

88 :param code: the original code

89 :return: the formatted lines.

91 >>> __format_lines("\ndef a():\n return 7- 45\n\n")

92 'def a():\n return 7 - 45'

93 >>> __format_lines("\n\n \nclass b:\n def bb(self): x =3/a()")

94 'class b:\n def bb(self):\n x = 3 / a()'

95 """

96 return yapf.yapf_api.FormatCode(code,

97 style_config=__YAPF_STYLE)[0].rstrip()

100#: the regexes stripping comments that occupy a complete line

101__REGEX_STRIP_LINE_COMMENT: reg.Regex = reg.compile(

102 "\\n[ \\t]*?#.*?\\n",

103 flags=reg.V1 | reg.MULTILINE) # pylint: disable=E1101

104

105

106def __strip_hints(code: str,

107 strip_comments: bool = False) -> str:

108 r"""

109 Strip all type hints from the given code string.

110

111 :param code: the code string

112 :return: the stripped code string

113 >>> __format_lines(__strip_hints(

114 ... "a: int = 7\ndef b(c: int) -> List[int]:\n return [4]"))

115 'a = 7\n\ndef b(c):\n return [4]'

116 """

117 new_text: str = sh.strip_string_to_string(code,

118 strip_nl=True,

119 to_empty=True)

120

121 # If we have single lines with type hints only, the above will turn

122 # them into line comments. We need to get rid of those.

123

124 if strip_comments:

125 # In the ideal case, we want to strip all comments anyway.

126 # Then we do not need to bother with anything complex and can

127 # directly use a regular expression getting rid of them.

128 new_text2 = None

129 while new_text2 != new_text:

130 new_text2 = new_text

131 new_text = reg.sub(__REGEX_STRIP_LINE_COMMENT, "\n", new_text)

132 return new_text

133

134 # If we should preserve normal comments, all we can do is trying to

135 # find these "new" comments in a very pedestrian fashion.

136 orig_lines: list[str] = code.splitlines()

137 new_lines: list[str] = new_text.splitlines()

138 for i in range(min(len(orig_lines), len(new_lines)) - 1, -1, -1):

139 t1: str = orig_lines[i].strip()

140 t2: str = new_lines[i].strip()

141 if t2.startswith("#") and (not t1.startswith("#")) \

142 and t2.endswith(t1):

143 del new_lines[i]

144 return lines_to_str(new_lines, trailing_newline=False)

145

146

147def __strip_docstrings_and_comments(code: str,

148 strip_docstrings: bool = True,

149 strip_comments: bool = True) -> str:

150 r"""

151 Remove all docstrings and comments from a string.

152

153 :param code: the code

154 :param strip_docstrings: should we delete docstrings?

155 :param strip_comments: should we delete comments?

156 :return: the stripped string

157

158 >>> __strip_docstrings_and_comments("a = 5# bla\n", False, False)

159 'a = 5# bla\n'

160 >>> __strip_docstrings_and_comments("a = 5# bla\n", False, True)

161 'a = 5\n'

162 >>> __strip_docstrings_and_comments('def b():\n \"\"\"bla!\"\"\"', True)

163 'def b():\n '

164 >>> __strip_docstrings_and_comments('# 1\na = 5\n# 2\nb = 6\n')

165 'a = 5\nb = 6\n'

166 """

167 # First, we strip line comments that are hard to catch correctly with

168 # the tokenization approach later.

169 if strip_comments:

170 code2 = None

171 while code2 != code:

172 code2 = code

173 code = reg.sub(__REGEX_STRIP_LINE_COMMENT, "\n", code)

174 del code2

175

176 # Now we strip the doc strings and remaining comments.

177 prev_toktype: int = token.INDENT

178 last_lineno: int = -1

179 last_col: int = 0

180 eat_newline: int = 0

181 with io.StringIO() as output:

182 with io.StringIO(code) as reader:

183 for toktype, tttext, (slineno, scol), (telineno, ecol), _ in \

184 tokenize.generate_tokens(reader.readline):

185 elineno = telineno

186 ttext = tttext

187 eat_newline -= 1

188 if slineno > last_lineno:

189 last_col = 0

190 if scol > last_col:

191 output.write(" " * (scol - last_col))

192 if (toktype == token.STRING) and \

193 (prev_toktype in (token.INDENT, token.NEWLINE)):

194 if strip_docstrings:

195 ttext = ""

196 eat_newline = 1

197 elif toktype == tokenize.COMMENT:

198 if strip_comments:

199 ttext = ""

200 elif toktype == tokenize.NEWLINE and eat_newline >= 0:

201 ttext = ""

202 elineno += 1

203 output.write(ttext)

204 prev_toktype = toktype

205 last_col = ecol

206 last_lineno = elineno

207

208 result = output.getvalue()

209

210 # remove leading newlines

211 while result:

212 if result[0] == "\n":

213 result = result[1:]

214 continue

215 return result

216

217 raise ValueError(f"code {code} becomes empty after docstring "

218 "and comment stripping!")

219

220

221def format_python(code: Iterable[str],

222 strip_docstrings: bool = True,

223 strip_comments: bool = True,

224 strip_hints: bool = True) -> list[str]:

225 """

226 Format a python code fragment.

227

228 :param code: the code fragment

229 :param strip_docstrings: should we delete docstrings?

230 :param strip_comments: should we delete comments?

231 :param strip_hints: should we delete type hints?

232 :return: the formatted code

233 """

234 if not isinstance(code, Iterable):

235 raise type_error(code, "code", Iterable)

236 if not isinstance(strip_docstrings, bool):

237 raise type_error(strip_docstrings, "strip_docstrings", bool)

238 if not isinstance(strip_comments, bool):

239 raise type_error(strip_comments, "strip_comments", bool)

240 if not isinstance(strip_hints, bool):

241 raise type_error(strip_hints, "strip_hints", bool)

242

243 old_len: tuple[int, int] = (sys.maxsize, sys.maxsize)

244

245 shortest: list[str] = list(code)

246 rcode: list[str] = shortest

247 not_first_run: bool = False

248 while True:

249 rcode = strip_common_whitespace_prefix(format_empty_lines(

250 lines=rcode,

251 empty_before=__empty_before,

252 no_empty_after=__no_empty_after,

253 force_no_empty_after=__force_no_empty_after,

254 max_consecutive_empty_lines=1))

255 if len(rcode) <= 0:

256 raise ValueError("Code becomes empty.")

257

258 text = lines_to_str(rcode)

259 new_len: tuple[int, int] = (text.count("\n"), len(text))

260 if not_first_run and (old_len <= new_len):

261 break

262 shortest = rcode

263 old_len = new_len

264

265 text = __format_lines(text)

266 ntext = text

267 if strip_docstrings or strip_comments:

268 ntext = __strip_docstrings_and_comments(

269 text, strip_docstrings=strip_docstrings,

270 strip_comments=strip_comments).rstrip()

271 if strip_hints:

272 ntext = __strip_hints(ntext,

273 strip_comments=strip_comments)

274 if ntext != text:

275 text = __format_lines(ntext)

276 del ntext

277

278 text = text.rstrip()

279 new_len = text.count("\n"), len(text)

280 if not_first_run and (old_len <= new_len):

281 break

282

283 rcode = str_to_lines(text)

284 shortest = rcode

285 old_len = new_len

286 not_first_run = True

287

288 if (len(shortest) <= 0) or (old_len[0] <= 0):

289 raise ValueError(f"Text cannot become {shortest}.")

290

291 return shortest

292

293

294def preprocess_python(code: list[str],

295 lines: list[int] | None = None,

296 labels: Iterable[str] | None = None,

297 args: set[str] | None = None) -> str:

298 r"""

299 Preprocess Python code.

300

301 First, we select all lines of the code we want to keep.

302 If labels are defined, then lines can be kept as ranges or as single

303 lines.

304 Otherwise, all lines are selected in this step.

305

306 Then, if line numbers are provided, we selected the lines based on the

307 line numbers from the lines we have preserved.

308

309 Finally, the Python formatter is applied.

310

311 :param code: the code loaded from a file

312 :param lines: the lines to keep, or `None` if we keep all

313 :param labels: a list of labels marking start and end of code snippets

314 to include

315 :param args: the arguments for the code formatter

316 :return: the formatted code string

317 """

318 keep_lines = select_lines(code=code, labels=labels, lines=lines)

319

320 # set up arguments

321 strip_docstrings: bool = True

322 strip_comments: bool = True

323 strip_hints: bool = True

324 do_format: bool = True

325 if args:

326 do_format = "format" not in args

327 strip_docstrings = "doc" not in args

328 strip_comments = "comments" not in args

329 strip_hints = "hints" not in args

330

331 if do_format:

332 return lines_to_str(format_python(keep_lines,

333 strip_docstrings=strip_docstrings,

334 strip_comments=strip_comments,

335 strip_hints=strip_hints),

336 trailing_newline=True)

337 return lines_to_str(keep_lines, trailing_newline=True)

Coverage for bookbuilderpy/format_python.py: 93%

143 statements