autopep8.py 155 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650
  1. #!/usr/bin/env python
  2. # Copyright (C) 2010-2011 Hideo Hattori
  3. # Copyright (C) 2011-2013 Hideo Hattori, Steven Myint
  4. # Copyright (C) 2013-2016 Hideo Hattori, Steven Myint, Bill Wendling
  5. #
  6. # Permission is hereby granted, free of charge, to any person obtaining
  7. # a copy of this software and associated documentation files (the
  8. # "Software"), to deal in the Software without restriction, including
  9. # without limitation the rights to use, copy, modify, merge, publish,
  10. # distribute, sublicense, and/or sell copies of the Software, and to
  11. # permit persons to whom the Software is furnished to do so, subject to
  12. # the following conditions:
  13. #
  14. # The above copyright notice and this permission notice shall be
  15. # included in all copies or substantial portions of the Software.
  16. #
  17. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  20. # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  21. # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  22. # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  23. # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  24. # SOFTWARE.
  25. # Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
  26. # Copyright (C) 2009-2013 Florent Xicluna <florent.xicluna@gmail.com>
  27. #
  28. # Permission is hereby granted, free of charge, to any person
  29. # obtaining a copy of this software and associated documentation files
  30. # (the "Software"), to deal in the Software without restriction,
  31. # including without limitation the rights to use, copy, modify, merge,
  32. # publish, distribute, sublicense, and/or sell copies of the Software,
  33. # and to permit persons to whom the Software is furnished to do so,
  34. # subject to the following conditions:
  35. #
  36. # The above copyright notice and this permission notice shall be
  37. # included in all copies or substantial portions of the Software.
  38. #
  39. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  40. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  41. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  42. # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  43. # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  44. # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  45. # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  46. # SOFTWARE.
  47. """Automatically formats Python code to conform to the PEP 8 style guide.
  48. Fixes that only need be done once can be added by adding a function of the form
  49. "fix_<code>(source)" to this module. They should return the fixed source code.
  50. These fixes are picked up by apply_global_fixes().
  51. Fixes that depend on pycodestyle should be added as methods to FixPEP8. See the
  52. class documentation for more information.
  53. """
  54. from __future__ import absolute_import
  55. from __future__ import division
  56. from __future__ import print_function
  57. from __future__ import unicode_literals
  58. import argparse
  59. import codecs
  60. import collections
  61. import copy
  62. import difflib
  63. import fnmatch
  64. import importlib
  65. import inspect
  66. import io
  67. import itertools
  68. import keyword
  69. import locale
  70. import os
  71. import re
  72. import signal
  73. import sys
  74. import textwrap
  75. import token
  76. import tokenize
  77. import warnings
  78. import ast
  79. from configparser import ConfigParser as SafeConfigParser, Error
  80. import pycodestyle
  81. __version__ = '2.3.2'
  82. CR = '\r'
  83. LF = '\n'
  84. CRLF = '\r\n'
  85. PYTHON_SHEBANG_REGEX = re.compile(r'^#!.*\bpython[23]?\b\s*$')
  86. LAMBDA_REGEX = re.compile(r'([\w.]+)\s=\slambda\s*([)(=\w,\s.]*):')
  87. COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+([^][)(}{]+?)\s+(in|is)\s')
  88. COMPARE_NEGATIVE_REGEX_THROUGH = re.compile(r'\b(not\s+in|is\s+not)\s')
  89. BARE_EXCEPT_REGEX = re.compile(r'except\s*:')
  90. STARTSWITH_DEF_REGEX = re.compile(r'^(async\s+def|def)\s.*\):')
  91. DOCSTRING_START_REGEX = re.compile(r'^u?r?(?P<kind>["\']{3})')
  92. ENABLE_REGEX = re.compile(r'# *(fmt|autopep8): *on')
  93. DISABLE_REGEX = re.compile(r'# *(fmt|autopep8): *off')
  94. ENCODING_MAGIC_COMMENT = re.compile(
  95. r'^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)'
  96. )
  97. COMPARE_TYPE_REGEX = re.compile(
  98. r'([=!]=)\s+type(?:\s*\(\s*([^)]*[^ )])\s*\))'
  99. r'|\btype(?:\s*\(\s*([^)]*[^ )])\s*\))\s+([=!]=)'
  100. )
  101. TYPE_REGEX = re.compile(r'(type\s*\(\s*[^)]*?[^\s)]\s*\))')
  102. EXIT_CODE_OK = 0
  103. EXIT_CODE_ERROR = 1
  104. EXIT_CODE_EXISTS_DIFF = 2
  105. EXIT_CODE_ARGPARSE_ERROR = 99
  106. # For generating line shortening candidates.
  107. SHORTEN_OPERATOR_GROUPS = frozenset([
  108. frozenset([',']),
  109. frozenset(['%']),
  110. frozenset([',', '(', '[', '{']),
  111. frozenset(['%', '(', '[', '{']),
  112. frozenset([',', '(', '[', '{', '%', '+', '-', '*', '/', '//']),
  113. frozenset(['%', '+', '-', '*', '/', '//']),
  114. ])
  115. DEFAULT_IGNORE = 'E226,E24,W50,W690' # TODO: use pycodestyle.DEFAULT_IGNORE
  116. DEFAULT_INDENT_SIZE = 4
  117. # these fixes conflict with each other, if the `--ignore` setting causes both
  118. # to be enabled, disable both of them
  119. CONFLICTING_CODES = ('W503', 'W504')
  120. if sys.platform == 'win32': # pragma: no cover
  121. DEFAULT_CONFIG = os.path.expanduser(r'~\.pycodestyle')
  122. else:
  123. DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
  124. os.path.expanduser('~/.config'),
  125. 'pycodestyle')
  126. # fallback, use .pep8
  127. if not os.path.exists(DEFAULT_CONFIG): # pragma: no cover
  128. if sys.platform == 'win32':
  129. DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
  130. else:
  131. DEFAULT_CONFIG = os.path.join(os.path.expanduser('~/.config'), 'pep8')
  132. PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8', '.flake8')
  133. MAX_PYTHON_FILE_DETECTION_BYTES = 1024
  134. IS_SUPPORT_TOKEN_FSTRING = False
  135. if sys.version_info >= (3, 12): # pgrama: no cover
  136. IS_SUPPORT_TOKEN_FSTRING = True
  137. def _custom_formatwarning(message, category, _, __, line=None):
  138. return f"{category.__name__}: {message}\n"
  139. def open_with_encoding(filename, mode='r', encoding=None, limit_byte_check=-1):
  140. """Return opened file with a specific encoding."""
  141. if not encoding:
  142. encoding = detect_encoding(filename, limit_byte_check=limit_byte_check)
  143. return io.open(filename, mode=mode, encoding=encoding,
  144. newline='') # Preserve line endings
  145. def _detect_encoding_from_file(filename: str):
  146. try:
  147. with open(filename) as input_file:
  148. for idx, line in enumerate(input_file):
  149. if idx == 0 and line[0] == '\ufeff':
  150. return "utf-8-sig"
  151. if idx >= 2:
  152. break
  153. match = ENCODING_MAGIC_COMMENT.search(line)
  154. if match:
  155. return match.groups()[0]
  156. except Exception:
  157. pass
  158. # Python3's default encoding
  159. return 'utf-8'
  160. def detect_encoding(filename, limit_byte_check=-1):
  161. """Return file encoding."""
  162. encoding = _detect_encoding_from_file(filename)
  163. if encoding == "utf-8-sig":
  164. return encoding
  165. try:
  166. with open_with_encoding(filename, encoding=encoding) as test_file:
  167. test_file.read(limit_byte_check)
  168. return encoding
  169. except (LookupError, SyntaxError, UnicodeDecodeError):
  170. return 'latin-1'
  171. def readlines_from_file(filename):
  172. """Return contents of file."""
  173. with open_with_encoding(filename) as input_file:
  174. return input_file.readlines()
  175. def extended_blank_lines(logical_line,
  176. blank_lines,
  177. blank_before,
  178. indent_level,
  179. previous_logical):
  180. """Check for missing blank lines after class declaration."""
  181. if previous_logical.startswith(('def ', 'async def ')):
  182. if blank_lines and pycodestyle.DOCSTRING_REGEX.match(logical_line):
  183. yield (0, 'E303 too many blank lines ({})'.format(blank_lines))
  184. elif pycodestyle.DOCSTRING_REGEX.match(previous_logical):
  185. # Missing blank line between class docstring and method declaration.
  186. if (
  187. indent_level and
  188. not blank_lines and
  189. not blank_before and
  190. logical_line.startswith(('def ', 'async def ')) and
  191. '(self' in logical_line
  192. ):
  193. yield (0, 'E301 expected 1 blank line, found 0')
  194. def continued_indentation(logical_line, tokens, indent_level, hang_closing,
  195. indent_char, noqa):
  196. """Override pycodestyle's function to provide indentation information."""
  197. first_row = tokens[0][2][0]
  198. nrows = 1 + tokens[-1][2][0] - first_row
  199. if noqa or nrows == 1:
  200. return
  201. # indent_next tells us whether the next block is indented. Assuming
  202. # that it is indented by 4 spaces, then we should not allow 4-space
  203. # indents on the final continuation line. In turn, some other
  204. # indents are allowed to have an extra 4 spaces.
  205. indent_next = logical_line.endswith(':')
  206. row = depth = 0
  207. valid_hangs = (
  208. (DEFAULT_INDENT_SIZE,)
  209. if indent_char != '\t' else (DEFAULT_INDENT_SIZE,
  210. 2 * DEFAULT_INDENT_SIZE)
  211. )
  212. # Remember how many brackets were opened on each line.
  213. parens = [0] * nrows
  214. # Relative indents of physical lines.
  215. rel_indent = [0] * nrows
  216. # For each depth, collect a list of opening rows.
  217. open_rows = [[0]]
  218. # For each depth, memorize the hanging indentation.
  219. hangs = [None]
  220. # Visual indents.
  221. indent_chances = {}
  222. last_indent = tokens[0][2]
  223. indent = [last_indent[1]]
  224. last_token_multiline = None
  225. line = None
  226. last_line = ''
  227. last_line_begins_with_multiline = False
  228. for token_type, text, start, end, line in tokens:
  229. newline = row < start[0] - first_row
  230. if newline:
  231. row = start[0] - first_row
  232. newline = (not last_token_multiline and
  233. token_type not in (tokenize.NL, tokenize.NEWLINE))
  234. last_line_begins_with_multiline = last_token_multiline
  235. if newline:
  236. # This is the beginning of a continuation line.
  237. last_indent = start
  238. # Record the initial indent.
  239. rel_indent[row] = pycodestyle.expand_indent(line) - indent_level
  240. # Identify closing bracket.
  241. close_bracket = (token_type == tokenize.OP and text in ']})')
  242. # Is the indent relative to an opening bracket line?
  243. for open_row in reversed(open_rows[depth]):
  244. hang = rel_indent[row] - rel_indent[open_row]
  245. hanging_indent = hang in valid_hangs
  246. if hanging_indent:
  247. break
  248. if hangs[depth]:
  249. hanging_indent = (hang == hangs[depth])
  250. visual_indent = (not close_bracket and hang > 0 and
  251. indent_chances.get(start[1]))
  252. if close_bracket and indent[depth]:
  253. # Closing bracket for visual indent.
  254. if start[1] != indent[depth]:
  255. yield (start, 'E124 {}'.format(indent[depth]))
  256. elif close_bracket and not hang:
  257. # closing bracket matches indentation of opening bracket's line
  258. if hang_closing:
  259. yield (start, 'E133 {}'.format(indent[depth]))
  260. elif indent[depth] and start[1] < indent[depth]:
  261. if visual_indent is not True:
  262. # Visual indent is broken.
  263. yield (start, 'E128 {}'.format(indent[depth]))
  264. elif (hanging_indent or
  265. (indent_next and
  266. rel_indent[row] == 2 * DEFAULT_INDENT_SIZE)):
  267. # Hanging indent is verified.
  268. if close_bracket and not hang_closing:
  269. yield (start, 'E123 {}'.format(indent_level +
  270. rel_indent[open_row]))
  271. hangs[depth] = hang
  272. elif visual_indent is True:
  273. # Visual indent is verified.
  274. indent[depth] = start[1]
  275. elif visual_indent in (text, str):
  276. # Ignore token lined up with matching one from a previous line.
  277. pass
  278. else:
  279. one_indented = (indent_level + rel_indent[open_row] +
  280. DEFAULT_INDENT_SIZE)
  281. # Indent is broken.
  282. if hang <= 0:
  283. error = ('E122', one_indented)
  284. elif indent[depth]:
  285. error = ('E127', indent[depth])
  286. elif not close_bracket and hangs[depth]:
  287. error = ('E131', one_indented)
  288. elif hang > DEFAULT_INDENT_SIZE:
  289. error = ('E126', one_indented)
  290. else:
  291. hangs[depth] = hang
  292. error = ('E121', one_indented)
  293. yield (start, '{} {}'.format(*error))
  294. # Look for visual indenting.
  295. if (
  296. parens[row] and
  297. token_type not in (tokenize.NL, tokenize.COMMENT) and
  298. not indent[depth]
  299. ):
  300. indent[depth] = start[1]
  301. indent_chances[start[1]] = True
  302. # Deal with implicit string concatenation.
  303. elif (token_type in (tokenize.STRING, tokenize.COMMENT) or
  304. text in ('u', 'ur', 'b', 'br')):
  305. indent_chances[start[1]] = str
  306. # Special case for the "if" statement because len("if (") is equal to
  307. # 4.
  308. elif not indent_chances and not row and not depth and text == 'if':
  309. indent_chances[end[1] + 1] = True
  310. elif text == ':' and line[end[1]:].isspace():
  311. open_rows[depth].append(row)
  312. # Keep track of bracket depth.
  313. if token_type == tokenize.OP:
  314. if text in '([{':
  315. depth += 1
  316. indent.append(0)
  317. hangs.append(None)
  318. if len(open_rows) == depth:
  319. open_rows.append([])
  320. open_rows[depth].append(row)
  321. parens[row] += 1
  322. elif text in ')]}' and depth > 0:
  323. # Parent indents should not be more than this one.
  324. prev_indent = indent.pop() or last_indent[1]
  325. hangs.pop()
  326. for d in range(depth):
  327. if indent[d] > prev_indent:
  328. indent[d] = 0
  329. for ind in list(indent_chances):
  330. if ind >= prev_indent:
  331. del indent_chances[ind]
  332. del open_rows[depth + 1:]
  333. depth -= 1
  334. if depth:
  335. indent_chances[indent[depth]] = True
  336. for idx in range(row, -1, -1):
  337. if parens[idx]:
  338. parens[idx] -= 1
  339. break
  340. assert len(indent) == depth + 1
  341. if (
  342. start[1] not in indent_chances and
  343. # This is for purposes of speeding up E121 (GitHub #90).
  344. not last_line.rstrip().endswith(',')
  345. ):
  346. # Allow to line up tokens.
  347. indent_chances[start[1]] = text
  348. last_token_multiline = (start[0] != end[0])
  349. if last_token_multiline:
  350. rel_indent[end[0] - first_row] = rel_indent[row]
  351. last_line = line
  352. if (
  353. indent_next and
  354. not last_line_begins_with_multiline and
  355. pycodestyle.expand_indent(line) == indent_level + DEFAULT_INDENT_SIZE
  356. ):
  357. pos = (start[0], indent[0] + 4)
  358. desired_indent = indent_level + 2 * DEFAULT_INDENT_SIZE
  359. if visual_indent:
  360. yield (pos, 'E129 {}'.format(desired_indent))
  361. else:
  362. yield (pos, 'E125 {}'.format(desired_indent))
  363. # NOTE: need reload with runpy and call twice
  364. # see: https://github.com/hhatto/autopep8/issues/625
  365. importlib.reload(pycodestyle)
  366. del pycodestyle._checks['logical_line'][pycodestyle.continued_indentation]
  367. pycodestyle.register_check(extended_blank_lines)
  368. pycodestyle.register_check(continued_indentation)
  369. class FixPEP8(object):
  370. """Fix invalid code.
  371. Fixer methods are prefixed "fix_". The _fix_source() method looks for these
  372. automatically.
  373. The fixer method can take either one or two arguments (in addition to
  374. self). The first argument is "result", which is the error information from
  375. pycodestyle. The second argument, "logical", is required only for
  376. logical-line fixes.
  377. The fixer method can return the list of modified lines or None. An empty
  378. list would mean that no changes were made. None would mean that only the
  379. line reported in the pycodestyle error was modified. Note that the modified
  380. line numbers that are returned are indexed at 1. This typically would
  381. correspond with the line number reported in the pycodestyle error
  382. information.
  383. [fixed method list]
  384. - e111,e114,e115,e116
  385. - e121,e122,e123,e124,e125,e126,e127,e128,e129
  386. - e201,e202,e203
  387. - e211
  388. - e221,e222,e223,e224,e225
  389. - e231
  390. - e251,e252
  391. - e261,e262
  392. - e271,e272,e273,e274,e275
  393. - e301,e302,e303,e304,e305,e306
  394. - e401,e402
  395. - e502
  396. - e701,e702,e703,e704
  397. - e711,e712,e713,e714
  398. - e721,e722
  399. - e731
  400. - w291
  401. - w503,504
  402. """
  403. def __init__(self, filename,
  404. options,
  405. contents=None,
  406. long_line_ignore_cache=None):
  407. self.filename = filename
  408. if contents is None:
  409. self.source = readlines_from_file(filename)
  410. else:
  411. sio = io.StringIO(contents)
  412. self.source = sio.readlines()
  413. self.options = options
  414. self.indent_word = _get_indentword(''.join(self.source))
  415. self.original_source = copy.copy(self.source)
  416. # collect imports line
  417. self.imports = {}
  418. for i, line in enumerate(self.source):
  419. if (line.find("import ") == 0 or line.find("from ") == 0) and \
  420. line not in self.imports:
  421. # collect only import statements that first appeared
  422. self.imports[line] = i
  423. self.long_line_ignore_cache = (
  424. set() if long_line_ignore_cache is None
  425. else long_line_ignore_cache)
  426. # Many fixers are the same even though pycodestyle categorizes them
  427. # differently.
  428. self.fix_e115 = self.fix_e112
  429. self.fix_e121 = self._fix_reindent
  430. self.fix_e122 = self._fix_reindent
  431. self.fix_e123 = self._fix_reindent
  432. self.fix_e124 = self._fix_reindent
  433. self.fix_e126 = self._fix_reindent
  434. self.fix_e127 = self._fix_reindent
  435. self.fix_e128 = self._fix_reindent
  436. self.fix_e129 = self._fix_reindent
  437. self.fix_e133 = self.fix_e131
  438. self.fix_e202 = self.fix_e201
  439. self.fix_e203 = self.fix_e201
  440. self.fix_e204 = self.fix_e201
  441. self.fix_e211 = self.fix_e201
  442. self.fix_e221 = self.fix_e271
  443. self.fix_e222 = self.fix_e271
  444. self.fix_e223 = self.fix_e271
  445. self.fix_e226 = self.fix_e225
  446. self.fix_e227 = self.fix_e225
  447. self.fix_e228 = self.fix_e225
  448. self.fix_e241 = self.fix_e271
  449. self.fix_e242 = self.fix_e224
  450. self.fix_e252 = self.fix_e225
  451. self.fix_e261 = self.fix_e262
  452. self.fix_e272 = self.fix_e271
  453. self.fix_e273 = self.fix_e271
  454. self.fix_e274 = self.fix_e271
  455. self.fix_e275 = self.fix_e271
  456. self.fix_e306 = self.fix_e301
  457. self.fix_e501 = (
  458. self.fix_long_line_logically if
  459. options and (options.aggressive >= 2 or options.experimental) else
  460. self.fix_long_line_physically)
  461. self.fix_e703 = self.fix_e702
  462. self.fix_w292 = self.fix_w291
  463. self.fix_w293 = self.fix_w291
  464. def _check_affected_anothers(self, result) -> bool:
  465. """Check if the fix affects the number of lines of another remark."""
  466. line_index = result['line'] - 1
  467. target = self.source[line_index]
  468. original_target = self.original_source[line_index]
  469. return target != original_target
  470. def _fix_source(self, results):
  471. try:
  472. (logical_start, logical_end) = _find_logical(self.source)
  473. logical_support = True
  474. except (SyntaxError, tokenize.TokenError): # pragma: no cover
  475. logical_support = False
  476. completed_lines = set()
  477. for result in sorted(results, key=_priority_key):
  478. if result['line'] in completed_lines:
  479. continue
  480. fixed_methodname = 'fix_' + result['id'].lower()
  481. if hasattr(self, fixed_methodname):
  482. fix = getattr(self, fixed_methodname)
  483. line_index = result['line'] - 1
  484. original_line = self.source[line_index]
  485. is_logical_fix = len(_get_parameters(fix)) > 2
  486. if is_logical_fix:
  487. logical = None
  488. if logical_support:
  489. logical = _get_logical(self.source,
  490. result,
  491. logical_start,
  492. logical_end)
  493. if logical and set(range(
  494. logical[0][0] + 1,
  495. logical[1][0] + 1)).intersection(
  496. completed_lines):
  497. continue
  498. if self._check_affected_anothers(result):
  499. continue
  500. modified_lines = fix(result, logical)
  501. else:
  502. if self._check_affected_anothers(result):
  503. continue
  504. modified_lines = fix(result)
  505. if modified_lines is None:
  506. # Force logical fixes to report what they modified.
  507. assert not is_logical_fix
  508. if self.source[line_index] == original_line:
  509. modified_lines = []
  510. if modified_lines:
  511. completed_lines.update(modified_lines)
  512. elif modified_lines == []: # Empty list means no fix
  513. if self.options.verbose >= 2:
  514. print(
  515. '---> Not fixing {error} on line {line}'.format(
  516. error=result['id'], line=result['line']),
  517. file=sys.stderr)
  518. else: # We assume one-line fix when None.
  519. completed_lines.add(result['line'])
  520. else:
  521. if self.options.verbose >= 3:
  522. print(
  523. "---> '{}' is not defined.".format(fixed_methodname),
  524. file=sys.stderr)
  525. info = result['info'].strip()
  526. print('---> {}:{}:{}:{}'.format(self.filename,
  527. result['line'],
  528. result['column'],
  529. info),
  530. file=sys.stderr)
  531. def fix(self):
  532. """Return a version of the source code with PEP 8 violations fixed."""
  533. pep8_options = {
  534. 'ignore': self.options.ignore,
  535. 'select': self.options.select,
  536. 'max_line_length': self.options.max_line_length,
  537. 'hang_closing': self.options.hang_closing,
  538. }
  539. results = _execute_pep8(pep8_options, self.source)
  540. if self.options.verbose:
  541. progress = {}
  542. for r in results:
  543. if r['id'] not in progress:
  544. progress[r['id']] = set()
  545. progress[r['id']].add(r['line'])
  546. print('---> {n} issue(s) to fix {progress}'.format(
  547. n=len(results), progress=progress), file=sys.stderr)
  548. if self.options.line_range:
  549. start, end = self.options.line_range
  550. results = [r for r in results
  551. if start <= r['line'] <= end]
  552. self._fix_source(filter_results(source=''.join(self.source),
  553. results=results,
  554. aggressive=self.options.aggressive))
  555. if self.options.line_range:
  556. # If number of lines has changed then change line_range.
  557. count = sum(sline.count('\n')
  558. for sline in self.source[start - 1:end])
  559. self.options.line_range[1] = start + count - 1
  560. return ''.join(self.source)
  561. def _fix_reindent(self, result):
  562. """Fix a badly indented line.
  563. This is done by adding or removing from its initial indent only.
  564. """
  565. num_indent_spaces = int(result['info'].split()[1])
  566. line_index = result['line'] - 1
  567. target = self.source[line_index]
  568. self.source[line_index] = ' ' * num_indent_spaces + target.lstrip()
  569. def fix_e112(self, result):
  570. """Fix under-indented comments."""
  571. line_index = result['line'] - 1
  572. target = self.source[line_index]
  573. if not target.lstrip().startswith('#'):
  574. # Don't screw with invalid syntax.
  575. return []
  576. self.source[line_index] = self.indent_word + target
  577. def fix_e113(self, result):
  578. """Fix unexpected indentation."""
  579. line_index = result['line'] - 1
  580. target = self.source[line_index]
  581. indent = _get_indentation(target)
  582. stripped = target.lstrip()
  583. self.source[line_index] = indent[1:] + stripped
  584. def fix_e116(self, result):
  585. """Fix over-indented comments."""
  586. line_index = result['line'] - 1
  587. target = self.source[line_index]
  588. indent = _get_indentation(target)
  589. stripped = target.lstrip()
  590. if not stripped.startswith('#'):
  591. # Don't screw with invalid syntax.
  592. return []
  593. self.source[line_index] = indent[1:] + stripped
  594. def fix_e117(self, result):
  595. """Fix over-indented."""
  596. line_index = result['line'] - 1
  597. target = self.source[line_index]
  598. indent = _get_indentation(target)
  599. if indent == '\t':
  600. return []
  601. stripped = target.lstrip()
  602. self.source[line_index] = indent[1:] + stripped
  603. def fix_e125(self, result):
  604. """Fix indentation undistinguish from the next logical line."""
  605. num_indent_spaces = int(result['info'].split()[1])
  606. line_index = result['line'] - 1
  607. target = self.source[line_index]
  608. spaces_to_add = num_indent_spaces - len(_get_indentation(target))
  609. indent = len(_get_indentation(target))
  610. modified_lines = []
  611. while len(_get_indentation(self.source[line_index])) >= indent:
  612. self.source[line_index] = (' ' * spaces_to_add +
  613. self.source[line_index])
  614. modified_lines.append(1 + line_index) # Line indexed at 1.
  615. line_index -= 1
  616. return modified_lines
  617. def fix_e131(self, result):
  618. """Fix indentation undistinguish from the next logical line."""
  619. num_indent_spaces = int(result['info'].split()[1])
  620. line_index = result['line'] - 1
  621. target = self.source[line_index]
  622. spaces_to_add = num_indent_spaces - len(_get_indentation(target))
  623. indent_length = len(_get_indentation(target))
  624. spaces_to_add = num_indent_spaces - indent_length
  625. if num_indent_spaces == 0 and indent_length == 0:
  626. spaces_to_add = 4
  627. if spaces_to_add >= 0:
  628. self.source[line_index] = (' ' * spaces_to_add +
  629. self.source[line_index])
  630. else:
  631. offset = abs(spaces_to_add)
  632. self.source[line_index] = self.source[line_index][offset:]
  633. def fix_e201(self, result):
  634. """Remove extraneous whitespace."""
  635. line_index = result['line'] - 1
  636. target = self.source[line_index]
  637. offset = result['column'] - 1
  638. fixed = fix_whitespace(target,
  639. offset=offset,
  640. replacement='')
  641. self.source[line_index] = fixed
  642. def fix_e224(self, result):
  643. """Remove extraneous whitespace around operator."""
  644. target = self.source[result['line'] - 1]
  645. offset = result['column'] - 1
  646. fixed = target[:offset] + target[offset:].replace('\t', ' ')
  647. self.source[result['line'] - 1] = fixed
  648. def fix_e225(self, result):
  649. """Fix missing whitespace around operator."""
  650. target = self.source[result['line'] - 1]
  651. offset = result['column'] - 1
  652. fixed = target[:offset] + ' ' + target[offset:]
  653. # Only proceed if non-whitespace characters match.
  654. # And make sure we don't break the indentation.
  655. if (
  656. fixed.replace(' ', '') == target.replace(' ', '') and
  657. _get_indentation(fixed) == _get_indentation(target)
  658. ):
  659. self.source[result['line'] - 1] = fixed
  660. error_code = result.get('id', 0)
  661. try:
  662. ts = generate_tokens(fixed)
  663. except (SyntaxError, tokenize.TokenError):
  664. return
  665. if not check_syntax(fixed.lstrip()):
  666. return
  667. try:
  668. _missing_whitespace = (
  669. pycodestyle.missing_whitespace_around_operator
  670. )
  671. except AttributeError:
  672. # pycodestyle >= 2.11.0
  673. _missing_whitespace = pycodestyle.missing_whitespace
  674. errors = list(_missing_whitespace(fixed, ts))
  675. for e in reversed(errors):
  676. if error_code != e[1].split()[0]:
  677. continue
  678. offset = e[0][1]
  679. fixed = fixed[:offset] + ' ' + fixed[offset:]
  680. self.source[result['line'] - 1] = fixed
  681. else:
  682. return []
  683. def fix_e231(self, result):
  684. """Add missing whitespace."""
  685. line_index = result['line'] - 1
  686. target = self.source[line_index]
  687. offset = result['column']
  688. fixed = target[:offset].rstrip() + ' ' + target[offset:].lstrip()
  689. self.source[line_index] = fixed
  690. def fix_e251(self, result):
  691. """Remove whitespace around parameter '=' sign."""
  692. line_index = result['line'] - 1
  693. target = self.source[line_index]
  694. # This is necessary since pycodestyle sometimes reports columns that
  695. # goes past the end of the physical line. This happens in cases like,
  696. # foo(bar\n=None)
  697. c = min(result['column'] - 1,
  698. len(target) - 1)
  699. if target[c].strip():
  700. fixed = target
  701. else:
  702. fixed = target[:c].rstrip() + target[c:].lstrip()
  703. # There could be an escaped newline
  704. #
  705. # def foo(a=\
  706. # 1)
  707. if fixed.endswith(('=\\\n', '=\\\r\n', '=\\\r')):
  708. self.source[line_index] = fixed.rstrip('\n\r \t\\')
  709. self.source[line_index + 1] = self.source[line_index + 1].lstrip()
  710. return [line_index + 1, line_index + 2] # Line indexed at 1
  711. self.source[result['line'] - 1] = fixed
  712. def fix_e262(self, result):
  713. """Fix spacing after inline comment hash."""
  714. target = self.source[result['line'] - 1]
  715. offset = result['column']
  716. code = target[:offset].rstrip(' \t#')
  717. comment = target[offset:].lstrip(' \t#')
  718. fixed = code + (' # ' + comment if comment.strip() else '\n')
  719. self.source[result['line'] - 1] = fixed
  720. def fix_e265(self, result):
  721. """Fix spacing after block comment hash."""
  722. target = self.source[result['line'] - 1]
  723. indent = _get_indentation(target)
  724. line = target.lstrip(' \t')
  725. pos = next((index for index, c in enumerate(line) if c != '#'))
  726. hashes = line[:pos]
  727. comment = line[pos:].lstrip(' \t')
  728. # Ignore special comments, even in the middle of the file.
  729. if comment.startswith('!'):
  730. return
  731. fixed = indent + hashes + (' ' + comment if comment.strip() else '\n')
  732. self.source[result['line'] - 1] = fixed
  733. def fix_e266(self, result):
  734. """Fix too many block comment hashes."""
  735. target = self.source[result['line'] - 1]
  736. # Leave stylistic outlined blocks alone.
  737. if target.strip().endswith('#'):
  738. return
  739. indentation = _get_indentation(target)
  740. fixed = indentation + '# ' + target.lstrip('# \t')
  741. self.source[result['line'] - 1] = fixed
  742. def fix_e271(self, result):
  743. """Fix extraneous whitespace around keywords."""
  744. line_index = result['line'] - 1
  745. target = self.source[line_index]
  746. offset = result['column'] - 1
  747. fixed = fix_whitespace(target,
  748. offset=offset,
  749. replacement=' ')
  750. if fixed == target:
  751. return []
  752. else:
  753. self.source[line_index] = fixed
  754. def fix_e301(self, result):
  755. """Add missing blank line."""
  756. cr = '\n'
  757. self.source[result['line'] - 1] = cr + self.source[result['line'] - 1]
  758. def fix_e302(self, result):
  759. """Add missing 2 blank lines."""
  760. add_linenum = 2 - int(result['info'].split()[-1])
  761. offset = 1
  762. if self.source[result['line'] - 2].strip() == "\\":
  763. offset = 2
  764. cr = '\n' * add_linenum
  765. self.source[result['line'] - offset] = (
  766. cr + self.source[result['line'] - offset]
  767. )
  768. def fix_e303(self, result):
  769. """Remove extra blank lines."""
  770. delete_linenum = int(result['info'].split('(')[1].split(')')[0]) - 2
  771. delete_linenum = max(1, delete_linenum)
  772. # We need to count because pycodestyle reports an offset line number if
  773. # there are comments.
  774. cnt = 0
  775. line = result['line'] - 2
  776. modified_lines = []
  777. while cnt < delete_linenum and line >= 0:
  778. if not self.source[line].strip():
  779. self.source[line] = ''
  780. modified_lines.append(1 + line) # Line indexed at 1
  781. cnt += 1
  782. line -= 1
  783. return modified_lines
  784. def fix_e304(self, result):
  785. """Remove blank line following function decorator."""
  786. line = result['line'] - 2
  787. if not self.source[line].strip():
  788. self.source[line] = ''
  789. def fix_e305(self, result):
  790. """Add missing 2 blank lines after end of function or class."""
  791. add_delete_linenum = 2 - int(result['info'].split()[-1])
  792. cnt = 0
  793. offset = result['line'] - 2
  794. modified_lines = []
  795. if add_delete_linenum < 0:
  796. # delete cr
  797. add_delete_linenum = abs(add_delete_linenum)
  798. while cnt < add_delete_linenum and offset >= 0:
  799. if not self.source[offset].strip():
  800. self.source[offset] = ''
  801. modified_lines.append(1 + offset) # Line indexed at 1
  802. cnt += 1
  803. offset -= 1
  804. else:
  805. # add cr
  806. cr = '\n'
  807. # check comment line
  808. while True:
  809. if offset < 0:
  810. break
  811. line = self.source[offset].lstrip()
  812. if not line:
  813. break
  814. if line[0] != '#':
  815. break
  816. offset -= 1
  817. offset += 1
  818. self.source[offset] = cr + self.source[offset]
  819. modified_lines.append(1 + offset) # Line indexed at 1.
  820. return modified_lines
  821. def fix_e401(self, result):
  822. """Put imports on separate lines."""
  823. line_index = result['line'] - 1
  824. target = self.source[line_index]
  825. offset = result['column'] - 1
  826. if not target.lstrip().startswith('import'):
  827. return []
  828. indentation = re.split(pattern=r'\bimport\b',
  829. string=target, maxsplit=1)[0]
  830. fixed = (target[:offset].rstrip('\t ,') + '\n' +
  831. indentation + 'import ' + target[offset:].lstrip('\t ,'))
  832. self.source[line_index] = fixed
  833. def fix_e402(self, result):
  834. (line_index, offset, target) = get_index_offset_contents(result,
  835. self.source)
  836. for i in range(1, 100):
  837. line = "".join(self.source[line_index:line_index+i])
  838. try:
  839. generate_tokens("".join(line))
  840. except (SyntaxError, tokenize.TokenError):
  841. continue
  842. break
  843. if not (target in self.imports and self.imports[target] != line_index):
  844. mod_offset = get_module_imports_on_top_of_file(self.source,
  845. line_index)
  846. self.source[mod_offset] = line + self.source[mod_offset]
  847. for offset in range(i):
  848. self.source[line_index+offset] = ''
  849. def fix_long_line_logically(self, result, logical):
  850. """Try to make lines fit within --max-line-length characters."""
  851. if (
  852. not logical or
  853. len(logical[2]) == 1 or
  854. self.source[result['line'] - 1].lstrip().startswith('#')
  855. ):
  856. return self.fix_long_line_physically(result)
  857. start_line_index = logical[0][0]
  858. end_line_index = logical[1][0]
  859. logical_lines = logical[2]
  860. previous_line = get_item(self.source, start_line_index - 1, default='')
  861. next_line = get_item(self.source, end_line_index + 1, default='')
  862. single_line = join_logical_line(''.join(logical_lines))
  863. try:
  864. fixed = self.fix_long_line(
  865. target=single_line,
  866. previous_line=previous_line,
  867. next_line=next_line,
  868. original=''.join(logical_lines))
  869. except (SyntaxError, tokenize.TokenError):
  870. return self.fix_long_line_physically(result)
  871. if fixed:
  872. for line_index in range(start_line_index, end_line_index + 1):
  873. self.source[line_index] = ''
  874. self.source[start_line_index] = fixed
  875. return range(start_line_index + 1, end_line_index + 1)
  876. return []
  877. def fix_long_line_physically(self, result):
  878. """Try to make lines fit within --max-line-length characters."""
  879. line_index = result['line'] - 1
  880. target = self.source[line_index]
  881. previous_line = get_item(self.source, line_index - 1, default='')
  882. next_line = get_item(self.source, line_index + 1, default='')
  883. try:
  884. fixed = self.fix_long_line(
  885. target=target,
  886. previous_line=previous_line,
  887. next_line=next_line,
  888. original=target)
  889. except (SyntaxError, tokenize.TokenError):
  890. return []
  891. if fixed:
  892. self.source[line_index] = fixed
  893. return [line_index + 1]
  894. return []
  895. def fix_long_line(self, target, previous_line,
  896. next_line, original):
  897. cache_entry = (target, previous_line, next_line)
  898. if cache_entry in self.long_line_ignore_cache:
  899. return []
  900. if target.lstrip().startswith('#'):
  901. if self.options.aggressive:
  902. # Wrap commented lines.
  903. return shorten_comment(
  904. line=target,
  905. max_line_length=self.options.max_line_length,
  906. last_comment=not next_line.lstrip().startswith('#'))
  907. return []
  908. fixed = get_fixed_long_line(
  909. target=target,
  910. previous_line=previous_line,
  911. original=original,
  912. indent_word=self.indent_word,
  913. max_line_length=self.options.max_line_length,
  914. aggressive=self.options.aggressive,
  915. experimental=self.options.experimental,
  916. verbose=self.options.verbose)
  917. if fixed and not code_almost_equal(original, fixed):
  918. return fixed
  919. self.long_line_ignore_cache.add(cache_entry)
  920. return None
  921. def fix_e502(self, result):
  922. """Remove extraneous escape of newline."""
  923. (line_index, _, target) = get_index_offset_contents(result,
  924. self.source)
  925. self.source[line_index] = target.rstrip('\n\r \t\\') + '\n'
  926. def fix_e701(self, result):
  927. """Put colon-separated compound statement on separate lines."""
  928. line_index = result['line'] - 1
  929. target = self.source[line_index]
  930. c = result['column']
  931. fixed_source = (target[:c] + '\n' +
  932. _get_indentation(target) + self.indent_word +
  933. target[c:].lstrip('\n\r \t\\'))
  934. self.source[result['line'] - 1] = fixed_source
  935. return [result['line'], result['line'] + 1]
  936. def fix_e702(self, result, logical):
  937. """Put semicolon-separated compound statement on separate lines."""
  938. if not logical:
  939. return [] # pragma: no cover
  940. logical_lines = logical[2]
  941. # Avoid applying this when indented.
  942. # https://docs.python.org/reference/compound_stmts.html
  943. for line in logical_lines:
  944. if (
  945. result['id'] == 'E702'
  946. and ':' in line
  947. and pycodestyle.STARTSWITH_INDENT_STATEMENT_REGEX.match(line)
  948. ):
  949. if self.options.verbose:
  950. print(
  951. '---> avoid fixing {error} with '
  952. 'other compound statements'.format(error=result['id']),
  953. file=sys.stderr
  954. )
  955. return []
  956. line_index = result['line'] - 1
  957. target = self.source[line_index]
  958. if target.rstrip().endswith('\\'):
  959. # Normalize '1; \\\n2' into '1; 2'.
  960. self.source[line_index] = target.rstrip('\n \r\t\\')
  961. self.source[line_index + 1] = self.source[line_index + 1].lstrip()
  962. return [line_index + 1, line_index + 2]
  963. if target.rstrip().endswith(';'):
  964. self.source[line_index] = target.rstrip('\n \r\t;') + '\n'
  965. return [line_index + 1]
  966. offset = result['column'] - 1
  967. first = target[:offset].rstrip(';').rstrip()
  968. second = (_get_indentation(logical_lines[0]) +
  969. target[offset:].lstrip(';').lstrip())
  970. # Find inline comment.
  971. inline_comment = None
  972. if target[offset:].lstrip(';').lstrip()[:2] == '# ':
  973. inline_comment = target[offset:].lstrip(';')
  974. if inline_comment:
  975. self.source[line_index] = first + inline_comment
  976. else:
  977. self.source[line_index] = first + '\n' + second
  978. return [line_index + 1]
  979. def fix_e704(self, result):
  980. """Fix multiple statements on one line def"""
  981. (line_index, _, target) = get_index_offset_contents(result,
  982. self.source)
  983. match = STARTSWITH_DEF_REGEX.match(target)
  984. if match:
  985. self.source[line_index] = '{}\n{}{}'.format(
  986. match.group(0),
  987. _get_indentation(target) + self.indent_word,
  988. target[match.end(0):].lstrip())
  989. def fix_e711(self, result):
  990. """Fix comparison with None."""
  991. (line_index, offset, target) = get_index_offset_contents(result,
  992. self.source)
  993. right_offset = offset + 2
  994. if right_offset >= len(target):
  995. return []
  996. left = target[:offset].rstrip()
  997. center = target[offset:right_offset]
  998. right = target[right_offset:].lstrip()
  999. if center.strip() == '==':
  1000. new_center = 'is'
  1001. elif center.strip() == '!=':
  1002. new_center = 'is not'
  1003. else:
  1004. return []
  1005. self.source[line_index] = ' '.join([left, new_center, right])
  1006. def fix_e712(self, result):
  1007. """Fix (trivial case of) comparison with boolean."""
  1008. (line_index, offset, target) = get_index_offset_contents(result,
  1009. self.source)
  1010. # Handle very easy "not" special cases.
  1011. if re.match(r'^\s*if [\w."\'\[\]]+ == False:$', target):
  1012. self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) == False:',
  1013. r'if not \1:', target, count=1)
  1014. elif re.match(r'^\s*if [\w."\'\[\]]+ != True:$', target):
  1015. self.source[line_index] = re.sub(r'if ([\w."\'\[\]]+) != True:',
  1016. r'if not \1:', target, count=1)
  1017. else:
  1018. right_offset = offset + 2
  1019. if right_offset >= len(target):
  1020. return []
  1021. left = target[:offset].rstrip()
  1022. center = target[offset:right_offset]
  1023. right = target[right_offset:].lstrip()
  1024. # Handle simple cases only.
  1025. new_right = None
  1026. if center.strip() == '==':
  1027. if re.match(r'\bTrue\b', right):
  1028. new_right = re.sub(r'\bTrue\b *', '', right, count=1)
  1029. elif center.strip() == '!=':
  1030. if re.match(r'\bFalse\b', right):
  1031. new_right = re.sub(r'\bFalse\b *', '', right, count=1)
  1032. if new_right is None:
  1033. return []
  1034. if new_right[0].isalnum():
  1035. new_right = ' ' + new_right
  1036. self.source[line_index] = left + new_right
  1037. def fix_e713(self, result):
  1038. """Fix (trivial case of) non-membership check."""
  1039. (line_index, offset, target) = get_index_offset_contents(result,
  1040. self.source)
  1041. # to convert once 'not in' -> 'in'
  1042. before_target = target[:offset]
  1043. target = target[offset:]
  1044. match_notin = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
  1045. notin_pos_start, notin_pos_end = 0, 0
  1046. if match_notin:
  1047. notin_pos_start = match_notin.start(1)
  1048. notin_pos_end = match_notin.end()
  1049. target = '{}{} {}'.format(
  1050. target[:notin_pos_start], 'in', target[notin_pos_end:])
  1051. # fix 'not in'
  1052. match = COMPARE_NEGATIVE_REGEX.search(target)
  1053. if match:
  1054. if match.group(3) == 'in':
  1055. pos_start = match.start(1)
  1056. new_target = '{5}{0}{1} {2} {3} {4}'.format(
  1057. target[:pos_start], match.group(2), match.group(1),
  1058. match.group(3), target[match.end():], before_target)
  1059. if match_notin:
  1060. # revert 'in' -> 'not in'
  1061. pos_start = notin_pos_start + offset
  1062. pos_end = notin_pos_end + offset - 4 # len('not ')
  1063. new_target = '{}{} {}'.format(
  1064. new_target[:pos_start], 'not in', new_target[pos_end:])
  1065. self.source[line_index] = new_target
  1066. def fix_e714(self, result):
  1067. """Fix object identity should be 'is not' case."""
  1068. (line_index, offset, target) = get_index_offset_contents(result,
  1069. self.source)
  1070. # to convert once 'is not' -> 'is'
  1071. before_target = target[:offset]
  1072. target = target[offset:]
  1073. match_isnot = COMPARE_NEGATIVE_REGEX_THROUGH.search(target)
  1074. isnot_pos_start, isnot_pos_end = 0, 0
  1075. if match_isnot:
  1076. isnot_pos_start = match_isnot.start(1)
  1077. isnot_pos_end = match_isnot.end()
  1078. target = '{}{} {}'.format(
  1079. target[:isnot_pos_start], 'in', target[isnot_pos_end:])
  1080. match = COMPARE_NEGATIVE_REGEX.search(target)
  1081. if match:
  1082. if match.group(3).startswith('is'):
  1083. pos_start = match.start(1)
  1084. new_target = '{5}{0}{1} {2} {3} {4}'.format(
  1085. target[:pos_start], match.group(2), match.group(3),
  1086. match.group(1), target[match.end():], before_target)
  1087. if match_isnot:
  1088. # revert 'is' -> 'is not'
  1089. pos_start = isnot_pos_start + offset
  1090. pos_end = isnot_pos_end + offset - 4 # len('not ')
  1091. new_target = '{}{} {}'.format(
  1092. new_target[:pos_start], 'is not', new_target[pos_end:])
  1093. self.source[line_index] = new_target
  1094. def fix_e721(self, result):
  1095. """fix comparison type"""
  1096. (line_index, _, target) = get_index_offset_contents(result,
  1097. self.source)
  1098. match = COMPARE_TYPE_REGEX.search(target)
  1099. if match:
  1100. # NOTE: match objects
  1101. # * type(a) == type(b) -> (None, None, 'a', '==')
  1102. # * str == type(b) -> ('==', 'b', None, None)
  1103. # * type(b) == str -> (None, None, 'b', '==')
  1104. # * type("") != type(b) -> (None, None, '""', '!=')
  1105. start = match.start()
  1106. end = match.end()
  1107. _prefix = ""
  1108. _suffix = ""
  1109. first_match_type_obj = match.groups()[1]
  1110. if first_match_type_obj is None:
  1111. _target_obj = match.groups()[2]
  1112. else:
  1113. _target_obj = match.groups()[1]
  1114. _suffix = target[end:]
  1115. isinstance_stmt = " isinstance"
  1116. is_not_condition = (
  1117. match.groups()[0] == "!=" or match.groups()[3] == "!="
  1118. )
  1119. if is_not_condition:
  1120. isinstance_stmt = " not isinstance"
  1121. _type_comp = f"{_target_obj}, {target[:start]}"
  1122. indent_match = re.match(r'^\s+', target)
  1123. indent = ""
  1124. if indent_match:
  1125. indent = indent_match.group()
  1126. _prefix_tmp = target[:start].split()
  1127. if len(_prefix_tmp) >= 1:
  1128. _type_comp = f"{_target_obj}, {target[:start]}"
  1129. if first_match_type_obj is not None:
  1130. _prefix = " ".join(_prefix_tmp[:-1])
  1131. _type_comp = f"{_target_obj}, {_prefix_tmp[-1]}"
  1132. else:
  1133. _prefix = " ".join(_prefix_tmp)
  1134. _suffix_tmp = target[end:]
  1135. _suffix_type_match = TYPE_REGEX.search(_suffix_tmp)
  1136. if _suffix_type_match:
  1137. if len(_suffix_tmp.split()) >= 1:
  1138. type_match_end = _suffix_type_match.end()
  1139. _suffix = _suffix_tmp[type_match_end:]
  1140. cmp_b = _suffix_type_match.groups()[0]
  1141. _type_comp = f"{_target_obj}, {cmp_b}"
  1142. else:
  1143. _else_suffix_match = re.match(
  1144. r"^\s*([^\s:]+)(.*)$",
  1145. _suffix_tmp,
  1146. )
  1147. if _else_suffix_match:
  1148. _else_suffix = _else_suffix_match.group(1)
  1149. _else_suffix_other = _else_suffix_match.group(2)
  1150. _type_comp = f"{_target_obj}, {_else_suffix}"
  1151. _else_suffix_end = _suffix_tmp[_else_suffix_match.end():]
  1152. _suffix = f"{_else_suffix_other}{_else_suffix_end}"
  1153. # `else` route is not care
  1154. fix_line = (
  1155. f"{indent}{_prefix}{isinstance_stmt}({_type_comp}){_suffix}"
  1156. )
  1157. self.source[line_index] = fix_line
  1158. def fix_e722(self, result):
  1159. """fix bare except"""
  1160. (line_index, _, target) = get_index_offset_contents(result,
  1161. self.source)
  1162. match = BARE_EXCEPT_REGEX.search(target)
  1163. if match:
  1164. self.source[line_index] = '{}{}{}'.format(
  1165. target[:result['column'] - 1], "except BaseException:",
  1166. target[match.end():])
  1167. def fix_e731(self, result):
  1168. """Fix do not assign a lambda expression check."""
  1169. (line_index, _, target) = get_index_offset_contents(result,
  1170. self.source)
  1171. match = LAMBDA_REGEX.search(target)
  1172. if match:
  1173. end = match.end()
  1174. self.source[line_index] = '{}def {}({}): return {}'.format(
  1175. target[:match.start(0)], match.group(1), match.group(2),
  1176. target[end:].lstrip())
  1177. def fix_w291(self, result):
  1178. """Remove trailing whitespace."""
  1179. fixed_line = self.source[result['line'] - 1].rstrip()
  1180. self.source[result['line'] - 1] = fixed_line + '\n'
  1181. def fix_w391(self, _):
  1182. """Remove trailing blank lines."""
  1183. blank_count = 0
  1184. for line in reversed(self.source):
  1185. line = line.rstrip()
  1186. if line:
  1187. break
  1188. else:
  1189. blank_count += 1
  1190. original_length = len(self.source)
  1191. self.source = self.source[:original_length - blank_count]
  1192. return range(1, 1 + original_length)
  1193. def fix_w503(self, result):
  1194. (line_index, _, target) = get_index_offset_contents(result,
  1195. self.source)
  1196. one_string_token = target.split()[0]
  1197. try:
  1198. ts = generate_tokens(one_string_token)
  1199. except (SyntaxError, tokenize.TokenError):
  1200. return
  1201. if not _is_binary_operator(ts[0][0], one_string_token):
  1202. return
  1203. # find comment
  1204. comment_index = 0
  1205. found_not_comment_only_line = False
  1206. comment_only_linenum = 0
  1207. for i in range(5):
  1208. # NOTE: try to parse code in 5 times
  1209. if (line_index - i) < 0:
  1210. break
  1211. from_index = line_index - i - 1
  1212. if from_index < 0 or len(self.source) <= from_index:
  1213. break
  1214. to_index = line_index + 1
  1215. strip_line = self.source[from_index].lstrip()
  1216. if (
  1217. not found_not_comment_only_line and
  1218. strip_line and strip_line[0] == '#'
  1219. ):
  1220. comment_only_linenum += 1
  1221. continue
  1222. found_not_comment_only_line = True
  1223. try:
  1224. ts = generate_tokens("".join(self.source[from_index:to_index]))
  1225. except (SyntaxError, tokenize.TokenError):
  1226. continue
  1227. newline_count = 0
  1228. newline_index = []
  1229. for index, t in enumerate(ts):
  1230. if t[0] in (tokenize.NEWLINE, tokenize.NL):
  1231. newline_index.append(index)
  1232. newline_count += 1
  1233. if newline_count > 2:
  1234. tts = ts[newline_index[-3]:]
  1235. else:
  1236. tts = ts
  1237. old = []
  1238. for t in tts:
  1239. if t[0] in (tokenize.NEWLINE, tokenize.NL):
  1240. newline_count -= 1
  1241. if newline_count <= 1:
  1242. break
  1243. if tokenize.COMMENT == t[0] and old and old[0] != tokenize.NL:
  1244. comment_index = old[3][1]
  1245. break
  1246. old = t
  1247. break
  1248. i = target.index(one_string_token)
  1249. fix_target_line = line_index - 1 - comment_only_linenum
  1250. self.source[line_index] = '{}{}'.format(
  1251. target[:i], target[i + len(one_string_token):].lstrip())
  1252. nl = find_newline(self.source[fix_target_line:line_index])
  1253. before_line = self.source[fix_target_line]
  1254. bl = before_line.index(nl)
  1255. if comment_index:
  1256. self.source[fix_target_line] = '{} {} {}'.format(
  1257. before_line[:comment_index], one_string_token,
  1258. before_line[comment_index + 1:])
  1259. else:
  1260. if before_line[:bl].endswith("#"):
  1261. # special case
  1262. # see: https://github.com/hhatto/autopep8/issues/503
  1263. self.source[fix_target_line] = '{}{} {}'.format(
  1264. before_line[:bl-2], one_string_token, before_line[bl-2:])
  1265. else:
  1266. self.source[fix_target_line] = '{} {}{}'.format(
  1267. before_line[:bl], one_string_token, before_line[bl:])
  1268. def fix_w504(self, result):
  1269. (line_index, _, target) = get_index_offset_contents(result,
  1270. self.source)
  1271. # NOTE: is not collect pointed out in pycodestyle==2.4.0
  1272. comment_index = 0
  1273. operator_position = None # (start_position, end_position)
  1274. for i in range(1, 6):
  1275. to_index = line_index + i
  1276. try:
  1277. ts = generate_tokens("".join(self.source[line_index:to_index]))
  1278. except (SyntaxError, tokenize.TokenError):
  1279. continue
  1280. newline_count = 0
  1281. newline_index = []
  1282. for index, t in enumerate(ts):
  1283. if _is_binary_operator(t[0], t[1]):
  1284. if t[2][0] == 1 and t[3][0] == 1:
  1285. operator_position = (t[2][1], t[3][1])
  1286. elif t[0] == tokenize.NAME and t[1] in ("and", "or"):
  1287. if t[2][0] == 1 and t[3][0] == 1:
  1288. operator_position = (t[2][1], t[3][1])
  1289. elif t[0] in (tokenize.NEWLINE, tokenize.NL):
  1290. newline_index.append(index)
  1291. newline_count += 1
  1292. if newline_count > 2:
  1293. tts = ts[:newline_index[-3]]
  1294. else:
  1295. tts = ts
  1296. old = []
  1297. for t in tts:
  1298. if tokenize.COMMENT == t[0] and old:
  1299. comment_row, comment_index = old[3]
  1300. break
  1301. old = t
  1302. break
  1303. if not operator_position:
  1304. return
  1305. target_operator = target[operator_position[0]:operator_position[1]]
  1306. if comment_index and comment_row == 1:
  1307. self.source[line_index] = '{}{}'.format(
  1308. target[:operator_position[0]].rstrip(),
  1309. target[comment_index:])
  1310. else:
  1311. self.source[line_index] = '{}{}{}'.format(
  1312. target[:operator_position[0]].rstrip(),
  1313. target[operator_position[1]:].lstrip(),
  1314. target[operator_position[1]:])
  1315. next_line = self.source[line_index + 1]
  1316. next_line_indent = 0
  1317. m = re.match(r'\s*', next_line)
  1318. if m:
  1319. next_line_indent = m.span()[1]
  1320. self.source[line_index + 1] = '{}{} {}'.format(
  1321. next_line[:next_line_indent], target_operator,
  1322. next_line[next_line_indent:])
  1323. def fix_w605(self, result):
  1324. (line_index, offset, target) = get_index_offset_contents(result,
  1325. self.source)
  1326. self.source[line_index] = '{}\\{}'.format(
  1327. target[:offset + 1], target[offset + 1:])
  1328. def get_module_imports_on_top_of_file(source, import_line_index):
  1329. """return import or from keyword position
  1330. example:
  1331. > 0: import sys
  1332. 1: import os
  1333. 2:
  1334. 3: def function():
  1335. """
  1336. def is_string_literal(line):
  1337. if line[0] in 'uUbB':
  1338. line = line[1:]
  1339. if line and line[0] in 'rR':
  1340. line = line[1:]
  1341. return line and (line[0] == '"' or line[0] == "'")
  1342. def is_future_import(line):
  1343. nodes = ast.parse(line)
  1344. for n in nodes.body:
  1345. if isinstance(n, ast.ImportFrom) and n.module == '__future__':
  1346. return True
  1347. return False
  1348. def has_future_import(source):
  1349. offset = 0
  1350. line = ''
  1351. for _, next_line in source:
  1352. for line_part in next_line.strip().splitlines(True):
  1353. line = line + line_part
  1354. try:
  1355. return is_future_import(line), offset
  1356. except SyntaxError:
  1357. continue
  1358. offset += 1
  1359. return False, offset
  1360. allowed_try_keywords = ('try', 'except', 'else', 'finally')
  1361. in_docstring = False
  1362. docstring_kind = '"""'
  1363. source_stream = iter(enumerate(source))
  1364. for cnt, line in source_stream:
  1365. if not in_docstring:
  1366. m = DOCSTRING_START_REGEX.match(line.lstrip())
  1367. if m is not None:
  1368. in_docstring = True
  1369. docstring_kind = m.group('kind')
  1370. remain = line[m.end(): m.endpos].rstrip()
  1371. if remain[-3:] == docstring_kind: # one line doc
  1372. in_docstring = False
  1373. continue
  1374. if in_docstring:
  1375. if line.rstrip()[-3:] == docstring_kind:
  1376. in_docstring = False
  1377. continue
  1378. if not line.rstrip():
  1379. continue
  1380. elif line.startswith('#'):
  1381. continue
  1382. if line.startswith('import '):
  1383. if cnt == import_line_index:
  1384. continue
  1385. return cnt
  1386. elif line.startswith('from '):
  1387. if cnt == import_line_index:
  1388. continue
  1389. hit, offset = has_future_import(
  1390. itertools.chain([(cnt, line)], source_stream)
  1391. )
  1392. if hit:
  1393. # move to the back
  1394. return cnt + offset + 1
  1395. return cnt
  1396. elif pycodestyle.DUNDER_REGEX.match(line):
  1397. return cnt
  1398. elif any(line.startswith(kw) for kw in allowed_try_keywords):
  1399. continue
  1400. elif is_string_literal(line):
  1401. return cnt
  1402. else:
  1403. return cnt
  1404. return 0
  1405. def get_index_offset_contents(result, source):
  1406. """Return (line_index, column_offset, line_contents)."""
  1407. line_index = result['line'] - 1
  1408. return (line_index,
  1409. result['column'] - 1,
  1410. source[line_index])
  1411. def get_fixed_long_line(target, previous_line, original,
  1412. indent_word=' ', max_line_length=79,
  1413. aggressive=0, experimental=False, verbose=False):
  1414. """Break up long line and return result.
  1415. Do this by generating multiple reformatted candidates and then
  1416. ranking the candidates to heuristically select the best option.
  1417. """
  1418. indent = _get_indentation(target)
  1419. source = target[len(indent):]
  1420. assert source.lstrip() == source
  1421. assert not target.lstrip().startswith('#')
  1422. # Check for partial multiline.
  1423. tokens = list(generate_tokens(source))
  1424. candidates = shorten_line(
  1425. tokens, source, indent,
  1426. indent_word,
  1427. max_line_length,
  1428. aggressive=aggressive,
  1429. experimental=experimental,
  1430. previous_line=previous_line)
  1431. # Also sort alphabetically as a tie breaker (for determinism).
  1432. candidates = sorted(
  1433. sorted(set(candidates).union([target, original])),
  1434. key=lambda x: line_shortening_rank(
  1435. x,
  1436. indent_word,
  1437. max_line_length,
  1438. experimental=experimental))
  1439. if verbose >= 4:
  1440. print(('-' * 79 + '\n').join([''] + candidates + ['']),
  1441. file=wrap_output(sys.stderr, 'utf-8'))
  1442. if candidates:
  1443. best_candidate = candidates[0]
  1444. # Don't allow things to get longer.
  1445. if longest_line_length(best_candidate) > longest_line_length(original):
  1446. return None
  1447. return best_candidate
  1448. def longest_line_length(code):
  1449. """Return length of longest line."""
  1450. if len(code) == 0:
  1451. return 0
  1452. return max(len(line) for line in code.splitlines())
  1453. def join_logical_line(logical_line):
  1454. """Return single line based on logical line input."""
  1455. indentation = _get_indentation(logical_line)
  1456. return indentation + untokenize_without_newlines(
  1457. generate_tokens(logical_line.lstrip())) + '\n'
  1458. def untokenize_without_newlines(tokens):
  1459. """Return source code based on tokens."""
  1460. text = ''
  1461. last_row = 0
  1462. last_column = -1
  1463. for t in tokens:
  1464. token_string = t[1]
  1465. (start_row, start_column) = t[2]
  1466. (end_row, end_column) = t[3]
  1467. if start_row > last_row:
  1468. last_column = 0
  1469. if (
  1470. (start_column > last_column or token_string == '\n') and
  1471. not text.endswith(' ')
  1472. ):
  1473. text += ' '
  1474. if token_string != '\n':
  1475. text += token_string
  1476. last_row = end_row
  1477. last_column = end_column
  1478. return text.rstrip()
  1479. def _find_logical(source_lines):
  1480. # Make a variable which is the index of all the starts of lines.
  1481. logical_start = []
  1482. logical_end = []
  1483. last_newline = True
  1484. parens = 0
  1485. for t in generate_tokens(''.join(source_lines)):
  1486. if t[0] in [tokenize.COMMENT, tokenize.DEDENT,
  1487. tokenize.INDENT, tokenize.NL,
  1488. tokenize.ENDMARKER]:
  1489. continue
  1490. if not parens and t[0] in [tokenize.NEWLINE, tokenize.SEMI]:
  1491. last_newline = True
  1492. logical_end.append((t[3][0] - 1, t[2][1]))
  1493. continue
  1494. if last_newline and not parens:
  1495. logical_start.append((t[2][0] - 1, t[2][1]))
  1496. last_newline = False
  1497. if t[0] == tokenize.OP:
  1498. if t[1] in '([{':
  1499. parens += 1
  1500. elif t[1] in '}])':
  1501. parens -= 1
  1502. return (logical_start, logical_end)
  1503. def _get_logical(source_lines, result, logical_start, logical_end):
  1504. """Return the logical line corresponding to the result.
  1505. Assumes input is already E702-clean.
  1506. """
  1507. row = result['line'] - 1
  1508. col = result['column'] - 1
  1509. ls = None
  1510. le = None
  1511. for i in range(0, len(logical_start), 1):
  1512. assert logical_end
  1513. x = logical_end[i]
  1514. if x[0] > row or (x[0] == row and x[1] > col):
  1515. le = x
  1516. ls = logical_start[i]
  1517. break
  1518. if ls is None:
  1519. return None
  1520. original = source_lines[ls[0]:le[0] + 1]
  1521. return ls, le, original
  1522. def get_item(items, index, default=None):
  1523. if 0 <= index < len(items):
  1524. return items[index]
  1525. return default
  1526. def reindent(source, indent_size, leave_tabs=False):
  1527. """Reindent all lines."""
  1528. reindenter = Reindenter(source, leave_tabs)
  1529. return reindenter.run(indent_size)
  1530. def code_almost_equal(a, b):
  1531. """Return True if code is similar.
  1532. Ignore whitespace when comparing specific line.
  1533. """
  1534. split_a = split_and_strip_non_empty_lines(a)
  1535. split_b = split_and_strip_non_empty_lines(b)
  1536. if len(split_a) != len(split_b):
  1537. return False
  1538. for (index, _) in enumerate(split_a):
  1539. if ''.join(split_a[index].split()) != ''.join(split_b[index].split()):
  1540. return False
  1541. return True
  1542. def split_and_strip_non_empty_lines(text):
  1543. """Return lines split by newline.
  1544. Ignore empty lines.
  1545. """
  1546. return [line.strip() for line in text.splitlines() if line.strip()]
  1547. def find_newline(source):
  1548. """Return type of newline used in source.
  1549. Input is a list of lines.
  1550. """
  1551. assert not isinstance(source, str)
  1552. counter = collections.defaultdict(int)
  1553. for line in source:
  1554. if line.endswith(CRLF):
  1555. counter[CRLF] += 1
  1556. elif line.endswith(CR):
  1557. counter[CR] += 1
  1558. elif line.endswith(LF):
  1559. counter[LF] += 1
  1560. return (sorted(counter, key=counter.get, reverse=True) or [LF])[0]
  1561. def _get_indentword(source):
  1562. """Return indentation type."""
  1563. indent_word = ' ' # Default in case source has no indentation
  1564. try:
  1565. for t in generate_tokens(source):
  1566. if t[0] == token.INDENT:
  1567. indent_word = t[1]
  1568. break
  1569. except (SyntaxError, tokenize.TokenError):
  1570. pass
  1571. return indent_word
  1572. def _get_indentation(line):
  1573. """Return leading whitespace."""
  1574. if line.strip():
  1575. non_whitespace_index = len(line) - len(line.lstrip())
  1576. return line[:non_whitespace_index]
  1577. return ''
  1578. def get_diff_text(old, new, filename):
  1579. """Return text of unified diff between old and new."""
  1580. newline = '\n'
  1581. diff = difflib.unified_diff(
  1582. old, new,
  1583. 'original/' + filename,
  1584. 'fixed/' + filename,
  1585. lineterm=newline)
  1586. text = ''
  1587. for line in diff:
  1588. text += line
  1589. # Work around missing newline (http://bugs.python.org/issue2142).
  1590. if text and not line.endswith(newline):
  1591. text += newline + r'\ No newline at end of file' + newline
  1592. return text
  1593. def _priority_key(pep8_result):
  1594. """Key for sorting PEP8 results.
  1595. Global fixes should be done first. This is important for things like
  1596. indentation.
  1597. """
  1598. priority = [
  1599. # Fix multiline colon-based before semicolon based.
  1600. 'e701',
  1601. # Break multiline statements early.
  1602. 'e702',
  1603. # Things that make lines longer.
  1604. 'e225', 'e231',
  1605. # Remove extraneous whitespace before breaking lines.
  1606. 'e201',
  1607. # Shorten whitespace in comment before resorting to wrapping.
  1608. 'e262'
  1609. ]
  1610. middle_index = 10000
  1611. lowest_priority = [
  1612. # We need to shorten lines last since the logical fixer can get in a
  1613. # loop, which causes us to exit early.
  1614. 'e501',
  1615. ]
  1616. key = pep8_result['id'].lower()
  1617. try:
  1618. return priority.index(key)
  1619. except ValueError:
  1620. try:
  1621. return middle_index + lowest_priority.index(key) + 1
  1622. except ValueError:
  1623. return middle_index
  1624. def shorten_line(tokens, source, indentation, indent_word, max_line_length,
  1625. aggressive=0, experimental=False, previous_line=''):
  1626. """Separate line at OPERATOR.
  1627. Multiple candidates will be yielded.
  1628. """
  1629. for candidate in _shorten_line(tokens=tokens,
  1630. source=source,
  1631. indentation=indentation,
  1632. indent_word=indent_word,
  1633. aggressive=aggressive,
  1634. previous_line=previous_line):
  1635. yield candidate
  1636. if aggressive:
  1637. for key_token_strings in SHORTEN_OPERATOR_GROUPS:
  1638. shortened = _shorten_line_at_tokens(
  1639. tokens=tokens,
  1640. source=source,
  1641. indentation=indentation,
  1642. indent_word=indent_word,
  1643. key_token_strings=key_token_strings,
  1644. aggressive=aggressive)
  1645. if shortened is not None and shortened != source:
  1646. yield shortened
  1647. if experimental:
  1648. for shortened in _shorten_line_at_tokens_new(
  1649. tokens=tokens,
  1650. source=source,
  1651. indentation=indentation,
  1652. max_line_length=max_line_length):
  1653. yield shortened
  1654. def _shorten_line(tokens, source, indentation, indent_word,
  1655. aggressive=0, previous_line=''):
  1656. """Separate line at OPERATOR.
  1657. The input is expected to be free of newlines except for inside multiline
  1658. strings and at the end.
  1659. Multiple candidates will be yielded.
  1660. """
  1661. in_string = False
  1662. for (token_type,
  1663. token_string,
  1664. start_offset,
  1665. end_offset) in token_offsets(tokens):
  1666. if IS_SUPPORT_TOKEN_FSTRING:
  1667. if token_type == tokenize.FSTRING_START:
  1668. in_string = True
  1669. elif token_type == tokenize.FSTRING_END:
  1670. in_string = False
  1671. if in_string:
  1672. continue
  1673. if (
  1674. token_type == tokenize.COMMENT and
  1675. not is_probably_part_of_multiline(previous_line) and
  1676. not is_probably_part_of_multiline(source) and
  1677. not source[start_offset + 1:].strip().lower().startswith(
  1678. ('noqa', 'pragma:', 'pylint:'))
  1679. ):
  1680. # Move inline comments to previous line.
  1681. first = source[:start_offset]
  1682. second = source[start_offset:]
  1683. yield (indentation + second.strip() + '\n' +
  1684. indentation + first.strip() + '\n')
  1685. elif token_type == token.OP and token_string != '=':
  1686. # Don't break on '=' after keyword as this violates PEP 8.
  1687. assert token_type != token.INDENT
  1688. first = source[:end_offset]
  1689. second_indent = indentation
  1690. if (first.rstrip().endswith('(') and
  1691. source[end_offset:].lstrip().startswith(')')):
  1692. pass
  1693. elif first.rstrip().endswith('('):
  1694. second_indent += indent_word
  1695. elif '(' in first:
  1696. second_indent += ' ' * (1 + first.find('('))
  1697. else:
  1698. second_indent += indent_word
  1699. second = (second_indent + source[end_offset:].lstrip())
  1700. if (
  1701. not second.strip() or
  1702. second.lstrip().startswith('#')
  1703. ):
  1704. continue
  1705. # Do not begin a line with a comma
  1706. if second.lstrip().startswith(','):
  1707. continue
  1708. # Do end a line with a dot
  1709. if first.rstrip().endswith('.'):
  1710. continue
  1711. if token_string in '+-*/':
  1712. fixed = first + ' \\' + '\n' + second
  1713. else:
  1714. fixed = first + '\n' + second
  1715. # Only fix if syntax is okay.
  1716. if check_syntax(normalize_multiline(fixed)
  1717. if aggressive else fixed):
  1718. yield indentation + fixed
  1719. def _is_binary_operator(token_type, text):
  1720. return ((token_type == tokenize.OP or text in ['and', 'or']) and
  1721. text not in '()[]{},:.;@=%~')
  1722. # A convenient way to handle tokens.
  1723. Token = collections.namedtuple('Token', ['token_type', 'token_string',
  1724. 'spos', 'epos', 'line'])
  1725. class ReformattedLines(object):
  1726. """The reflowed lines of atoms.
  1727. Each part of the line is represented as an "atom." They can be moved
  1728. around when need be to get the optimal formatting.
  1729. """
  1730. ###########################################################################
  1731. # Private Classes
  1732. class _Indent(object):
  1733. """Represent an indentation in the atom stream."""
  1734. def __init__(self, indent_amt):
  1735. self._indent_amt = indent_amt
  1736. def emit(self):
  1737. return ' ' * self._indent_amt
  1738. @property
  1739. def size(self):
  1740. return self._indent_amt
  1741. class _Space(object):
  1742. """Represent a space in the atom stream."""
  1743. def emit(self):
  1744. return ' '
  1745. @property
  1746. def size(self):
  1747. return 1
  1748. class _LineBreak(object):
  1749. """Represent a line break in the atom stream."""
  1750. def emit(self):
  1751. return '\n'
  1752. @property
  1753. def size(self):
  1754. return 0
  1755. def __init__(self, max_line_length):
  1756. self._max_line_length = max_line_length
  1757. self._lines = []
  1758. self._bracket_depth = 0
  1759. self._prev_item = None
  1760. self._prev_prev_item = None
  1761. self._in_fstring = False
  1762. def __repr__(self):
  1763. return self.emit()
  1764. ###########################################################################
  1765. # Public Methods
  1766. def add(self, obj, indent_amt, break_after_open_bracket):
  1767. if isinstance(obj, Atom):
  1768. self._add_item(obj, indent_amt)
  1769. return
  1770. self._add_container(obj, indent_amt, break_after_open_bracket)
  1771. def add_comment(self, item):
  1772. num_spaces = 2
  1773. if len(self._lines) > 1:
  1774. if isinstance(self._lines[-1], self._Space):
  1775. num_spaces -= 1
  1776. if len(self._lines) > 2:
  1777. if isinstance(self._lines[-2], self._Space):
  1778. num_spaces -= 1
  1779. while num_spaces > 0:
  1780. self._lines.append(self._Space())
  1781. num_spaces -= 1
  1782. self._lines.append(item)
  1783. def add_indent(self, indent_amt):
  1784. self._lines.append(self._Indent(indent_amt))
  1785. def add_line_break(self, indent):
  1786. self._lines.append(self._LineBreak())
  1787. self.add_indent(len(indent))
  1788. def add_line_break_at(self, index, indent_amt):
  1789. self._lines.insert(index, self._LineBreak())
  1790. self._lines.insert(index + 1, self._Indent(indent_amt))
  1791. def add_space_if_needed(self, curr_text, equal=False):
  1792. if (
  1793. not self._lines or isinstance(
  1794. self._lines[-1], (self._LineBreak, self._Indent, self._Space))
  1795. ):
  1796. return
  1797. prev_text = str(self._prev_item)
  1798. prev_prev_text = (
  1799. str(self._prev_prev_item) if self._prev_prev_item else '')
  1800. if (
  1801. # The previous item was a keyword or identifier and the current
  1802. # item isn't an operator that doesn't require a space.
  1803. ((self._prev_item.is_keyword or self._prev_item.is_string or
  1804. self._prev_item.is_name or self._prev_item.is_number) and
  1805. (curr_text[0] not in '([{.,:}])' or
  1806. (curr_text[0] == '=' and equal))) or
  1807. # Don't place spaces around a '.', unless it's in an 'import'
  1808. # statement.
  1809. ((prev_prev_text != 'from' and prev_text[-1] != '.' and
  1810. curr_text != 'import') and
  1811. # Don't place a space before a colon.
  1812. curr_text[0] != ':' and
  1813. # Don't split up ending brackets by spaces.
  1814. ((prev_text[-1] in '}])' and curr_text[0] not in '.,}])') or
  1815. # Put a space after a colon or comma.
  1816. prev_text[-1] in ':,' or
  1817. # Put space around '=' if asked to.
  1818. (equal and prev_text == '=') or
  1819. # Put spaces around non-unary arithmetic operators.
  1820. ((self._prev_prev_item and
  1821. (prev_text not in '+-' and
  1822. (self._prev_prev_item.is_name or
  1823. self._prev_prev_item.is_number or
  1824. self._prev_prev_item.is_string)) and
  1825. prev_text in ('+', '-', '%', '*', '/', '//', '**', 'in')))))
  1826. ):
  1827. self._lines.append(self._Space())
  1828. def previous_item(self):
  1829. """Return the previous non-whitespace item."""
  1830. return self._prev_item
  1831. def fits_on_current_line(self, item_extent):
  1832. return self.current_size() + item_extent <= self._max_line_length
  1833. def current_size(self):
  1834. """The size of the current line minus the indentation."""
  1835. size = 0
  1836. for item in reversed(self._lines):
  1837. size += item.size
  1838. if isinstance(item, self._LineBreak):
  1839. break
  1840. return size
  1841. def line_empty(self):
  1842. return (self._lines and
  1843. isinstance(self._lines[-1],
  1844. (self._LineBreak, self._Indent)))
  1845. def emit(self):
  1846. string = ''
  1847. for item in self._lines:
  1848. if isinstance(item, self._LineBreak):
  1849. string = string.rstrip()
  1850. string += item.emit()
  1851. return string.rstrip() + '\n'
  1852. ###########################################################################
  1853. # Private Methods
  1854. def _add_item(self, item, indent_amt):
  1855. """Add an item to the line.
  1856. Reflow the line to get the best formatting after the item is
  1857. inserted. The bracket depth indicates if the item is being
  1858. inserted inside of a container or not.
  1859. """
  1860. if item.is_fstring_start:
  1861. self._in_fstring = True
  1862. elif self._prev_item and self._prev_item.is_fstring_end:
  1863. self._in_fstring = False
  1864. if self._prev_item and self._prev_item.is_string and item.is_string:
  1865. # Place consecutive string literals on separate lines.
  1866. self._lines.append(self._LineBreak())
  1867. self._lines.append(self._Indent(indent_amt))
  1868. item_text = str(item)
  1869. if self._lines and self._bracket_depth:
  1870. # Adding the item into a container.
  1871. self._prevent_default_initializer_splitting(item, indent_amt)
  1872. if item_text in '.,)]}':
  1873. self._split_after_delimiter(item, indent_amt)
  1874. elif self._lines and not self.line_empty():
  1875. # Adding the item outside of a container.
  1876. if self.fits_on_current_line(len(item_text)):
  1877. self._enforce_space(item)
  1878. else:
  1879. # Line break for the new item.
  1880. self._lines.append(self._LineBreak())
  1881. self._lines.append(self._Indent(indent_amt))
  1882. self._lines.append(item)
  1883. self._prev_item, self._prev_prev_item = item, self._prev_item
  1884. if item_text in '([{' and not self._in_fstring:
  1885. self._bracket_depth += 1
  1886. elif item_text in '}])' and not self._in_fstring:
  1887. self._bracket_depth -= 1
  1888. assert self._bracket_depth >= 0
  1889. def _add_container(self, container, indent_amt, break_after_open_bracket):
  1890. actual_indent = indent_amt + 1
  1891. if (
  1892. str(self._prev_item) != '=' and
  1893. not self.line_empty() and
  1894. not self.fits_on_current_line(
  1895. container.size + self._bracket_depth + 2)
  1896. ):
  1897. if str(container)[0] == '(' and self._prev_item.is_name:
  1898. # Don't split before the opening bracket of a call.
  1899. break_after_open_bracket = True
  1900. actual_indent = indent_amt + 4
  1901. elif (
  1902. break_after_open_bracket or
  1903. str(self._prev_item) not in '([{'
  1904. ):
  1905. # If the container doesn't fit on the current line and the
  1906. # current line isn't empty, place the container on the next
  1907. # line.
  1908. self._lines.append(self._LineBreak())
  1909. self._lines.append(self._Indent(indent_amt))
  1910. break_after_open_bracket = False
  1911. else:
  1912. actual_indent = self.current_size() + 1
  1913. break_after_open_bracket = False
  1914. if isinstance(container, (ListComprehension, IfExpression)):
  1915. actual_indent = indent_amt
  1916. # Increase the continued indentation only if recursing on a
  1917. # container.
  1918. container.reflow(self, ' ' * actual_indent,
  1919. break_after_open_bracket=break_after_open_bracket)
  1920. def _prevent_default_initializer_splitting(self, item, indent_amt):
  1921. """Prevent splitting between a default initializer.
  1922. When there is a default initializer, it's best to keep it all on
  1923. the same line. It's nicer and more readable, even if it goes
  1924. over the maximum allowable line length. This goes back along the
  1925. current line to determine if we have a default initializer, and,
  1926. if so, to remove extraneous whitespaces and add a line
  1927. break/indent before it if needed.
  1928. """
  1929. if str(item) == '=':
  1930. # This is the assignment in the initializer. Just remove spaces for
  1931. # now.
  1932. self._delete_whitespace()
  1933. return
  1934. if (not self._prev_item or not self._prev_prev_item or
  1935. str(self._prev_item) != '='):
  1936. return
  1937. self._delete_whitespace()
  1938. prev_prev_index = self._lines.index(self._prev_prev_item)
  1939. if (
  1940. isinstance(self._lines[prev_prev_index - 1], self._Indent) or
  1941. self.fits_on_current_line(item.size + 1)
  1942. ):
  1943. # The default initializer is already the only item on this line.
  1944. # Don't insert a newline here.
  1945. return
  1946. # Replace the space with a newline/indent combo.
  1947. if isinstance(self._lines[prev_prev_index - 1], self._Space):
  1948. del self._lines[prev_prev_index - 1]
  1949. self.add_line_break_at(self._lines.index(self._prev_prev_item),
  1950. indent_amt)
  1951. def _split_after_delimiter(self, item, indent_amt):
  1952. """Split the line only after a delimiter."""
  1953. self._delete_whitespace()
  1954. if self.fits_on_current_line(item.size):
  1955. return
  1956. last_space = None
  1957. for current_item in reversed(self._lines):
  1958. if (
  1959. last_space and
  1960. (not isinstance(current_item, Atom) or
  1961. not current_item.is_colon)
  1962. ):
  1963. break
  1964. else:
  1965. last_space = None
  1966. if isinstance(current_item, self._Space):
  1967. last_space = current_item
  1968. if isinstance(current_item, (self._LineBreak, self._Indent)):
  1969. return
  1970. if not last_space:
  1971. return
  1972. self.add_line_break_at(self._lines.index(last_space), indent_amt)
  1973. def _enforce_space(self, item):
  1974. """Enforce a space in certain situations.
  1975. There are cases where we will want a space where normally we
  1976. wouldn't put one. This just enforces the addition of a space.
  1977. """
  1978. if isinstance(self._lines[-1],
  1979. (self._Space, self._LineBreak, self._Indent)):
  1980. return
  1981. if not self._prev_item:
  1982. return
  1983. item_text = str(item)
  1984. prev_text = str(self._prev_item)
  1985. # Prefer a space around a '.' in an import statement, and between the
  1986. # 'import' and '('.
  1987. if (
  1988. (item_text == '.' and prev_text == 'from') or
  1989. (item_text == 'import' and prev_text == '.') or
  1990. (item_text == '(' and prev_text == 'import')
  1991. ):
  1992. self._lines.append(self._Space())
  1993. def _delete_whitespace(self):
  1994. """Delete all whitespace from the end of the line."""
  1995. while isinstance(self._lines[-1], (self._Space, self._LineBreak,
  1996. self._Indent)):
  1997. del self._lines[-1]
  1998. class Atom(object):
  1999. """The smallest unbreakable unit that can be reflowed."""
  2000. def __init__(self, atom):
  2001. self._atom = atom
  2002. def __repr__(self):
  2003. return self._atom.token_string
  2004. def __len__(self):
  2005. return self.size
  2006. def reflow(
  2007. self, reflowed_lines, continued_indent, extent,
  2008. break_after_open_bracket=False,
  2009. is_list_comp_or_if_expr=False,
  2010. next_is_dot=False
  2011. ):
  2012. if self._atom.token_type == tokenize.COMMENT:
  2013. reflowed_lines.add_comment(self)
  2014. return
  2015. total_size = extent if extent else self.size
  2016. if self._atom.token_string not in ',:([{}])':
  2017. # Some atoms will need an extra 1-sized space token after them.
  2018. total_size += 1
  2019. prev_item = reflowed_lines.previous_item()
  2020. if (
  2021. not is_list_comp_or_if_expr and
  2022. not reflowed_lines.fits_on_current_line(total_size) and
  2023. not (next_is_dot and
  2024. reflowed_lines.fits_on_current_line(self.size + 1)) and
  2025. not reflowed_lines.line_empty() and
  2026. not self.is_colon and
  2027. not (prev_item and prev_item.is_name and
  2028. str(self) == '(')
  2029. ):
  2030. # Start a new line if there is already something on the line and
  2031. # adding this atom would make it go over the max line length.
  2032. reflowed_lines.add_line_break(continued_indent)
  2033. else:
  2034. reflowed_lines.add_space_if_needed(str(self))
  2035. reflowed_lines.add(self, len(continued_indent),
  2036. break_after_open_bracket)
  2037. def emit(self):
  2038. return self.__repr__()
  2039. @property
  2040. def is_keyword(self):
  2041. return keyword.iskeyword(self._atom.token_string)
  2042. @property
  2043. def is_string(self):
  2044. return self._atom.token_type == tokenize.STRING
  2045. @property
  2046. def is_fstring_start(self):
  2047. if not IS_SUPPORT_TOKEN_FSTRING:
  2048. return False
  2049. return self._atom.token_type == tokenize.FSTRING_START
  2050. @property
  2051. def is_fstring_end(self):
  2052. if not IS_SUPPORT_TOKEN_FSTRING:
  2053. return False
  2054. return self._atom.token_type == tokenize.FSTRING_END
  2055. @property
  2056. def is_name(self):
  2057. return self._atom.token_type == tokenize.NAME
  2058. @property
  2059. def is_number(self):
  2060. return self._atom.token_type == tokenize.NUMBER
  2061. @property
  2062. def is_comma(self):
  2063. return self._atom.token_string == ','
  2064. @property
  2065. def is_colon(self):
  2066. return self._atom.token_string == ':'
  2067. @property
  2068. def size(self):
  2069. return len(self._atom.token_string)
  2070. class Container(object):
  2071. """Base class for all container types."""
  2072. def __init__(self, items):
  2073. self._items = items
  2074. def __repr__(self):
  2075. string = ''
  2076. last_was_keyword = False
  2077. for item in self._items:
  2078. if item.is_comma:
  2079. string += ', '
  2080. elif item.is_colon:
  2081. string += ': '
  2082. else:
  2083. item_string = str(item)
  2084. if (
  2085. string and
  2086. (last_was_keyword or
  2087. (not string.endswith(tuple('([{,.:}]) ')) and
  2088. not item_string.startswith(tuple('([{,.:}])'))))
  2089. ):
  2090. string += ' '
  2091. string += item_string
  2092. last_was_keyword = item.is_keyword
  2093. return string
  2094. def __iter__(self):
  2095. for element in self._items:
  2096. yield element
  2097. def __getitem__(self, idx):
  2098. return self._items[idx]
  2099. def reflow(self, reflowed_lines, continued_indent,
  2100. break_after_open_bracket=False):
  2101. last_was_container = False
  2102. for (index, item) in enumerate(self._items):
  2103. next_item = get_item(self._items, index + 1)
  2104. if isinstance(item, Atom):
  2105. is_list_comp_or_if_expr = (
  2106. isinstance(self, (ListComprehension, IfExpression)))
  2107. item.reflow(reflowed_lines, continued_indent,
  2108. self._get_extent(index),
  2109. is_list_comp_or_if_expr=is_list_comp_or_if_expr,
  2110. next_is_dot=(next_item and
  2111. str(next_item) == '.'))
  2112. if last_was_container and item.is_comma:
  2113. reflowed_lines.add_line_break(continued_indent)
  2114. last_was_container = False
  2115. else: # isinstance(item, Container)
  2116. reflowed_lines.add(item, len(continued_indent),
  2117. break_after_open_bracket)
  2118. last_was_container = not isinstance(item, (ListComprehension,
  2119. IfExpression))
  2120. if (
  2121. break_after_open_bracket and index == 0 and
  2122. # Prefer to keep empty containers together instead of
  2123. # separating them.
  2124. str(item) == self.open_bracket and
  2125. (not next_item or str(next_item) != self.close_bracket) and
  2126. (len(self._items) != 3 or not isinstance(next_item, Atom))
  2127. ):
  2128. reflowed_lines.add_line_break(continued_indent)
  2129. break_after_open_bracket = False
  2130. else:
  2131. next_next_item = get_item(self._items, index + 2)
  2132. if (
  2133. str(item) not in ['.', '%', 'in'] and
  2134. next_item and not isinstance(next_item, Container) and
  2135. str(next_item) != ':' and
  2136. next_next_item and (not isinstance(next_next_item, Atom) or
  2137. str(next_item) == 'not') and
  2138. not reflowed_lines.line_empty() and
  2139. not reflowed_lines.fits_on_current_line(
  2140. self._get_extent(index + 1) + 2)
  2141. ):
  2142. reflowed_lines.add_line_break(continued_indent)
  2143. def _get_extent(self, index):
  2144. """The extent of the full element.
  2145. E.g., the length of a function call or keyword.
  2146. """
  2147. extent = 0
  2148. prev_item = get_item(self._items, index - 1)
  2149. seen_dot = prev_item and str(prev_item) == '.'
  2150. while index < len(self._items):
  2151. item = get_item(self._items, index)
  2152. index += 1
  2153. if isinstance(item, (ListComprehension, IfExpression)):
  2154. break
  2155. if isinstance(item, Container):
  2156. if prev_item and prev_item.is_name:
  2157. if seen_dot:
  2158. extent += 1
  2159. else:
  2160. extent += item.size
  2161. prev_item = item
  2162. continue
  2163. elif (str(item) not in ['.', '=', ':', 'not'] and
  2164. not item.is_name and not item.is_string):
  2165. break
  2166. if str(item) == '.':
  2167. seen_dot = True
  2168. extent += item.size
  2169. prev_item = item
  2170. return extent
  2171. @property
  2172. def is_string(self):
  2173. return False
  2174. @property
  2175. def size(self):
  2176. return len(self.__repr__())
  2177. @property
  2178. def is_keyword(self):
  2179. return False
  2180. @property
  2181. def is_name(self):
  2182. return False
  2183. @property
  2184. def is_comma(self):
  2185. return False
  2186. @property
  2187. def is_colon(self):
  2188. return False
  2189. @property
  2190. def open_bracket(self):
  2191. return None
  2192. @property
  2193. def close_bracket(self):
  2194. return None
  2195. class Tuple(Container):
  2196. """A high-level representation of a tuple."""
  2197. @property
  2198. def open_bracket(self):
  2199. return '('
  2200. @property
  2201. def close_bracket(self):
  2202. return ')'
  2203. class List(Container):
  2204. """A high-level representation of a list."""
  2205. @property
  2206. def open_bracket(self):
  2207. return '['
  2208. @property
  2209. def close_bracket(self):
  2210. return ']'
  2211. class DictOrSet(Container):
  2212. """A high-level representation of a dictionary or set."""
  2213. @property
  2214. def open_bracket(self):
  2215. return '{'
  2216. @property
  2217. def close_bracket(self):
  2218. return '}'
  2219. class ListComprehension(Container):
  2220. """A high-level representation of a list comprehension."""
  2221. @property
  2222. def size(self):
  2223. length = 0
  2224. for item in self._items:
  2225. if isinstance(item, IfExpression):
  2226. break
  2227. length += item.size
  2228. return length
  2229. class IfExpression(Container):
  2230. """A high-level representation of an if-expression."""
  2231. def _parse_container(tokens, index, for_or_if=None):
  2232. """Parse a high-level container, such as a list, tuple, etc."""
  2233. # Store the opening bracket.
  2234. items = [Atom(Token(*tokens[index]))]
  2235. index += 1
  2236. num_tokens = len(tokens)
  2237. while index < num_tokens:
  2238. tok = Token(*tokens[index])
  2239. if tok.token_string in ',)]}':
  2240. # First check if we're at the end of a list comprehension or
  2241. # if-expression. Don't add the ending token as part of the list
  2242. # comprehension or if-expression, because they aren't part of those
  2243. # constructs.
  2244. if for_or_if == 'for':
  2245. return (ListComprehension(items), index - 1)
  2246. elif for_or_if == 'if':
  2247. return (IfExpression(items), index - 1)
  2248. # We've reached the end of a container.
  2249. items.append(Atom(tok))
  2250. # If not, then we are at the end of a container.
  2251. if tok.token_string == ')':
  2252. # The end of a tuple.
  2253. return (Tuple(items), index)
  2254. elif tok.token_string == ']':
  2255. # The end of a list.
  2256. return (List(items), index)
  2257. elif tok.token_string == '}':
  2258. # The end of a dictionary or set.
  2259. return (DictOrSet(items), index)
  2260. elif tok.token_string in '([{':
  2261. # A sub-container is being defined.
  2262. (container, index) = _parse_container(tokens, index)
  2263. items.append(container)
  2264. elif tok.token_string == 'for':
  2265. (container, index) = _parse_container(tokens, index, 'for')
  2266. items.append(container)
  2267. elif tok.token_string == 'if':
  2268. (container, index) = _parse_container(tokens, index, 'if')
  2269. items.append(container)
  2270. else:
  2271. items.append(Atom(tok))
  2272. index += 1
  2273. return (None, None)
  2274. def _parse_tokens(tokens):
  2275. """Parse the tokens.
  2276. This converts the tokens into a form where we can manipulate them
  2277. more easily.
  2278. """
  2279. index = 0
  2280. parsed_tokens = []
  2281. num_tokens = len(tokens)
  2282. while index < num_tokens:
  2283. tok = Token(*tokens[index])
  2284. assert tok.token_type != token.INDENT
  2285. if tok.token_type == tokenize.NEWLINE:
  2286. # There's only one newline and it's at the end.
  2287. break
  2288. if tok.token_string in '([{':
  2289. (container, index) = _parse_container(tokens, index)
  2290. if not container:
  2291. return None
  2292. parsed_tokens.append(container)
  2293. else:
  2294. parsed_tokens.append(Atom(tok))
  2295. index += 1
  2296. return parsed_tokens
  2297. def _reflow_lines(parsed_tokens, indentation, max_line_length,
  2298. start_on_prefix_line):
  2299. """Reflow the lines so that it looks nice."""
  2300. if str(parsed_tokens[0]) == 'def':
  2301. # A function definition gets indented a bit more.
  2302. continued_indent = indentation + ' ' * 2 * DEFAULT_INDENT_SIZE
  2303. else:
  2304. continued_indent = indentation + ' ' * DEFAULT_INDENT_SIZE
  2305. break_after_open_bracket = not start_on_prefix_line
  2306. lines = ReformattedLines(max_line_length)
  2307. lines.add_indent(len(indentation.lstrip('\r\n')))
  2308. if not start_on_prefix_line:
  2309. # If splitting after the opening bracket will cause the first element
  2310. # to be aligned weirdly, don't try it.
  2311. first_token = get_item(parsed_tokens, 0)
  2312. second_token = get_item(parsed_tokens, 1)
  2313. if (
  2314. first_token and second_token and
  2315. str(second_token)[0] == '(' and
  2316. len(indentation) + len(first_token) + 1 == len(continued_indent)
  2317. ):
  2318. return None
  2319. for item in parsed_tokens:
  2320. lines.add_space_if_needed(str(item), equal=True)
  2321. save_continued_indent = continued_indent
  2322. if start_on_prefix_line and isinstance(item, Container):
  2323. start_on_prefix_line = False
  2324. continued_indent = ' ' * (lines.current_size() + 1)
  2325. item.reflow(lines, continued_indent, break_after_open_bracket)
  2326. continued_indent = save_continued_indent
  2327. return lines.emit()
  2328. def _shorten_line_at_tokens_new(tokens, source, indentation,
  2329. max_line_length):
  2330. """Shorten the line taking its length into account.
  2331. The input is expected to be free of newlines except for inside
  2332. multiline strings and at the end.
  2333. """
  2334. # Yield the original source so to see if it's a better choice than the
  2335. # shortened candidate lines we generate here.
  2336. yield indentation + source
  2337. parsed_tokens = _parse_tokens(tokens)
  2338. if parsed_tokens:
  2339. # Perform two reflows. The first one starts on the same line as the
  2340. # prefix. The second starts on the line after the prefix.
  2341. fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
  2342. start_on_prefix_line=True)
  2343. if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
  2344. yield fixed
  2345. fixed = _reflow_lines(parsed_tokens, indentation, max_line_length,
  2346. start_on_prefix_line=False)
  2347. if fixed and check_syntax(normalize_multiline(fixed.lstrip())):
  2348. yield fixed
  2349. def _shorten_line_at_tokens(tokens, source, indentation, indent_word,
  2350. key_token_strings, aggressive):
  2351. """Separate line by breaking at tokens in key_token_strings.
  2352. The input is expected to be free of newlines except for inside
  2353. multiline strings and at the end.
  2354. """
  2355. offsets = []
  2356. for (index, _t) in enumerate(token_offsets(tokens)):
  2357. (token_type,
  2358. token_string,
  2359. start_offset,
  2360. end_offset) = _t
  2361. assert token_type != token.INDENT
  2362. if token_string in key_token_strings:
  2363. # Do not break in containers with zero or one items.
  2364. unwanted_next_token = {
  2365. '(': ')',
  2366. '[': ']',
  2367. '{': '}'}.get(token_string)
  2368. if unwanted_next_token:
  2369. if (
  2370. get_item(tokens,
  2371. index + 1,
  2372. default=[None, None])[1] == unwanted_next_token or
  2373. get_item(tokens,
  2374. index + 2,
  2375. default=[None, None])[1] == unwanted_next_token
  2376. ):
  2377. continue
  2378. if (
  2379. index > 2 and token_string == '(' and
  2380. tokens[index - 1][1] in ',(%['
  2381. ):
  2382. # Don't split after a tuple start, or before a tuple start if
  2383. # the tuple is in a list.
  2384. continue
  2385. if end_offset < len(source) - 1:
  2386. # Don't split right before newline.
  2387. offsets.append(end_offset)
  2388. else:
  2389. # Break at adjacent strings. These were probably meant to be on
  2390. # separate lines in the first place.
  2391. previous_token = get_item(tokens, index - 1)
  2392. if (
  2393. token_type == tokenize.STRING and
  2394. previous_token and previous_token[0] == tokenize.STRING
  2395. ):
  2396. offsets.append(start_offset)
  2397. current_indent = None
  2398. fixed = None
  2399. for line in split_at_offsets(source, offsets):
  2400. if fixed:
  2401. fixed += '\n' + current_indent + line
  2402. for symbol in '([{':
  2403. if line.endswith(symbol):
  2404. current_indent += indent_word
  2405. else:
  2406. # First line.
  2407. fixed = line
  2408. assert not current_indent
  2409. current_indent = indent_word
  2410. assert fixed is not None
  2411. if check_syntax(normalize_multiline(fixed)
  2412. if aggressive > 1 else fixed):
  2413. return indentation + fixed
  2414. return None
  2415. def token_offsets(tokens):
  2416. """Yield tokens and offsets."""
  2417. end_offset = 0
  2418. previous_end_row = 0
  2419. previous_end_column = 0
  2420. for t in tokens:
  2421. token_type = t[0]
  2422. token_string = t[1]
  2423. (start_row, start_column) = t[2]
  2424. (end_row, end_column) = t[3]
  2425. # Account for the whitespace between tokens.
  2426. end_offset += start_column
  2427. if previous_end_row == start_row:
  2428. end_offset -= previous_end_column
  2429. # Record the start offset of the token.
  2430. start_offset = end_offset
  2431. # Account for the length of the token itself.
  2432. end_offset += len(token_string)
  2433. yield (token_type,
  2434. token_string,
  2435. start_offset,
  2436. end_offset)
  2437. previous_end_row = end_row
  2438. previous_end_column = end_column
  2439. def normalize_multiline(line):
  2440. """Normalize multiline-related code that will cause syntax error.
  2441. This is for purposes of checking syntax.
  2442. """
  2443. if line.startswith(('def ', 'async def ')) and line.rstrip().endswith(':'):
  2444. return line + ' pass'
  2445. elif line.startswith('return '):
  2446. return 'def _(): ' + line
  2447. elif line.startswith('@'):
  2448. return line + 'def _(): pass'
  2449. elif line.startswith('class '):
  2450. return line + ' pass'
  2451. elif line.startswith(('if ', 'elif ', 'for ', 'while ')):
  2452. return line + ' pass'
  2453. return line
  2454. def fix_whitespace(line, offset, replacement):
  2455. """Replace whitespace at offset and return fixed line."""
  2456. # Replace escaped newlines too
  2457. left = line[:offset].rstrip('\n\r \t\\')
  2458. right = line[offset:].lstrip('\n\r \t\\')
  2459. if right.startswith('#'):
  2460. return line
  2461. return left + replacement + right
  2462. def _execute_pep8(pep8_options, source):
  2463. """Execute pycodestyle via python method calls."""
  2464. class QuietReport(pycodestyle.BaseReport):
  2465. """Version of checker that does not print."""
  2466. def __init__(self, options):
  2467. super(QuietReport, self).__init__(options)
  2468. self.__full_error_results = []
  2469. def error(self, line_number, offset, text, check):
  2470. """Collect errors."""
  2471. code = super(QuietReport, self).error(line_number,
  2472. offset,
  2473. text,
  2474. check)
  2475. if code:
  2476. self.__full_error_results.append(
  2477. {'id': code,
  2478. 'line': line_number,
  2479. 'column': offset + 1,
  2480. 'info': text})
  2481. def full_error_results(self):
  2482. """Return error results in detail.
  2483. Results are in the form of a list of dictionaries. Each
  2484. dictionary contains 'id', 'line', 'column', and 'info'.
  2485. """
  2486. return self.__full_error_results
  2487. checker = pycodestyle.Checker('', lines=source, reporter=QuietReport,
  2488. **pep8_options)
  2489. checker.check_all()
  2490. return checker.report.full_error_results()
  2491. def _remove_leading_and_normalize(line, with_rstrip=True):
  2492. # ignore FF in first lstrip()
  2493. if with_rstrip:
  2494. return line.lstrip(' \t\v').rstrip(CR + LF) + '\n'
  2495. return line.lstrip(' \t\v')
  2496. class Reindenter(object):
  2497. """Reindents badly-indented code to uniformly use four-space indentation.
  2498. Released to the public domain, by Tim Peters, 03 October 2000.
  2499. """
  2500. def __init__(self, input_text, leave_tabs=False):
  2501. sio = io.StringIO(input_text)
  2502. source_lines = sio.readlines()
  2503. self.string_content_line_numbers = multiline_string_lines(input_text)
  2504. # File lines, rstripped & tab-expanded. Dummy at start is so
  2505. # that we can use tokenize's 1-based line numbering easily.
  2506. # Note that a line is all-blank iff it is a newline.
  2507. self.lines = []
  2508. for line_number, line in enumerate(source_lines, start=1):
  2509. # Do not modify if inside a multiline string.
  2510. if line_number in self.string_content_line_numbers:
  2511. self.lines.append(line)
  2512. else:
  2513. # Only expand leading tabs.
  2514. with_rstrip = line_number != len(source_lines)
  2515. if leave_tabs:
  2516. self.lines.append(
  2517. _get_indentation(line) +
  2518. _remove_leading_and_normalize(line, with_rstrip)
  2519. )
  2520. else:
  2521. self.lines.append(
  2522. _get_indentation(line).expandtabs() +
  2523. _remove_leading_and_normalize(line, with_rstrip)
  2524. )
  2525. self.lines.insert(0, None)
  2526. self.index = 1 # index into self.lines of next line
  2527. self.input_text = input_text
  2528. def run(self, indent_size=DEFAULT_INDENT_SIZE):
  2529. """Fix indentation and return modified line numbers.
  2530. Line numbers are indexed at 1.
  2531. """
  2532. if indent_size < 1:
  2533. return self.input_text
  2534. try:
  2535. stats = _reindent_stats(tokenize.generate_tokens(self.getline))
  2536. except (SyntaxError, tokenize.TokenError):
  2537. return self.input_text
  2538. # Remove trailing empty lines.
  2539. lines = self.lines
  2540. # Sentinel.
  2541. stats.append((len(lines), 0))
  2542. # Map count of leading spaces to # we want.
  2543. have2want = {}
  2544. # Program after transformation.
  2545. after = []
  2546. # Copy over initial empty lines -- there's nothing to do until
  2547. # we see a line with *something* on it.
  2548. i = stats[0][0]
  2549. after.extend(lines[1:i])
  2550. for i in range(len(stats) - 1):
  2551. thisstmt, thislevel = stats[i]
  2552. nextstmt = stats[i + 1][0]
  2553. have = _leading_space_count(lines[thisstmt])
  2554. want = thislevel * indent_size
  2555. if want < 0:
  2556. # A comment line.
  2557. if have:
  2558. # An indented comment line. If we saw the same
  2559. # indentation before, reuse what it most recently
  2560. # mapped to.
  2561. want = have2want.get(have, -1)
  2562. if want < 0:
  2563. # Then it probably belongs to the next real stmt.
  2564. for j in range(i + 1, len(stats) - 1):
  2565. jline, jlevel = stats[j]
  2566. if jlevel >= 0:
  2567. if have == _leading_space_count(lines[jline]):
  2568. want = jlevel * indent_size
  2569. break
  2570. # Maybe it's a hanging comment like this one,
  2571. if want < 0:
  2572. # in which case we should shift it like its base
  2573. # line got shifted.
  2574. for j in range(i - 1, -1, -1):
  2575. jline, jlevel = stats[j]
  2576. if jlevel >= 0:
  2577. want = (have + _leading_space_count(
  2578. after[jline - 1]) -
  2579. _leading_space_count(lines[jline]))
  2580. break
  2581. if want < 0:
  2582. # Still no luck -- leave it alone.
  2583. want = have
  2584. else:
  2585. want = 0
  2586. assert want >= 0
  2587. have2want[have] = want
  2588. diff = want - have
  2589. if diff == 0 or have == 0:
  2590. after.extend(lines[thisstmt:nextstmt])
  2591. else:
  2592. for line_number, line in enumerate(lines[thisstmt:nextstmt],
  2593. start=thisstmt):
  2594. if line_number in self.string_content_line_numbers:
  2595. after.append(line)
  2596. elif diff > 0:
  2597. if line == '\n':
  2598. after.append(line)
  2599. else:
  2600. after.append(' ' * diff + line)
  2601. else:
  2602. remove = min(_leading_space_count(line), -diff)
  2603. after.append(line[remove:])
  2604. return ''.join(after)
  2605. def getline(self):
  2606. """Line-getter for tokenize."""
  2607. if self.index >= len(self.lines):
  2608. line = ''
  2609. else:
  2610. line = self.lines[self.index]
  2611. self.index += 1
  2612. return line
  2613. def _reindent_stats(tokens):
  2614. """Return list of (lineno, indentlevel) pairs.
  2615. One for each stmt and comment line. indentlevel is -1 for comment
  2616. lines, as a signal that tokenize doesn't know what to do about them;
  2617. indeed, they're our headache!
  2618. """
  2619. find_stmt = 1 # Next token begins a fresh stmt?
  2620. level = 0 # Current indent level.
  2621. stats = []
  2622. for t in tokens:
  2623. token_type = t[0]
  2624. sline = t[2][0]
  2625. line = t[4]
  2626. if token_type == tokenize.NEWLINE:
  2627. # A program statement, or ENDMARKER, will eventually follow,
  2628. # after some (possibly empty) run of tokens of the form
  2629. # (NL | COMMENT)* (INDENT | DEDENT+)?
  2630. find_stmt = 1
  2631. elif token_type == tokenize.INDENT:
  2632. find_stmt = 1
  2633. level += 1
  2634. elif token_type == tokenize.DEDENT:
  2635. find_stmt = 1
  2636. level -= 1
  2637. elif token_type == tokenize.COMMENT:
  2638. if find_stmt:
  2639. stats.append((sline, -1))
  2640. # But we're still looking for a new stmt, so leave
  2641. # find_stmt alone.
  2642. elif token_type == tokenize.NL:
  2643. pass
  2644. elif find_stmt:
  2645. # This is the first "real token" following a NEWLINE, so it
  2646. # must be the first token of the next program statement, or an
  2647. # ENDMARKER.
  2648. find_stmt = 0
  2649. if line: # Not endmarker.
  2650. stats.append((sline, level))
  2651. return stats
  2652. def _leading_space_count(line):
  2653. """Return number of leading spaces in line."""
  2654. i = 0
  2655. while i < len(line) and line[i] == ' ':
  2656. i += 1
  2657. return i
  2658. def check_syntax(code):
  2659. """Return True if syntax is okay."""
  2660. try:
  2661. return compile(code, '<string>', 'exec', dont_inherit=True)
  2662. except (SyntaxError, TypeError, ValueError):
  2663. return False
  2664. def find_with_line_numbers(pattern, contents):
  2665. """A wrapper around 're.finditer' to find line numbers.
  2666. Returns a list of line numbers where pattern was found in contents.
  2667. """
  2668. matches = list(re.finditer(pattern, contents))
  2669. if not matches:
  2670. return []
  2671. end = matches[-1].start()
  2672. # -1 so a failed `rfind` maps to the first line.
  2673. newline_offsets = {
  2674. -1: 0
  2675. }
  2676. for line_num, m in enumerate(re.finditer(r'\n', contents), 1):
  2677. offset = m.start()
  2678. if offset > end:
  2679. break
  2680. newline_offsets[offset] = line_num
  2681. def get_line_num(match, contents):
  2682. """Get the line number of string in a files contents.
  2683. Failing to find the newline is OK, -1 maps to 0
  2684. """
  2685. newline_offset = contents.rfind('\n', 0, match.start())
  2686. return newline_offsets[newline_offset]
  2687. return [get_line_num(match, contents) + 1 for match in matches]
  2688. def get_disabled_ranges(source):
  2689. """Returns a list of tuples representing the disabled ranges.
  2690. If disabled and no re-enable will disable for rest of file.
  2691. """
  2692. enable_line_nums = find_with_line_numbers(ENABLE_REGEX, source)
  2693. disable_line_nums = find_with_line_numbers(DISABLE_REGEX, source)
  2694. total_lines = len(re.findall("\n", source)) + 1
  2695. enable_commands = {}
  2696. for num in enable_line_nums:
  2697. enable_commands[num] = True
  2698. for num in disable_line_nums:
  2699. enable_commands[num] = False
  2700. disabled_ranges = []
  2701. currently_enabled = True
  2702. disabled_start = None
  2703. for line, commanded_enabled in sorted(enable_commands.items()):
  2704. if commanded_enabled is False and currently_enabled is True:
  2705. disabled_start = line
  2706. currently_enabled = False
  2707. elif commanded_enabled is True and currently_enabled is False:
  2708. disabled_ranges.append((disabled_start, line))
  2709. currently_enabled = True
  2710. if currently_enabled is False:
  2711. disabled_ranges.append((disabled_start, total_lines))
  2712. return disabled_ranges
  2713. def filter_disabled_results(result, disabled_ranges):
  2714. """Filter out reports based on tuple of disabled ranges.
  2715. """
  2716. line = result['line']
  2717. for disabled_range in disabled_ranges:
  2718. if disabled_range[0] <= line <= disabled_range[1]:
  2719. return False
  2720. return True
  2721. def filter_results(source, results, aggressive):
  2722. """Filter out spurious reports from pycodestyle.
  2723. If aggressive is True, we allow possibly unsafe fixes (E711, E712).
  2724. """
  2725. non_docstring_string_line_numbers = multiline_string_lines(
  2726. source, include_docstrings=False)
  2727. all_string_line_numbers = multiline_string_lines(
  2728. source, include_docstrings=True)
  2729. commented_out_code_line_numbers = commented_out_code_lines(source)
  2730. # Filter out the disabled ranges
  2731. disabled_ranges = get_disabled_ranges(source)
  2732. if disabled_ranges:
  2733. results = [
  2734. result for result in results if filter_disabled_results(
  2735. result,
  2736. disabled_ranges,
  2737. )
  2738. ]
  2739. has_e901 = any(result['id'].lower() == 'e901' for result in results)
  2740. for r in results:
  2741. issue_id = r['id'].lower()
  2742. if r['line'] in non_docstring_string_line_numbers:
  2743. if issue_id.startswith(('e1', 'e501', 'w191')):
  2744. continue
  2745. if r['line'] in all_string_line_numbers:
  2746. if issue_id in ['e501']:
  2747. continue
  2748. # We must offset by 1 for lines that contain the trailing contents of
  2749. # multiline strings.
  2750. if not aggressive and (r['line'] + 1) in all_string_line_numbers:
  2751. # Do not modify multiline strings in non-aggressive mode. Remove
  2752. # trailing whitespace could break doctests.
  2753. if issue_id.startswith(('w29', 'w39')):
  2754. continue
  2755. if aggressive <= 0:
  2756. if issue_id.startswith(('e711', 'e72', 'w6')):
  2757. continue
  2758. if aggressive <= 1:
  2759. if issue_id.startswith(('e712', 'e713', 'e714')):
  2760. continue
  2761. if aggressive <= 2:
  2762. if issue_id.startswith(('e704')):
  2763. continue
  2764. if r['line'] in commented_out_code_line_numbers:
  2765. if issue_id.startswith(('e261', 'e262', 'e501')):
  2766. continue
  2767. # Do not touch indentation if there is a token error caused by
  2768. # incomplete multi-line statement. Otherwise, we risk screwing up the
  2769. # indentation.
  2770. if has_e901:
  2771. if issue_id.startswith(('e1', 'e7')):
  2772. continue
  2773. yield r
  2774. def multiline_string_lines(source, include_docstrings=False):
  2775. """Return line numbers that are within multiline strings.
  2776. The line numbers are indexed at 1.
  2777. Docstrings are ignored.
  2778. """
  2779. line_numbers = set()
  2780. previous_token_type = ''
  2781. _check_target_tokens = [tokenize.STRING]
  2782. if IS_SUPPORT_TOKEN_FSTRING:
  2783. _check_target_tokens.extend([
  2784. tokenize.FSTRING_START,
  2785. tokenize.FSTRING_MIDDLE,
  2786. tokenize.FSTRING_END,
  2787. ])
  2788. try:
  2789. for t in generate_tokens(source):
  2790. token_type = t[0]
  2791. start_row = t[2][0]
  2792. end_row = t[3][0]
  2793. if token_type in _check_target_tokens and start_row != end_row:
  2794. if (
  2795. include_docstrings or
  2796. previous_token_type != tokenize.INDENT
  2797. ):
  2798. # We increment by one since we want the contents of the
  2799. # string.
  2800. line_numbers |= set(range(1 + start_row, 1 + end_row))
  2801. previous_token_type = token_type
  2802. except (SyntaxError, tokenize.TokenError):
  2803. pass
  2804. return line_numbers
  2805. def commented_out_code_lines(source):
  2806. """Return line numbers of comments that are likely code.
  2807. Commented-out code is bad practice, but modifying it just adds even
  2808. more clutter.
  2809. """
  2810. line_numbers = []
  2811. try:
  2812. for t in generate_tokens(source):
  2813. token_type = t[0]
  2814. token_string = t[1]
  2815. start_row = t[2][0]
  2816. line = t[4]
  2817. # Ignore inline comments.
  2818. if not line.lstrip().startswith('#'):
  2819. continue
  2820. if token_type == tokenize.COMMENT:
  2821. stripped_line = token_string.lstrip('#').strip()
  2822. with warnings.catch_warnings():
  2823. # ignore SyntaxWarning in Python3.8+
  2824. # refs:
  2825. # https://bugs.python.org/issue15248
  2826. # https://docs.python.org/3.8/whatsnew/3.8.html#other-language-changes
  2827. warnings.filterwarnings("ignore", category=SyntaxWarning)
  2828. if (
  2829. ' ' in stripped_line and
  2830. '#' not in stripped_line and
  2831. check_syntax(stripped_line)
  2832. ):
  2833. line_numbers.append(start_row)
  2834. except (SyntaxError, tokenize.TokenError):
  2835. pass
  2836. return line_numbers
  2837. def shorten_comment(line, max_line_length, last_comment=False):
  2838. """Return trimmed or split long comment line.
  2839. If there are no comments immediately following it, do a text wrap.
  2840. Doing this wrapping on all comments in general would lead to jagged
  2841. comment text.
  2842. """
  2843. assert len(line) > max_line_length
  2844. line = line.rstrip()
  2845. # PEP 8 recommends 72 characters for comment text.
  2846. indentation = _get_indentation(line) + '# '
  2847. max_line_length = min(max_line_length,
  2848. len(indentation) + 72)
  2849. MIN_CHARACTER_REPEAT = 5
  2850. if (
  2851. len(line) - len(line.rstrip(line[-1])) >= MIN_CHARACTER_REPEAT and
  2852. not line[-1].isalnum()
  2853. ):
  2854. # Trim comments that end with things like ---------
  2855. return line[:max_line_length] + '\n'
  2856. elif last_comment and re.match(r'\s*#+\s*\w+', line):
  2857. split_lines = textwrap.wrap(line.lstrip(' \t#'),
  2858. initial_indent=indentation,
  2859. subsequent_indent=indentation,
  2860. width=max_line_length,
  2861. break_long_words=False,
  2862. break_on_hyphens=False)
  2863. return '\n'.join(split_lines) + '\n'
  2864. return line + '\n'
  2865. def normalize_line_endings(lines, newline):
  2866. """Return fixed line endings.
  2867. All lines will be modified to use the most common line ending.
  2868. """
  2869. line = [line.rstrip('\n\r') + newline for line in lines]
  2870. if line and lines[-1] == lines[-1].rstrip('\n\r'):
  2871. line[-1] = line[-1].rstrip('\n\r')
  2872. return line
  2873. def mutual_startswith(a, b):
  2874. return b.startswith(a) or a.startswith(b)
  2875. def code_match(code, select, ignore):
  2876. if ignore:
  2877. assert not isinstance(ignore, str)
  2878. for ignored_code in [c.strip() for c in ignore]:
  2879. if mutual_startswith(code.lower(), ignored_code.lower()):
  2880. return False
  2881. if select:
  2882. assert not isinstance(select, str)
  2883. for selected_code in [c.strip() for c in select]:
  2884. if mutual_startswith(code.lower(), selected_code.lower()):
  2885. return True
  2886. return False
  2887. return True
  2888. def fix_code(source, options=None, encoding=None, apply_config=False):
  2889. """Return fixed source code.
  2890. "encoding" will be used to decode "source" if it is a byte string.
  2891. """
  2892. options = _get_options(options, apply_config)
  2893. # normalize
  2894. options.ignore = [opt.upper() for opt in options.ignore]
  2895. options.select = [opt.upper() for opt in options.select]
  2896. # check ignore args
  2897. # NOTE: If W50x is not included, add W50x because the code
  2898. # correction result is indefinite.
  2899. ignore_opt = options.ignore
  2900. if not {"W50", "W503", "W504"} & set(ignore_opt):
  2901. options.ignore.append("W50")
  2902. if not isinstance(source, str):
  2903. source = source.decode(encoding or get_encoding())
  2904. sio = io.StringIO(source)
  2905. return fix_lines(sio.readlines(), options=options)
  2906. def _get_options(raw_options, apply_config):
  2907. """Return parsed options."""
  2908. if not raw_options:
  2909. return parse_args([''], apply_config=apply_config)
  2910. if isinstance(raw_options, dict):
  2911. options = parse_args([''], apply_config=apply_config)
  2912. for name, value in raw_options.items():
  2913. if not hasattr(options, name):
  2914. raise ValueError("No such option '{}'".format(name))
  2915. # Check for very basic type errors.
  2916. expected_type = type(getattr(options, name))
  2917. if not isinstance(expected_type, (str, )):
  2918. if isinstance(value, (str, )):
  2919. raise ValueError(
  2920. "Option '{}' should not be a string".format(name))
  2921. setattr(options, name, value)
  2922. else:
  2923. options = raw_options
  2924. return options
  2925. def fix_lines(source_lines, options, filename=''):
  2926. """Return fixed source code."""
  2927. # Transform everything to line feed. Then change them back to original
  2928. # before returning fixed source code.
  2929. original_newline = find_newline(source_lines)
  2930. tmp_source = ''.join(normalize_line_endings(source_lines, '\n'))
  2931. # Keep a history to break out of cycles.
  2932. previous_hashes = set()
  2933. if options.line_range:
  2934. # Disable "apply_local_fixes()" for now due to issue #175.
  2935. fixed_source = tmp_source
  2936. else:
  2937. # Apply global fixes only once (for efficiency).
  2938. fixed_source = apply_global_fixes(tmp_source,
  2939. options,
  2940. filename=filename)
  2941. passes = 0
  2942. long_line_ignore_cache = set()
  2943. while hash(fixed_source) not in previous_hashes:
  2944. if options.pep8_passes >= 0 and passes > options.pep8_passes:
  2945. break
  2946. passes += 1
  2947. previous_hashes.add(hash(fixed_source))
  2948. tmp_source = copy.copy(fixed_source)
  2949. fix = FixPEP8(
  2950. filename,
  2951. options,
  2952. contents=tmp_source,
  2953. long_line_ignore_cache=long_line_ignore_cache)
  2954. fixed_source = fix.fix()
  2955. sio = io.StringIO(fixed_source)
  2956. return ''.join(normalize_line_endings(sio.readlines(), original_newline))
  2957. def fix_file(filename, options=None, output=None, apply_config=False):
  2958. if not options:
  2959. options = parse_args([filename], apply_config=apply_config)
  2960. original_source = readlines_from_file(filename)
  2961. fixed_source = original_source
  2962. if options.in_place or options.diff or output:
  2963. encoding = detect_encoding(filename)
  2964. if output:
  2965. output = LineEndingWrapper(wrap_output(output, encoding=encoding))
  2966. fixed_source = fix_lines(fixed_source, options, filename=filename)
  2967. if options.diff:
  2968. new = io.StringIO(fixed_source)
  2969. new = new.readlines()
  2970. diff = get_diff_text(original_source, new, filename)
  2971. if output:
  2972. output.write(diff)
  2973. output.flush()
  2974. elif options.jobs > 1:
  2975. diff = diff.encode(encoding)
  2976. return diff
  2977. elif options.in_place:
  2978. original = "".join(original_source).splitlines()
  2979. fixed = fixed_source.splitlines()
  2980. original_source_last_line = (
  2981. original_source[-1].split("\n")[-1] if original_source else ""
  2982. )
  2983. fixed_source_last_line = fixed_source.split("\n")[-1]
  2984. if original != fixed or (
  2985. original_source_last_line != fixed_source_last_line
  2986. ):
  2987. with open_with_encoding(filename, 'w', encoding=encoding) as fp:
  2988. fp.write(fixed_source)
  2989. return fixed_source
  2990. return None
  2991. else:
  2992. if output:
  2993. output.write(fixed_source)
  2994. output.flush()
  2995. return fixed_source
  2996. def global_fixes():
  2997. """Yield multiple (code, function) tuples."""
  2998. for function in list(globals().values()):
  2999. if inspect.isfunction(function):
  3000. arguments = _get_parameters(function)
  3001. if arguments[:1] != ['source']:
  3002. continue
  3003. code = extract_code_from_function(function)
  3004. if code:
  3005. yield (code, function)
  3006. def _get_parameters(function):
  3007. # pylint: disable=deprecated-method
  3008. if sys.version_info.major >= 3:
  3009. # We need to match "getargspec()", which includes "self" as the first
  3010. # value for methods.
  3011. # https://bugs.python.org/issue17481#msg209469
  3012. if inspect.ismethod(function):
  3013. function = function.__func__
  3014. return list(inspect.signature(function).parameters)
  3015. else:
  3016. return inspect.getargspec(function)[0]
  3017. def apply_global_fixes(source, options, where='global', filename='',
  3018. codes=None):
  3019. """Run global fixes on source code.
  3020. These are fixes that only need be done once (unlike those in
  3021. FixPEP8, which are dependent on pycodestyle).
  3022. """
  3023. if codes is None:
  3024. codes = []
  3025. if any(code_match(code, select=options.select, ignore=options.ignore)
  3026. for code in ['E101', 'E111']):
  3027. source = reindent(
  3028. source,
  3029. indent_size=options.indent_size,
  3030. leave_tabs=not (
  3031. code_match(
  3032. 'W191',
  3033. select=options.select,
  3034. ignore=options.ignore
  3035. )
  3036. )
  3037. )
  3038. for (code, function) in global_fixes():
  3039. if code_match(code, select=options.select, ignore=options.ignore):
  3040. if options.verbose:
  3041. print('---> Applying {} fix for {}'.format(where,
  3042. code.upper()),
  3043. file=sys.stderr)
  3044. source = function(source,
  3045. aggressive=options.aggressive)
  3046. return source
  3047. def extract_code_from_function(function):
  3048. """Return code handled by function."""
  3049. if not function.__name__.startswith('fix_'):
  3050. return None
  3051. code = re.sub('^fix_', '', function.__name__)
  3052. if not code:
  3053. return None
  3054. try:
  3055. int(code[1:])
  3056. except ValueError:
  3057. return None
  3058. return code
  3059. def _get_package_version():
  3060. packages = ["pycodestyle: {}".format(pycodestyle.__version__)]
  3061. return ", ".join(packages)
  3062. def create_parser():
  3063. """Return command-line parser."""
  3064. parser = argparse.ArgumentParser(description=docstring_summary(__doc__),
  3065. prog='autopep8')
  3066. parser.add_argument('--version', action='version',
  3067. version='%(prog)s {} ({})'.format(
  3068. __version__, _get_package_version()))
  3069. parser.add_argument('-v', '--verbose', action='count',
  3070. default=0,
  3071. help='print verbose messages; '
  3072. 'multiple -v result in more verbose messages')
  3073. parser.add_argument('-d', '--diff', action='store_true',
  3074. help='print the diff for the fixed source')
  3075. parser.add_argument('-i', '--in-place', action='store_true',
  3076. help='make changes to files in place')
  3077. parser.add_argument('--global-config', metavar='filename',
  3078. default=DEFAULT_CONFIG,
  3079. help='path to a global pep8 config file; if this file '
  3080. 'does not exist then this is ignored '
  3081. '(default: {})'.format(DEFAULT_CONFIG))
  3082. parser.add_argument('--ignore-local-config', action='store_true',
  3083. help="don't look for and apply local config files; "
  3084. 'if not passed, defaults are updated with any '
  3085. "config files in the project's root directory")
  3086. parser.add_argument('-r', '--recursive', action='store_true',
  3087. help='run recursively over directories; '
  3088. 'must be used with --in-place or --diff')
  3089. parser.add_argument('-j', '--jobs', type=int, metavar='n', default=1,
  3090. help='number of parallel jobs; '
  3091. 'match CPU count if value is less than 1')
  3092. parser.add_argument('-p', '--pep8-passes', metavar='n',
  3093. default=-1, type=int,
  3094. help='maximum number of additional pep8 passes '
  3095. '(default: infinite)')
  3096. parser.add_argument('-a', '--aggressive', action='count', default=0,
  3097. help='enable non-whitespace changes; '
  3098. 'multiple -a result in more aggressive changes')
  3099. parser.add_argument('--experimental', action='store_true',
  3100. help='enable experimental fixes')
  3101. parser.add_argument('--exclude', metavar='globs',
  3102. help='exclude file/directory names that match these '
  3103. 'comma-separated globs')
  3104. parser.add_argument('--list-fixes', action='store_true',
  3105. help='list codes for fixes; '
  3106. 'used by --ignore and --select')
  3107. parser.add_argument('--ignore', metavar='errors', default='',
  3108. help='do not fix these errors/warnings '
  3109. '(default: {})'.format(DEFAULT_IGNORE))
  3110. parser.add_argument('--select', metavar='errors', default='',
  3111. help='fix only these errors/warnings (e.g. E4,W)')
  3112. parser.add_argument('--max-line-length', metavar='n', default=79, type=int,
  3113. help='set maximum allowed line length '
  3114. '(default: %(default)s)')
  3115. parser.add_argument('--line-range', '--range', metavar='line',
  3116. default=None, type=int, nargs=2,
  3117. help='only fix errors found within this inclusive '
  3118. 'range of line numbers (e.g. 1 99); '
  3119. 'line numbers are indexed at 1')
  3120. parser.add_argument('--indent-size', default=DEFAULT_INDENT_SIZE,
  3121. type=int, help=argparse.SUPPRESS)
  3122. parser.add_argument('--hang-closing', action='store_true',
  3123. help='hang-closing option passed to pycodestyle')
  3124. parser.add_argument('--exit-code', action='store_true',
  3125. help='change to behavior of exit code.'
  3126. ' default behavior of return value, 0 is no '
  3127. 'differences, 1 is error exit. return 2 when'
  3128. ' add this option. 2 is exists differences.')
  3129. parser.add_argument('files', nargs='*',
  3130. help="files to format or '-' for standard in")
  3131. return parser
  3132. def _expand_codes(codes, ignore_codes):
  3133. """expand to individual E/W codes"""
  3134. ret = set()
  3135. is_conflict = False
  3136. if all(
  3137. any(
  3138. conflicting_code.startswith(code)
  3139. for code in codes
  3140. )
  3141. for conflicting_code in CONFLICTING_CODES
  3142. ):
  3143. is_conflict = True
  3144. is_ignore_w503 = "W503" in ignore_codes
  3145. is_ignore_w504 = "W504" in ignore_codes
  3146. for code in codes:
  3147. if code == "W":
  3148. if is_ignore_w503 and is_ignore_w504:
  3149. ret.update({"W1", "W2", "W3", "W505", "W6"})
  3150. elif is_ignore_w503:
  3151. ret.update({"W1", "W2", "W3", "W504", "W505", "W6"})
  3152. else:
  3153. ret.update({"W1", "W2", "W3", "W503", "W505", "W6"})
  3154. elif code in ("W5", "W50"):
  3155. if is_ignore_w503 and is_ignore_w504:
  3156. ret.update({"W505"})
  3157. elif is_ignore_w503:
  3158. ret.update({"W504", "W505"})
  3159. else:
  3160. ret.update({"W503", "W505"})
  3161. elif not (code in ("W503", "W504") and is_conflict):
  3162. ret.add(code)
  3163. return ret
  3164. def _parser_error_with_code(
  3165. parser: argparse.ArgumentParser, code: int, msg: str,
  3166. ) -> None:
  3167. """wrap parser.error with exit code"""
  3168. parser.print_usage(sys.stderr)
  3169. parser.exit(code, f"{msg}\n")
  3170. def parse_args(arguments, apply_config=False):
  3171. """Parse command-line options."""
  3172. parser = create_parser()
  3173. args = parser.parse_args(arguments)
  3174. if not args.files and not args.list_fixes:
  3175. _parser_error_with_code(
  3176. parser, EXIT_CODE_ARGPARSE_ERROR, 'incorrect number of arguments',
  3177. )
  3178. args.files = [decode_filename(name) for name in args.files]
  3179. if apply_config:
  3180. parser = read_config(args, parser)
  3181. # prioritize settings when exist pyproject.toml's tool.autopep8 section
  3182. try:
  3183. parser_with_pyproject_toml = read_pyproject_toml(args, parser)
  3184. except Exception:
  3185. parser_with_pyproject_toml = None
  3186. if parser_with_pyproject_toml:
  3187. parser = parser_with_pyproject_toml
  3188. args = parser.parse_args(arguments)
  3189. args.files = [decode_filename(name) for name in args.files]
  3190. if '-' in args.files:
  3191. if len(args.files) > 1:
  3192. _parser_error_with_code(
  3193. parser,
  3194. EXIT_CODE_ARGPARSE_ERROR,
  3195. 'cannot mix stdin and regular files',
  3196. )
  3197. if args.diff:
  3198. _parser_error_with_code(
  3199. parser,
  3200. EXIT_CODE_ARGPARSE_ERROR,
  3201. '--diff cannot be used with standard input',
  3202. )
  3203. if args.in_place:
  3204. _parser_error_with_code(
  3205. parser,
  3206. EXIT_CODE_ARGPARSE_ERROR,
  3207. '--in-place cannot be used with standard input',
  3208. )
  3209. if args.recursive:
  3210. _parser_error_with_code(
  3211. parser,
  3212. EXIT_CODE_ARGPARSE_ERROR,
  3213. '--recursive cannot be used with standard input',
  3214. )
  3215. if len(args.files) > 1 and not (args.in_place or args.diff):
  3216. _parser_error_with_code(
  3217. parser,
  3218. EXIT_CODE_ARGPARSE_ERROR,
  3219. 'autopep8 only takes one filename as argument '
  3220. 'unless the "--in-place" or "--diff" args are used',
  3221. )
  3222. if args.recursive and not (args.in_place or args.diff):
  3223. _parser_error_with_code(
  3224. parser,
  3225. EXIT_CODE_ARGPARSE_ERROR,
  3226. '--recursive must be used with --in-place or --diff',
  3227. )
  3228. if args.in_place and args.diff:
  3229. _parser_error_with_code(
  3230. parser,
  3231. EXIT_CODE_ARGPARSE_ERROR,
  3232. '--in-place and --diff are mutually exclusive',
  3233. )
  3234. if args.max_line_length <= 0:
  3235. _parser_error_with_code(
  3236. parser,
  3237. EXIT_CODE_ARGPARSE_ERROR,
  3238. '--max-line-length must be greater than 0',
  3239. )
  3240. if args.indent_size <= 0:
  3241. _parser_error_with_code(
  3242. parser,
  3243. EXIT_CODE_ARGPARSE_ERROR,
  3244. '--indent-size must be greater than 0',
  3245. )
  3246. if args.select:
  3247. args.select = _expand_codes(
  3248. _split_comma_separated(args.select),
  3249. (_split_comma_separated(args.ignore) if args.ignore else [])
  3250. )
  3251. if args.ignore:
  3252. args.ignore = _split_comma_separated(args.ignore)
  3253. if all(
  3254. not any(
  3255. conflicting_code.startswith(ignore_code)
  3256. for ignore_code in args.ignore
  3257. )
  3258. for conflicting_code in CONFLICTING_CODES
  3259. ):
  3260. args.ignore.update(CONFLICTING_CODES)
  3261. elif not args.select:
  3262. if args.aggressive:
  3263. # Enable everything by default if aggressive.
  3264. args.select = {'E', 'W1', 'W2', 'W3', 'W6'}
  3265. else:
  3266. args.ignore = _split_comma_separated(DEFAULT_IGNORE)
  3267. if args.exclude:
  3268. args.exclude = _split_comma_separated(args.exclude)
  3269. else:
  3270. args.exclude = {}
  3271. if args.jobs < 1:
  3272. # Do not import multiprocessing globally in case it is not supported
  3273. # on the platform.
  3274. import multiprocessing
  3275. args.jobs = multiprocessing.cpu_count()
  3276. if args.jobs > 1 and not (args.in_place or args.diff):
  3277. _parser_error_with_code(
  3278. parser,
  3279. EXIT_CODE_ARGPARSE_ERROR,
  3280. 'parallel jobs requires --in-place',
  3281. )
  3282. if args.line_range:
  3283. if args.line_range[0] <= 0:
  3284. _parser_error_with_code(
  3285. parser,
  3286. EXIT_CODE_ARGPARSE_ERROR,
  3287. '--range must be positive numbers',
  3288. )
  3289. if args.line_range[0] > args.line_range[1]:
  3290. _parser_error_with_code(
  3291. parser,
  3292. EXIT_CODE_ARGPARSE_ERROR,
  3293. 'First value of --range should be less than or equal '
  3294. 'to the second',
  3295. )
  3296. original_formatwarning = warnings.formatwarning
  3297. warnings.formatwarning = _custom_formatwarning
  3298. if args.experimental:
  3299. warnings.warn(
  3300. "`experimental` option is deprecated and will be "
  3301. "removed in a future version.",
  3302. DeprecationWarning,
  3303. )
  3304. warnings.formatwarning = original_formatwarning
  3305. return args
  3306. def _get_normalize_options(args, config, section, option_list):
  3307. for (k, v) in config.items(section):
  3308. norm_opt = k.lstrip('-').replace('-', '_')
  3309. if not option_list.get(norm_opt):
  3310. continue
  3311. opt_type = option_list[norm_opt]
  3312. if opt_type is int:
  3313. if v.strip() == "auto":
  3314. # skip to special case
  3315. if args.verbose:
  3316. print(f"ignore config: {k}={v}")
  3317. continue
  3318. value = config.getint(section, k)
  3319. elif opt_type is bool:
  3320. value = config.getboolean(section, k)
  3321. else:
  3322. value = config.get(section, k)
  3323. yield norm_opt, k, value
  3324. def read_config(args, parser):
  3325. """Read both user configuration and local configuration."""
  3326. config = SafeConfigParser()
  3327. try:
  3328. if args.verbose and os.path.exists(args.global_config):
  3329. print("read config path: {}".format(args.global_config))
  3330. config.read(args.global_config)
  3331. if not args.ignore_local_config:
  3332. parent = tail = args.files and os.path.abspath(
  3333. os.path.commonprefix(args.files))
  3334. while tail:
  3335. if config.read([os.path.join(parent, fn)
  3336. for fn in PROJECT_CONFIG]):
  3337. if args.verbose:
  3338. for fn in PROJECT_CONFIG:
  3339. config_file = os.path.join(parent, fn)
  3340. if not os.path.exists(config_file):
  3341. continue
  3342. print(
  3343. "read config path: {}".format(
  3344. os.path.join(parent, fn)
  3345. )
  3346. )
  3347. break
  3348. (parent, tail) = os.path.split(parent)
  3349. defaults = {}
  3350. option_list = {o.dest: o.type or type(o.default)
  3351. for o in parser._actions}
  3352. for section in ['pep8', 'pycodestyle', 'flake8']:
  3353. if not config.has_section(section):
  3354. continue
  3355. for norm_opt, k, value in _get_normalize_options(
  3356. args, config, section, option_list
  3357. ):
  3358. if args.verbose:
  3359. print("enable config: section={}, key={}, value={}".format(
  3360. section, k, value))
  3361. defaults[norm_opt] = value
  3362. parser.set_defaults(**defaults)
  3363. except Error:
  3364. # Ignore for now.
  3365. pass
  3366. return parser
  3367. def read_pyproject_toml(args, parser):
  3368. """Read pyproject.toml and load configuration."""
  3369. if sys.version_info >= (3, 11):
  3370. import tomllib
  3371. else:
  3372. import tomli as tomllib
  3373. config = None
  3374. if os.path.exists(args.global_config):
  3375. with open(args.global_config, "rb") as fp:
  3376. config = tomllib.load(fp)
  3377. if not args.ignore_local_config:
  3378. parent = tail = args.files and os.path.abspath(
  3379. os.path.commonprefix(args.files))
  3380. while tail:
  3381. pyproject_toml = os.path.join(parent, "pyproject.toml")
  3382. if os.path.exists(pyproject_toml):
  3383. with open(pyproject_toml, "rb") as fp:
  3384. config = tomllib.load(fp)
  3385. break
  3386. (parent, tail) = os.path.split(parent)
  3387. if not config:
  3388. return None
  3389. if config.get("tool", {}).get("autopep8") is None:
  3390. return None
  3391. config = config.get("tool", {}).get("autopep8")
  3392. defaults = {}
  3393. option_list = {o.dest: o.type or type(o.default)
  3394. for o in parser._actions}
  3395. TUPLED_OPTIONS = ("ignore", "select")
  3396. for (k, v) in config.items():
  3397. norm_opt = k.lstrip('-').replace('-', '_')
  3398. if not option_list.get(norm_opt):
  3399. continue
  3400. if type(v) in (list, tuple) and norm_opt in TUPLED_OPTIONS:
  3401. value = ",".join(v)
  3402. else:
  3403. value = v
  3404. if args.verbose:
  3405. print("enable pyproject.toml config: "
  3406. "key={}, value={}".format(k, value))
  3407. defaults[norm_opt] = value
  3408. if defaults:
  3409. # set value when exists key-value in defaults dict
  3410. parser.set_defaults(**defaults)
  3411. return parser
  3412. def _split_comma_separated(string):
  3413. """Return a set of strings."""
  3414. return {text.strip() for text in string.split(',') if text.strip()}
  3415. def decode_filename(filename):
  3416. """Return Unicode filename."""
  3417. if isinstance(filename, str):
  3418. return filename
  3419. return filename.decode(sys.getfilesystemencoding())
  3420. def supported_fixes():
  3421. """Yield pep8 error codes that autopep8 fixes.
  3422. Each item we yield is a tuple of the code followed by its
  3423. description.
  3424. """
  3425. yield ('E101', docstring_summary(reindent.__doc__))
  3426. instance = FixPEP8(filename=None, options=None, contents='')
  3427. for attribute in dir(instance):
  3428. code = re.match('fix_([ew][0-9][0-9][0-9])', attribute)
  3429. if code:
  3430. yield (
  3431. code.group(1).upper(),
  3432. re.sub(r'\s+', ' ',
  3433. docstring_summary(getattr(instance, attribute).__doc__))
  3434. )
  3435. for (code, function) in sorted(global_fixes()):
  3436. yield (code.upper() + (4 - len(code)) * ' ',
  3437. re.sub(r'\s+', ' ', docstring_summary(function.__doc__)))
  3438. def docstring_summary(docstring):
  3439. """Return summary of docstring."""
  3440. return docstring.split('\n')[0] if docstring else ''
  3441. def line_shortening_rank(candidate, indent_word, max_line_length,
  3442. experimental=False):
  3443. """Return rank of candidate.
  3444. This is for sorting candidates.
  3445. """
  3446. if not candidate.strip():
  3447. return 0
  3448. rank = 0
  3449. lines = candidate.rstrip().split('\n')
  3450. offset = 0
  3451. if (
  3452. not lines[0].lstrip().startswith('#') and
  3453. lines[0].rstrip()[-1] not in '([{'
  3454. ):
  3455. for (opening, closing) in ('()', '[]', '{}'):
  3456. # Don't penalize empty containers that aren't split up. Things like
  3457. # this "foo(\n )" aren't particularly good.
  3458. opening_loc = lines[0].find(opening)
  3459. closing_loc = lines[0].find(closing)
  3460. if opening_loc >= 0:
  3461. if closing_loc < 0 or closing_loc != opening_loc + 1:
  3462. offset = max(offset, 1 + opening_loc)
  3463. current_longest = max(offset + len(x.strip()) for x in lines)
  3464. rank += 4 * max(0, current_longest - max_line_length)
  3465. rank += len(lines)
  3466. # Too much variation in line length is ugly.
  3467. rank += 2 * standard_deviation(len(line) for line in lines)
  3468. bad_staring_symbol = {
  3469. '(': ')',
  3470. '[': ']',
  3471. '{': '}'}.get(lines[0][-1])
  3472. if len(lines) > 1:
  3473. if (
  3474. bad_staring_symbol and
  3475. lines[1].lstrip().startswith(bad_staring_symbol)
  3476. ):
  3477. rank += 20
  3478. for lineno, current_line in enumerate(lines):
  3479. current_line = current_line.strip()
  3480. if current_line.startswith('#'):
  3481. continue
  3482. for bad_start in ['.', '%', '+', '-', '/']:
  3483. if current_line.startswith(bad_start):
  3484. rank += 100
  3485. # Do not tolerate operators on their own line.
  3486. if current_line == bad_start:
  3487. rank += 1000
  3488. if (
  3489. current_line.endswith(('.', '%', '+', '-', '/')) and
  3490. "': " in current_line
  3491. ):
  3492. rank += 1000
  3493. if current_line.endswith(('(', '[', '{', '.')):
  3494. # Avoid lonely opening. They result in longer lines.
  3495. if len(current_line) <= len(indent_word):
  3496. rank += 100
  3497. # Avoid the ugliness of ", (\n".
  3498. if (
  3499. current_line.endswith('(') and
  3500. current_line[:-1].rstrip().endswith(',')
  3501. ):
  3502. rank += 100
  3503. # Avoid the ugliness of "something[\n" and something[index][\n.
  3504. if (
  3505. current_line.endswith('[') and
  3506. len(current_line) > 1 and
  3507. (current_line[-2].isalnum() or current_line[-2] in ']')
  3508. ):
  3509. rank += 300
  3510. # Also avoid the ugliness of "foo.\nbar"
  3511. if current_line.endswith('.'):
  3512. rank += 100
  3513. if has_arithmetic_operator(current_line):
  3514. rank += 100
  3515. # Avoid breaking at unary operators.
  3516. if re.match(r'.*[(\[{]\s*[\-\+~]$', current_line.rstrip('\\ ')):
  3517. rank += 1000
  3518. if re.match(r'.*lambda\s*\*$', current_line.rstrip('\\ ')):
  3519. rank += 1000
  3520. if current_line.endswith(('%', '(', '[', '{')):
  3521. rank -= 20
  3522. # Try to break list comprehensions at the "for".
  3523. if current_line.startswith('for '):
  3524. rank -= 50
  3525. if current_line.endswith('\\'):
  3526. # If a line ends in \-newline, it may be part of a
  3527. # multiline string. In that case, we would like to know
  3528. # how long that line is without the \-newline. If it's
  3529. # longer than the maximum, or has comments, then we assume
  3530. # that the \-newline is an okay candidate and only
  3531. # penalize it a bit.
  3532. total_len = len(current_line)
  3533. lineno += 1
  3534. while lineno < len(lines):
  3535. total_len += len(lines[lineno])
  3536. if lines[lineno].lstrip().startswith('#'):
  3537. total_len = max_line_length
  3538. break
  3539. if not lines[lineno].endswith('\\'):
  3540. break
  3541. lineno += 1
  3542. if total_len < max_line_length:
  3543. rank += 10
  3544. else:
  3545. rank += 100 if experimental else 1
  3546. # Prefer breaking at commas rather than colon.
  3547. if ',' in current_line and current_line.endswith(':'):
  3548. rank += 10
  3549. # Avoid splitting dictionaries between key and value.
  3550. if current_line.endswith(':'):
  3551. rank += 100
  3552. rank += 10 * count_unbalanced_brackets(current_line)
  3553. return max(0, rank)
  3554. def standard_deviation(numbers):
  3555. """Return standard deviation."""
  3556. numbers = list(numbers)
  3557. if not numbers:
  3558. return 0
  3559. mean = sum(numbers) / len(numbers)
  3560. return (sum((n - mean) ** 2 for n in numbers) /
  3561. len(numbers)) ** .5
  3562. def has_arithmetic_operator(line):
  3563. """Return True if line contains any arithmetic operators."""
  3564. for operator in pycodestyle.ARITHMETIC_OP:
  3565. if operator in line:
  3566. return True
  3567. return False
  3568. def count_unbalanced_brackets(line):
  3569. """Return number of unmatched open/close brackets."""
  3570. count = 0
  3571. for opening, closing in ['()', '[]', '{}']:
  3572. count += abs(line.count(opening) - line.count(closing))
  3573. return count
  3574. def split_at_offsets(line, offsets):
  3575. """Split line at offsets.
  3576. Return list of strings.
  3577. """
  3578. result = []
  3579. previous_offset = 0
  3580. current_offset = 0
  3581. for current_offset in sorted(offsets):
  3582. if current_offset < len(line) and previous_offset != current_offset:
  3583. result.append(line[previous_offset:current_offset].strip())
  3584. previous_offset = current_offset
  3585. result.append(line[current_offset:])
  3586. return result
  3587. class LineEndingWrapper(object):
  3588. r"""Replace line endings to work with sys.stdout.
  3589. It seems that sys.stdout expects only '\n' as the line ending, no matter
  3590. the platform. Otherwise, we get repeated line endings.
  3591. """
  3592. def __init__(self, output):
  3593. self.__output = output
  3594. def write(self, s):
  3595. self.__output.write(s.replace('\r\n', '\n').replace('\r', '\n'))
  3596. def flush(self):
  3597. self.__output.flush()
  3598. def match_file(filename, exclude):
  3599. """Return True if file is okay for modifying/recursing."""
  3600. base_name = os.path.basename(filename)
  3601. if base_name.startswith('.'):
  3602. return False
  3603. for pattern in exclude:
  3604. if fnmatch.fnmatch(base_name, pattern):
  3605. return False
  3606. if fnmatch.fnmatch(filename, pattern):
  3607. return False
  3608. if not os.path.isdir(filename) and not is_python_file(filename):
  3609. return False
  3610. return True
  3611. def find_files(filenames, recursive, exclude):
  3612. """Yield filenames."""
  3613. while filenames:
  3614. name = filenames.pop(0)
  3615. if recursive and os.path.isdir(name):
  3616. for root, directories, children in os.walk(name):
  3617. filenames += [os.path.join(root, f) for f in children
  3618. if match_file(os.path.join(root, f),
  3619. exclude)]
  3620. directories[:] = [d for d in directories
  3621. if match_file(os.path.join(root, d),
  3622. exclude)]
  3623. else:
  3624. is_exclude_match = False
  3625. for pattern in exclude:
  3626. if fnmatch.fnmatch(name, pattern):
  3627. is_exclude_match = True
  3628. break
  3629. if not is_exclude_match:
  3630. yield name
  3631. def _fix_file(parameters):
  3632. """Helper function for optionally running fix_file() in parallel."""
  3633. if parameters[1].verbose:
  3634. print('[file:{}]'.format(parameters[0]), file=sys.stderr)
  3635. try:
  3636. return fix_file(*parameters)
  3637. except IOError as error:
  3638. print(str(error), file=sys.stderr)
  3639. raise error
  3640. def fix_multiple_files(filenames, options, output=None):
  3641. """Fix list of files.
  3642. Optionally fix files recursively.
  3643. """
  3644. results = []
  3645. filenames = find_files(filenames, options.recursive, options.exclude)
  3646. if options.jobs > 1:
  3647. import multiprocessing
  3648. pool = multiprocessing.Pool(options.jobs)
  3649. rets = []
  3650. for name in filenames:
  3651. ret = pool.apply_async(_fix_file, ((name, options),))
  3652. rets.append(ret)
  3653. pool.close()
  3654. pool.join()
  3655. if options.diff:
  3656. for r in rets:
  3657. sys.stdout.write(r.get().decode())
  3658. sys.stdout.flush()
  3659. results.extend([x.get() for x in rets if x is not None])
  3660. else:
  3661. for name in filenames:
  3662. ret = _fix_file((name, options, output))
  3663. if ret is None:
  3664. continue
  3665. if options.diff:
  3666. if ret != '':
  3667. results.append(ret)
  3668. elif options.in_place:
  3669. results.append(ret)
  3670. else:
  3671. original_source = readlines_from_file(name)
  3672. if "".join(original_source).splitlines() != ret.splitlines():
  3673. results.append(ret)
  3674. return results
  3675. def is_python_file(filename):
  3676. """Return True if filename is Python file."""
  3677. if filename.endswith('.py'):
  3678. return True
  3679. try:
  3680. with open_with_encoding(
  3681. filename,
  3682. limit_byte_check=MAX_PYTHON_FILE_DETECTION_BYTES) as f:
  3683. text = f.read(MAX_PYTHON_FILE_DETECTION_BYTES)
  3684. if not text:
  3685. return False
  3686. first_line = text.splitlines()[0]
  3687. except (IOError, IndexError):
  3688. return False
  3689. if not PYTHON_SHEBANG_REGEX.match(first_line):
  3690. return False
  3691. return True
  3692. def is_probably_part_of_multiline(line):
  3693. """Return True if line is likely part of a multiline string.
  3694. When multiline strings are involved, pep8 reports the error as being
  3695. at the start of the multiline string, which doesn't work for us.
  3696. """
  3697. return (
  3698. '"""' in line or
  3699. "'''" in line or
  3700. line.rstrip().endswith('\\')
  3701. )
  3702. def wrap_output(output, encoding):
  3703. """Return output with specified encoding."""
  3704. return codecs.getwriter(encoding)(output.buffer
  3705. if hasattr(output, 'buffer')
  3706. else output)
  3707. def get_encoding():
  3708. """Return preferred encoding."""
  3709. return locale.getpreferredencoding() or sys.getdefaultencoding()
  3710. def main(argv=None, apply_config=True):
  3711. """Command-line entry."""
  3712. if argv is None:
  3713. argv = sys.argv
  3714. try:
  3715. # Exit on broken pipe.
  3716. signal.signal(signal.SIGPIPE, signal.SIG_DFL)
  3717. except AttributeError: # pragma: no cover
  3718. # SIGPIPE is not available on Windows.
  3719. pass
  3720. try:
  3721. args = parse_args(argv[1:], apply_config=apply_config)
  3722. if args.list_fixes:
  3723. for code, description in sorted(supported_fixes()):
  3724. print('{code} - {description}'.format(
  3725. code=code, description=description))
  3726. return EXIT_CODE_OK
  3727. if args.files == ['-']:
  3728. assert not args.in_place
  3729. encoding = sys.stdin.encoding or get_encoding()
  3730. read_stdin = sys.stdin.read()
  3731. fixed_stdin = fix_code(read_stdin, args, encoding=encoding)
  3732. # LineEndingWrapper is unnecessary here due to the symmetry between
  3733. # standard in and standard out.
  3734. wrap_output(sys.stdout, encoding=encoding).write(fixed_stdin)
  3735. if hash(read_stdin) != hash(fixed_stdin):
  3736. if args.exit_code:
  3737. return EXIT_CODE_EXISTS_DIFF
  3738. else:
  3739. if args.in_place or args.diff:
  3740. args.files = list(set(args.files))
  3741. else:
  3742. assert len(args.files) == 1
  3743. assert not args.recursive
  3744. results = fix_multiple_files(args.files, args, sys.stdout)
  3745. if args.diff:
  3746. ret = any([len(ret) != 0 for ret in results])
  3747. else:
  3748. # with in-place option
  3749. ret = any([ret is not None for ret in results])
  3750. if args.exit_code and ret:
  3751. return EXIT_CODE_EXISTS_DIFF
  3752. except IOError:
  3753. return EXIT_CODE_ERROR
  3754. except KeyboardInterrupt:
  3755. return EXIT_CODE_ERROR # pragma: no cover
  3756. class CachedTokenizer(object):
  3757. """A one-element cache around tokenize.generate_tokens().
  3758. Original code written by Ned Batchelder, in coverage.py.
  3759. """
  3760. def __init__(self):
  3761. self.last_text = None
  3762. self.last_tokens = None
  3763. def generate_tokens(self, text):
  3764. """A stand-in for tokenize.generate_tokens()."""
  3765. if text != self.last_text:
  3766. string_io = io.StringIO(text)
  3767. self.last_tokens = list(
  3768. tokenize.generate_tokens(string_io.readline)
  3769. )
  3770. self.last_text = text
  3771. return self.last_tokens
  3772. _cached_tokenizer = CachedTokenizer()
  3773. generate_tokens = _cached_tokenizer.generate_tokens
  3774. if __name__ == '__main__':
  3775. sys.exit(main())