| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720 |
- # -*- coding: utf-8 -*-
- # Version 0.9.0, commit bf58e37e6b35e3cc9a0bd740f752abfd32b6e6f8
- """Pretty-print tabular data."""
- from collections import namedtuple
- from collections.abc import Iterable, Sized
- from html import escape as htmlescape
- from itertools import chain, zip_longest as izip_longest
- from functools import reduce, partial
- import io
- import re
- import math
- import textwrap
- import dataclasses
- try:
- import wcwidth # optional wide-character (CJK) support
- except ImportError:
- wcwidth = None
- def _is_file(f):
- return isinstance(f, io.IOBase)
- __all__ = ["tabulate", "tabulate_formats", "simple_separated_format"]
- try:
- from .version import version as __version__ # noqa: F401
- except ImportError:
- pass # running __init__.py as a script, AppVeyor pytests
- # minimum extra space in headers
- MIN_PADDING = 2
- # Whether or not to preserve leading/trailing whitespace in data.
- PRESERVE_WHITESPACE = False
- _DEFAULT_FLOATFMT = "g"
- _DEFAULT_INTFMT = ""
- _DEFAULT_MISSINGVAL = ""
- # default align will be overwritten by "left", "center" or "decimal"
- # depending on the formatter
- _DEFAULT_ALIGN = "default"
- # if True, enable wide-character (CJK) support
- WIDE_CHARS_MODE = wcwidth is not None
- # Constant that can be used as part of passed rows to generate a separating line
- # It is purposely an unprintable character, very unlikely to be used in a table
- SEPARATING_LINE = "\001"
- Line = namedtuple("Line", ["begin", "hline", "sep", "end"])
- DataRow = namedtuple("DataRow", ["begin", "sep", "end"])
- # A table structure is supposed to be:
- #
- # --- lineabove ---------
- # headerrow
- # --- linebelowheader ---
- # datarow
- # --- linebetweenrows ---
- # ... (more datarows) ...
- # --- linebetweenrows ---
- # last datarow
- # --- linebelow ---------
- #
- # TableFormat's line* elements can be
- #
- # - either None, if the element is not used,
- # - or a Line tuple,
- # - or a function: [col_widths], [col_alignments] -> string.
- #
- # TableFormat's *row elements can be
- #
- # - either None, if the element is not used,
- # - or a DataRow tuple,
- # - or a function: [cell_values], [col_widths], [col_alignments] -> string.
- #
- # padding (an integer) is the amount of white space around data values.
- #
- # with_header_hide:
- #
- # - either None, to display all table elements unconditionally,
- # - or a list of elements not to be displayed if the table has column headers.
- #
- TableFormat = namedtuple(
- "TableFormat",
- [
- "lineabove",
- "linebelowheader",
- "linebetweenrows",
- "linebelow",
- "headerrow",
- "datarow",
- "padding",
- "with_header_hide",
- ],
- )
- def _is_separating_line(row):
- row_type = type(row)
- is_sl = (row_type == list or row_type == str) and (
- (len(row) >= 1 and row[0] == SEPARATING_LINE)
- or (len(row) >= 2 and row[1] == SEPARATING_LINE)
- )
- return is_sl
- def _pipe_segment_with_colons(align, colwidth):
- """Return a segment of a horizontal line with optional colons which
- indicate column's alignment (as in `pipe` output format)."""
- w = colwidth
- if align in ["right", "decimal"]:
- return ("-" * (w - 1)) + ":"
- elif align == "center":
- return ":" + ("-" * (w - 2)) + ":"
- elif align == "left":
- return ":" + ("-" * (w - 1))
- else:
- return "-" * w
- def _pipe_line_with_colons(colwidths, colaligns):
- """Return a horizontal line with optional colons to indicate column's
- alignment (as in `pipe` output format)."""
- if not colaligns: # e.g. printing an empty data frame (github issue #15)
- colaligns = [""] * len(colwidths)
- segments = [_pipe_segment_with_colons(a, w) for a, w in zip(colaligns, colwidths)]
- return "|" + "|".join(segments) + "|"
- def _mediawiki_row_with_attrs(separator, cell_values, colwidths, colaligns):
- alignment = {
- "left": "",
- "right": 'align="right"| ',
- "center": 'align="center"| ',
- "decimal": 'align="right"| ',
- }
- # hard-coded padding _around_ align attribute and value together
- # rather than padding parameter which affects only the value
- values_with_attrs = [
- " " + alignment.get(a, "") + c + " " for c, a in zip(cell_values, colaligns)
- ]
- colsep = separator * 2
- return (separator + colsep.join(values_with_attrs)).rstrip()
- def _textile_row_with_attrs(cell_values, colwidths, colaligns):
- cell_values[0] += " "
- alignment = {"left": "<.", "right": ">.", "center": "=.", "decimal": ">."}
- values = (alignment.get(a, "") + v for a, v in zip(colaligns, cell_values))
- return "|" + "|".join(values) + "|"
- def _html_begin_table_without_header(colwidths_ignore, colaligns_ignore):
- # this table header will be suppressed if there is a header row
- return "<table>\n<tbody>"
- def _html_row_with_attrs(celltag, unsafe, cell_values, colwidths, colaligns):
- alignment = {
- "left": "",
- "right": ' style="text-align: right;"',
- "center": ' style="text-align: center;"',
- "decimal": ' style="text-align: right;"',
- }
- if unsafe:
- values_with_attrs = [
- "<{0}{1}>{2}</{0}>".format(celltag, alignment.get(a, ""), c)
- for c, a in zip(cell_values, colaligns)
- ]
- else:
- values_with_attrs = [
- "<{0}{1}>{2}</{0}>".format(celltag, alignment.get(a, ""), htmlescape(c))
- for c, a in zip(cell_values, colaligns)
- ]
- rowhtml = "<tr>{}</tr>".format("".join(values_with_attrs).rstrip())
- if celltag == "th": # it's a header row, create a new table header
- rowhtml = f"<table>\n<thead>\n{rowhtml}\n</thead>\n<tbody>"
- return rowhtml
- def _moin_row_with_attrs(celltag, cell_values, colwidths, colaligns, header=""):
- alignment = {
- "left": "",
- "right": '<style="text-align: right;">',
- "center": '<style="text-align: center;">',
- "decimal": '<style="text-align: right;">',
- }
- values_with_attrs = [
- "{}{} {} ".format(celltag, alignment.get(a, ""), header + c + header)
- for c, a in zip(cell_values, colaligns)
- ]
- return "".join(values_with_attrs) + "||"
- def _latex_line_begin_tabular(colwidths, colaligns, booktabs=False, longtable=False):
- alignment = {"left": "l", "right": "r", "center": "c", "decimal": "r"}
- tabular_columns_fmt = "".join([alignment.get(a, "l") for a in colaligns])
- return "\n".join(
- [
- ("\\begin{tabular}{" if not longtable else "\\begin{longtable}{")
- + tabular_columns_fmt
- + "}",
- "\\toprule" if booktabs else "\\hline",
- ]
- )
- def _asciidoc_row(is_header, *args):
- """handle header and data rows for asciidoc format"""
- def make_header_line(is_header, colwidths, colaligns):
- # generate the column specifiers
- alignment = {"left": "<", "right": ">", "center": "^", "decimal": ">"}
- # use the column widths generated by tabulate for the asciidoc column width specifiers
- asciidoc_alignments = zip(
- colwidths, [alignment[colalign] for colalign in colaligns]
- )
- asciidoc_column_specifiers = [
- "{:d}{}".format(width, align) for width, align in asciidoc_alignments
- ]
- header_list = ['cols="' + (",".join(asciidoc_column_specifiers)) + '"']
- # generate the list of options (currently only "header")
- options_list = []
- if is_header:
- options_list.append("header")
- if options_list:
- header_list += ['options="' + ",".join(options_list) + '"']
- # generate the list of entries in the table header field
- return "[{}]\n|====".format(",".join(header_list))
- if len(args) == 2:
- # two arguments are passed if called in the context of aboveline
- # print the table header with column widths and optional header tag
- return make_header_line(False, *args)
- elif len(args) == 3:
- # three arguments are passed if called in the context of dataline or headerline
- # print the table line and make the aboveline if it is a header
- cell_values, colwidths, colaligns = args
- data_line = "|" + "|".join(cell_values)
- if is_header:
- return make_header_line(True, colwidths, colaligns) + "\n" + data_line
- else:
- return data_line
- else:
- raise ValueError(
- " _asciidoc_row() requires two (colwidths, colaligns) "
- + "or three (cell_values, colwidths, colaligns) arguments) "
- )
- LATEX_ESCAPE_RULES = {
- r"&": r"\&",
- r"%": r"\%",
- r"$": r"\$",
- r"#": r"\#",
- r"_": r"\_",
- r"^": r"\^{}",
- r"{": r"\{",
- r"}": r"\}",
- r"~": r"\textasciitilde{}",
- "\\": r"\textbackslash{}",
- r"<": r"\ensuremath{<}",
- r">": r"\ensuremath{>}",
- }
- def _latex_row(cell_values, colwidths, colaligns, escrules=LATEX_ESCAPE_RULES):
- def escape_char(c):
- return escrules.get(c, c)
- escaped_values = ["".join(map(escape_char, cell)) for cell in cell_values]
- rowfmt = DataRow("", "&", "\\\\")
- return _build_simple_row(escaped_values, rowfmt)
- def _rst_escape_first_column(rows, headers):
- def escape_empty(val):
- if isinstance(val, (str, bytes)) and not val.strip():
- return ".."
- else:
- return val
- new_headers = list(headers)
- new_rows = []
- if headers:
- new_headers[0] = escape_empty(headers[0])
- for row in rows:
- new_row = list(row)
- if new_row:
- new_row[0] = escape_empty(row[0])
- new_rows.append(new_row)
- return new_rows, new_headers
- _table_formats = {
- "simple": TableFormat(
- lineabove=Line("", "-", " ", ""),
- linebelowheader=Line("", "-", " ", ""),
- linebetweenrows=None,
- linebelow=Line("", "-", " ", ""),
- headerrow=DataRow("", " ", ""),
- datarow=DataRow("", " ", ""),
- padding=0,
- with_header_hide=["lineabove", "linebelow"],
- ),
- "plain": TableFormat(
- lineabove=None,
- linebelowheader=None,
- linebetweenrows=None,
- linebelow=None,
- headerrow=DataRow("", " ", ""),
- datarow=DataRow("", " ", ""),
- padding=0,
- with_header_hide=None,
- ),
- "grid": TableFormat(
- lineabove=Line("+", "-", "+", "+"),
- linebelowheader=Line("+", "=", "+", "+"),
- linebetweenrows=Line("+", "-", "+", "+"),
- linebelow=Line("+", "-", "+", "+"),
- headerrow=DataRow("|", "|", "|"),
- datarow=DataRow("|", "|", "|"),
- padding=1,
- with_header_hide=None,
- ),
- "simple_grid": TableFormat(
- lineabove=Line("┌", "─", "┬", "┐"),
- linebelowheader=Line("├", "─", "┼", "┤"),
- linebetweenrows=Line("├", "─", "┼", "┤"),
- linebelow=Line("└", "─", "┴", "┘"),
- headerrow=DataRow("│", "│", "│"),
- datarow=DataRow("│", "│", "│"),
- padding=1,
- with_header_hide=None,
- ),
- "rounded_grid": TableFormat(
- lineabove=Line("╭", "─", "┬", "╮"),
- linebelowheader=Line("├", "─", "┼", "┤"),
- linebetweenrows=Line("├", "─", "┼", "┤"),
- linebelow=Line("╰", "─", "┴", "╯"),
- headerrow=DataRow("│", "│", "│"),
- datarow=DataRow("│", "│", "│"),
- padding=1,
- with_header_hide=None,
- ),
- "heavy_grid": TableFormat(
- lineabove=Line("┏", "━", "┳", "┓"),
- linebelowheader=Line("┣", "━", "╋", "┫"),
- linebetweenrows=Line("┣", "━", "╋", "┫"),
- linebelow=Line("┗", "━", "┻", "┛"),
- headerrow=DataRow("┃", "┃", "┃"),
- datarow=DataRow("┃", "┃", "┃"),
- padding=1,
- with_header_hide=None,
- ),
- "mixed_grid": TableFormat(
- lineabove=Line("┍", "━", "┯", "┑"),
- linebelowheader=Line("┝", "━", "┿", "┥"),
- linebetweenrows=Line("├", "─", "┼", "┤"),
- linebelow=Line("┕", "━", "┷", "┙"),
- headerrow=DataRow("│", "│", "│"),
- datarow=DataRow("│", "│", "│"),
- padding=1,
- with_header_hide=None,
- ),
- "double_grid": TableFormat(
- lineabove=Line("╔", "═", "╦", "╗"),
- linebelowheader=Line("╠", "═", "╬", "╣"),
- linebetweenrows=Line("╠", "═", "╬", "╣"),
- linebelow=Line("╚", "═", "╩", "╝"),
- headerrow=DataRow("║", "║", "║"),
- datarow=DataRow("║", "║", "║"),
- padding=1,
- with_header_hide=None,
- ),
- "fancy_grid": TableFormat(
- lineabove=Line("╒", "═", "╤", "╕"),
- linebelowheader=Line("╞", "═", "╪", "╡"),
- linebetweenrows=Line("├", "─", "┼", "┤"),
- linebelow=Line("╘", "═", "╧", "╛"),
- headerrow=DataRow("│", "│", "│"),
- datarow=DataRow("│", "│", "│"),
- padding=1,
- with_header_hide=None,
- ),
- "outline": TableFormat(
- lineabove=Line("+", "-", "+", "+"),
- linebelowheader=Line("+", "=", "+", "+"),
- linebetweenrows=None,
- linebelow=Line("+", "-", "+", "+"),
- headerrow=DataRow("|", "|", "|"),
- datarow=DataRow("|", "|", "|"),
- padding=1,
- with_header_hide=None,
- ),
- "simple_outline": TableFormat(
- lineabove=Line("┌", "─", "┬", "┐"),
- linebelowheader=Line("├", "─", "┼", "┤"),
- linebetweenrows=None,
- linebelow=Line("└", "─", "┴", "┘"),
- headerrow=DataRow("│", "│", "│"),
- datarow=DataRow("│", "│", "│"),
- padding=1,
- with_header_hide=None,
- ),
- "rounded_outline": TableFormat(
- lineabove=Line("╭", "─", "┬", "╮"),
- linebelowheader=Line("├", "─", "┼", "┤"),
- linebetweenrows=None,
- linebelow=Line("╰", "─", "┴", "╯"),
- headerrow=DataRow("│", "│", "│"),
- datarow=DataRow("│", "│", "│"),
- padding=1,
- with_header_hide=None,
- ),
- "heavy_outline": TableFormat(
- lineabove=Line("┏", "━", "┳", "┓"),
- linebelowheader=Line("┣", "━", "╋", "┫"),
- linebetweenrows=None,
- linebelow=Line("┗", "━", "┻", "┛"),
- headerrow=DataRow("┃", "┃", "┃"),
- datarow=DataRow("┃", "┃", "┃"),
- padding=1,
- with_header_hide=None,
- ),
- "mixed_outline": TableFormat(
- lineabove=Line("┍", "━", "┯", "┑"),
- linebelowheader=Line("┝", "━", "┿", "┥"),
- linebetweenrows=None,
- linebelow=Line("┕", "━", "┷", "┙"),
- headerrow=DataRow("│", "│", "│"),
- datarow=DataRow("│", "│", "│"),
- padding=1,
- with_header_hide=None,
- ),
- "double_outline": TableFormat(
- lineabove=Line("╔", "═", "╦", "╗"),
- linebelowheader=Line("╠", "═", "╬", "╣"),
- linebetweenrows=None,
- linebelow=Line("╚", "═", "╩", "╝"),
- headerrow=DataRow("║", "║", "║"),
- datarow=DataRow("║", "║", "║"),
- padding=1,
- with_header_hide=None,
- ),
- "fancy_outline": TableFormat(
- lineabove=Line("╒", "═", "╤", "╕"),
- linebelowheader=Line("╞", "═", "╪", "╡"),
- linebetweenrows=None,
- linebelow=Line("╘", "═", "╧", "╛"),
- headerrow=DataRow("│", "│", "│"),
- datarow=DataRow("│", "│", "│"),
- padding=1,
- with_header_hide=None,
- ),
- "github": TableFormat(
- lineabove=Line("|", "-", "|", "|"),
- linebelowheader=Line("|", "-", "|", "|"),
- linebetweenrows=None,
- linebelow=None,
- headerrow=DataRow("|", "|", "|"),
- datarow=DataRow("|", "|", "|"),
- padding=1,
- with_header_hide=["lineabove"],
- ),
- "pipe": TableFormat(
- lineabove=_pipe_line_with_colons,
- linebelowheader=_pipe_line_with_colons,
- linebetweenrows=None,
- linebelow=None,
- headerrow=DataRow("|", "|", "|"),
- datarow=DataRow("|", "|", "|"),
- padding=1,
- with_header_hide=["lineabove"],
- ),
- "orgtbl": TableFormat(
- lineabove=None,
- linebelowheader=Line("|", "-", "+", "|"),
- linebetweenrows=None,
- linebelow=None,
- headerrow=DataRow("|", "|", "|"),
- datarow=DataRow("|", "|", "|"),
- padding=1,
- with_header_hide=None,
- ),
- "jira": TableFormat(
- lineabove=None,
- linebelowheader=None,
- linebetweenrows=None,
- linebelow=None,
- headerrow=DataRow("||", "||", "||"),
- datarow=DataRow("|", "|", "|"),
- padding=1,
- with_header_hide=None,
- ),
- "presto": TableFormat(
- lineabove=None,
- linebelowheader=Line("", "-", "+", ""),
- linebetweenrows=None,
- linebelow=None,
- headerrow=DataRow("", "|", ""),
- datarow=DataRow("", "|", ""),
- padding=1,
- with_header_hide=None,
- ),
- "pretty": TableFormat(
- lineabove=Line("+", "-", "+", "+"),
- linebelowheader=Line("+", "-", "+", "+"),
- linebetweenrows=None,
- linebelow=Line("+", "-", "+", "+"),
- headerrow=DataRow("|", "|", "|"),
- datarow=DataRow("|", "|", "|"),
- padding=1,
- with_header_hide=None,
- ),
- "psql": TableFormat(
- lineabove=Line("+", "-", "+", "+"),
- linebelowheader=Line("|", "-", "+", "|"),
- linebetweenrows=None,
- linebelow=Line("+", "-", "+", "+"),
- headerrow=DataRow("|", "|", "|"),
- datarow=DataRow("|", "|", "|"),
- padding=1,
- with_header_hide=None,
- ),
- "rst": TableFormat(
- lineabove=Line("", "=", " ", ""),
- linebelowheader=Line("", "=", " ", ""),
- linebetweenrows=None,
- linebelow=Line("", "=", " ", ""),
- headerrow=DataRow("", " ", ""),
- datarow=DataRow("", " ", ""),
- padding=0,
- with_header_hide=None,
- ),
- "mediawiki": TableFormat(
- lineabove=Line(
- '{| class="wikitable" style="text-align: left;"',
- "",
- "",
- "\n|+ <!-- caption -->\n|-",
- ),
- linebelowheader=Line("|-", "", "", ""),
- linebetweenrows=Line("|-", "", "", ""),
- linebelow=Line("|}", "", "", ""),
- headerrow=partial(_mediawiki_row_with_attrs, "!"),
- datarow=partial(_mediawiki_row_with_attrs, "|"),
- padding=0,
- with_header_hide=None,
- ),
- "moinmoin": TableFormat(
- lineabove=None,
- linebelowheader=None,
- linebetweenrows=None,
- linebelow=None,
- headerrow=partial(_moin_row_with_attrs, "||", header="'''"),
- datarow=partial(_moin_row_with_attrs, "||"),
- padding=1,
- with_header_hide=None,
- ),
- "youtrack": TableFormat(
- lineabove=None,
- linebelowheader=None,
- linebetweenrows=None,
- linebelow=None,
- headerrow=DataRow("|| ", " || ", " || "),
- datarow=DataRow("| ", " | ", " |"),
- padding=1,
- with_header_hide=None,
- ),
- "html": TableFormat(
- lineabove=_html_begin_table_without_header,
- linebelowheader="",
- linebetweenrows=None,
- linebelow=Line("</tbody>\n</table>", "", "", ""),
- headerrow=partial(_html_row_with_attrs, "th", False),
- datarow=partial(_html_row_with_attrs, "td", False),
- padding=0,
- with_header_hide=["lineabove"],
- ),
- "unsafehtml": TableFormat(
- lineabove=_html_begin_table_without_header,
- linebelowheader="",
- linebetweenrows=None,
- linebelow=Line("</tbody>\n</table>", "", "", ""),
- headerrow=partial(_html_row_with_attrs, "th", True),
- datarow=partial(_html_row_with_attrs, "td", True),
- padding=0,
- with_header_hide=["lineabove"],
- ),
- "latex": TableFormat(
- lineabove=_latex_line_begin_tabular,
- linebelowheader=Line("\\hline", "", "", ""),
- linebetweenrows=None,
- linebelow=Line("\\hline\n\\end{tabular}", "", "", ""),
- headerrow=_latex_row,
- datarow=_latex_row,
- padding=1,
- with_header_hide=None,
- ),
- "latex_raw": TableFormat(
- lineabove=_latex_line_begin_tabular,
- linebelowheader=Line("\\hline", "", "", ""),
- linebetweenrows=None,
- linebelow=Line("\\hline\n\\end{tabular}", "", "", ""),
- headerrow=partial(_latex_row, escrules={}),
- datarow=partial(_latex_row, escrules={}),
- padding=1,
- with_header_hide=None,
- ),
- "latex_booktabs": TableFormat(
- lineabove=partial(_latex_line_begin_tabular, booktabs=True),
- linebelowheader=Line("\\midrule", "", "", ""),
- linebetweenrows=None,
- linebelow=Line("\\bottomrule\n\\end{tabular}", "", "", ""),
- headerrow=_latex_row,
- datarow=_latex_row,
- padding=1,
- with_header_hide=None,
- ),
- "latex_longtable": TableFormat(
- lineabove=partial(_latex_line_begin_tabular, longtable=True),
- linebelowheader=Line("\\hline\n\\endhead", "", "", ""),
- linebetweenrows=None,
- linebelow=Line("\\hline\n\\end{longtable}", "", "", ""),
- headerrow=_latex_row,
- datarow=_latex_row,
- padding=1,
- with_header_hide=None,
- ),
- "tsv": TableFormat(
- lineabove=None,
- linebelowheader=None,
- linebetweenrows=None,
- linebelow=None,
- headerrow=DataRow("", "\t", ""),
- datarow=DataRow("", "\t", ""),
- padding=0,
- with_header_hide=None,
- ),
- "textile": TableFormat(
- lineabove=None,
- linebelowheader=None,
- linebetweenrows=None,
- linebelow=None,
- headerrow=DataRow("|_. ", "|_.", "|"),
- datarow=_textile_row_with_attrs,
- padding=1,
- with_header_hide=None,
- ),
- "asciidoc": TableFormat(
- lineabove=partial(_asciidoc_row, False),
- linebelowheader=None,
- linebetweenrows=None,
- linebelow=Line("|====", "", "", ""),
- headerrow=partial(_asciidoc_row, True),
- datarow=partial(_asciidoc_row, False),
- padding=1,
- with_header_hide=["lineabove"],
- ),
- }
- tabulate_formats = list(sorted(_table_formats.keys()))
- # The table formats for which multiline cells will be folded into subsequent
- # table rows. The key is the original format specified at the API. The value is
- # the format that will be used to represent the original format.
- multiline_formats = {
- "plain": "plain",
- "simple": "simple",
- "grid": "grid",
- "simple_grid": "simple_grid",
- "rounded_grid": "rounded_grid",
- "heavy_grid": "heavy_grid",
- "mixed_grid": "mixed_grid",
- "double_grid": "double_grid",
- "fancy_grid": "fancy_grid",
- "pipe": "pipe",
- "orgtbl": "orgtbl",
- "jira": "jira",
- "presto": "presto",
- "pretty": "pretty",
- "psql": "psql",
- "rst": "rst",
- }
- # TODO: Add multiline support for the remaining table formats:
- # - mediawiki: Replace \n with <br>
- # - moinmoin: TBD
- # - youtrack: TBD
- # - html: Replace \n with <br>
- # - latex*: Use "makecell" package: In header, replace X\nY with
- # \thead{X\\Y} and in data row, replace X\nY with \makecell{X\\Y}
- # - tsv: TBD
- # - textile: Replace \n with <br/> (must be well-formed XML)
- _multiline_codes = re.compile(r"\r|\n|\r\n")
- _multiline_codes_bytes = re.compile(b"\r|\n|\r\n")
- # Handle ANSI escape sequences for both control sequence introducer (CSI) and
- # operating system command (OSC). Both of these begin with 0x1b (or octal 033),
- # which will be shown below as ESC.
- #
- # CSI ANSI escape codes have the following format, defined in section 5.4 of ECMA-48:
- #
- # CSI: ESC followed by the '[' character (0x5b)
- # Parameter Bytes: 0..n bytes in the range 0x30-0x3f
- # Intermediate Bytes: 0..n bytes in the range 0x20-0x2f
- # Final Byte: a single byte in the range 0x40-0x7e
- #
- # Also include the terminal hyperlink sequences as described here:
- # https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda
- #
- # OSC 8 ; params ; uri ST display_text OSC 8 ;; ST
- #
- # Example: \x1b]8;;https://example.com\x5ctext to show\x1b]8;;\x5c
- #
- # Where:
- # OSC: ESC followed by the ']' character (0x5d)
- # params: 0..n optional key value pairs separated by ':' (e.g. foo=bar:baz=qux:abc=123)
- # URI: the actual URI with protocol scheme (e.g. https://, file://, ftp://)
- # ST: ESC followed by the '\' character (0x5c)
- _esc = r"\x1b"
- _csi = rf"{_esc}\["
- _osc = rf"{_esc}\]"
- _st = rf"{_esc}\\"
- _ansi_escape_pat = rf"""
- (
- # terminal colors, etc
- {_csi} # CSI
- [\x30-\x3f]* # parameter bytes
- [\x20-\x2f]* # intermediate bytes
- [\x40-\x7e] # final byte
- |
- # terminal hyperlinks
- {_osc}8; # OSC opening
- (\w+=\w+:?)* # key=value params list (submatch 2)
- ; # delimiter
- ([^{_esc}]+) # URI - anything but ESC (submatch 3)
- {_st} # ST
- ([^{_esc}]+) # link text - anything but ESC (submatch 4)
- {_osc}8;;{_st} # "closing" OSC sequence
- )
- """
- _ansi_codes = re.compile(_ansi_escape_pat, re.VERBOSE)
- _ansi_codes_bytes = re.compile(_ansi_escape_pat.encode("utf8"), re.VERBOSE)
- _ansi_color_reset_code = "\033[0m"
- _float_with_thousands_separators = re.compile(
- r"^(([+-]?[0-9]{1,3})(?:,([0-9]{3}))*)?(?(1)\.[0-9]*|\.[0-9]+)?$"
- )
- def simple_separated_format(separator):
- """Construct a simple TableFormat with columns separated by a separator.
- >>> tsv = simple_separated_format("\\t") ; \
- tabulate([["foo", 1], ["spam", 23]], tablefmt=tsv) == 'foo \\t 1\\nspam\\t23'
- True
- """
- return TableFormat(
- None,
- None,
- None,
- None,
- headerrow=DataRow("", separator, ""),
- datarow=DataRow("", separator, ""),
- padding=0,
- with_header_hide=None,
- )
- def _isnumber_with_thousands_separator(string):
- """
- >>> _isnumber_with_thousands_separator(".")
- False
- >>> _isnumber_with_thousands_separator("1")
- True
- >>> _isnumber_with_thousands_separator("1.")
- True
- >>> _isnumber_with_thousands_separator(".1")
- True
- >>> _isnumber_with_thousands_separator("1000")
- False
- >>> _isnumber_with_thousands_separator("1,000")
- True
- >>> _isnumber_with_thousands_separator("1,0000")
- False
- >>> _isnumber_with_thousands_separator("1,000.1234")
- True
- >>> _isnumber_with_thousands_separator(b"1,000.1234")
- True
- >>> _isnumber_with_thousands_separator("+1,000.1234")
- True
- >>> _isnumber_with_thousands_separator("-1,000.1234")
- True
- """
- try:
- string = string.decode()
- except (UnicodeDecodeError, AttributeError):
- pass
- return bool(re.match(_float_with_thousands_separators, string))
- def _isconvertible(conv, string):
- try:
- conv(string)
- return True
- except (ValueError, TypeError):
- return False
- def _isnumber(string):
- """
- >>> _isnumber("123.45")
- True
- >>> _isnumber("123")
- True
- >>> _isnumber("spam")
- False
- >>> _isnumber("123e45678")
- False
- >>> _isnumber("inf")
- True
- """
- if not _isconvertible(float, string):
- return False
- elif isinstance(string, (str, bytes)) and (
- math.isinf(float(string)) or math.isnan(float(string))
- ):
- return string.lower() in ["inf", "-inf", "nan"]
- return True
- def _isint(string, inttype=int):
- """
- >>> _isint("123")
- True
- >>> _isint("123.45")
- False
- """
- return (
- type(string) is inttype
- or isinstance(string, (bytes, str))
- and _isconvertible(inttype, string)
- )
- def _isbool(string):
- """
- >>> _isbool(True)
- True
- >>> _isbool("False")
- True
- >>> _isbool(1)
- False
- """
- return type(string) is bool or (
- isinstance(string, (bytes, str)) and string in ("True", "False")
- )
- def _type(string, has_invisible=True, numparse=True):
- """The least generic type (type(None), int, float, str, unicode).
- >>> _type(None) is type(None)
- True
- >>> _type("foo") is type("")
- True
- >>> _type("1") is type(1)
- True
- >>> _type('\x1b[31m42\x1b[0m') is type(42)
- True
- >>> _type('\x1b[31m42\x1b[0m') is type(42)
- True
- """
- if has_invisible and isinstance(string, (str, bytes)):
- string = _strip_ansi(string)
- if string is None:
- return type(None)
- elif hasattr(string, "isoformat"): # datetime.datetime, date, and time
- return str
- elif _isbool(string):
- return bool
- elif _isint(string) and numparse:
- return int
- elif _isnumber(string) and numparse:
- return float
- elif isinstance(string, bytes):
- return bytes
- else:
- return str
- def _afterpoint(string):
- """Symbols after a decimal point, -1 if the string lacks the decimal point.
- >>> _afterpoint("123.45")
- 2
- >>> _afterpoint("1001")
- -1
- >>> _afterpoint("eggs")
- -1
- >>> _afterpoint("123e45")
- 2
- >>> _afterpoint("123,456.78")
- 2
- """
- if _isnumber(string) or _isnumber_with_thousands_separator(string):
- if _isint(string):
- return -1
- else:
- pos = string.rfind(".")
- pos = string.lower().rfind("e") if pos < 0 else pos
- if pos >= 0:
- return len(string) - pos - 1
- else:
- return -1 # no point
- else:
- return -1 # not a number
- def _padleft(width, s):
- """Flush right.
- >>> _padleft(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430'
- True
- """
- fmt = "{0:>%ds}" % width
- return fmt.format(s)
- def _padright(width, s):
- """Flush left.
- >>> _padright(6, '\u044f\u0439\u0446\u0430') == '\u044f\u0439\u0446\u0430 '
- True
- """
- fmt = "{0:<%ds}" % width
- return fmt.format(s)
- def _padboth(width, s):
- """Center string.
- >>> _padboth(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430 '
- True
- """
- fmt = "{0:^%ds}" % width
- return fmt.format(s)
- def _padnone(ignore_width, s):
- return s
- def _strip_ansi(s):
- r"""Remove ANSI escape sequences, both CSI (color codes, etc) and OSC hyperlinks.
- CSI sequences are simply removed from the output, while OSC hyperlinks are replaced
- with the link text. Note: it may be desirable to show the URI instead but this is not
- supported.
- >>> repr(_strip_ansi('\x1B]8;;https://example.com\x1B\\This is a link\x1B]8;;\x1B\\'))
- "'This is a link'"
- >>> repr(_strip_ansi('\x1b[31mred\x1b[0m text'))
- "'red text'"
- """
- if isinstance(s, str):
- return _ansi_codes.sub(r"\4", s)
- else: # a bytestring
- return _ansi_codes_bytes.sub(r"\4", s)
- def _visible_width(s):
- """Visible width of a printed string. ANSI color codes are removed.
- >>> _visible_width('\x1b[31mhello\x1b[0m'), _visible_width("world")
- (5, 5)
- """
- # optional wide-character support
- if wcwidth is not None and WIDE_CHARS_MODE:
- len_fn = wcwidth.wcswidth
- else:
- len_fn = len
- if isinstance(s, (str, bytes)):
- return len_fn(_strip_ansi(s))
- else:
- return len_fn(str(s))
- def _is_multiline(s):
- if isinstance(s, str):
- return bool(re.search(_multiline_codes, s))
- else: # a bytestring
- return bool(re.search(_multiline_codes_bytes, s))
- def _multiline_width(multiline_s, line_width_fn=len):
- """Visible width of a potentially multiline content."""
- return max(map(line_width_fn, re.split("[\r\n]", multiline_s)))
- def _choose_width_fn(has_invisible, enable_widechars, is_multiline):
- """Return a function to calculate visible cell width."""
- if has_invisible:
- line_width_fn = _visible_width
- elif enable_widechars: # optional wide-character support if available
- line_width_fn = wcwidth.wcswidth
- else:
- line_width_fn = len
- if is_multiline:
- width_fn = lambda s: _multiline_width(s, line_width_fn) # noqa
- else:
- width_fn = line_width_fn
- return width_fn
- def _align_column_choose_padfn(strings, alignment, has_invisible):
- if alignment == "right":
- if not PRESERVE_WHITESPACE:
- strings = [s.strip() for s in strings]
- padfn = _padleft
- elif alignment == "center":
- if not PRESERVE_WHITESPACE:
- strings = [s.strip() for s in strings]
- padfn = _padboth
- elif alignment == "decimal":
- if has_invisible:
- decimals = [_afterpoint(_strip_ansi(s)) for s in strings]
- else:
- decimals = [_afterpoint(s) for s in strings]
- maxdecimals = max(decimals)
- strings = [s + (maxdecimals - decs) * " " for s, decs in zip(strings, decimals)]
- padfn = _padleft
- elif not alignment:
- padfn = _padnone
- else:
- if not PRESERVE_WHITESPACE:
- strings = [s.strip() for s in strings]
- padfn = _padright
- return strings, padfn
- def _align_column_choose_width_fn(has_invisible, enable_widechars, is_multiline):
- if has_invisible:
- line_width_fn = _visible_width
- elif enable_widechars: # optional wide-character support if available
- line_width_fn = wcwidth.wcswidth
- else:
- line_width_fn = len
- if is_multiline:
- width_fn = lambda s: _align_column_multiline_width(s, line_width_fn) # noqa
- else:
- width_fn = line_width_fn
- return width_fn
- def _align_column_multiline_width(multiline_s, line_width_fn=len):
- """Visible width of a potentially multiline content."""
- return list(map(line_width_fn, re.split("[\r\n]", multiline_s)))
- def _flat_list(nested_list):
- ret = []
- for item in nested_list:
- if isinstance(item, list):
- for subitem in item:
- ret.append(subitem)
- else:
- ret.append(item)
- return ret
- def _align_column(
- strings,
- alignment,
- minwidth=0,
- has_invisible=True,
- enable_widechars=False,
- is_multiline=False,
- ):
- """[string] -> [padded_string]"""
- strings, padfn = _align_column_choose_padfn(strings, alignment, has_invisible)
- width_fn = _align_column_choose_width_fn(
- has_invisible, enable_widechars, is_multiline
- )
- s_widths = list(map(width_fn, strings))
- maxwidth = max(max(_flat_list(s_widths)), minwidth)
- # TODO: refactor column alignment in single-line and multiline modes
- if is_multiline:
- if not enable_widechars and not has_invisible:
- padded_strings = [
- "\n".join([padfn(maxwidth, s) for s in ms.splitlines()])
- for ms in strings
- ]
- else:
- # enable wide-character width corrections
- s_lens = [[len(s) for s in re.split("[\r\n]", ms)] for ms in strings]
- visible_widths = [
- [maxwidth - (w - l) for w, l in zip(mw, ml)]
- for mw, ml in zip(s_widths, s_lens)
- ]
- # wcswidth and _visible_width don't count invisible characters;
- # padfn doesn't need to apply another correction
- padded_strings = [
- "\n".join([padfn(w, s) for s, w in zip((ms.splitlines() or ms), mw)])
- for ms, mw in zip(strings, visible_widths)
- ]
- else: # single-line cell values
- if not enable_widechars and not has_invisible:
- padded_strings = [padfn(maxwidth, s) for s in strings]
- else:
- # enable wide-character width corrections
- s_lens = list(map(len, strings))
- visible_widths = [maxwidth - (w - l) for w, l in zip(s_widths, s_lens)]
- # wcswidth and _visible_width don't count invisible characters;
- # padfn doesn't need to apply another correction
- padded_strings = [padfn(w, s) for s, w in zip(strings, visible_widths)]
- return padded_strings
- def _more_generic(type1, type2):
- types = {
- type(None): 0, # noqa
- bool: 1,
- int: 2,
- float: 3,
- bytes: 4,
- str: 5,
- }
- invtypes = {
- 5: str,
- 4: bytes,
- 3: float,
- 2: int,
- 1: bool,
- 0: type(None),
- }
- moregeneric = max(types.get(type1, 5), types.get(type2, 5))
- return invtypes[moregeneric]
- def _column_type(strings, has_invisible=True, numparse=True):
- """The least generic type all column values are convertible to.
- >>> _column_type([True, False]) is bool
- True
- >>> _column_type(["1", "2"]) is int
- True
- >>> _column_type(["1", "2.3"]) is float
- True
- >>> _column_type(["1", "2.3", "four"]) is str
- True
- >>> _column_type(["four", '\u043f\u044f\u0442\u044c']) is str
- True
- >>> _column_type([None, "brux"]) is str
- True
- >>> _column_type([1, 2, None]) is int
- True
- >>> import datetime as dt
- >>> _column_type([dt.datetime(1991,2,19), dt.time(17,35)]) is str
- True
- """
- types = [_type(s, has_invisible, numparse) for s in strings]
- return reduce(_more_generic, types, bool)
- def _format(val, valtype, floatfmt, intfmt, missingval="", has_invisible=True):
- """Format a value according to its type.
- Unicode is supported:
- >>> hrow = ['\u0431\u0443\u043a\u0432\u0430', '\u0446\u0438\u0444\u0440\u0430'] ; \
- tbl = [['\u0430\u0437', 2], ['\u0431\u0443\u043a\u0438', 4]] ; \
- good_result = '\\u0431\\u0443\\u043a\\u0432\\u0430 \\u0446\\u0438\\u0444\\u0440\\u0430\\n------- -------\\n\\u0430\\u0437 2\\n\\u0431\\u0443\\u043a\\u0438 4' ; \
- tabulate(tbl, headers=hrow) == good_result
- True
- """ # noqa
- if val is None:
- return missingval
- if valtype is str:
- return f"{val}"
- elif valtype is int:
- return format(val, intfmt)
- elif valtype is bytes:
- try:
- return str(val, "ascii")
- except (TypeError, UnicodeDecodeError):
- return str(val)
- elif valtype is float:
- is_a_colored_number = has_invisible and isinstance(val, (str, bytes))
- if is_a_colored_number:
- raw_val = _strip_ansi(val)
- formatted_val = format(float(raw_val), floatfmt)
- return val.replace(raw_val, formatted_val)
- else:
- return format(float(val), floatfmt)
- else:
- return f"{val}"
- def _align_header(
- header, alignment, width, visible_width, is_multiline=False, width_fn=None
- ):
- "Pad string header to width chars given known visible_width of the header."
- if is_multiline:
- header_lines = re.split(_multiline_codes, header)
- padded_lines = [
- _align_header(h, alignment, width, width_fn(h)) for h in header_lines
- ]
- return "\n".join(padded_lines)
- # else: not multiline
- ninvisible = len(header) - visible_width
- width += ninvisible
- if alignment == "left":
- return _padright(width, header)
- elif alignment == "center":
- return _padboth(width, header)
- elif not alignment:
- return f"{header}"
- else:
- return _padleft(width, header)
- def _remove_separating_lines(rows):
- if type(rows) == list:
- separating_lines = []
- sans_rows = []
- for index, row in enumerate(rows):
- if _is_separating_line(row):
- separating_lines.append(index)
- else:
- sans_rows.append(row)
- return sans_rows, separating_lines
- else:
- return rows, None
- def _reinsert_separating_lines(rows, separating_lines):
- if separating_lines:
- for index in separating_lines:
- rows.insert(index, SEPARATING_LINE)
- def _prepend_row_index(rows, index):
- """Add a left-most index column."""
- if index is None or index is False:
- return rows
- if isinstance(index, Sized) and len(index) != len(rows):
- raise ValueError(
- "index must be as long as the number of data rows: "
- + "len(index)={} len(rows)={}".format(len(index), len(rows))
- )
- sans_rows, separating_lines = _remove_separating_lines(rows)
- new_rows = []
- index_iter = iter(index)
- for row in sans_rows:
- index_v = next(index_iter)
- new_rows.append([index_v] + list(row))
- rows = new_rows
- _reinsert_separating_lines(rows, separating_lines)
- return rows
- def _bool(val):
- "A wrapper around standard bool() which doesn't throw on NumPy arrays"
- try:
- return bool(val)
- except ValueError: # val is likely to be a numpy array with many elements
- return False
- def _normalize_tabular_data(tabular_data, headers, showindex="default"):
- """Transform a supported data type to a list of lists, and a list of headers.
- Supported tabular data types:
- * list-of-lists or another iterable of iterables
- * list of named tuples (usually used with headers="keys")
- * list of dicts (usually used with headers="keys")
- * list of OrderedDicts (usually used with headers="keys")
- * list of dataclasses (Python 3.7+ only, usually used with headers="keys")
- * 2D NumPy arrays
- * NumPy record arrays (usually used with headers="keys")
- * dict of iterables (usually used with headers="keys")
- * pandas.DataFrame (usually used with headers="keys")
- The first row can be used as headers if headers="firstrow",
- column indices can be used as headers if headers="keys".
- If showindex="default", show row indices of the pandas.DataFrame.
- If showindex="always", show row indices for all types of data.
- If showindex="never", don't show row indices for all types of data.
- If showindex is an iterable, show its values as row indices.
- """
- try:
- bool(headers)
- is_headers2bool_broken = False # noqa
- except ValueError: # numpy.ndarray, pandas.core.index.Index, ...
- is_headers2bool_broken = True # noqa
- headers = list(headers)
- index = None
- if hasattr(tabular_data, "keys") and hasattr(tabular_data, "values"):
- # dict-like and pandas.DataFrame?
- if hasattr(tabular_data.values, "__call__"):
- # likely a conventional dict
- keys = tabular_data.keys()
- rows = list(
- izip_longest(*tabular_data.values())
- ) # columns have to be transposed
- elif hasattr(tabular_data, "index"):
- # values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0)
- keys = list(tabular_data)
- if (
- showindex in ["default", "always", True]
- and tabular_data.index.name is not None
- ):
- if isinstance(tabular_data.index.name, list):
- keys[:0] = tabular_data.index.name
- else:
- keys[:0] = [tabular_data.index.name]
- vals = tabular_data.values # values matrix doesn't need to be transposed
- # for DataFrames add an index per default
- index = list(tabular_data.index)
- rows = [list(row) for row in vals]
- else:
- raise ValueError("tabular data doesn't appear to be a dict or a DataFrame")
- if headers == "keys":
- headers = list(map(str, keys)) # headers should be strings
- else: # it's a usual iterable of iterables, or a NumPy array, or an iterable of dataclasses
- rows = list(tabular_data)
- if headers == "keys" and not rows:
- # an empty table (issue #81)
- headers = []
- elif (
- headers == "keys"
- and hasattr(tabular_data, "dtype")
- and getattr(tabular_data.dtype, "names")
- ):
- # numpy record array
- headers = tabular_data.dtype.names
- elif (
- headers == "keys"
- and len(rows) > 0
- and isinstance(rows[0], tuple)
- and hasattr(rows[0], "_fields")
- ):
- # namedtuple
- headers = list(map(str, rows[0]._fields))
- elif len(rows) > 0 and hasattr(rows[0], "keys") and hasattr(rows[0], "values"):
- # dict-like object
- uniq_keys = set() # implements hashed lookup
- keys = [] # storage for set
- if headers == "firstrow":
- firstdict = rows[0] if len(rows) > 0 else {}
- keys.extend(firstdict.keys())
- uniq_keys.update(keys)
- rows = rows[1:]
- for row in rows:
- for k in row.keys():
- # Save unique items in input order
- if k not in uniq_keys:
- keys.append(k)
- uniq_keys.add(k)
- if headers == "keys":
- headers = keys
- elif isinstance(headers, dict):
- # a dict of headers for a list of dicts
- headers = [headers.get(k, k) for k in keys]
- headers = list(map(str, headers))
- elif headers == "firstrow":
- if len(rows) > 0:
- headers = [firstdict.get(k, k) for k in keys]
- headers = list(map(str, headers))
- else:
- headers = []
- elif headers:
- raise ValueError(
- "headers for a list of dicts is not a dict or a keyword"
- )
- rows = [[row.get(k) for k in keys] for row in rows]
- elif (
- headers == "keys"
- and hasattr(tabular_data, "description")
- and hasattr(tabular_data, "fetchone")
- and hasattr(tabular_data, "rowcount")
- ):
- # Python Database API cursor object (PEP 0249)
- # print tabulate(cursor, headers='keys')
- headers = [column[0] for column in tabular_data.description]
- elif (
- dataclasses is not None
- and len(rows) > 0
- and dataclasses.is_dataclass(rows[0])
- ):
- # Python 3.7+'s dataclass
- field_names = [field.name for field in dataclasses.fields(rows[0])]
- if headers == "keys":
- headers = field_names
- rows = [[getattr(row, f) for f in field_names] for row in rows]
- elif headers == "keys" and len(rows) > 0:
- # keys are column indices
- headers = list(map(str, range(len(rows[0]))))
- # take headers from the first row if necessary
- if headers == "firstrow" and len(rows) > 0:
- if index is not None:
- headers = [index[0]] + list(rows[0])
- index = index[1:]
- else:
- headers = rows[0]
- headers = list(map(str, headers)) # headers should be strings
- rows = rows[1:]
- elif headers == "firstrow":
- headers = []
- headers = list(map(str, headers))
- # rows = list(map(list, rows))
- rows = list(map(lambda r: r if _is_separating_line(r) else list(r), rows))
- # add or remove an index column
- showindex_is_a_str = type(showindex) in [str, bytes]
- if showindex == "default" and index is not None:
- rows = _prepend_row_index(rows, index)
- elif isinstance(showindex, Sized) and not showindex_is_a_str:
- rows = _prepend_row_index(rows, list(showindex))
- elif isinstance(showindex, Iterable) and not showindex_is_a_str:
- rows = _prepend_row_index(rows, showindex)
- elif showindex == "always" or (_bool(showindex) and not showindex_is_a_str):
- if index is None:
- index = list(range(len(rows)))
- rows = _prepend_row_index(rows, index)
- elif showindex == "never" or (not _bool(showindex) and not showindex_is_a_str):
- pass
- # pad with empty headers for initial columns if necessary
- if headers and len(rows) > 0:
- nhs = len(headers)
- ncols = len(rows[0])
- if nhs < ncols:
- headers = [""] * (ncols - nhs) + headers
- return rows, headers
- def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True):
- numparses = _expand_iterable(numparses, len(list_of_lists[0]), True)
- result = []
- for row in list_of_lists:
- new_row = []
- for cell, width, numparse in zip(row, colwidths, numparses):
- if _isnumber(cell) and numparse:
- new_row.append(cell)
- continue
- if width is not None:
- wrapper = _CustomTextWrap(width=width)
- # Cast based on our internal type handling
- # Any future custom formatting of types (such as datetimes)
- # may need to be more explicit than just `str` of the object
- casted_cell = (
- str(cell) if _isnumber(cell) else _type(cell, numparse)(cell)
- )
- wrapped = wrapper.wrap(casted_cell)
- new_row.append("\n".join(wrapped))
- else:
- new_row.append(cell)
- result.append(new_row)
- return result
- def _to_str(s, encoding="utf8", errors="ignore"):
- """
- A type safe wrapper for converting a bytestring to str. This is essentially just
- a wrapper around .decode() intended for use with things like map(), but with some
- specific behavior:
- 1. if the given parameter is not a bytestring, it is returned unmodified
- 2. decode() is called for the given parameter and assumes utf8 encoding, but the
- default error behavior is changed from 'strict' to 'ignore'
- >>> repr(_to_str(b'foo'))
- "'foo'"
- >>> repr(_to_str('foo'))
- "'foo'"
- >>> repr(_to_str(42))
- "'42'"
- """
- if isinstance(s, bytes):
- return s.decode(encoding=encoding, errors=errors)
- return str(s)
- def tabulate(
- tabular_data,
- headers=(),
- tablefmt="simple",
- floatfmt=_DEFAULT_FLOATFMT,
- intfmt=_DEFAULT_INTFMT,
- numalign=_DEFAULT_ALIGN,
- stralign=_DEFAULT_ALIGN,
- missingval=_DEFAULT_MISSINGVAL,
- showindex="default",
- disable_numparse=False,
- colalign=None,
- maxcolwidths=None,
- rowalign=None,
- maxheadercolwidths=None,
- ):
- """Format a fixed width table for pretty printing.
- >>> print(tabulate([[1, 2.34], [-56, "8.999"], ["2", "10001"]]))
- --- ---------
- 1 2.34
- -56 8.999
- 2 10001
- --- ---------
- The first required argument (`tabular_data`) can be a
- list-of-lists (or another iterable of iterables), a list of named
- tuples, a dictionary of iterables, an iterable of dictionaries,
- an iterable of dataclasses (Python 3.7+), a two-dimensional NumPy array,
- NumPy record array, or a Pandas' dataframe.
- Table headers
- -------------
- To print nice column headers, supply the second argument (`headers`):
- - `headers` can be an explicit list of column headers
- - if `headers="firstrow"`, then the first row of data is used
- - if `headers="keys"`, then dictionary keys or column indices are used
- Otherwise a headerless table is produced.
- If the number of headers is less than the number of columns, they
- are supposed to be names of the last columns. This is consistent
- with the plain-text format of R and Pandas' dataframes.
- >>> print(tabulate([["sex","age"],["Alice","F",24],["Bob","M",19]],
- ... headers="firstrow"))
- sex age
- ----- ----- -----
- Alice F 24
- Bob M 19
- By default, pandas.DataFrame data have an additional column called
- row index. To add a similar column to all other types of data,
- use `showindex="always"` or `showindex=True`. To suppress row indices
- for all types of data, pass `showindex="never" or `showindex=False`.
- To add a custom row index column, pass `showindex=some_iterable`.
- >>> print(tabulate([["F",24],["M",19]], showindex="always"))
- - - --
- 0 F 24
- 1 M 19
- - - --
- Column alignment
- ----------------
- `tabulate` tries to detect column types automatically, and aligns
- the values properly. By default it aligns decimal points of the
- numbers (or flushes integer numbers to the right), and flushes
- everything else to the left. Possible column alignments
- (`numalign`, `stralign`) are: "right", "center", "left", "decimal"
- (only for `numalign`), and None (to disable alignment).
- Table formats
- -------------
- `intfmt` is a format specification used for columns which
- contain numeric data without a decimal point. This can also be
- a list or tuple of format strings, one per column.
- `floatfmt` is a format specification used for columns which
- contain numeric data with a decimal point. This can also be
- a list or tuple of format strings, one per column.
- `None` values are replaced with a `missingval` string (like
- `floatfmt`, this can also be a list of values for different
- columns):
- >>> print(tabulate([["spam", 1, None],
- ... ["eggs", 42, 3.14],
- ... ["other", None, 2.7]], missingval="?"))
- ----- -- ----
- spam 1 ?
- eggs 42 3.14
- other ? 2.7
- ----- -- ----
- Various plain-text table formats (`tablefmt`) are supported:
- 'plain', 'simple', 'grid', 'pipe', 'orgtbl', 'rst', 'mediawiki',
- 'latex', 'latex_raw', 'latex_booktabs', 'latex_longtable' and tsv.
- Variable `tabulate_formats`contains the list of currently supported formats.
- "plain" format doesn't use any pseudographics to draw tables,
- it separates columns with a double space:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "plain"))
- strings numbers
- spam 41.9999
- eggs 451
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="plain"))
- spam 41.9999
- eggs 451
- "simple" format is like Pandoc simple_tables:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "simple"))
- strings numbers
- --------- ---------
- spam 41.9999
- eggs 451
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="simple"))
- ---- --------
- spam 41.9999
- eggs 451
- ---- --------
- "grid" is similar to tables produced by Emacs table.el package or
- Pandoc grid_tables:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "grid"))
- +-----------+-----------+
- | strings | numbers |
- +===========+===========+
- | spam | 41.9999 |
- +-----------+-----------+
- | eggs | 451 |
- +-----------+-----------+
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="grid"))
- +------+----------+
- | spam | 41.9999 |
- +------+----------+
- | eggs | 451 |
- +------+----------+
- "simple_grid" draws a grid using single-line box-drawing
- characters:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "simple_grid"))
- ┌───────────┬───────────┐
- │ strings │ numbers │
- ├───────────┼───────────┤
- │ spam │ 41.9999 │
- ├───────────┼───────────┤
- │ eggs │ 451 │
- └───────────┴───────────┘
- "rounded_grid" draws a grid using single-line box-drawing
- characters with rounded corners:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "rounded_grid"))
- ╭───────────┬───────────╮
- │ strings │ numbers │
- ├───────────┼───────────┤
- │ spam │ 41.9999 │
- ├───────────┼───────────┤
- │ eggs │ 451 │
- ╰───────────┴───────────╯
- "heavy_grid" draws a grid using bold (thick) single-line box-drawing
- characters:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "heavy_grid"))
- ┏━━━━━━━━━━━┳━━━━━━━━━━━┓
- ┃ strings ┃ numbers ┃
- ┣━━━━━━━━━━━╋━━━━━━━━━━━┫
- ┃ spam ┃ 41.9999 ┃
- ┣━━━━━━━━━━━╋━━━━━━━━━━━┫
- ┃ eggs ┃ 451 ┃
- ┗━━━━━━━━━━━┻━━━━━━━━━━━┛
- "mixed_grid" draws a grid using a mix of light (thin) and heavy (thick) lines
- box-drawing characters:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "mixed_grid"))
- ┍━━━━━━━━━━━┯━━━━━━━━━━━┑
- │ strings │ numbers │
- ┝━━━━━━━━━━━┿━━━━━━━━━━━┥
- │ spam │ 41.9999 │
- ├───────────┼───────────┤
- │ eggs │ 451 │
- ┕━━━━━━━━━━━┷━━━━━━━━━━━┙
- "double_grid" draws a grid using double-line box-drawing
- characters:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "double_grid"))
- ╔═══════════╦═══════════╗
- ║ strings ║ numbers ║
- ╠═══════════╬═══════════╣
- ║ spam ║ 41.9999 ║
- ╠═══════════╬═══════════╣
- ║ eggs ║ 451 ║
- ╚═══════════╩═══════════╝
- "fancy_grid" draws a grid using a mix of single and
- double-line box-drawing characters:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "fancy_grid"))
- ╒═══════════╤═══════════╕
- │ strings │ numbers │
- ╞═══════════╪═══════════╡
- │ spam │ 41.9999 │
- ├───────────┼───────────┤
- │ eggs │ 451 │
- ╘═══════════╧═══════════╛
- "outline" is the same as the "grid" format but doesn't draw lines between rows:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "outline"))
- +-----------+-----------+
- | strings | numbers |
- +===========+===========+
- | spam | 41.9999 |
- | eggs | 451 |
- +-----------+-----------+
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="outline"))
- +------+----------+
- | spam | 41.9999 |
- | eggs | 451 |
- +------+----------+
- "simple_outline" is the same as the "simple_grid" format but doesn't draw lines between rows:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "simple_outline"))
- ┌───────────┬───────────┐
- │ strings │ numbers │
- ├───────────┼───────────┤
- │ spam │ 41.9999 │
- │ eggs │ 451 │
- └───────────┴───────────┘
- "rounded_outline" is the same as the "rounded_grid" format but doesn't draw lines between rows:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "rounded_outline"))
- ╭───────────┬───────────╮
- │ strings │ numbers │
- ├───────────┼───────────┤
- │ spam │ 41.9999 │
- │ eggs │ 451 │
- ╰───────────┴───────────╯
- "heavy_outline" is the same as the "heavy_grid" format but doesn't draw lines between rows:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "heavy_outline"))
- ┏━━━━━━━━━━━┳━━━━━━━━━━━┓
- ┃ strings ┃ numbers ┃
- ┣━━━━━━━━━━━╋━━━━━━━━━━━┫
- ┃ spam ┃ 41.9999 ┃
- ┃ eggs ┃ 451 ┃
- ┗━━━━━━━━━━━┻━━━━━━━━━━━┛
- "mixed_outline" is the same as the "mixed_grid" format but doesn't draw lines between rows:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "mixed_outline"))
- ┍━━━━━━━━━━━┯━━━━━━━━━━━┑
- │ strings │ numbers │
- ┝━━━━━━━━━━━┿━━━━━━━━━━━┥
- │ spam │ 41.9999 │
- │ eggs │ 451 │
- ┕━━━━━━━━━━━┷━━━━━━━━━━━┙
- "double_outline" is the same as the "double_grid" format but doesn't draw lines between rows:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "double_outline"))
- ╔═══════════╦═══════════╗
- ║ strings ║ numbers ║
- ╠═══════════╬═══════════╣
- ║ spam ║ 41.9999 ║
- ║ eggs ║ 451 ║
- ╚═══════════╩═══════════╝
- "fancy_outline" is the same as the "fancy_grid" format but doesn't draw lines between rows:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "fancy_outline"))
- ╒═══════════╤═══════════╕
- │ strings │ numbers │
- ╞═══════════╪═══════════╡
- │ spam │ 41.9999 │
- │ eggs │ 451 │
- ╘═══════════╧═══════════╛
- "pipe" is like tables in PHP Markdown Extra extension or Pandoc
- pipe_tables:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "pipe"))
- | strings | numbers |
- |:----------|----------:|
- | spam | 41.9999 |
- | eggs | 451 |
- "presto" is like tables produce by the Presto CLI:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "presto"))
- strings | numbers
- -----------+-----------
- spam | 41.9999
- eggs | 451
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="pipe"))
- |:-----|---------:|
- | spam | 41.9999 |
- | eggs | 451 |
- "orgtbl" is like tables in Emacs org-mode and orgtbl-mode. They
- are slightly different from "pipe" format by not using colons to
- define column alignment, and using a "+" sign to indicate line
- intersections:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "orgtbl"))
- | strings | numbers |
- |-----------+-----------|
- | spam | 41.9999 |
- | eggs | 451 |
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="orgtbl"))
- | spam | 41.9999 |
- | eggs | 451 |
- "rst" is like a simple table format from reStructuredText; please
- note that reStructuredText accepts also "grid" tables:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "rst"))
- ========= =========
- strings numbers
- ========= =========
- spam 41.9999
- eggs 451
- ========= =========
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="rst"))
- ==== ========
- spam 41.9999
- eggs 451
- ==== ========
- "mediawiki" produces a table markup used in Wikipedia and on other
- MediaWiki-based sites:
- >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]],
- ... headers="firstrow", tablefmt="mediawiki"))
- {| class="wikitable" style="text-align: left;"
- |+ <!-- caption -->
- |-
- ! strings !! align="right"| numbers
- |-
- | spam || align="right"| 41.9999
- |-
- | eggs || align="right"| 451
- |}
- "html" produces HTML markup as an html.escape'd str
- with a ._repr_html_ method so that Jupyter Lab and Notebook display the HTML
- and a .str property so that the raw HTML remains accessible
- the unsafehtml table format can be used if an unescaped HTML format is required:
- >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]],
- ... headers="firstrow", tablefmt="html"))
- <table>
- <thead>
- <tr><th>strings </th><th style="text-align: right;"> numbers</th></tr>
- </thead>
- <tbody>
- <tr><td>spam </td><td style="text-align: right;"> 41.9999</td></tr>
- <tr><td>eggs </td><td style="text-align: right;"> 451 </td></tr>
- </tbody>
- </table>
- "latex" produces a tabular environment of LaTeX document markup:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex"))
- \\begin{tabular}{lr}
- \\hline
- spam & 41.9999 \\\\
- eggs & 451 \\\\
- \\hline
- \\end{tabular}
- "latex_raw" is similar to "latex", but doesn't escape special characters,
- such as backslash and underscore, so LaTeX commands may embedded into
- cells' values:
- >>> print(tabulate([["spam$_9$", 41.9999], ["\\\\emph{eggs}", "451.0"]], tablefmt="latex_raw"))
- \\begin{tabular}{lr}
- \\hline
- spam$_9$ & 41.9999 \\\\
- \\emph{eggs} & 451 \\\\
- \\hline
- \\end{tabular}
- "latex_booktabs" produces a tabular environment of LaTeX document markup
- using the booktabs.sty package:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex_booktabs"))
- \\begin{tabular}{lr}
- \\toprule
- spam & 41.9999 \\\\
- eggs & 451 \\\\
- \\bottomrule
- \\end{tabular}
- "latex_longtable" produces a tabular environment that can stretch along
- multiple pages, using the longtable package for LaTeX.
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex_longtable"))
- \\begin{longtable}{lr}
- \\hline
- spam & 41.9999 \\\\
- eggs & 451 \\\\
- \\hline
- \\end{longtable}
- Number parsing
- --------------
- By default, anything which can be parsed as a number is a number.
- This ensures numbers represented as strings are aligned properly.
- This can lead to weird results for particular strings such as
- specific git SHAs e.g. "42992e1" will be parsed into the number
- 429920 and aligned as such.
- To completely disable number parsing (and alignment), use
- `disable_numparse=True`. For more fine grained control, a list column
- indices is used to disable number parsing only on those columns
- e.g. `disable_numparse=[0, 2]` would disable number parsing only on the
- first and third columns.
- Column Widths and Auto Line Wrapping
- ------------------------------------
- Tabulate will, by default, set the width of each column to the length of the
- longest element in that column. However, in situations where fields are expected
- to reasonably be too long to look good as a single line, tabulate can help automate
- word wrapping long fields for you. Use the parameter `maxcolwidth` to provide a
- list of maximal column widths
- >>> print(tabulate( \
- [('1', 'John Smith', \
- 'This is a rather long description that might look better if it is wrapped a bit')], \
- headers=("Issue Id", "Author", "Description"), \
- maxcolwidths=[None, None, 30], \
- tablefmt="grid" \
- ))
- +------------+------------+-------------------------------+
- | Issue Id | Author | Description |
- +============+============+===============================+
- | 1 | John Smith | This is a rather long |
- | | | description that might look |
- | | | better if it is wrapped a bit |
- +------------+------------+-------------------------------+
- Header column width can be specified in a similar way using `maxheadercolwidth`
- """
- if tabular_data is None:
- tabular_data = []
- list_of_lists, headers = _normalize_tabular_data(
- tabular_data, headers, showindex=showindex
- )
- list_of_lists, separating_lines = _remove_separating_lines(list_of_lists)
- if maxcolwidths is not None:
- num_cols = len(list_of_lists[0])
- if isinstance(maxcolwidths, int): # Expand scalar for all columns
- maxcolwidths = _expand_iterable(maxcolwidths, num_cols, maxcolwidths)
- else: # Ignore col width for any 'trailing' columns
- maxcolwidths = _expand_iterable(maxcolwidths, num_cols, None)
- numparses = _expand_numparse(disable_numparse, num_cols)
- list_of_lists = _wrap_text_to_colwidths(
- list_of_lists, maxcolwidths, numparses=numparses
- )
- if maxheadercolwidths is not None:
- num_cols = len(list_of_lists[0])
- if isinstance(maxheadercolwidths, int): # Expand scalar for all columns
- maxheadercolwidths = _expand_iterable(
- maxheadercolwidths, num_cols, maxheadercolwidths
- )
- else: # Ignore col width for any 'trailing' columns
- maxheadercolwidths = _expand_iterable(maxheadercolwidths, num_cols, None)
- numparses = _expand_numparse(disable_numparse, num_cols)
- headers = _wrap_text_to_colwidths(
- [headers], maxheadercolwidths, numparses=numparses
- )[0]
- # empty values in the first column of RST tables should be escaped (issue #82)
- # "" should be escaped as "\\ " or ".."
- if tablefmt == "rst":
- list_of_lists, headers = _rst_escape_first_column(list_of_lists, headers)
- # PrettyTable formatting does not use any extra padding.
- # Numbers are not parsed and are treated the same as strings for alignment.
- # Check if pretty is the format being used and override the defaults so it
- # does not impact other formats.
- min_padding = MIN_PADDING
- if tablefmt == "pretty":
- min_padding = 0
- disable_numparse = True
- numalign = "center" if numalign == _DEFAULT_ALIGN else numalign
- stralign = "center" if stralign == _DEFAULT_ALIGN else stralign
- else:
- numalign = "decimal" if numalign == _DEFAULT_ALIGN else numalign
- stralign = "left" if stralign == _DEFAULT_ALIGN else stralign
- # optimization: look for ANSI control codes once,
- # enable smart width functions only if a control code is found
- #
- # convert the headers and rows into a single, tab-delimited string ensuring
- # that any bytestrings are decoded safely (i.e. errors ignored)
- plain_text = "\t".join(
- chain(
- # headers
- map(_to_str, headers),
- # rows: chain the rows together into a single iterable after mapping
- # the bytestring conversino to each cell value
- chain.from_iterable(map(_to_str, row) for row in list_of_lists),
- )
- )
- has_invisible = _ansi_codes.search(plain_text) is not None
- enable_widechars = wcwidth is not None and WIDE_CHARS_MODE
- if (
- not isinstance(tablefmt, TableFormat)
- and tablefmt in multiline_formats
- and _is_multiline(plain_text)
- ):
- tablefmt = multiline_formats.get(tablefmt, tablefmt)
- is_multiline = True
- else:
- is_multiline = False
- width_fn = _choose_width_fn(has_invisible, enable_widechars, is_multiline)
- # format rows and columns, convert numeric values to strings
- cols = list(izip_longest(*list_of_lists))
- numparses = _expand_numparse(disable_numparse, len(cols))
- coltypes = [_column_type(col, numparse=np) for col, np in zip(cols, numparses)]
- if isinstance(floatfmt, str): # old version
- float_formats = len(cols) * [
- floatfmt
- ] # just duplicate the string to use in each column
- else: # if floatfmt is list, tuple etc we have one per column
- float_formats = list(floatfmt)
- if len(float_formats) < len(cols):
- float_formats.extend((len(cols) - len(float_formats)) * [_DEFAULT_FLOATFMT])
- if isinstance(intfmt, str): # old version
- int_formats = len(cols) * [
- intfmt
- ] # just duplicate the string to use in each column
- else: # if intfmt is list, tuple etc we have one per column
- int_formats = list(intfmt)
- if len(int_formats) < len(cols):
- int_formats.extend((len(cols) - len(int_formats)) * [_DEFAULT_INTFMT])
- if isinstance(missingval, str):
- missing_vals = len(cols) * [missingval]
- else:
- missing_vals = list(missingval)
- if len(missing_vals) < len(cols):
- missing_vals.extend((len(cols) - len(missing_vals)) * [_DEFAULT_MISSINGVAL])
- cols = [
- [_format(v, ct, fl_fmt, int_fmt, miss_v, has_invisible) for v in c]
- for c, ct, fl_fmt, int_fmt, miss_v in zip(
- cols, coltypes, float_formats, int_formats, missing_vals
- )
- ]
- # align columns
- aligns = [numalign if ct in [int, float] else stralign for ct in coltypes]
- if colalign is not None:
- assert isinstance(colalign, Iterable)
- for idx, align in enumerate(colalign):
- aligns[idx] = align
- minwidths = (
- [width_fn(h) + min_padding for h in headers] if headers else [0] * len(cols)
- )
- cols = [
- _align_column(c, a, minw, has_invisible, enable_widechars, is_multiline)
- for c, a, minw in zip(cols, aligns, minwidths)
- ]
- if headers:
- # align headers and add headers
- t_cols = cols or [[""]] * len(headers)
- t_aligns = aligns or [stralign] * len(headers)
- minwidths = [
- max(minw, max(width_fn(cl) for cl in c))
- for minw, c in zip(minwidths, t_cols)
- ]
- headers = [
- _align_header(h, a, minw, width_fn(h), is_multiline, width_fn)
- for h, a, minw in zip(headers, t_aligns, minwidths)
- ]
- rows = list(zip(*cols))
- else:
- minwidths = [max(width_fn(cl) for cl in c) for c in cols]
- rows = list(zip(*cols))
- if not isinstance(tablefmt, TableFormat):
- tablefmt = _table_formats.get(tablefmt, _table_formats["simple"])
- ra_default = rowalign if isinstance(rowalign, str) else None
- rowaligns = _expand_iterable(rowalign, len(rows), ra_default)
- _reinsert_separating_lines(rows, separating_lines)
- return _format_table(
- tablefmt, headers, rows, minwidths, aligns, is_multiline, rowaligns=rowaligns
- )
- def _expand_numparse(disable_numparse, column_count):
- """
- Return a list of bools of length `column_count` which indicates whether
- number parsing should be used on each column.
- If `disable_numparse` is a list of indices, each of those indices are False,
- and everything else is True.
- If `disable_numparse` is a bool, then the returned list is all the same.
- """
- if isinstance(disable_numparse, Iterable):
- numparses = [True] * column_count
- for index in disable_numparse:
- numparses[index] = False
- return numparses
- else:
- return [not disable_numparse] * column_count
- def _expand_iterable(original, num_desired, default):
- """
- Expands the `original` argument to return a return a list of
- length `num_desired`. If `original` is shorter than `num_desired`, it will
- be padded with the value in `default`.
- If `original` is not a list to begin with (i.e. scalar value) a list of
- length `num_desired` completely populated with `default will be returned
- """
- if isinstance(original, Iterable) and not isinstance(original, str):
- return original + [default] * (num_desired - len(original))
- else:
- return [default] * num_desired
- def _pad_row(cells, padding):
- if cells:
- pad = " " * padding
- padded_cells = [pad + cell + pad for cell in cells]
- return padded_cells
- else:
- return cells
- def _build_simple_row(padded_cells, rowfmt):
- "Format row according to DataRow format without padding."
- begin, sep, end = rowfmt
- return (begin + sep.join(padded_cells) + end).rstrip()
- def _build_row(padded_cells, colwidths, colaligns, rowfmt):
- "Return a string which represents a row of data cells."
- if not rowfmt:
- return None
- if hasattr(rowfmt, "__call__"):
- return rowfmt(padded_cells, colwidths, colaligns)
- else:
- return _build_simple_row(padded_cells, rowfmt)
- def _append_basic_row(lines, padded_cells, colwidths, colaligns, rowfmt, rowalign=None):
- # NOTE: rowalign is ignored and exists for api compatibility with _append_multiline_row
- lines.append(_build_row(padded_cells, colwidths, colaligns, rowfmt))
- return lines
- def _align_cell_veritically(text_lines, num_lines, column_width, row_alignment):
- delta_lines = num_lines - len(text_lines)
- blank = [" " * column_width]
- if row_alignment == "bottom":
- return blank * delta_lines + text_lines
- elif row_alignment == "center":
- top_delta = delta_lines // 2
- bottom_delta = delta_lines - top_delta
- return top_delta * blank + text_lines + bottom_delta * blank
- else:
- return text_lines + blank * delta_lines
- def _append_multiline_row(
- lines, padded_multiline_cells, padded_widths, colaligns, rowfmt, pad, rowalign=None
- ):
- colwidths = [w - 2 * pad for w in padded_widths]
- cells_lines = [c.splitlines() for c in padded_multiline_cells]
- nlines = max(map(len, cells_lines)) # number of lines in the row
- # vertically pad cells where some lines are missing
- # cells_lines = [
- # (cl + [" " * w] * (nlines - len(cl))) for cl, w in zip(cells_lines, colwidths)
- # ]
- cells_lines = [
- _align_cell_veritically(cl, nlines, w, rowalign)
- for cl, w in zip(cells_lines, colwidths)
- ]
- lines_cells = [[cl[i] for cl in cells_lines] for i in range(nlines)]
- for ln in lines_cells:
- padded_ln = _pad_row(ln, pad)
- _append_basic_row(lines, padded_ln, colwidths, colaligns, rowfmt)
- return lines
- def _build_line(colwidths, colaligns, linefmt):
- "Return a string which represents a horizontal line."
- if not linefmt:
- return None
- if hasattr(linefmt, "__call__"):
- return linefmt(colwidths, colaligns)
- else:
- begin, fill, sep, end = linefmt
- cells = [fill * w for w in colwidths]
- return _build_simple_row(cells, (begin, sep, end))
- def _append_line(lines, colwidths, colaligns, linefmt):
- lines.append(_build_line(colwidths, colaligns, linefmt))
- return lines
- class JupyterHTMLStr(str):
- """Wrap the string with a _repr_html_ method so that Jupyter
- displays the HTML table"""
- def _repr_html_(self):
- return self
- @property
- def str(self):
- """add a .str property so that the raw string is still accessible"""
- return self
- def _format_table(fmt, headers, rows, colwidths, colaligns, is_multiline, rowaligns):
- """Produce a plain-text representation of the table."""
- lines = []
- hidden = fmt.with_header_hide if (headers and fmt.with_header_hide) else []
- pad = fmt.padding
- headerrow = fmt.headerrow
- padded_widths = [(w + 2 * pad) for w in colwidths]
- if is_multiline:
- pad_row = lambda row, _: row # noqa do it later, in _append_multiline_row
- append_row = partial(_append_multiline_row, pad=pad)
- else:
- pad_row = _pad_row
- append_row = _append_basic_row
- padded_headers = pad_row(headers, pad)
- padded_rows = [pad_row(row, pad) for row in rows]
- if fmt.lineabove and "lineabove" not in hidden:
- _append_line(lines, padded_widths, colaligns, fmt.lineabove)
- if padded_headers:
- append_row(lines, padded_headers, padded_widths, colaligns, headerrow)
- if fmt.linebelowheader and "linebelowheader" not in hidden:
- _append_line(lines, padded_widths, colaligns, fmt.linebelowheader)
- if padded_rows and fmt.linebetweenrows and "linebetweenrows" not in hidden:
- # initial rows with a line below
- for row, ralign in zip(padded_rows[:-1], rowaligns):
- append_row(
- lines, row, padded_widths, colaligns, fmt.datarow, rowalign=ralign
- )
- _append_line(lines, padded_widths, colaligns, fmt.linebetweenrows)
- # the last row without a line below
- append_row(
- lines,
- padded_rows[-1],
- padded_widths,
- colaligns,
- fmt.datarow,
- rowalign=rowaligns[-1],
- )
- else:
- separating_line = (
- fmt.linebetweenrows
- or fmt.linebelowheader
- or fmt.linebelow
- or fmt.lineabove
- or Line("", "", "", "")
- )
- for row in padded_rows:
- # test to see if either the 1st column or the 2nd column (account for showindex) has
- # the SEPARATING_LINE flag
- if _is_separating_line(row):
- _append_line(lines, padded_widths, colaligns, separating_line)
- else:
- append_row(lines, row, padded_widths, colaligns, fmt.datarow)
- if fmt.linebelow and "linebelow" not in hidden:
- _append_line(lines, padded_widths, colaligns, fmt.linebelow)
- if headers or rows:
- output = "\n".join(lines)
- if fmt.lineabove == _html_begin_table_without_header:
- return JupyterHTMLStr(output)
- else:
- return output
- else: # a completely empty table
- return ""
- class _CustomTextWrap(textwrap.TextWrapper):
- """A custom implementation of CPython's textwrap.TextWrapper. This supports
- both wide characters (Korea, Japanese, Chinese) - including mixed string.
- For the most part, the `_handle_long_word` and `_wrap_chunks` functions were
- copy pasted out of the CPython baseline, and updated with our custom length
- and line appending logic.
- """
- def __init__(self, *args, **kwargs):
- self._active_codes = []
- self.max_lines = None # For python2 compatibility
- textwrap.TextWrapper.__init__(self, *args, **kwargs)
- @staticmethod
- def _len(item):
- """Custom len that gets console column width for wide
- and non-wide characters as well as ignores color codes"""
- stripped = _strip_ansi(item)
- if wcwidth:
- return wcwidth.wcswidth(stripped)
- else:
- return len(stripped)
- def _update_lines(self, lines, new_line):
- """Adds a new line to the list of lines the text is being wrapped into
- This function will also track any ANSI color codes in this string as well
- as add any colors from previous lines order to preserve the same formatting
- as a single unwrapped string.
- """
- code_matches = [x for x in _ansi_codes.finditer(new_line)]
- color_codes = [
- code.string[code.span()[0] : code.span()[1]] for code in code_matches
- ]
- # Add color codes from earlier in the unwrapped line, and then track any new ones we add.
- new_line = "".join(self._active_codes) + new_line
- for code in color_codes:
- if code != _ansi_color_reset_code:
- self._active_codes.append(code)
- else: # A single reset code resets everything
- self._active_codes = []
- # Always ensure each line is color terminted if any colors are
- # still active, otherwise colors will bleed into other cells on the console
- if len(self._active_codes) > 0:
- new_line = new_line + _ansi_color_reset_code
- lines.append(new_line)
- def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
- """_handle_long_word(chunks : [string],
- cur_line : [string],
- cur_len : int, width : int)
- Handle a chunk of text (most likely a word, not whitespace) that
- is too long to fit in any line.
- """
- # Figure out when indent is larger than the specified width, and make
- # sure at least one character is stripped off on every pass
- if width < 1:
- space_left = 1
- else:
- space_left = width - cur_len
- # If we're allowed to break long words, then do so: put as much
- # of the next chunk onto the current line as will fit.
- if self.break_long_words:
- # Tabulate Custom: Build the string up piece-by-piece in order to
- # take each charcter's width into account
- chunk = reversed_chunks[-1]
- i = 1
- while self._len(chunk[:i]) <= space_left:
- i = i + 1
- cur_line.append(chunk[: i - 1])
- reversed_chunks[-1] = chunk[i - 1 :]
- # Otherwise, we have to preserve the long word intact. Only add
- # it to the current line if there's nothing already there --
- # that minimizes how much we violate the width constraint.
- elif not cur_line:
- cur_line.append(reversed_chunks.pop())
- # If we're not allowed to break long words, and there's already
- # text on the current line, do nothing. Next time through the
- # main loop of _wrap_chunks(), we'll wind up here again, but
- # cur_len will be zero, so the next line will be entirely
- # devoted to the long word that we can't handle right now.
- def _wrap_chunks(self, chunks):
- """_wrap_chunks(chunks : [string]) -> [string]
- Wrap a sequence of text chunks and return a list of lines of
- length 'self.width' or less. (If 'break_long_words' is false,
- some lines may be longer than this.) Chunks correspond roughly
- to words and the whitespace between them: each chunk is
- indivisible (modulo 'break_long_words'), but a line break can
- come between any two chunks. Chunks should not have internal
- whitespace; ie. a chunk is either all whitespace or a "word".
- Whitespace chunks will be removed from the beginning and end of
- lines, but apart from that whitespace is preserved.
- """
- lines = []
- if self.width <= 0:
- raise ValueError("invalid width %r (must be > 0)" % self.width)
- if self.max_lines is not None:
- if self.max_lines > 1:
- indent = self.subsequent_indent
- else:
- indent = self.initial_indent
- if self._len(indent) + self._len(self.placeholder.lstrip()) > self.width:
- raise ValueError("placeholder too large for max width")
- # Arrange in reverse order so items can be efficiently popped
- # from a stack of chucks.
- chunks.reverse()
- while chunks:
- # Start the list of chunks that will make up the current line.
- # cur_len is just the length of all the chunks in cur_line.
- cur_line = []
- cur_len = 0
- # Figure out which static string will prefix this line.
- if lines:
- indent = self.subsequent_indent
- else:
- indent = self.initial_indent
- # Maximum width for this line.
- width = self.width - self._len(indent)
- # First chunk on line is whitespace -- drop it, unless this
- # is the very beginning of the text (ie. no lines started yet).
- if self.drop_whitespace and chunks[-1].strip() == "" and lines:
- del chunks[-1]
- while chunks:
- chunk_len = self._len(chunks[-1])
- # Can at least squeeze this chunk onto the current line.
- if cur_len + chunk_len <= width:
- cur_line.append(chunks.pop())
- cur_len += chunk_len
- # Nope, this line is full.
- else:
- break
- # The current line is full, and the next chunk is too big to
- # fit on *any* line (not just this one).
- if chunks and self._len(chunks[-1]) > width:
- self._handle_long_word(chunks, cur_line, cur_len, width)
- cur_len = sum(map(self._len, cur_line))
- # If the last chunk on this line is all whitespace, drop it.
- if self.drop_whitespace and cur_line and cur_line[-1].strip() == "":
- cur_len -= self._len(cur_line[-1])
- del cur_line[-1]
- if cur_line:
- if (
- self.max_lines is None
- or len(lines) + 1 < self.max_lines
- or (
- not chunks
- or self.drop_whitespace
- and len(chunks) == 1
- and not chunks[0].strip()
- )
- and cur_len <= width
- ):
- # Convert current line back to a string and store it in
- # list of all lines (return value).
- self._update_lines(lines, indent + "".join(cur_line))
- else:
- while cur_line:
- if (
- cur_line[-1].strip()
- and cur_len + self._len(self.placeholder) <= width
- ):
- cur_line.append(self.placeholder)
- self._update_lines(lines, indent + "".join(cur_line))
- break
- cur_len -= self._len(cur_line[-1])
- del cur_line[-1]
- else:
- if lines:
- prev_line = lines[-1].rstrip()
- if (
- self._len(prev_line) + self._len(self.placeholder)
- <= self.width
- ):
- lines[-1] = prev_line + self.placeholder
- break
- self._update_lines(lines, indent + self.placeholder.lstrip())
- break
- return lines
- def _main():
- """\
- Usage: tabulate [options] [FILE ...]
- Pretty-print tabular data.
- See also https://github.com/astanin/python-tabulate
- FILE a filename of the file with tabular data;
- if "-" or missing, read data from stdin.
- Options:
- -h, --help show this message
- -1, --header use the first row of data as a table header
- -o FILE, --output FILE print table to FILE (default: stdout)
- -s REGEXP, --sep REGEXP use a custom column separator (default: whitespace)
- -F FPFMT, --float FPFMT floating point number format (default: g)
- -I INTFMT, --int INTFMT integer point number format (default: "")
- -f FMT, --format FMT set output table format; supported formats:
- plain, simple, grid, fancy_grid, pipe, orgtbl,
- rst, mediawiki, html, latex, latex_raw,
- latex_booktabs, latex_longtable, tsv
- (default: simple)
- """
- import getopt
- import sys
- import textwrap
- usage = textwrap.dedent(_main.__doc__)
- try:
- opts, args = getopt.getopt(
- sys.argv[1:],
- "h1o:s:F:A:f:",
- ["help", "header", "output", "sep=", "float=", "int=", "align=", "format="],
- )
- except getopt.GetoptError as e:
- print(e)
- print(usage)
- sys.exit(2)
- headers = []
- floatfmt = _DEFAULT_FLOATFMT
- intfmt = _DEFAULT_INTFMT
- colalign = None
- tablefmt = "simple"
- sep = r"\s+"
- outfile = "-"
- for opt, value in opts:
- if opt in ["-1", "--header"]:
- headers = "firstrow"
- elif opt in ["-o", "--output"]:
- outfile = value
- elif opt in ["-F", "--float"]:
- floatfmt = value
- elif opt in ["-I", "--int"]:
- intfmt = value
- elif opt in ["-C", "--colalign"]:
- colalign = value.split()
- elif opt in ["-f", "--format"]:
- if value not in tabulate_formats:
- print("%s is not a supported table format" % value)
- print(usage)
- sys.exit(3)
- tablefmt = value
- elif opt in ["-s", "--sep"]:
- sep = value
- elif opt in ["-h", "--help"]:
- print(usage)
- sys.exit(0)
- files = [sys.stdin] if not args else args
- with (sys.stdout if outfile == "-" else open(outfile, "w")) as out:
- for f in files:
- if f == "-":
- f = sys.stdin
- if _is_file(f):
- _pprint_file(
- f,
- headers=headers,
- tablefmt=tablefmt,
- sep=sep,
- floatfmt=floatfmt,
- intfmt=intfmt,
- file=out,
- colalign=colalign,
- )
- else:
- with open(f) as fobj:
- _pprint_file(
- fobj,
- headers=headers,
- tablefmt=tablefmt,
- sep=sep,
- floatfmt=floatfmt,
- intfmt=intfmt,
- file=out,
- colalign=colalign,
- )
- def _pprint_file(fobject, headers, tablefmt, sep, floatfmt, intfmt, file, colalign):
- rows = fobject.readlines()
- table = [re.split(sep, r.rstrip()) for r in rows if r.strip()]
- print(
- tabulate(
- table,
- headers,
- tablefmt,
- floatfmt=floatfmt,
- intfmt=intfmt,
- colalign=colalign,
- ),
- file=file,
- )
- if __name__ == "__main__":
- _main()
|