wc.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. #!/usr/bin/env python3
  2. import sys, os
  3. import argparse
  4. import stringzilla
  5. from stringzilla import File, Str
  6. def parse_arguments():
  7. parser = argparse.ArgumentParser(
  8. description="Print newline, word, and byte counts for each FILE, and a total line if more than one FILE is \
  9. specified. A word is a non-zero-length sequence of acters delimited by white space."
  10. )
  11. parser.add_argument("files", nargs="*", default=["-"], help="Files to process")
  12. parser.add_argument(
  13. "-c", "--bytes", action="store_true", help="print the byte counts"
  14. )
  15. parser.add_argument(
  16. "-m", "--chars", action="store_true", help="print the character counts"
  17. )
  18. parser.add_argument(
  19. "-l", "--lines", action="store_true", help="print the newline counts"
  20. )
  21. parser.add_argument(
  22. "-L",
  23. "--max-line-length",
  24. action="store_true",
  25. help="print the maximum display width",
  26. )
  27. parser.add_argument(
  28. "-w", "--words", action="store_true", help="print the word counts"
  29. )
  30. parser.add_argument(
  31. "--files0-from",
  32. metavar="filename",
  33. help="Read input from the files specified by NUL-terminated names in file F;"
  34. " If F is - then read names from standard input",
  35. )
  36. parser.add_argument("--version", action="version", version=stringzilla.__version__)
  37. return parser.parse_args()
  38. def wc(file_path, args):
  39. if file_path == "-": # read from stdin
  40. content = sys.stdin.read()
  41. mapped_bytes = Str(content)
  42. else:
  43. try:
  44. mapped_file = File(file_path)
  45. mapped_bytes = Str(mapped_file)
  46. except RuntimeError: # File gives a RuntimeError if the file does not exist
  47. return f"No such file: {file_path}", False
  48. counts = {}
  49. if args.lines:
  50. counts["line_count"] = mapped_bytes.count("\n")
  51. if args.words:
  52. counts["word_count"] = mapped_bytes.count(" ") + 1
  53. if args.chars:
  54. counts["char_count"] = mapped_bytes.__len__()
  55. if args.max_line_length:
  56. max_line_length = max(len(line) for line in mapped_bytes.split("\n"))
  57. counts["max_line_length"] = max_line_length
  58. if args.bytes:
  59. if args.chars:
  60. counts["byte_count"] = counts["char_count"]
  61. else:
  62. counts["byte_count"] = mapped_bytes.__len__()
  63. return counts, True
  64. def format_output(counts, args, just):
  65. selected_counts = []
  66. if args.lines:
  67. selected_counts.append(counts["line_count"])
  68. if args.words:
  69. selected_counts.append(counts["word_count"])
  70. if args.chars:
  71. selected_counts.append(counts["char_count"])
  72. if args.bytes:
  73. selected_counts.append(counts["byte_count"])
  74. if args.max_line_length:
  75. selected_counts.append(counts.get("max_line_length", 0))
  76. return " ".join(str(count).rjust(just) for count in selected_counts)
  77. def get_files_from(fn):
  78. f = open(fn, "r")
  79. s = f.read()
  80. f.close()
  81. return [x for x in s.split("\0") if os.path.isfile(x)]
  82. def main():
  83. args = parse_arguments()
  84. total_counts = {
  85. "line_count": 0,
  86. "word_count": 0,
  87. "char_count": 0,
  88. "max_line_length": 0,
  89. "byte_count": 0,
  90. }
  91. if not any([args.lines, args.words, args.chars, args.bytes, args.max_line_length]):
  92. args.lines = 1
  93. args.words = 1
  94. args.bytes = 1
  95. # wc uses the file size to determine column width when printing
  96. if args.files0_from:
  97. if args.files[0] == "-":
  98. args.files = get_files_from(args.files0_from)
  99. if len(args.files) == 0:
  100. # print(" No filenames found in ", args.files0_from)
  101. exit(0)
  102. just = max(len(str(os.stat(fn).st_size)) for fn in args.files)
  103. for file_path in args.files:
  104. counts, success = wc(file_path, args)
  105. if success:
  106. for key in total_counts.keys():
  107. total_counts[key] += counts.get(key, 0)
  108. output = format_output(counts, args, just) + f" {file_path}"
  109. print(output)
  110. else:
  111. print(counts)
  112. if len(args.files) > 1:
  113. total_output = format_output(total_counts, args, just) + " total"
  114. print(total_output)
  115. if __name__ == "__main__":
  116. main()