regexremove.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. """
  2. Module containing a preprocessor that removes cells if they match
  3. one or more regular expression.
  4. """
  5. # Copyright (c) IPython Development Team.
  6. # Distributed under the terms of the Modified BSD License.
  7. from __future__ import annotations
  8. import re
  9. from traitlets import List, Unicode
  10. from .base import Preprocessor
  11. class RegexRemovePreprocessor(Preprocessor):
  12. """
  13. Removes cells from a notebook that match one or more regular expression.
  14. For each cell, the preprocessor checks whether its contents match
  15. the regular expressions in the ``patterns`` traitlet which is a list
  16. of unicode strings. If the contents match any of the patterns, the cell
  17. is removed from the notebook.
  18. To modify the list of matched patterns,
  19. modify the patterns traitlet. For example, execute the following command
  20. to convert a notebook to html and remove cells containing only whitespace::
  21. jupyter nbconvert --RegexRemovePreprocessor.patterns="['\\s*\\Z']" mynotebook.ipynb
  22. The command line argument
  23. sets the list of patterns to ``'\\s*\\Z'`` which matches an arbitrary number
  24. of whitespace characters followed by the end of the string.
  25. See https://regex101.com/ for an interactive guide to regular expressions
  26. (make sure to select the python flavor). See
  27. https://docs.python.org/library/re.html for the official regular expression
  28. documentation in python.
  29. """
  30. patterns = List(Unicode()).tag(config=True)
  31. def check_conditions(self, cell):
  32. """
  33. Checks that a cell matches the pattern.
  34. Returns: Boolean.
  35. True means cell should *not* be removed.
  36. """
  37. # Compile all the patterns into one: each pattern is first wrapped
  38. # by a non-capturing group to ensure the correct order of precedence
  39. # and the patterns are joined with a logical or
  40. pattern = re.compile("|".join("(?:%s)" % pattern for pattern in self.patterns))
  41. # Filter out cells that meet the pattern and have no outputs
  42. return not pattern.match(cell.source)
  43. def preprocess(self, nb, resources):
  44. """
  45. Preprocessing to apply to each notebook. See base.py for details.
  46. """
  47. # Skip preprocessing if the list of patterns is empty
  48. if not self.patterns:
  49. return nb, resources
  50. # Filter out cells that meet the conditions
  51. nb.cells = [cell for cell in nb.cells if self.check_conditions(cell)]
  52. return nb, resources