filemanager.py 46 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225
  1. """A contents manager that uses the local file system for storage."""
  2. # Copyright (c) Jupyter Development Team.
  3. # Distributed under the terms of the Modified BSD License.
  4. from __future__ import annotations
  5. import asyncio
  6. import errno
  7. import math
  8. import mimetypes
  9. import os
  10. import platform
  11. import shutil
  12. import stat
  13. import subprocess
  14. import sys
  15. import typing as t
  16. import warnings
  17. from datetime import datetime
  18. from pathlib import Path
  19. import nbformat
  20. from anyio.to_thread import run_sync
  21. from jupyter_core.paths import exists, is_file_hidden, is_hidden
  22. from send2trash import send2trash
  23. from tornado import web
  24. from traitlets import Bool, Int, TraitError, Unicode, default, validate
  25. from jupyter_server import _tz as tz
  26. from jupyter_server.base.handlers import AuthenticatedFileHandler
  27. from jupyter_server.transutils import _i18n
  28. from jupyter_server.utils import to_api_path
  29. from .filecheckpoints import AsyncFileCheckpoints, FileCheckpoints
  30. from .fileio import AsyncFileManagerMixin, FileManagerMixin
  31. from .manager import AsyncContentsManager, ContentsManager, copy_pat
  32. try:
  33. from os.path import samefile
  34. except ImportError:
  35. # windows
  36. from jupyter_server.utils import samefile_simple as samefile # type:ignore[assignment]
  37. _script_exporter = None
  38. class FileContentsManager(FileManagerMixin, ContentsManager):
  39. """A file contents manager."""
  40. root_dir = Unicode(config=True)
  41. max_copy_folder_size_mb = Int(500, config=True, help="The max folder size that can be copied")
  42. @default("root_dir")
  43. def _default_root_dir(self):
  44. if not self.parent:
  45. return os.getcwd()
  46. return self.parent.root_dir
  47. @validate("root_dir")
  48. def _validate_root_dir(self, proposal):
  49. value = proposal["value"]
  50. if not os.path.isabs(value):
  51. # If we receive a non-absolute path, make it absolute.
  52. value = os.path.abspath(value)
  53. if not os.path.isdir(value):
  54. raise TraitError("%r is not a directory" % value)
  55. return value
  56. @default("preferred_dir")
  57. def _default_preferred_dir(self):
  58. if not self.parent:
  59. return ""
  60. try:
  61. value = self.parent.preferred_dir
  62. if value == self.parent.root_dir:
  63. value = None
  64. except AttributeError:
  65. pass
  66. else:
  67. if value is not None:
  68. warnings.warn(
  69. "ServerApp.preferred_dir config is deprecated in jupyter-server 2.0. Use FileContentsManager.preferred_dir instead",
  70. FutureWarning,
  71. stacklevel=3,
  72. )
  73. try:
  74. path = Path(value)
  75. return path.relative_to(self.root_dir).as_posix()
  76. except ValueError:
  77. raise TraitError("%s is outside root contents directory" % value) from None
  78. return ""
  79. @validate("preferred_dir")
  80. def _validate_preferred_dir(self, proposal):
  81. # It should be safe to pass an API path through this method:
  82. proposal["value"] = to_api_path(proposal["value"], self.root_dir)
  83. return super()._validate_preferred_dir(proposal)
  84. @default("checkpoints_class")
  85. def _checkpoints_class_default(self):
  86. return FileCheckpoints
  87. delete_to_trash = Bool(
  88. True,
  89. config=True,
  90. help="""If True (default), deleting files will send them to the
  91. platform's trash/recycle bin, where they can be recovered. If False,
  92. deleting files really deletes them.""",
  93. )
  94. always_delete_dir = Bool(
  95. False,
  96. config=True,
  97. help="""If True, deleting a non-empty directory will always be allowed.
  98. WARNING this may result in files being permanently removed; e.g. on Windows,
  99. if the data size is too big for the trash/recycle bin the directory will be permanently
  100. deleted. If False (default), the non-empty directory will be sent to the trash only
  101. if safe. And if ``delete_to_trash`` is True, the directory won't be deleted.""",
  102. )
  103. @default("files_handler_class")
  104. def _files_handler_class_default(self):
  105. return AuthenticatedFileHandler
  106. @default("files_handler_params")
  107. def _files_handler_params_default(self):
  108. return {"path": self.root_dir}
  109. def is_hidden(self, path):
  110. """Does the API style path correspond to a hidden directory or file?
  111. Parameters
  112. ----------
  113. path : str
  114. The path to check. This is an API path (`/` separated,
  115. relative to root_dir).
  116. Returns
  117. -------
  118. hidden : bool
  119. Whether the path exists and is hidden.
  120. """
  121. path = path.strip("/")
  122. os_path = self._get_os_path(path=path)
  123. return is_hidden(os_path, self.root_dir)
  124. def is_writable(self, path):
  125. """Does the API style path correspond to a writable directory or file?
  126. Parameters
  127. ----------
  128. path : str
  129. The path to check. This is an API path (`/` separated,
  130. relative to root_dir).
  131. Returns
  132. -------
  133. hidden : bool
  134. Whether the path exists and is writable.
  135. """
  136. path = path.strip("/")
  137. os_path = self._get_os_path(path=path)
  138. try:
  139. return os.access(os_path, os.W_OK)
  140. except OSError:
  141. self.log.error("Failed to check write permissions on %s", os_path)
  142. return False
  143. def file_exists(self, path):
  144. """Returns True if the file exists, else returns False.
  145. API-style wrapper for os.path.isfile
  146. Parameters
  147. ----------
  148. path : str
  149. The relative path to the file (with '/' as separator)
  150. Returns
  151. -------
  152. exists : bool
  153. Whether the file exists.
  154. """
  155. path = path.strip("/")
  156. os_path = self._get_os_path(path)
  157. return os.path.isfile(os_path)
  158. def dir_exists(self, path):
  159. """Does the API-style path refer to an extant directory?
  160. API-style wrapper for os.path.isdir
  161. Parameters
  162. ----------
  163. path : str
  164. The path to check. This is an API path (`/` separated,
  165. relative to root_dir).
  166. Returns
  167. -------
  168. exists : bool
  169. Whether the path is indeed a directory.
  170. """
  171. path = path.strip("/")
  172. os_path = self._get_os_path(path=path)
  173. return os.path.isdir(os_path)
  174. def exists(self, path):
  175. """Returns True if the path exists, else returns False.
  176. API-style wrapper for os.path.exists
  177. Parameters
  178. ----------
  179. path : str
  180. The API path to the file (with '/' as separator)
  181. Returns
  182. -------
  183. exists : bool
  184. Whether the target exists.
  185. """
  186. path = path.strip("/")
  187. os_path = self._get_os_path(path=path)
  188. return exists(os_path)
  189. def _base_model(self, path):
  190. """Build the common base of a contents model"""
  191. os_path = self._get_os_path(path)
  192. info = os.lstat(os_path)
  193. four_o_four = "file or directory does not exist: %r" % path
  194. if not self.allow_hidden and is_hidden(os_path, self.root_dir):
  195. self.log.info("Refusing to serve hidden file or directory %r, via 404 Error", os_path)
  196. raise web.HTTPError(404, four_o_four)
  197. try:
  198. # size of file
  199. size = info.st_size
  200. except (ValueError, OSError):
  201. self.log.warning("Unable to get size.")
  202. size = None
  203. try:
  204. last_modified = tz.utcfromtimestamp(info.st_mtime)
  205. except (ValueError, OSError):
  206. # Files can rarely have an invalid timestamp
  207. # https://github.com/jupyter/notebook/issues/2539
  208. # https://github.com/jupyter/notebook/issues/2757
  209. # Use the Unix epoch as a fallback so we don't crash.
  210. self.log.warning("Invalid mtime %s for %s", info.st_mtime, os_path)
  211. last_modified = datetime(1970, 1, 1, 0, 0, tzinfo=tz.UTC)
  212. try:
  213. created = tz.utcfromtimestamp(info.st_ctime)
  214. except (ValueError, OSError): # See above
  215. self.log.warning("Invalid ctime %s for %s", info.st_ctime, os_path)
  216. created = datetime(1970, 1, 1, 0, 0, tzinfo=tz.UTC)
  217. # Create the base model.
  218. model = {}
  219. model["name"] = path.rsplit("/", 1)[-1]
  220. model["path"] = path
  221. model["last_modified"] = last_modified
  222. model["created"] = created
  223. model["content"] = None
  224. model["format"] = None
  225. model["mimetype"] = None
  226. model["size"] = size
  227. model["writable"] = self.is_writable(path)
  228. model["hash"] = None
  229. model["hash_algorithm"] = None
  230. return model
  231. def _dir_model(self, path, content=True):
  232. """Build a model for a directory
  233. if content is requested, will include a listing of the directory
  234. """
  235. os_path = self._get_os_path(path)
  236. four_o_four = "directory does not exist: %r" % path
  237. if not os.path.isdir(os_path):
  238. raise web.HTTPError(404, four_o_four)
  239. elif not self.allow_hidden and is_hidden(os_path, self.root_dir):
  240. self.log.info("Refusing to serve hidden directory %r, via 404 Error", os_path)
  241. raise web.HTTPError(404, four_o_four)
  242. model = self._base_model(path)
  243. model["type"] = "directory"
  244. model["size"] = None
  245. if content:
  246. model["content"] = contents = []
  247. os_dir = os_path
  248. for name in os.listdir(os_dir):
  249. try:
  250. os_path = os.path.join(os_dir, name)
  251. except UnicodeDecodeError as e:
  252. self.log.warning("failed to decode filename '%s': %r", name, e)
  253. continue
  254. try:
  255. st = os.lstat(os_path)
  256. except OSError as e:
  257. # skip over broken symlinks in listing
  258. if e.errno == errno.ENOENT:
  259. self.log.warning("%s doesn't exist", os_path)
  260. elif e.errno != errno.EACCES: # Don't provide clues about protected files
  261. self.log.warning("Error stat-ing %s: %r", os_path, e)
  262. continue
  263. if (
  264. not stat.S_ISLNK(st.st_mode)
  265. and not stat.S_ISREG(st.st_mode)
  266. and not stat.S_ISDIR(st.st_mode)
  267. ):
  268. self.log.debug("%s not a regular file", os_path)
  269. continue
  270. try:
  271. if self.should_list(name) and (
  272. self.allow_hidden or not is_file_hidden(os_path, stat_res=st)
  273. ):
  274. contents.append(self.get(path=f"{path}/{name}", content=False))
  275. except OSError as e:
  276. # ELOOP: recursive symlink, also don't show failure due to permissions
  277. if e.errno not in [errno.ELOOP, errno.EACCES]:
  278. self.log.warning(
  279. "Unknown error checking if file %r is hidden",
  280. os_path,
  281. exc_info=True,
  282. )
  283. model["format"] = "json"
  284. return model
  285. def _file_model(self, path, content=True, format=None, require_hash=False):
  286. """Build a model for a file
  287. if content is requested, include the file contents.
  288. format:
  289. If 'text', the contents will be decoded as UTF-8.
  290. If 'base64', the raw bytes contents will be encoded as base64.
  291. If not specified, try to decode as UTF-8, and fall back to base64
  292. if require_hash is true, the model will include 'hash'
  293. """
  294. model = self._base_model(path)
  295. model["type"] = "file"
  296. os_path = self._get_os_path(path)
  297. model["mimetype"] = mimetypes.guess_type(os_path)[0]
  298. bytes_content = None
  299. if content:
  300. content, format, bytes_content = self._read_file(os_path, format, raw=True) # type: ignore[misc]
  301. if model["mimetype"] is None:
  302. default_mime = {
  303. "text": "text/plain",
  304. "base64": "application/octet-stream",
  305. }[format]
  306. model["mimetype"] = default_mime
  307. model.update(
  308. content=content,
  309. format=format,
  310. )
  311. if require_hash:
  312. if bytes_content is None:
  313. bytes_content, _ = self._read_file(os_path, "byte") # type: ignore[assignment,misc]
  314. model.update(**self._get_hash(bytes_content)) # type: ignore[arg-type]
  315. return model
  316. def _notebook_model(self, path, content=True, require_hash=False):
  317. """Build a notebook model
  318. if content is requested, the notebook content will be populated
  319. as a JSON structure (not double-serialized)
  320. if require_hash is true, the model will include 'hash'
  321. """
  322. model = self._base_model(path)
  323. model["type"] = "notebook"
  324. os_path = self._get_os_path(path)
  325. bytes_content = None
  326. if content:
  327. validation_error: dict[str, t.Any] = {}
  328. nb, bytes_content = self._read_notebook(
  329. os_path, as_version=4, capture_validation_error=validation_error, raw=True
  330. )
  331. self.mark_trusted_cells(nb, path)
  332. model["content"] = nb
  333. model["format"] = "json"
  334. self.validate_notebook_model(model, validation_error)
  335. if require_hash:
  336. if bytes_content is None:
  337. bytes_content, _ = self._read_file(os_path, "byte") # type: ignore[misc]
  338. model.update(**self._get_hash(bytes_content)) # type: ignore[arg-type]
  339. return model
  340. def get(self, path, content=True, type=None, format=None, require_hash=False):
  341. """Takes a path for an entity and returns its model
  342. Parameters
  343. ----------
  344. path : str
  345. the API path that describes the relative path for the target
  346. content : bool
  347. Whether to include the contents in the reply
  348. type : str, optional
  349. The requested type - 'file', 'notebook', or 'directory'.
  350. Will raise HTTPError 400 if the content doesn't match.
  351. format : str, optional
  352. The requested format for file contents. 'text' or 'base64'.
  353. Ignored if this returns a notebook or directory model.
  354. require_hash: bool, optional
  355. Whether to include the hash of the file contents.
  356. Returns
  357. -------
  358. model : dict
  359. the contents model. If content=True, returns the contents
  360. of the file or directory as well.
  361. """
  362. path = path.strip("/")
  363. os_path = self._get_os_path(path)
  364. four_o_four = "file or directory does not exist: %r" % path
  365. if not self.exists(path):
  366. raise web.HTTPError(404, four_o_four)
  367. if not self.allow_hidden and is_hidden(os_path, self.root_dir):
  368. self.log.info("Refusing to serve hidden file or directory %r, via 404 Error", os_path)
  369. raise web.HTTPError(404, four_o_four)
  370. if os.path.isdir(os_path):
  371. if type not in (None, "directory"):
  372. raise web.HTTPError(
  373. 400,
  374. f"{path} is a directory, not a {type}",
  375. reason="bad type",
  376. )
  377. model = self._dir_model(path, content=content)
  378. elif type == "notebook" or (type is None and path.endswith(".ipynb")):
  379. model = self._notebook_model(path, content=content, require_hash=require_hash)
  380. else:
  381. if type == "directory":
  382. raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type")
  383. model = self._file_model(
  384. path, content=content, format=format, require_hash=require_hash
  385. )
  386. self.emit(data={"action": "get", "path": path})
  387. return model
  388. def _save_directory(self, os_path, model, path=""):
  389. """create a directory"""
  390. if not self.allow_hidden and is_hidden(os_path, self.root_dir):
  391. raise web.HTTPError(400, "Cannot create directory %r" % os_path)
  392. if not os.path.exists(os_path):
  393. with self.perm_to_403():
  394. os.mkdir(os_path)
  395. elif not os.path.isdir(os_path):
  396. raise web.HTTPError(400, "Not a directory: %s" % (os_path))
  397. else:
  398. self.log.debug("Directory %r already exists", os_path)
  399. def save(self, model, path=""):
  400. """Save the file model and return the model with no content."""
  401. path = path.strip("/")
  402. self.run_pre_save_hooks(model=model, path=path)
  403. if "type" not in model:
  404. raise web.HTTPError(400, "No file type provided")
  405. if "content" not in model and model["type"] != "directory":
  406. raise web.HTTPError(400, "No file content provided")
  407. os_path = self._get_os_path(path)
  408. if not self.allow_hidden and is_hidden(os_path, self.root_dir):
  409. raise web.HTTPError(400, f"Cannot create file or directory {os_path!r}")
  410. self.log.debug("Saving %s", os_path)
  411. validation_error: dict[str, t.Any] = {}
  412. try:
  413. if model["type"] == "notebook":
  414. nb = nbformat.from_dict(model["content"])
  415. self.check_and_sign(nb, path)
  416. self._save_notebook(os_path, nb, capture_validation_error=validation_error)
  417. # One checkpoint should always exist for notebooks.
  418. if not self.checkpoints.list_checkpoints(path):
  419. self.create_checkpoint(path)
  420. elif model["type"] == "file":
  421. # Missing format will be handled internally by _save_file.
  422. self._save_file(os_path, model["content"], model.get("format"))
  423. elif model["type"] == "directory":
  424. self._save_directory(os_path, model, path)
  425. else:
  426. raise web.HTTPError(400, "Unhandled contents type: %s" % model["type"])
  427. except web.HTTPError:
  428. raise
  429. except Exception as e:
  430. self.log.error("Error while saving file: %s %s", path, e, exc_info=True)
  431. raise web.HTTPError(500, f"Unexpected error while saving file: {path} {e}") from e
  432. validation_message = None
  433. if model["type"] == "notebook":
  434. self.validate_notebook_model(model, validation_error=validation_error)
  435. validation_message = model.get("message", None)
  436. model = self.get(path, content=False)
  437. if validation_message:
  438. model["message"] = validation_message
  439. self.run_post_save_hooks(model=model, os_path=os_path)
  440. self.emit(data={"action": "save", "path": path})
  441. return model
  442. def delete_file(self, path):
  443. """Delete file at path."""
  444. path = path.strip("/")
  445. os_path = self._get_os_path(path)
  446. rm = os.unlink
  447. if not self.allow_hidden and is_hidden(os_path, self.root_dir):
  448. raise web.HTTPError(400, f"Cannot delete file or directory {os_path!r}")
  449. four_o_four = "file or directory does not exist: %r" % path
  450. if not self.exists(path):
  451. raise web.HTTPError(404, four_o_four)
  452. def is_non_empty_dir(os_path):
  453. if os.path.isdir(os_path):
  454. # A directory containing only leftover checkpoints is
  455. # considered empty.
  456. cp_dir = getattr(self.checkpoints, "checkpoint_dir", None)
  457. if set(os.listdir(os_path)) - {cp_dir}:
  458. return True
  459. return False
  460. if self.delete_to_trash:
  461. if not self.always_delete_dir and sys.platform == "win32" and is_non_empty_dir(os_path):
  462. # send2trash can really delete files on Windows, so disallow
  463. # deleting non-empty files. See Github issue 3631.
  464. raise web.HTTPError(400, "Directory %s not empty" % os_path)
  465. # send2trash now supports deleting directories. see #1290
  466. if not self.is_writable(path):
  467. raise web.HTTPError(403, "Permission denied: %s" % path) from None
  468. self.log.debug("Sending %s to trash", os_path)
  469. try:
  470. send2trash(os_path)
  471. except OSError as e:
  472. raise web.HTTPError(400, "send2trash failed: %s" % e) from e
  473. return
  474. if os.path.isdir(os_path):
  475. # Don't permanently delete non-empty directories.
  476. if not self.always_delete_dir and is_non_empty_dir(os_path):
  477. raise web.HTTPError(400, "Directory %s not empty" % os_path)
  478. self.log.debug("Removing directory %s", os_path)
  479. with self.perm_to_403():
  480. shutil.rmtree(os_path)
  481. else:
  482. self.log.debug("Unlinking file %s", os_path)
  483. with self.perm_to_403():
  484. rm(os_path)
  485. def rename_file(self, old_path, new_path):
  486. """Rename a file."""
  487. old_path = old_path.strip("/")
  488. new_path = new_path.strip("/")
  489. if new_path == old_path:
  490. return
  491. new_os_path = self._get_os_path(new_path)
  492. old_os_path = self._get_os_path(old_path)
  493. if not self.allow_hidden and (
  494. is_hidden(old_os_path, self.root_dir) or is_hidden(new_os_path, self.root_dir)
  495. ):
  496. raise web.HTTPError(400, f"Cannot rename file or directory {old_os_path!r}")
  497. # Should we proceed with the move?
  498. if os.path.exists(new_os_path) and not samefile(old_os_path, new_os_path):
  499. raise web.HTTPError(409, "File already exists: %s" % new_path)
  500. # Move the file
  501. try:
  502. with self.perm_to_403():
  503. shutil.move(old_os_path, new_os_path)
  504. except web.HTTPError:
  505. raise
  506. except FileNotFoundError:
  507. raise web.HTTPError(404, f"File or directory does not exist: {old_path}") from None
  508. except Exception as e:
  509. raise web.HTTPError(500, f"Unknown error renaming file: {old_path} {e}") from e
  510. def info_string(self):
  511. """Get the information string for the manager."""
  512. return _i18n("Serving notebooks from local directory: %s") % self.root_dir
  513. def get_kernel_path(self, path, model=None):
  514. """Return the initial API path of a kernel associated with a given notebook"""
  515. if self.dir_exists(path):
  516. return path
  517. parent_dir = path.rsplit("/", 1)[0] if "/" in path else ""
  518. return parent_dir
  519. def copy(self, from_path, to_path=None):
  520. """
  521. Copy an existing file or directory and return its new model.
  522. If to_path not specified, it will be the parent directory of from_path.
  523. If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
  524. Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
  525. For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
  526. from_path must be a full path to a file or directory.
  527. """
  528. to_path_original = str(to_path)
  529. path = from_path.strip("/")
  530. if to_path is not None:
  531. to_path = to_path.strip("/")
  532. if "/" in path:
  533. from_dir, from_name = path.rsplit("/", 1)
  534. else:
  535. from_dir = ""
  536. from_name = path
  537. model = self.get(path)
  538. # limit the size of folders being copied to prevent a timeout error
  539. if model["type"] == "directory":
  540. self.check_folder_size(path)
  541. else:
  542. # let the super class handle copying files
  543. return super().copy(from_path=from_path, to_path=to_path)
  544. is_destination_specified = to_path is not None
  545. to_name = copy_pat.sub(".", from_name)
  546. if not is_destination_specified:
  547. to_path = from_dir
  548. if self.dir_exists(to_path):
  549. name = copy_pat.sub(".", from_name)
  550. to_name = super().increment_filename(name, to_path, insert="-Copy")
  551. to_path = f"{to_path}/{to_name}"
  552. return self._copy_dir(
  553. from_path=from_path,
  554. to_path_original=to_path_original,
  555. to_name=to_name,
  556. to_path=to_path,
  557. )
  558. def _copy_dir(self, from_path, to_path_original, to_name, to_path):
  559. """
  560. handles copying directories
  561. returns the model for the copied directory
  562. """
  563. try:
  564. os_from_path = self._get_os_path(from_path.strip("/"))
  565. os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
  566. shutil.copytree(os_from_path, os_to_path)
  567. model = self.get(to_path, content=False)
  568. except OSError as err:
  569. self.log.error(f"OSError in _copy_dir: {err}")
  570. raise web.HTTPError(
  571. 400,
  572. f"Can't copy '{from_path}' into Folder '{to_path}'",
  573. ) from err
  574. return model
  575. def check_folder_size(self, path):
  576. """
  577. limit the size of folders being copied to be no more than the
  578. trait max_copy_folder_size_mb to prevent a timeout error
  579. """
  580. limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024
  581. size = int(self._get_dir_size(self._get_os_path(path)))
  582. # convert from KB to Bytes for macOS
  583. size = size * 1024 if platform.system() == "Darwin" else size
  584. if size > limit_bytes:
  585. raise web.HTTPError(
  586. 400,
  587. f"""
  588. Can't copy folders larger than {self.max_copy_folder_size_mb}MB,
  589. "{path}" is {self._human_readable_size(size)}
  590. """,
  591. )
  592. def _get_dir_size(self, path="."):
  593. """
  594. calls the command line program du to get the directory size
  595. """
  596. try:
  597. if platform.system() == "Darwin":
  598. # returns the size of the folder in KB
  599. result = subprocess.run(
  600. ["du", "-sk", path], # noqa: S607
  601. capture_output=True,
  602. check=True,
  603. ).stdout.split()
  604. else:
  605. result = subprocess.run(
  606. ["du", "-s", "--block-size=1", path], # noqa: S607
  607. capture_output=True,
  608. check=True,
  609. ).stdout.split()
  610. self.log.info(f"current status of du command {result}")
  611. size = result[0].decode("utf-8")
  612. except Exception:
  613. self.log.warning(
  614. "Not able to get the size of the %s directory. Copying might be slow if the directory is large!",
  615. path,
  616. )
  617. return "0"
  618. return size
  619. def _human_readable_size(self, size):
  620. """
  621. returns folder size in a human readable format
  622. """
  623. if size == 0:
  624. return "0 Bytes"
  625. units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
  626. order = int(math.log2(size) / 10) if size else 0
  627. return f"{size / (1 << (order * 10)):.4g} {units[order]}"
  628. class AsyncFileContentsManager(FileContentsManager, AsyncFileManagerMixin, AsyncContentsManager):
  629. """An async file contents manager."""
  630. @default("checkpoints_class")
  631. def _checkpoints_class_default(self):
  632. return AsyncFileCheckpoints
  633. async def _dir_model(self, path, content=True):
  634. """Build a model for a directory
  635. if content is requested, will include a listing of the directory
  636. """
  637. os_path = self._get_os_path(path)
  638. four_o_four = "directory does not exist: %r" % path
  639. if not os.path.isdir(os_path):
  640. raise web.HTTPError(404, four_o_four)
  641. elif not self.allow_hidden and is_hidden(os_path, self.root_dir):
  642. self.log.info("Refusing to serve hidden directory %r, via 404 Error", os_path)
  643. raise web.HTTPError(404, four_o_four)
  644. model = self._base_model(path)
  645. model["type"] = "directory"
  646. model["size"] = None
  647. if content:
  648. model["content"] = contents = []
  649. os_dir = os_path
  650. dir_contents = await run_sync(os.listdir, os_dir)
  651. for name in dir_contents:
  652. try:
  653. os_path = os.path.join(os_dir, name)
  654. except UnicodeDecodeError as e:
  655. self.log.warning("failed to decode filename '%s': %r", name, e)
  656. continue
  657. try:
  658. st = await run_sync(os.lstat, os_path)
  659. except OSError as e:
  660. # skip over broken symlinks in listing
  661. if e.errno == errno.ENOENT:
  662. self.log.warning("%s doesn't exist", os_path)
  663. elif e.errno != errno.EACCES: # Don't provide clues about protected files
  664. self.log.warning("Error stat-ing %s: %r", os_path, e)
  665. continue
  666. if (
  667. not stat.S_ISLNK(st.st_mode)
  668. and not stat.S_ISREG(st.st_mode)
  669. and not stat.S_ISDIR(st.st_mode)
  670. ):
  671. self.log.debug("%s not a regular file", os_path)
  672. continue
  673. try:
  674. if self.should_list(name) and (
  675. self.allow_hidden or not is_file_hidden(os_path, stat_res=st)
  676. ):
  677. contents.append(await self.get(path=f"{path}/{name}", content=False))
  678. except OSError as e:
  679. # ELOOP: recursive symlink, also don't show failure due to permissions
  680. if e.errno not in [errno.ELOOP, errno.EACCES]:
  681. self.log.warning(
  682. "Unknown error checking if file %r is hidden",
  683. os_path,
  684. exc_info=True,
  685. )
  686. model["format"] = "json"
  687. return model
  688. async def _file_model(self, path, content=True, format=None, require_hash=False):
  689. """Build a model for a file
  690. if content is requested, include the file contents.
  691. format:
  692. If 'text', the contents will be decoded as UTF-8.
  693. If 'base64', the raw bytes contents will be encoded as base64.
  694. If not specified, try to decode as UTF-8, and fall back to base64
  695. if require_hash is true, the model will include 'hash'
  696. """
  697. model = self._base_model(path)
  698. model["type"] = "file"
  699. os_path = self._get_os_path(path)
  700. model["mimetype"] = mimetypes.guess_type(os_path)[0]
  701. bytes_content = None
  702. if content:
  703. content, format, bytes_content = await self._read_file(os_path, format, raw=True) # type: ignore[misc]
  704. if model["mimetype"] is None:
  705. default_mime = {
  706. "text": "text/plain",
  707. "base64": "application/octet-stream",
  708. }[format]
  709. model["mimetype"] = default_mime
  710. model.update(
  711. content=content,
  712. format=format,
  713. )
  714. if require_hash:
  715. if bytes_content is None:
  716. bytes_content, _ = await self._read_file(os_path, "byte") # type: ignore[assignment,misc]
  717. model.update(**self._get_hash(bytes_content)) # type: ignore[arg-type]
  718. return model
  719. async def _notebook_model(self, path, content=True, require_hash=False):
  720. """Build a notebook model
  721. if content is requested, the notebook content will be populated
  722. as a JSON structure (not double-serialized)
  723. """
  724. model = self._base_model(path)
  725. model["type"] = "notebook"
  726. os_path = self._get_os_path(path)
  727. bytes_content = None
  728. if content:
  729. validation_error: dict[str, t.Any] = {}
  730. nb, bytes_content = await self._read_notebook(
  731. os_path, as_version=4, capture_validation_error=validation_error, raw=True
  732. )
  733. self.mark_trusted_cells(nb, path)
  734. model["content"] = nb
  735. model["format"] = "json"
  736. self.validate_notebook_model(model, validation_error)
  737. if require_hash:
  738. if bytes_content is None:
  739. bytes_content, _ = await self._read_file(os_path, "byte") # type: ignore[misc]
  740. model.update(**(self._get_hash(bytes_content))) # type: ignore[arg-type]
  741. return model
  742. async def get(self, path, content=True, type=None, format=None, require_hash=False):
  743. """Takes a path for an entity and returns its model
  744. Parameters
  745. ----------
  746. path : str
  747. the API path that describes the relative path for the target
  748. content : bool
  749. Whether to include the contents in the reply
  750. type : str, optional
  751. The requested type - 'file', 'notebook', or 'directory'.
  752. Will raise HTTPError 400 if the content doesn't match.
  753. format : str, optional
  754. The requested format for file contents. 'text' or 'base64'.
  755. Ignored if this returns a notebook or directory model.
  756. require_hash: bool, optional
  757. Whether to include the hash of the file contents.
  758. Returns
  759. -------
  760. model : dict
  761. the contents model. If content=True, returns the contents
  762. of the file or directory as well.
  763. """
  764. path = path.strip("/")
  765. if not self.exists(path):
  766. raise web.HTTPError(404, "No such file or directory: %s" % path)
  767. os_path = self._get_os_path(path)
  768. if os.path.isdir(os_path):
  769. if type not in (None, "directory"):
  770. raise web.HTTPError(
  771. 400,
  772. f"{path} is a directory, not a {type}",
  773. reason="bad type",
  774. )
  775. model = await self._dir_model(path, content=content)
  776. elif type == "notebook" or (type is None and path.endswith(".ipynb")):
  777. model = await self._notebook_model(path, content=content, require_hash=require_hash)
  778. else:
  779. if type == "directory":
  780. raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type")
  781. model = await self._file_model(
  782. path, content=content, format=format, require_hash=require_hash
  783. )
  784. self.emit(data={"action": "get", "path": path})
  785. return model
  786. async def _save_directory(self, os_path, model, path=""):
  787. """create a directory"""
  788. if not self.allow_hidden and is_hidden(os_path, self.root_dir):
  789. raise web.HTTPError(400, "Cannot create hidden directory %r" % os_path)
  790. if not os.path.exists(os_path):
  791. with self.perm_to_403():
  792. await run_sync(os.mkdir, os_path)
  793. elif not os.path.isdir(os_path):
  794. raise web.HTTPError(400, "Not a directory: %s" % (os_path))
  795. else:
  796. self.log.debug("Directory %r already exists", os_path)
  797. async def save(self, model, path=""):
  798. """Save the file model and return the model with no content."""
  799. path = path.strip("/")
  800. self.run_pre_save_hooks(model=model, path=path)
  801. if "type" not in model:
  802. raise web.HTTPError(400, "No file type provided")
  803. if "content" not in model and model["type"] != "directory":
  804. raise web.HTTPError(400, "No file content provided")
  805. os_path = self._get_os_path(path)
  806. self.log.debug("Saving %s", os_path)
  807. validation_error: dict[str, t.Any] = {}
  808. try:
  809. if model["type"] == "notebook":
  810. nb = nbformat.from_dict(model["content"])
  811. self.check_and_sign(nb, path)
  812. await self._save_notebook(os_path, nb, capture_validation_error=validation_error)
  813. # One checkpoint should always exist for notebooks.
  814. if not (await self.checkpoints.list_checkpoints(path)):
  815. await self.create_checkpoint(path)
  816. elif model["type"] == "file":
  817. # Missing format will be handled internally by _save_file.
  818. await self._save_file(os_path, model["content"], model.get("format"))
  819. elif model["type"] == "directory":
  820. await self._save_directory(os_path, model, path)
  821. else:
  822. raise web.HTTPError(400, "Unhandled contents type: %s" % model["type"])
  823. except web.HTTPError:
  824. raise
  825. except Exception as e:
  826. self.log.error("Error while saving file: %s %s", path, e, exc_info=True)
  827. raise web.HTTPError(500, f"Unexpected error while saving file: {path} {e}") from e
  828. validation_message = None
  829. if model["type"] == "notebook":
  830. self.validate_notebook_model(model, validation_error=validation_error)
  831. validation_message = model.get("message", None)
  832. model = await self.get(path, content=False)
  833. if validation_message:
  834. model["message"] = validation_message
  835. self.run_post_save_hooks(model=model, os_path=os_path)
  836. self.emit(data={"action": "save", "path": path})
  837. return model
  838. async def delete_file(self, path):
  839. """Delete file at path."""
  840. path = path.strip("/")
  841. os_path = self._get_os_path(path)
  842. rm = os.unlink
  843. if not self.allow_hidden and is_hidden(os_path, self.root_dir):
  844. raise web.HTTPError(400, f"Cannot delete file or directory {os_path!r}")
  845. if not os.path.exists(os_path):
  846. raise web.HTTPError(404, "File or directory does not exist: %s" % os_path)
  847. async def is_non_empty_dir(os_path):
  848. if os.path.isdir(os_path):
  849. # A directory containing only leftover checkpoints is
  850. # considered empty.
  851. cp_dir = getattr(self.checkpoints, "checkpoint_dir", None)
  852. dir_contents = set(await run_sync(os.listdir, os_path))
  853. if dir_contents - {cp_dir}:
  854. return True
  855. return False
  856. if self.delete_to_trash:
  857. if (
  858. not self.always_delete_dir
  859. and sys.platform == "win32"
  860. and await is_non_empty_dir(os_path)
  861. ):
  862. # send2trash can really delete files on Windows, so disallow
  863. # deleting non-empty files. See Github issue 3631.
  864. raise web.HTTPError(400, "Directory %s not empty" % os_path)
  865. # send2trash now supports deleting directories. see #1290
  866. if not self.is_writable(path):
  867. raise web.HTTPError(403, "Permission denied: %s" % path) from None
  868. self.log.debug("Sending %s to trash", os_path)
  869. try:
  870. send2trash(os_path)
  871. except OSError as e:
  872. raise web.HTTPError(400, "send2trash failed: %s" % e) from e
  873. return
  874. if os.path.isdir(os_path):
  875. # Don't permanently delete non-empty directories.
  876. if not self.always_delete_dir and await is_non_empty_dir(os_path):
  877. raise web.HTTPError(400, "Directory %s not empty" % os_path)
  878. self.log.debug("Removing directory %s", os_path)
  879. with self.perm_to_403():
  880. await run_sync(shutil.rmtree, os_path)
  881. else:
  882. self.log.debug("Unlinking file %s", os_path)
  883. with self.perm_to_403():
  884. await run_sync(rm, os_path)
  885. async def rename_file(self, old_path, new_path):
  886. """Rename a file."""
  887. old_path = old_path.strip("/")
  888. new_path = new_path.strip("/")
  889. if new_path == old_path:
  890. return
  891. new_os_path = self._get_os_path(new_path)
  892. old_os_path = self._get_os_path(old_path)
  893. if not self.allow_hidden and (
  894. is_hidden(old_os_path, self.root_dir) or is_hidden(new_os_path, self.root_dir)
  895. ):
  896. raise web.HTTPError(400, f"Cannot rename file or directory {old_os_path!r}")
  897. # Should we proceed with the move?
  898. if os.path.exists(new_os_path) and not samefile(old_os_path, new_os_path):
  899. raise web.HTTPError(409, "File already exists: %s" % new_path)
  900. # Move the file
  901. try:
  902. with self.perm_to_403():
  903. await run_sync(shutil.move, old_os_path, new_os_path)
  904. except web.HTTPError:
  905. raise
  906. except FileNotFoundError:
  907. raise web.HTTPError(404, f"File or directory does not exist: {old_path}") from None
  908. except Exception as e:
  909. raise web.HTTPError(500, f"Unknown error renaming file: {old_path} {e}") from e
  910. async def dir_exists(self, path):
  911. """Does a directory exist at the given path"""
  912. path = path.strip("/")
  913. os_path = self._get_os_path(path=path)
  914. return os.path.isdir(os_path)
  915. async def file_exists(self, path):
  916. """Does a file exist at the given path"""
  917. path = path.strip("/")
  918. os_path = self._get_os_path(path)
  919. return os.path.isfile(os_path)
  920. async def is_hidden(self, path):
  921. """Is path a hidden directory or file"""
  922. path = path.strip("/")
  923. os_path = self._get_os_path(path=path)
  924. return is_hidden(os_path, self.root_dir)
  925. async def get_kernel_path(self, path, model=None):
  926. """Return the initial API path of a kernel associated with a given notebook"""
  927. if await self.dir_exists(path):
  928. return path
  929. parent_dir = path.rsplit("/", 1)[0] if "/" in path else ""
  930. return parent_dir
  931. async def copy(self, from_path, to_path=None):
  932. """
  933. Copy an existing file or directory and return its new model.
  934. If to_path not specified, it will be the parent directory of from_path.
  935. If copying a file and to_path is a directory, filename/directoryname will increment `from_path-Copy#.ext`.
  936. Considering multi-part extensions, the Copy# part will be placed before the first dot for all the extensions except `ipynb`.
  937. For easier manual searching in case of notebooks, the Copy# part will be placed before the last dot.
  938. from_path must be a full path to a file or directory.
  939. """
  940. to_path_original = str(to_path)
  941. path = from_path.strip("/")
  942. if to_path is not None:
  943. to_path = to_path.strip("/")
  944. if "/" in path:
  945. from_dir, from_name = path.rsplit("/", 1)
  946. else:
  947. from_dir = ""
  948. from_name = path
  949. model = await self.get(path)
  950. # limit the size of folders being copied to prevent a timeout error
  951. if model["type"] == "directory":
  952. await self.check_folder_size(path)
  953. else:
  954. # let the super class handle copying files
  955. return await AsyncContentsManager.copy(self, from_path=from_path, to_path=to_path)
  956. is_destination_specified = to_path is not None
  957. to_name = copy_pat.sub(".", from_name)
  958. if not is_destination_specified:
  959. to_path = from_dir
  960. if await self.dir_exists(to_path):
  961. name = copy_pat.sub(".", from_name)
  962. to_name = await super().increment_filename(name, to_path, insert="-Copy")
  963. to_path = f"{to_path}/{to_name}"
  964. return await self._copy_dir(
  965. from_path=from_path,
  966. to_path_original=to_path_original,
  967. to_name=to_name,
  968. to_path=to_path,
  969. )
  970. async def _copy_dir(
  971. self, from_path: str, to_path_original: str, to_name: str, to_path: str
  972. ) -> dict[str, t.Any]:
  973. """
  974. handles copying directories
  975. returns the model for the copied directory
  976. """
  977. try:
  978. os_from_path = self._get_os_path(from_path.strip("/"))
  979. os_to_path = f'{self._get_os_path(to_path_original.strip("/"))}/{to_name}'
  980. shutil.copytree(os_from_path, os_to_path)
  981. model = await self.get(to_path, content=False)
  982. except OSError as err:
  983. self.log.error(f"OSError in _copy_dir: {err}")
  984. raise web.HTTPError(
  985. 400,
  986. f"Can't copy '{from_path}' into read-only Folder '{to_path}'",
  987. ) from err
  988. return model # type:ignore[no-any-return]
  989. async def check_folder_size(self, path: str) -> None:
  990. """
  991. limit the size of folders being copied to be no more than the
  992. trait max_copy_folder_size_mb to prevent a timeout error
  993. """
  994. limit_bytes = self.max_copy_folder_size_mb * 1024 * 1024
  995. size = int(await self._get_dir_size(self._get_os_path(path)))
  996. # convert from KB to Bytes for macOS
  997. size = size * 1024 if platform.system() == "Darwin" else size
  998. if size > limit_bytes:
  999. raise web.HTTPError(
  1000. 400,
  1001. f"""
  1002. Can't copy folders larger than {self.max_copy_folder_size_mb}MB,
  1003. "{path}" is {await self._human_readable_size(size)}
  1004. """,
  1005. )
  1006. async def _get_dir_size(self, path: str = ".") -> str:
  1007. """
  1008. calls the command line program du to get the directory size
  1009. """
  1010. try:
  1011. if platform.system() == "Darwin":
  1012. # returns the size of the folder in KB
  1013. args = ["-sk", path]
  1014. else:
  1015. args = ["-s", "--block-size=1", path]
  1016. proc = await asyncio.create_subprocess_exec(
  1017. "du", *args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
  1018. )
  1019. stdout, _ = await proc.communicate()
  1020. result = await proc.wait()
  1021. self.log.info(f"current status of du command {result}")
  1022. assert result == 0
  1023. size = stdout.decode("utf-8").split()[0]
  1024. except Exception:
  1025. self.log.warning(
  1026. "Not able to get the size of the %s directory. Copying might be slow if the directory is large!",
  1027. path,
  1028. )
  1029. return "0"
  1030. return size
  1031. async def _human_readable_size(self, size: int) -> str:
  1032. """
  1033. returns folder size in a human readable format
  1034. """
  1035. if size == 0:
  1036. return "0 Bytes"
  1037. units = ["Bytes", "KB", "MB", "GB", "TB", "PB"]
  1038. order = int(math.log2(size) / 10) if size else 0
  1039. return f"{size / (1 << (order * 10)):.4g} {units[order]}"