writer.py 46 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219
  1. # mypy: allow-untyped-defs
  2. """Provide an API for writing protocol buffers to event files to be consumed by TensorBoard for visualization."""
  3. import os
  4. import time
  5. from typing import TYPE_CHECKING, Union
  6. import torch
  7. if TYPE_CHECKING:
  8. from matplotlib.figure import Figure
  9. from tensorboard.compat import tf
  10. from tensorboard.compat.proto import event_pb2
  11. from tensorboard.compat.proto.event_pb2 import Event, SessionLog
  12. from tensorboard.plugins.projector.projector_config_pb2 import ProjectorConfig
  13. from tensorboard.summary.writer.event_file_writer import EventFileWriter
  14. from ._convert_np import make_np
  15. from ._embedding import get_embedding_info, make_mat, make_sprite, make_tsv, write_pbtxt
  16. from ._onnx_graph import load_onnx_graph
  17. from ._pytorch_graph import graph
  18. from ._utils import figure_to_image
  19. from .summary import (
  20. audio,
  21. custom_scalars,
  22. histogram,
  23. histogram_raw,
  24. hparams,
  25. image,
  26. image_boxes,
  27. mesh,
  28. pr_curve,
  29. pr_curve_raw,
  30. scalar,
  31. tensor_proto,
  32. text,
  33. video,
  34. )
  35. __all__ = ["FileWriter", "SummaryWriter"]
  36. class FileWriter:
  37. """Writes protocol buffers to event files to be consumed by TensorBoard.
  38. The `FileWriter` class provides a mechanism to create an event file in a
  39. given directory and add summaries and events to it. The class updates the
  40. file contents asynchronously. This allows a training program to call methods
  41. to add data to the file directly from the training loop, without slowing down
  42. training.
  43. """
  44. def __init__(self, log_dir, max_queue=10, flush_secs=120, filename_suffix="") -> None:
  45. """Create a `FileWriter` and an event file.
  46. On construction the writer creates a new event file in `log_dir`.
  47. The other arguments to the constructor control the asynchronous writes to
  48. the event file.
  49. Args:
  50. log_dir: A string. Directory where event file will be written.
  51. max_queue: Integer. Size of the queue for pending events and
  52. summaries before one of the 'add' calls forces a flush to disk.
  53. Default is ten items.
  54. flush_secs: Number. How often, in seconds, to flush the
  55. pending events and summaries to disk. Default is every two minutes.
  56. filename_suffix: A string. Suffix added to all event filenames
  57. in the log_dir directory. More details on filename construction in
  58. tensorboard.summary.writer.event_file_writer.EventFileWriter.
  59. """
  60. # Sometimes PosixPath is passed in and we need to coerce it to
  61. # a string in all cases
  62. # TODO: See if we can remove this in the future if we are
  63. # actually the ones passing in a PosixPath
  64. log_dir = str(log_dir)
  65. self.event_writer = EventFileWriter(
  66. log_dir, max_queue, flush_secs, filename_suffix
  67. )
  68. def get_logdir(self):
  69. """Return the directory where event file will be written."""
  70. return self.event_writer.get_logdir()
  71. def add_event(self, event, step=None, walltime=None) -> None:
  72. """Add an event to the event file.
  73. Args:
  74. event: An `Event` protocol buffer.
  75. step: Number. Optional global step value for training process
  76. to record with the event.
  77. walltime: float. Optional walltime to override the default (current)
  78. walltime (from time.time()) seconds after epoch
  79. """
  80. event.wall_time = time.time() if walltime is None else walltime
  81. if step is not None:
  82. # Make sure step is converted from numpy or other formats
  83. # since protobuf might not convert depending on version
  84. event.step = int(step)
  85. self.event_writer.add_event(event)
  86. def add_summary(self, summary, global_step=None, walltime=None) -> None:
  87. """Add a `Summary` protocol buffer to the event file.
  88. This method wraps the provided summary in an `Event` protocol buffer
  89. and adds it to the event file.
  90. Args:
  91. summary: A `Summary` protocol buffer.
  92. global_step: Number. Optional global step value for training process
  93. to record with the summary.
  94. walltime: float. Optional walltime to override the default (current)
  95. walltime (from time.time()) seconds after epoch
  96. """
  97. event = event_pb2.Event(summary=summary)
  98. self.add_event(event, global_step, walltime)
  99. def add_graph(self, graph_profile, walltime=None) -> None:
  100. """Add a `Graph` and step stats protocol buffer to the event file.
  101. Args:
  102. graph_profile: A `Graph` and step stats protocol buffer.
  103. walltime: float. Optional walltime to override the default (current)
  104. walltime (from time.time()) seconds after epoch
  105. """
  106. graph = graph_profile[0]
  107. stepstats = graph_profile[1]
  108. event = event_pb2.Event(graph_def=graph.SerializeToString())
  109. self.add_event(event, None, walltime)
  110. trm = event_pb2.TaggedRunMetadata(
  111. tag="step1", run_metadata=stepstats.SerializeToString()
  112. )
  113. event = event_pb2.Event(tagged_run_metadata=trm)
  114. self.add_event(event, None, walltime)
  115. def add_onnx_graph(self, graph, walltime=None) -> None:
  116. """Add a `Graph` protocol buffer to the event file.
  117. Args:
  118. graph: A `Graph` protocol buffer.
  119. walltime: float. Optional walltime to override the default (current)
  120. _get_file_writerfrom time.time())
  121. """
  122. event = event_pb2.Event(graph_def=graph.SerializeToString())
  123. self.add_event(event, None, walltime)
  124. def flush(self) -> None:
  125. """Flushes the event file to disk.
  126. Call this method to make sure that all pending events have been written to
  127. disk.
  128. """
  129. self.event_writer.flush()
  130. def close(self) -> None:
  131. """Flushes the event file to disk and close the file.
  132. Call this method when you do not need the summary writer anymore.
  133. """
  134. self.event_writer.close()
  135. def reopen(self) -> None:
  136. """Reopens the EventFileWriter.
  137. Can be called after `close()` to add more events in the same directory.
  138. The events will go into a new events file.
  139. Does nothing if the EventFileWriter was not closed.
  140. """
  141. # pyrefly: ignore [missing-attribute]
  142. self.event_writer.reopen()
  143. class SummaryWriter:
  144. """Writes entries directly to event files in the log_dir to be consumed by TensorBoard.
  145. The `SummaryWriter` class provides a high-level API to create an event file
  146. in a given directory and add summaries and events to it. The class updates the
  147. file contents asynchronously. This allows a training program to call methods
  148. to add data to the file directly from the training loop, without slowing down
  149. training.
  150. """
  151. def __init__(
  152. self,
  153. log_dir=None,
  154. comment="",
  155. purge_step=None,
  156. max_queue=10,
  157. flush_secs=120,
  158. filename_suffix="",
  159. ) -> None:
  160. """Create a `SummaryWriter` that will write out events and summaries to the event file.
  161. Args:
  162. log_dir (str): Save directory location. Default is
  163. runs/**CURRENT_DATETIME_HOSTNAME**, which changes after each run.
  164. Use hierarchical folder structure to compare
  165. between runs easily. e.g. pass in 'runs/exp1', 'runs/exp2', etc.
  166. for each new experiment to compare across them.
  167. comment (str): Comment log_dir suffix appended to the default
  168. ``log_dir``. If ``log_dir`` is assigned, this argument has no effect.
  169. purge_step (int):
  170. When logging crashes at step :math:`T+X` and restarts at step :math:`T`,
  171. any events whose global_step larger or equal to :math:`T` will be
  172. purged and hidden from TensorBoard.
  173. Note that crashed and resumed experiments should have the same ``log_dir``.
  174. max_queue (int): Size of the queue for pending events and
  175. summaries before one of the 'add' calls forces a flush to disk.
  176. Default is ten items.
  177. flush_secs (int): How often, in seconds, to flush the
  178. pending events and summaries to disk. Default is every two minutes.
  179. filename_suffix (str): Suffix added to all event filenames in
  180. the log_dir directory. More details on filename construction in
  181. tensorboard.summary.writer.event_file_writer.EventFileWriter.
  182. Examples::
  183. from torch.utils.tensorboard import SummaryWriter
  184. # create a summary writer with automatically generated folder name.
  185. writer = SummaryWriter()
  186. # folder location: runs/May04_22-14-54_s-MacBook-Pro.local/
  187. # create a summary writer using the specified folder name.
  188. writer = SummaryWriter("my_experiment")
  189. # folder location: my_experiment
  190. # create a summary writer with comment appended.
  191. writer = SummaryWriter(comment="LR_0.1_BATCH_16")
  192. # folder location: runs/May04_22-14-54_s-MacBook-Pro.localLR_0.1_BATCH_16/
  193. """
  194. torch._C._log_api_usage_once("tensorboard.create.summarywriter")
  195. if not log_dir:
  196. import socket
  197. from datetime import datetime
  198. current_time = datetime.now().strftime("%b%d_%H-%M-%S")
  199. log_dir = os.path.join(
  200. "runs", current_time + "_" + socket.gethostname() + comment
  201. )
  202. self.log_dir = log_dir
  203. self.purge_step = purge_step
  204. self.max_queue = max_queue
  205. self.flush_secs = flush_secs
  206. self.filename_suffix = filename_suffix
  207. # Initialize the file writers, but they can be cleared out on close
  208. # and recreated later as needed.
  209. self.file_writer = self.all_writers = None
  210. self._get_file_writer()
  211. # Create default bins for histograms, see generate_testdata.py in tensorflow/tensorboard
  212. v = 1e-12
  213. buckets = []
  214. neg_buckets = []
  215. while v < 1e20:
  216. # pyrefly: ignore [bad-argument-type]
  217. buckets.append(v)
  218. # pyrefly: ignore [bad-argument-type]
  219. neg_buckets.append(-v)
  220. v *= 1.1
  221. self.default_bins = neg_buckets[::-1] + [0] + buckets
  222. def _get_file_writer(self):
  223. """Return the default FileWriter instance. Recreates it if closed."""
  224. if self.all_writers is None or self.file_writer is None:
  225. # pyrefly: ignore [bad-assignment]
  226. self.file_writer = FileWriter(
  227. self.log_dir, self.max_queue, self.flush_secs, self.filename_suffix
  228. )
  229. # pyrefly: ignore [bad-assignment, missing-attribute]
  230. self.all_writers = {self.file_writer.get_logdir(): self.file_writer}
  231. if self.purge_step is not None:
  232. most_recent_step = self.purge_step
  233. # pyrefly: ignore [missing-attribute]
  234. self.file_writer.add_event(
  235. Event(step=most_recent_step, file_version="brain.Event:2")
  236. )
  237. # pyrefly: ignore [missing-attribute]
  238. self.file_writer.add_event(
  239. Event(
  240. step=most_recent_step,
  241. # pyrefly: ignore [missing-attribute]
  242. session_log=SessionLog(status=SessionLog.START),
  243. )
  244. )
  245. self.purge_step = None
  246. return self.file_writer
  247. def get_logdir(self):
  248. """Return the directory where event files will be written."""
  249. return self.log_dir
  250. def add_hparams(
  251. self,
  252. hparam_dict,
  253. metric_dict,
  254. hparam_domain_discrete=None,
  255. run_name=None,
  256. global_step=None,
  257. ) -> None:
  258. """Add a set of hyperparameters to be compared in TensorBoard.
  259. Args:
  260. hparam_dict (dict): Each key-value pair in the dictionary is the
  261. name of the hyper parameter and it's corresponding value.
  262. The type of the value can be one of `bool`, `string`, `float`,
  263. `int`, or `None`.
  264. metric_dict (dict): Each key-value pair in the dictionary is the
  265. name of the metric and it's corresponding value. Note that the key used
  266. here should be unique in the tensorboard record. Otherwise the value
  267. you added by ``add_scalar`` will be displayed in hparam plugin. In most
  268. cases, this is unwanted.
  269. hparam_domain_discrete: (Optional[Dict[str, List[Any]]]) A dictionary that
  270. contains names of the hyperparameters and all discrete values they can hold
  271. run_name (str): Name of the run, to be included as part of the logdir.
  272. If unspecified, will use current timestamp.
  273. global_step (int): Global step value to record
  274. Examples::
  275. from torch.utils.tensorboard import SummaryWriter
  276. with SummaryWriter() as w:
  277. for i in range(5):
  278. w.add_hparams({'lr': 0.1*i, 'bsize': i},
  279. {'hparam/accuracy': 10*i, 'hparam/loss': 10*i})
  280. Expected result:
  281. .. image:: _static/img/tensorboard/add_hparam.png
  282. :scale: 50 %
  283. """
  284. torch._C._log_api_usage_once("tensorboard.logging.add_hparams")
  285. if type(hparam_dict) is not dict or type(metric_dict) is not dict:
  286. raise TypeError("hparam_dict and metric_dict should be dictionary.")
  287. exp, ssi, sei = hparams(hparam_dict, metric_dict, hparam_domain_discrete)
  288. if not run_name:
  289. run_name = str(time.time())
  290. logdir = os.path.join(self._get_file_writer().get_logdir(), run_name)
  291. with SummaryWriter(log_dir=logdir) as w_hp:
  292. w_hp.file_writer.add_summary(exp, global_step)
  293. w_hp.file_writer.add_summary(ssi, global_step)
  294. w_hp.file_writer.add_summary(sei, global_step)
  295. for k, v in metric_dict.items():
  296. w_hp.add_scalar(k, v, global_step)
  297. def add_scalar(
  298. self,
  299. tag,
  300. scalar_value,
  301. global_step=None,
  302. walltime=None,
  303. new_style=False,
  304. double_precision=False,
  305. ) -> None:
  306. """Add scalar data to summary.
  307. Args:
  308. tag (str): Data identifier
  309. scalar_value (float or string/blobname): Value to save
  310. global_step (int): Global step value to record
  311. walltime (float): Optional override default walltime (time.time())
  312. with seconds after epoch of event
  313. new_style (boolean): Whether to use new style (tensor field) or old
  314. style (simple_value field). New style could lead to faster data loading.
  315. Examples::
  316. from torch.utils.tensorboard import SummaryWriter
  317. writer = SummaryWriter()
  318. x = range(100)
  319. for i in x:
  320. writer.add_scalar('y=2x', i * 2, i)
  321. writer.close()
  322. Expected result:
  323. .. image:: _static/img/tensorboard/add_scalar.png
  324. :scale: 50 %
  325. """
  326. torch._C._log_api_usage_once("tensorboard.logging.add_scalar")
  327. summary = scalar(
  328. tag, scalar_value, new_style=new_style, double_precision=double_precision
  329. )
  330. self._get_file_writer().add_summary(summary, global_step, walltime)
  331. def add_scalars(self, main_tag, tag_scalar_dict, global_step=None, walltime=None) -> None:
  332. """Add many scalar data to summary.
  333. Args:
  334. main_tag (str): The parent name for the tags
  335. tag_scalar_dict (dict): Key-value pair storing the tag and corresponding values
  336. global_step (int): Global step value to record
  337. walltime (float): Optional override default walltime (time.time())
  338. seconds after epoch of event
  339. Examples::
  340. from torch.utils.tensorboard import SummaryWriter
  341. writer = SummaryWriter()
  342. r = 5
  343. for i in range(100):
  344. writer.add_scalars('run_14h', {'xsinx':i*np.sin(i/r),
  345. 'xcosx':i*np.cos(i/r),
  346. 'tanx': np.tan(i/r)}, i)
  347. writer.close()
  348. # This call adds three values to the same scalar plot with the tag
  349. # 'run_14h' in TensorBoard's scalar section.
  350. Expected result:
  351. .. image:: _static/img/tensorboard/add_scalars.png
  352. :scale: 50 %
  353. """
  354. torch._C._log_api_usage_once("tensorboard.logging.add_scalars")
  355. walltime = time.time() if walltime is None else walltime
  356. fw_logdir = self._get_file_writer().get_logdir()
  357. for tag, scalar_value in tag_scalar_dict.items():
  358. fw_tag = fw_logdir + "/" + main_tag.replace("/", "_") + "_" + tag
  359. if self.all_writers is None:
  360. raise AssertionError("self.all_writers is None")
  361. if fw_tag in self.all_writers:
  362. fw = self.all_writers[fw_tag]
  363. else:
  364. fw = FileWriter(
  365. fw_tag, self.max_queue, self.flush_secs, self.filename_suffix
  366. )
  367. self.all_writers[fw_tag] = fw
  368. fw.add_summary(scalar(main_tag, scalar_value), global_step, walltime)
  369. def add_tensor(
  370. self,
  371. tag,
  372. tensor,
  373. global_step=None,
  374. walltime=None,
  375. ) -> None:
  376. """Add tensor data to summary.
  377. Args:
  378. tag (str): Data identifier
  379. tensor (torch.Tensor): tensor to save
  380. global_step (int): Global step value to record
  381. Examples::
  382. from torch.utils.tensorboard import SummaryWriter
  383. writer = SummaryWriter()
  384. x = torch.tensor([1,2,3])
  385. writer.add_scalar('x', x)
  386. writer.close()
  387. Expected result:
  388. Summary::tensor::float_val [1,2,3]
  389. ::tensor::shape [3]
  390. ::tag 'x'
  391. """
  392. torch._C._log_api_usage_once("tensorboard.logging.add_tensor")
  393. summary = tensor_proto(tag, tensor)
  394. self._get_file_writer().add_summary(summary, global_step, walltime)
  395. def add_histogram(
  396. self,
  397. tag,
  398. values,
  399. global_step=None,
  400. bins="tensorflow",
  401. walltime=None,
  402. max_bins=None,
  403. ) -> None:
  404. """Add histogram to summary.
  405. Args:
  406. tag (str): Data identifier
  407. values (torch.Tensor, numpy.ndarray, or string/blobname): Values to build histogram
  408. global_step (int): Global step value to record
  409. bins (str): One of {'tensorflow','auto', 'fd', ...}. This determines how the bins are made. You can find
  410. other options in: https://numpy.org/doc/stable/reference/generated/numpy.histogram.html
  411. walltime (float): Optional override default walltime (time.time())
  412. seconds after epoch of event
  413. Examples::
  414. from torch.utils.tensorboard import SummaryWriter
  415. import numpy as np
  416. writer = SummaryWriter()
  417. for i in range(10):
  418. x = np.random.random(1000)
  419. writer.add_histogram('distribution centers', x + i, i)
  420. writer.close()
  421. Expected result:
  422. .. image:: _static/img/tensorboard/add_histogram.png
  423. :scale: 50 %
  424. """
  425. torch._C._log_api_usage_once("tensorboard.logging.add_histogram")
  426. if isinstance(bins, str) and bins == "tensorflow":
  427. bins = self.default_bins
  428. self._get_file_writer().add_summary(
  429. histogram(tag, values, bins, max_bins=max_bins), global_step, walltime
  430. )
  431. def add_histogram_raw(
  432. self,
  433. tag,
  434. min,
  435. max,
  436. num,
  437. sum,
  438. sum_squares,
  439. bucket_limits,
  440. bucket_counts,
  441. global_step=None,
  442. walltime=None,
  443. ) -> None:
  444. """Add histogram with raw data.
  445. Args:
  446. tag (str): Data identifier
  447. min (float or int): Min value
  448. max (float or int): Max value
  449. num (int): Number of values
  450. sum (float or int): Sum of all values
  451. sum_squares (float or int): Sum of squares for all values
  452. bucket_limits (torch.Tensor, numpy.ndarray): Upper value per bucket.
  453. The number of elements of it should be the same as `bucket_counts`.
  454. bucket_counts (torch.Tensor, numpy.ndarray): Number of values per bucket
  455. global_step (int): Global step value to record
  456. walltime (float): Optional override default walltime (time.time())
  457. seconds after epoch of event
  458. see: https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/histogram/README.md
  459. Examples::
  460. from torch.utils.tensorboard import SummaryWriter
  461. import numpy as np
  462. writer = SummaryWriter()
  463. dummy_data = []
  464. for idx, value in enumerate(range(50)):
  465. dummy_data += [idx + 0.001] * value
  466. bins = list(range(50+2))
  467. bins = np.array(bins)
  468. values = np.array(dummy_data).astype(float).reshape(-1)
  469. counts, limits = np.histogram(values, bins=bins)
  470. sum_sq = values.dot(values)
  471. writer.add_histogram_raw(
  472. tag='histogram_with_raw_data',
  473. min=values.min(),
  474. max=values.max(),
  475. num=len(values),
  476. sum=values.sum(),
  477. sum_squares=sum_sq,
  478. bucket_limits=limits[1:].tolist(),
  479. bucket_counts=counts.tolist(),
  480. global_step=0)
  481. writer.close()
  482. Expected result:
  483. .. image:: _static/img/tensorboard/add_histogram_raw.png
  484. :scale: 50 %
  485. """
  486. torch._C._log_api_usage_once("tensorboard.logging.add_histogram_raw")
  487. if len(bucket_limits) != len(bucket_counts):
  488. raise ValueError(
  489. "len(bucket_limits) != len(bucket_counts), see the document."
  490. )
  491. self._get_file_writer().add_summary(
  492. histogram_raw(
  493. tag, min, max, num, sum, sum_squares, bucket_limits, bucket_counts
  494. ),
  495. global_step,
  496. walltime,
  497. )
  498. def add_image(
  499. self, tag, img_tensor, global_step=None, walltime=None, dataformats="CHW"
  500. ) -> None:
  501. """Add image data to summary.
  502. Note that this requires the ``pillow`` package.
  503. Args:
  504. tag (str): Data identifier
  505. img_tensor (torch.Tensor, numpy.ndarray, or string/blobname): Image data
  506. global_step (int): Global step value to record
  507. walltime (float): Optional override default walltime (time.time())
  508. seconds after epoch of event
  509. dataformats (str): Image data format specification of the form
  510. CHW, HWC, HW, WH, etc.
  511. Shape:
  512. img_tensor: Default is :math:`(3, H, W)`. You can use ``torchvision.utils.make_grid()`` to
  513. convert a batch of tensor into 3xHxW format or call ``add_images`` and let us do the job.
  514. Tensor with :math:`(1, H, W)`, :math:`(H, W)`, :math:`(H, W, 3)` is also suitable as long as
  515. corresponding ``dataformats`` argument is passed, e.g. ``CHW``, ``HWC``, ``HW``.
  516. Examples::
  517. from torch.utils.tensorboard import SummaryWriter
  518. import numpy as np
  519. img = np.zeros((3, 100, 100))
  520. img[0] = np.arange(0, 10000).reshape(100, 100) / 10000
  521. img[1] = 1 - np.arange(0, 10000).reshape(100, 100) / 10000
  522. img_HWC = np.zeros((100, 100, 3))
  523. img_HWC[:, :, 0] = np.arange(0, 10000).reshape(100, 100) / 10000
  524. img_HWC[:, :, 1] = 1 - np.arange(0, 10000).reshape(100, 100) / 10000
  525. writer = SummaryWriter()
  526. writer.add_image('my_image', img, 0)
  527. # If you have non-default dimension setting, set the dataformats argument.
  528. writer.add_image('my_image_HWC', img_HWC, 0, dataformats='HWC')
  529. writer.close()
  530. Expected result:
  531. .. image:: _static/img/tensorboard/add_image.png
  532. :scale: 50 %
  533. """
  534. torch._C._log_api_usage_once("tensorboard.logging.add_image")
  535. self._get_file_writer().add_summary(
  536. image(tag, img_tensor, dataformats=dataformats), global_step, walltime
  537. )
  538. def add_images(
  539. self, tag, img_tensor, global_step=None, walltime=None, dataformats="NCHW"
  540. ) -> None:
  541. """Add batched image data to summary.
  542. Note that this requires the ``pillow`` package.
  543. Args:
  544. tag (str): Data identifier
  545. img_tensor (torch.Tensor, numpy.ndarray, or string/blobname): Image data
  546. global_step (int): Global step value to record
  547. walltime (float): Optional override default walltime (time.time())
  548. seconds after epoch of event
  549. dataformats (str): Image data format specification of the form
  550. NCHW, NHWC, CHW, HWC, HW, WH, etc.
  551. Shape:
  552. img_tensor: Default is :math:`(N, 3, H, W)`. If ``dataformats`` is specified, other shape will be
  553. accepted. e.g. NCHW or NHWC.
  554. Examples::
  555. from torch.utils.tensorboard import SummaryWriter
  556. import numpy as np
  557. img_batch = np.zeros((16, 3, 100, 100))
  558. for i in range(16):
  559. img_batch[i, 0] = np.arange(0, 10000).reshape(100, 100) / 10000 / 16 * i
  560. img_batch[i, 1] = (1 - np.arange(0, 10000).reshape(100, 100) / 10000) / 16 * i
  561. writer = SummaryWriter()
  562. writer.add_images('my_image_batch', img_batch, 0)
  563. writer.close()
  564. Expected result:
  565. .. image:: _static/img/tensorboard/add_images.png
  566. :scale: 30 %
  567. """
  568. torch._C._log_api_usage_once("tensorboard.logging.add_images")
  569. self._get_file_writer().add_summary(
  570. image(tag, img_tensor, dataformats=dataformats), global_step, walltime
  571. )
  572. def add_image_with_boxes(
  573. self,
  574. tag,
  575. img_tensor,
  576. box_tensor,
  577. global_step=None,
  578. walltime=None,
  579. rescale=1,
  580. dataformats="CHW",
  581. labels=None,
  582. ) -> None:
  583. """Add image and draw bounding boxes on the image.
  584. Args:
  585. tag (str): Data identifier
  586. img_tensor (torch.Tensor, numpy.ndarray, or string/blobname): Image data
  587. box_tensor (torch.Tensor, numpy.ndarray, or string/blobname): Box data (for detected objects)
  588. box should be represented as [x1, y1, x2, y2].
  589. global_step (int): Global step value to record
  590. walltime (float): Optional override default walltime (time.time())
  591. seconds after epoch of event
  592. rescale (float): Optional scale override
  593. dataformats (str): Image data format specification of the form
  594. NCHW, NHWC, CHW, HWC, HW, WH, etc.
  595. labels (list of string): The label to be shown for each bounding box.
  596. Shape:
  597. img_tensor: Default is :math:`(3, H, W)`. It can be specified with ``dataformats`` argument.
  598. e.g. CHW or HWC
  599. box_tensor: (torch.Tensor, numpy.ndarray, or string/blobname): NX4, where N is the number of
  600. boxes and each 4 elements in a row represents (xmin, ymin, xmax, ymax).
  601. """
  602. torch._C._log_api_usage_once("tensorboard.logging.add_image_with_boxes")
  603. if labels is not None:
  604. if isinstance(labels, str):
  605. labels = [labels]
  606. if len(labels) != box_tensor.shape[0]:
  607. labels = None
  608. self._get_file_writer().add_summary(
  609. image_boxes(
  610. tag,
  611. img_tensor,
  612. box_tensor,
  613. rescale=rescale,
  614. dataformats=dataformats,
  615. labels=labels,
  616. ),
  617. global_step,
  618. walltime,
  619. )
  620. def add_figure(
  621. self,
  622. tag: str,
  623. figure: Union["Figure", list["Figure"]],
  624. global_step: int | None = None,
  625. close: bool = True,
  626. walltime: float | None = None,
  627. ) -> None:
  628. """Render matplotlib figure into an image and add it to summary.
  629. Note that this requires the ``matplotlib`` package.
  630. Args:
  631. tag: Data identifier
  632. figure: Figure or a list of figures
  633. global_step: Global step value to record
  634. close: Flag to automatically close the figure
  635. walltime: Optional override default walltime (time.time())
  636. seconds after epoch of event
  637. """
  638. torch._C._log_api_usage_once("tensorboard.logging.add_figure")
  639. if isinstance(figure, list):
  640. self.add_image(
  641. tag,
  642. figure_to_image(figure, close),
  643. global_step,
  644. walltime,
  645. dataformats="NCHW",
  646. )
  647. else:
  648. self.add_image(
  649. tag,
  650. figure_to_image(figure, close),
  651. global_step,
  652. walltime,
  653. dataformats="CHW",
  654. )
  655. def add_video(self, tag, vid_tensor, global_step=None, fps=4, walltime=None) -> None:
  656. """Add video data to summary.
  657. Note that this requires the ``moviepy`` package.
  658. Args:
  659. tag (str): Data identifier
  660. vid_tensor (torch.Tensor): Video data
  661. global_step (int): Global step value to record
  662. fps (float or int): Frames per second
  663. walltime (float): Optional override default walltime (time.time())
  664. seconds after epoch of event
  665. Shape:
  666. vid_tensor: :math:`(N, T, C, H, W)`. The values should lie in [0, 255] for type `uint8` or [0, 1] for type `float`.
  667. """
  668. torch._C._log_api_usage_once("tensorboard.logging.add_video")
  669. self._get_file_writer().add_summary(
  670. video(tag, vid_tensor, fps), global_step, walltime
  671. )
  672. def add_audio(
  673. self, tag, snd_tensor, global_step=None, sample_rate=44100, walltime=None
  674. ) -> None:
  675. """Add audio data to summary.
  676. Args:
  677. tag (str): Data identifier
  678. snd_tensor (torch.Tensor): Sound data
  679. global_step (int): Global step value to record
  680. sample_rate (int): sample rate in Hz
  681. walltime (float): Optional override default walltime (time.time())
  682. seconds after epoch of event
  683. Shape:
  684. snd_tensor: :math:`(1, L)`. The values should lie between [-1, 1].
  685. """
  686. torch._C._log_api_usage_once("tensorboard.logging.add_audio")
  687. self._get_file_writer().add_summary(
  688. audio(tag, snd_tensor, sample_rate=sample_rate), global_step, walltime
  689. )
  690. def add_text(self, tag, text_string, global_step=None, walltime=None) -> None:
  691. """Add text data to summary.
  692. Args:
  693. tag (str): Data identifier
  694. text_string (str): String to save
  695. global_step (int): Global step value to record
  696. walltime (float): Optional override default walltime (time.time())
  697. seconds after epoch of event
  698. Examples::
  699. writer.add_text('lstm', 'This is an lstm', 0)
  700. writer.add_text('rnn', 'This is an rnn', 10)
  701. """
  702. torch._C._log_api_usage_once("tensorboard.logging.add_text")
  703. self._get_file_writer().add_summary(
  704. text(tag, text_string), global_step, walltime
  705. )
  706. def add_onnx_graph(self, prototxt) -> None:
  707. torch._C._log_api_usage_once("tensorboard.logging.add_onnx_graph")
  708. self._get_file_writer().add_onnx_graph(load_onnx_graph(prototxt))
  709. def add_graph(
  710. self, model, input_to_model=None, verbose=False, use_strict_trace=True
  711. ) -> None:
  712. """Add graph data to summary.
  713. Args:
  714. model (torch.nn.Module): Model to draw.
  715. input_to_model (torch.Tensor or list of torch.Tensor): A variable or a tuple of
  716. variables to be fed.
  717. verbose (bool): Whether to print graph structure in console.
  718. use_strict_trace (bool): Whether to pass keyword argument `strict` to
  719. `torch.jit.trace`. Pass False when you want the tracer to
  720. record your mutable container types (list, dict)
  721. """
  722. torch._C._log_api_usage_once("tensorboard.logging.add_graph")
  723. # A valid PyTorch model should have a 'forward' method
  724. self._get_file_writer().add_graph(
  725. graph(model, input_to_model, verbose, use_strict_trace)
  726. )
  727. @staticmethod
  728. def _encode(rawstr):
  729. # I'd use urllib but, I'm unsure about the differences from python3 to python2, etc.
  730. retval = rawstr
  731. retval = retval.replace("%", f"%{ord('%'):02x}")
  732. retval = retval.replace("/", f"%{ord('/'):02x}")
  733. retval = retval.replace("\\", "%%%02x" % (ord("\\"))) # noqa: UP031
  734. return retval
  735. def add_embedding(
  736. self,
  737. mat,
  738. metadata=None,
  739. label_img=None,
  740. global_step=None,
  741. tag="default",
  742. metadata_header=None,
  743. ) -> None:
  744. """Add embedding projector data to summary.
  745. Args:
  746. mat (torch.Tensor or numpy.ndarray): A matrix which each row is the feature vector of the data point
  747. metadata (list): A list of labels, each element will be converted to string
  748. label_img (torch.Tensor): Images correspond to each data point
  749. global_step (int): Global step value to record
  750. tag (str): Name for the embedding
  751. metadata_header (list): A list of headers for multi-column metadata. If given, each metadata must be
  752. a list with values corresponding to headers.
  753. Shape:
  754. mat: :math:`(N, D)`, where N is number of data and D is feature dimension
  755. label_img: :math:`(N, C, H, W)`
  756. Examples::
  757. import keyword
  758. import torch
  759. meta = []
  760. while len(meta)<100:
  761. meta = meta+keyword.kwlist # get some strings
  762. meta = meta[:100]
  763. for i, v in enumerate(meta):
  764. meta[i] = v+str(i)
  765. label_img = torch.rand(100, 3, 10, 32)
  766. for i in range(100):
  767. label_img[i]*=i/100.0
  768. writer.add_embedding(torch.randn(100, 5), metadata=meta, label_img=label_img)
  769. writer.add_embedding(torch.randn(100, 5), label_img=label_img)
  770. writer.add_embedding(torch.randn(100, 5), metadata=meta)
  771. .. note::
  772. Categorical (i.e. non-numeric) metadata cannot have more than 50 unique values if they are to be used for
  773. coloring in the embedding projector.
  774. """
  775. torch._C._log_api_usage_once("tensorboard.logging.add_embedding")
  776. mat = make_np(mat)
  777. if global_step is None:
  778. global_step = 0
  779. # clear pbtxt?
  780. # Maybe we should encode the tag so slashes don't trip us up?
  781. # I don't think this will mess us up, but better safe than sorry.
  782. subdir = f"{str(global_step).zfill(5)}/{self._encode(tag)}"
  783. save_path = os.path.join(self._get_file_writer().get_logdir(), subdir)
  784. fs = tf.io.gfile
  785. if fs.exists(save_path):
  786. if fs.isdir(save_path):
  787. print(
  788. "warning: Embedding dir exists, did you set global_step for add_embedding()?"
  789. )
  790. else:
  791. raise NotADirectoryError(
  792. f"Path: `{save_path}` exists, but is a file. Cannot proceed."
  793. )
  794. else:
  795. fs.makedirs(save_path)
  796. if metadata is not None:
  797. if mat.shape[0] != len(
  798. metadata
  799. ):
  800. raise AssertionError("#labels should equal with #data points")
  801. make_tsv(metadata, save_path, metadata_header=metadata_header)
  802. if label_img is not None:
  803. if mat.shape[0] != label_img.shape[0]:
  804. raise AssertionError("#images should equal with #data points")
  805. make_sprite(label_img, save_path)
  806. if mat.ndim != 2:
  807. raise AssertionError("mat should be 2D, where mat.size(0) is the number of data points")
  808. make_mat(mat, save_path)
  809. # Filesystem doesn't necessarily have append semantics, so we store an
  810. # internal buffer to append to and re-write whole file after each
  811. # embedding is added
  812. if not hasattr(self, "_projector_config"):
  813. self._projector_config = ProjectorConfig()
  814. embedding_info = get_embedding_info(
  815. metadata, label_img, subdir, global_step, tag
  816. )
  817. self._projector_config.embeddings.extend([embedding_info])
  818. from google.protobuf import text_format
  819. config_pbtxt = text_format.MessageToString(self._projector_config)
  820. write_pbtxt(self._get_file_writer().get_logdir(), config_pbtxt)
  821. def add_pr_curve(
  822. self,
  823. tag,
  824. labels,
  825. predictions,
  826. global_step=None,
  827. num_thresholds=127,
  828. weights=None,
  829. walltime=None,
  830. ) -> None:
  831. """Add precision recall curve.
  832. Plotting a precision-recall curve lets you understand your model's
  833. performance under different threshold settings. With this function,
  834. you provide the ground truth labeling (T/F) and prediction confidence
  835. (usually the output of your model) for each target. The TensorBoard UI
  836. will let you choose the threshold interactively.
  837. Args:
  838. tag (str): Data identifier
  839. labels (torch.Tensor, numpy.ndarray, or string/blobname):
  840. Ground truth data. Binary label for each element.
  841. predictions (torch.Tensor, numpy.ndarray, or string/blobname):
  842. The probability that an element be classified as true.
  843. Value should be in [0, 1]
  844. global_step (int): Global step value to record
  845. num_thresholds (int): Number of thresholds used to draw the curve.
  846. walltime (float): Optional override default walltime (time.time())
  847. seconds after epoch of event
  848. Examples::
  849. from torch.utils.tensorboard import SummaryWriter
  850. import numpy as np
  851. labels = np.random.randint(2, size=100) # binary label
  852. predictions = np.random.rand(100)
  853. writer = SummaryWriter()
  854. writer.add_pr_curve('pr_curve', labels, predictions, 0)
  855. writer.close()
  856. """
  857. torch._C._log_api_usage_once("tensorboard.logging.add_pr_curve")
  858. labels, predictions = make_np(labels), make_np(predictions)
  859. self._get_file_writer().add_summary(
  860. pr_curve(tag, labels, predictions, num_thresholds, weights),
  861. global_step,
  862. walltime,
  863. )
  864. def add_pr_curve_raw(
  865. self,
  866. tag,
  867. true_positive_counts,
  868. false_positive_counts,
  869. true_negative_counts,
  870. false_negative_counts,
  871. precision,
  872. recall,
  873. global_step=None,
  874. num_thresholds=127,
  875. weights=None,
  876. walltime=None,
  877. ) -> None:
  878. """Add precision recall curve with raw data.
  879. Args:
  880. tag (str): Data identifier
  881. true_positive_counts (torch.Tensor, numpy.ndarray, or string/blobname): true positive counts
  882. false_positive_counts (torch.Tensor, numpy.ndarray, or string/blobname): false positive counts
  883. true_negative_counts (torch.Tensor, numpy.ndarray, or string/blobname): true negative counts
  884. false_negative_counts (torch.Tensor, numpy.ndarray, or string/blobname): false negative counts
  885. precision (torch.Tensor, numpy.ndarray, or string/blobname): precision
  886. recall (torch.Tensor, numpy.ndarray, or string/blobname): recall
  887. global_step (int): Global step value to record
  888. num_thresholds (int): Number of thresholds used to draw the curve.
  889. walltime (float): Optional override default walltime (time.time())
  890. seconds after epoch of event
  891. see: https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/pr_curve/README.md
  892. """
  893. torch._C._log_api_usage_once("tensorboard.logging.add_pr_curve_raw")
  894. self._get_file_writer().add_summary(
  895. pr_curve_raw(
  896. tag,
  897. true_positive_counts,
  898. false_positive_counts,
  899. true_negative_counts,
  900. false_negative_counts,
  901. precision,
  902. recall,
  903. num_thresholds,
  904. weights,
  905. ),
  906. global_step,
  907. walltime,
  908. )
  909. def add_custom_scalars_multilinechart(
  910. self, tags, category="default", title="untitled"
  911. ) -> None:
  912. """Shorthand for creating multilinechart. Similar to ``add_custom_scalars()``, but the only necessary argument is *tags*.
  913. Args:
  914. tags (list): list of tags that have been used in ``add_scalar()``
  915. Examples::
  916. writer.add_custom_scalars_multilinechart(['twse/0050', 'twse/2330'])
  917. """
  918. torch._C._log_api_usage_once(
  919. "tensorboard.logging.add_custom_scalars_multilinechart"
  920. )
  921. layout = {category: {title: ["Multiline", tags]}}
  922. self._get_file_writer().add_summary(custom_scalars(layout))
  923. def add_custom_scalars_marginchart(
  924. self, tags, category="default", title="untitled"
  925. ) -> None:
  926. """Shorthand for creating marginchart.
  927. Similar to ``add_custom_scalars()``, but the only necessary argument is *tags*,
  928. which should have exactly 3 elements.
  929. Args:
  930. tags (list): list of tags that have been used in ``add_scalar()``
  931. Examples::
  932. writer.add_custom_scalars_marginchart(['twse/0050', 'twse/2330', 'twse/2006'])
  933. """
  934. torch._C._log_api_usage_once(
  935. "tensorboard.logging.add_custom_scalars_marginchart"
  936. )
  937. if len(tags) != 3:
  938. raise AssertionError(f"Expected 3 tags, got {len(tags)}.")
  939. layout = {category: {title: ["Margin", tags]}}
  940. self._get_file_writer().add_summary(custom_scalars(layout))
  941. def add_custom_scalars(self, layout) -> None:
  942. """Create special chart by collecting charts tags in 'scalars'.
  943. NOTE: This function can only be called once for each SummaryWriter() object.
  944. Because it only provides metadata to tensorboard, the function can be called before or after the training loop.
  945. Args:
  946. layout (dict): {categoryName: *charts*}, where *charts* is also a dictionary
  947. {chartName: *ListOfProperties*}. The first element in *ListOfProperties* is the chart's type
  948. (one of **Multiline** or **Margin**) and the second element should be a list containing the tags
  949. you have used in add_scalar function, which will be collected into the new chart.
  950. Examples::
  951. layout = {'Taiwan':{'twse':['Multiline',['twse/0050', 'twse/2330']]},
  952. 'USA':{ 'dow':['Margin', ['dow/aaa', 'dow/bbb', 'dow/ccc']],
  953. 'nasdaq':['Margin', ['nasdaq/aaa', 'nasdaq/bbb', 'nasdaq/ccc']]}}
  954. writer.add_custom_scalars(layout)
  955. """
  956. torch._C._log_api_usage_once("tensorboard.logging.add_custom_scalars")
  957. self._get_file_writer().add_summary(custom_scalars(layout))
  958. def add_mesh(
  959. self,
  960. tag,
  961. vertices,
  962. colors=None,
  963. faces=None,
  964. config_dict=None,
  965. global_step=None,
  966. walltime=None,
  967. ) -> None:
  968. """Add meshes or 3D point clouds to TensorBoard.
  969. The visualization is based on Three.js,
  970. so it allows users to interact with the rendered object. Besides the basic definitions
  971. such as vertices, faces, users can further provide camera parameter, lighting condition, etc.
  972. Please check https://threejs.org/docs/index.html#manual/en/introduction/Creating-a-scene for
  973. advanced usage.
  974. Args:
  975. tag (str): Data identifier
  976. vertices (torch.Tensor): List of the 3D coordinates of vertices.
  977. colors (torch.Tensor): Colors for each vertex
  978. faces (torch.Tensor): Indices of vertices within each triangle. (Optional)
  979. config_dict: Dictionary with ThreeJS classes names and configuration.
  980. global_step (int): Global step value to record
  981. walltime (float): Optional override default walltime (time.time())
  982. seconds after epoch of event
  983. Shape:
  984. vertices: :math:`(B, N, 3)`. (batch, number_of_vertices, channels)
  985. colors: :math:`(B, N, 3)`. The values should lie in [0, 255] for type `uint8` or [0, 1] for type `float`.
  986. faces: :math:`(B, N, 3)`. The values should lie in [0, number_of_vertices] for type `uint8`.
  987. Examples::
  988. from torch.utils.tensorboard import SummaryWriter
  989. vertices_tensor = torch.as_tensor([
  990. [1, 1, 1],
  991. [-1, -1, 1],
  992. [1, -1, -1],
  993. [-1, 1, -1],
  994. ], dtype=torch.float).unsqueeze(0)
  995. colors_tensor = torch.as_tensor([
  996. [255, 0, 0],
  997. [0, 255, 0],
  998. [0, 0, 255],
  999. [255, 0, 255],
  1000. ], dtype=torch.int).unsqueeze(0)
  1001. faces_tensor = torch.as_tensor([
  1002. [0, 2, 3],
  1003. [0, 3, 1],
  1004. [0, 1, 2],
  1005. [1, 3, 2],
  1006. ], dtype=torch.int).unsqueeze(0)
  1007. writer = SummaryWriter()
  1008. writer.add_mesh('my_mesh', vertices=vertices_tensor, colors=colors_tensor, faces=faces_tensor)
  1009. writer.close()
  1010. """
  1011. torch._C._log_api_usage_once("tensorboard.logging.add_mesh")
  1012. self._get_file_writer().add_summary(
  1013. mesh(tag, vertices, colors, faces, config_dict), global_step, walltime
  1014. )
  1015. def flush(self) -> None:
  1016. """Flushes the event file to disk.
  1017. Call this method to make sure that all pending events have been written to
  1018. disk.
  1019. """
  1020. if self.all_writers is None:
  1021. return
  1022. for writer in self.all_writers.values():
  1023. writer.flush()
  1024. def close(self) -> None:
  1025. if self.all_writers is None:
  1026. return # ignore double close
  1027. for writer in self.all_writers.values():
  1028. writer.flush()
  1029. writer.close()
  1030. # pyrefly: ignore [bad-assignment]
  1031. self.file_writer = self.all_writers = None
  1032. def __enter__(self):
  1033. return self
  1034. def __exit__(self, exc_type, exc_val, exc_tb):
  1035. self.close()