_json.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. # Extracted from https://github.com/pfmoore/pkg_metadata
  2. from __future__ import annotations
  3. from email.header import Header, decode_header, make_header
  4. from email.message import Message
  5. from typing import Any, cast
  6. METADATA_FIELDS = [
  7. # Name, Multiple-Use
  8. ("Metadata-Version", False),
  9. ("Name", False),
  10. ("Version", False),
  11. ("Dynamic", True),
  12. ("Platform", True),
  13. ("Supported-Platform", True),
  14. ("Summary", False),
  15. ("Description", False),
  16. ("Description-Content-Type", False),
  17. ("Keywords", False),
  18. ("Home-page", False),
  19. ("Download-URL", False),
  20. ("Author", False),
  21. ("Author-email", False),
  22. ("Maintainer", False),
  23. ("Maintainer-email", False),
  24. ("License", False),
  25. ("License-Expression", False),
  26. ("License-File", True),
  27. ("Classifier", True),
  28. ("Requires-Dist", True),
  29. ("Requires-Python", False),
  30. ("Requires-External", True),
  31. ("Project-URL", True),
  32. ("Provides-Extra", True),
  33. ("Provides-Dist", True),
  34. ("Obsoletes-Dist", True),
  35. ]
  36. def json_name(field: str) -> str:
  37. return field.lower().replace("-", "_")
  38. def msg_to_json(msg: Message) -> dict[str, Any]:
  39. """Convert a Message object into a JSON-compatible dictionary."""
  40. def sanitise_header(h: Header | str) -> str:
  41. if isinstance(h, Header):
  42. chunks = []
  43. for bytes, encoding in decode_header(h):
  44. if encoding == "unknown-8bit":
  45. try:
  46. # See if UTF-8 works
  47. bytes.decode("utf-8")
  48. encoding = "utf-8"
  49. except UnicodeDecodeError:
  50. # If not, latin1 at least won't fail
  51. encoding = "latin1"
  52. chunks.append((bytes, encoding))
  53. return str(make_header(chunks))
  54. return str(h)
  55. result = {}
  56. for field, multi in METADATA_FIELDS:
  57. if field not in msg:
  58. continue
  59. key = json_name(field)
  60. if multi:
  61. value: str | list[str] = [
  62. sanitise_header(v) for v in msg.get_all(field) # type: ignore
  63. ]
  64. else:
  65. value = sanitise_header(msg.get(field)) # type: ignore
  66. if key == "keywords":
  67. # Accept both comma-separated and space-separated
  68. # forms, for better compatibility with old data.
  69. if "," in value:
  70. value = [v.strip() for v in value.split(",")]
  71. else:
  72. value = value.split()
  73. result[key] = value
  74. payload = cast(str, msg.get_payload())
  75. if payload:
  76. result["description"] = payload
  77. return result