parse_array.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. from typing import TYPE_CHECKING, Any
  2. from .utils.constants import STRING_DELIMITERS, JSONReturnType
  3. from .utils.json_context import ContextValues
  4. from .utils.object_comparer import ObjectComparer
  5. if TYPE_CHECKING:
  6. from .json_parser import JSONParser
  7. from .schema_repair import SchemaRepairer
  8. def parse_array(
  9. self: "JSONParser",
  10. schema: dict[str, Any] | bool | None = None,
  11. path: str = "$",
  12. ) -> list[JSONReturnType]:
  13. # <array> ::= '[' [ <json> *(', ' <json>) ] ']' ; A sequence of JSON values separated by commas
  14. # Only activate schema-guided parsing if a repairer is available and schema looks array-like.
  15. schema_repairer: SchemaRepairer | None = None
  16. items_schema: object | None = None
  17. additional_items: object | None = None
  18. if schema is not None and schema is not True:
  19. repairer = self.schema_repairer
  20. if repairer is not None:
  21. schema = repairer.resolve_schema(schema)
  22. if schema is False:
  23. raise ValueError("Schema does not allow any values.")
  24. if schema is not True and repairer.is_array_schema(schema):
  25. schema_repairer = repairer
  26. items_schema = schema.get("items")
  27. additional_items = schema.get("additionalItems", None)
  28. salvage_mode = schema_repairer is not None and schema_repairer.schema_repair_mode == "salvage"
  29. arr: list[JSONReturnType] = []
  30. self.context.set(ContextValues.ARRAY)
  31. self.skip_whitespaces()
  32. char = self.get_char_at()
  33. idx = 0
  34. while char and char not in ["]", "}"]:
  35. # Resolve per-item schema (tuple schemas + additionalItems) when schema guidance is active.
  36. item_schema: dict[str, Any] | bool | None = None
  37. drop_item = False
  38. if schema_repairer is not None:
  39. if isinstance(items_schema, list):
  40. if idx < len(items_schema):
  41. raw_schema = items_schema[idx]
  42. # Tuple schemas must contain dict/bool entries only.
  43. if raw_schema is not None and not isinstance(raw_schema, (dict, bool)):
  44. raise ValueError("Schema must be an object.")
  45. item_schema = raw_schema
  46. else:
  47. if additional_items is False:
  48. drop_item = True
  49. elif isinstance(additional_items, dict):
  50. item_schema = additional_items
  51. else:
  52. item_schema = True
  53. elif isinstance(items_schema, dict):
  54. item_schema = items_schema
  55. else:
  56. item_schema = True
  57. item_path = f"{path}[{idx}]"
  58. active_schema_repairer = (
  59. schema_repairer if schema_repairer is not None and not drop_item and not salvage_mode else None
  60. )
  61. if char in STRING_DELIMITERS:
  62. # A string followed by ':' is often a missing object start; treat it as an object.
  63. i = 1
  64. i = self.skip_to_character(char, i)
  65. i = self.scroll_whitespaces(idx=i + 1)
  66. if self.get_char_at(i) == ":":
  67. if active_schema_repairer is not None:
  68. # Schema-guided object parsing, then enforce schema on the parsed object.
  69. value = self.parse_object(item_schema, item_path)
  70. value = active_schema_repairer.repair_value(value, item_schema, item_path)
  71. else:
  72. # No schema (or dropping): still parse to keep the cursor in sync.
  73. value = self.parse_object()
  74. else:
  75. value = self.parse_string()
  76. if active_schema_repairer is not None:
  77. # Apply schema constraints/coercions to scalar values when configured.
  78. value = active_schema_repairer.repair_value(value, item_schema, item_path)
  79. else:
  80. # Use schema-aware parsing to guide nested repairs when configured.
  81. value = self.parse_json(item_schema, item_path) if active_schema_repairer is not None else self.parse_json()
  82. if ObjectComparer.is_strictly_empty(value) and self.get_char_at() not in ["]", ","]:
  83. self.index += 1
  84. elif value == "..." and self.get_char_at(-1) == ".":
  85. self.log(
  86. "While parsing an array, found a stray '...'; ignoring it",
  87. )
  88. elif not drop_item:
  89. arr.append(value)
  90. elif schema_repairer is not None:
  91. # Record drops for visibility when schema forbids extra tuple items.
  92. schema_repairer._log("Dropped extra array item not covered by schema", item_path)
  93. idx += 1
  94. char = self.get_char_at()
  95. while char and char != "]" and (char.isspace() or char == ","):
  96. self.index += 1
  97. char = self.get_char_at()
  98. if char != "]":
  99. self.log(
  100. "While parsing an array we missed the closing ], ignoring it",
  101. )
  102. self.index += 1
  103. self.context.reset()
  104. return arr