methods.d.ts 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. import { APIResource } from "../../resource.js";
  2. import * as GraderModelsAPI from "../graders/grader-models.js";
  3. export declare class Methods extends APIResource {
  4. }
  5. /**
  6. * The hyperparameters used for the DPO fine-tuning job.
  7. */
  8. export interface DpoHyperparameters {
  9. /**
  10. * Number of examples in each batch. A larger batch size means that model
  11. * parameters are updated less frequently, but with lower variance.
  12. */
  13. batch_size?: 'auto' | number;
  14. /**
  15. * The beta value for the DPO method. A higher beta value will increase the weight
  16. * of the penalty between the policy and reference model.
  17. */
  18. beta?: 'auto' | number;
  19. /**
  20. * Scaling factor for the learning rate. A smaller learning rate may be useful to
  21. * avoid overfitting.
  22. */
  23. learning_rate_multiplier?: 'auto' | number;
  24. /**
  25. * The number of epochs to train the model for. An epoch refers to one full cycle
  26. * through the training dataset.
  27. */
  28. n_epochs?: 'auto' | number;
  29. }
  30. /**
  31. * Configuration for the DPO fine-tuning method.
  32. */
  33. export interface DpoMethod {
  34. /**
  35. * The hyperparameters used for the DPO fine-tuning job.
  36. */
  37. hyperparameters?: DpoHyperparameters;
  38. }
  39. /**
  40. * The hyperparameters used for the reinforcement fine-tuning job.
  41. */
  42. export interface ReinforcementHyperparameters {
  43. /**
  44. * Number of examples in each batch. A larger batch size means that model
  45. * parameters are updated less frequently, but with lower variance.
  46. */
  47. batch_size?: 'auto' | number;
  48. /**
  49. * Multiplier on amount of compute used for exploring search space during training.
  50. */
  51. compute_multiplier?: 'auto' | number;
  52. /**
  53. * The number of training steps between evaluation runs.
  54. */
  55. eval_interval?: 'auto' | number;
  56. /**
  57. * Number of evaluation samples to generate per training step.
  58. */
  59. eval_samples?: 'auto' | number;
  60. /**
  61. * Scaling factor for the learning rate. A smaller learning rate may be useful to
  62. * avoid overfitting.
  63. */
  64. learning_rate_multiplier?: 'auto' | number;
  65. /**
  66. * The number of epochs to train the model for. An epoch refers to one full cycle
  67. * through the training dataset.
  68. */
  69. n_epochs?: 'auto' | number;
  70. /**
  71. * Level of reasoning effort.
  72. */
  73. reasoning_effort?: 'default' | 'low' | 'medium' | 'high';
  74. }
  75. /**
  76. * Configuration for the reinforcement fine-tuning method.
  77. */
  78. export interface ReinforcementMethod {
  79. /**
  80. * The grader used for the fine-tuning job.
  81. */
  82. grader: GraderModelsAPI.StringCheckGrader | GraderModelsAPI.TextSimilarityGrader | GraderModelsAPI.PythonGrader | GraderModelsAPI.ScoreModelGrader | GraderModelsAPI.MultiGrader;
  83. /**
  84. * The hyperparameters used for the reinforcement fine-tuning job.
  85. */
  86. hyperparameters?: ReinforcementHyperparameters;
  87. }
  88. /**
  89. * The hyperparameters used for the fine-tuning job.
  90. */
  91. export interface SupervisedHyperparameters {
  92. /**
  93. * Number of examples in each batch. A larger batch size means that model
  94. * parameters are updated less frequently, but with lower variance.
  95. */
  96. batch_size?: 'auto' | number;
  97. /**
  98. * Scaling factor for the learning rate. A smaller learning rate may be useful to
  99. * avoid overfitting.
  100. */
  101. learning_rate_multiplier?: 'auto' | number;
  102. /**
  103. * The number of epochs to train the model for. An epoch refers to one full cycle
  104. * through the training dataset.
  105. */
  106. n_epochs?: 'auto' | number;
  107. }
  108. /**
  109. * Configuration for the supervised fine-tuning method.
  110. */
  111. export interface SupervisedMethod {
  112. /**
  113. * The hyperparameters used for the fine-tuning job.
  114. */
  115. hyperparameters?: SupervisedHyperparameters;
  116. }
  117. export declare namespace Methods {
  118. export { type DpoHyperparameters as DpoHyperparameters, type DpoMethod as DpoMethod, type ReinforcementHyperparameters as ReinforcementHyperparameters, type ReinforcementMethod as ReinforcementMethod, type SupervisedHyperparameters as SupervisedHyperparameters, type SupervisedMethod as SupervisedMethod, };
  119. }
  120. //# sourceMappingURL=methods.d.ts.map