| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120 |
- import { APIResource } from "../../resource.js";
- import * as GraderModelsAPI from "../graders/grader-models.js";
- export declare class Methods extends APIResource {
- }
- /**
- * The hyperparameters used for the DPO fine-tuning job.
- */
- export interface DpoHyperparameters {
- /**
- * Number of examples in each batch. A larger batch size means that model
- * parameters are updated less frequently, but with lower variance.
- */
- batch_size?: 'auto' | number;
- /**
- * The beta value for the DPO method. A higher beta value will increase the weight
- * of the penalty between the policy and reference model.
- */
- beta?: 'auto' | number;
- /**
- * Scaling factor for the learning rate. A smaller learning rate may be useful to
- * avoid overfitting.
- */
- learning_rate_multiplier?: 'auto' | number;
- /**
- * The number of epochs to train the model for. An epoch refers to one full cycle
- * through the training dataset.
- */
- n_epochs?: 'auto' | number;
- }
- /**
- * Configuration for the DPO fine-tuning method.
- */
- export interface DpoMethod {
- /**
- * The hyperparameters used for the DPO fine-tuning job.
- */
- hyperparameters?: DpoHyperparameters;
- }
- /**
- * The hyperparameters used for the reinforcement fine-tuning job.
- */
- export interface ReinforcementHyperparameters {
- /**
- * Number of examples in each batch. A larger batch size means that model
- * parameters are updated less frequently, but with lower variance.
- */
- batch_size?: 'auto' | number;
- /**
- * Multiplier on amount of compute used for exploring search space during training.
- */
- compute_multiplier?: 'auto' | number;
- /**
- * The number of training steps between evaluation runs.
- */
- eval_interval?: 'auto' | number;
- /**
- * Number of evaluation samples to generate per training step.
- */
- eval_samples?: 'auto' | number;
- /**
- * Scaling factor for the learning rate. A smaller learning rate may be useful to
- * avoid overfitting.
- */
- learning_rate_multiplier?: 'auto' | number;
- /**
- * The number of epochs to train the model for. An epoch refers to one full cycle
- * through the training dataset.
- */
- n_epochs?: 'auto' | number;
- /**
- * Level of reasoning effort.
- */
- reasoning_effort?: 'default' | 'low' | 'medium' | 'high';
- }
- /**
- * Configuration for the reinforcement fine-tuning method.
- */
- export interface ReinforcementMethod {
- /**
- * The grader used for the fine-tuning job.
- */
- grader: GraderModelsAPI.StringCheckGrader | GraderModelsAPI.TextSimilarityGrader | GraderModelsAPI.PythonGrader | GraderModelsAPI.ScoreModelGrader | GraderModelsAPI.MultiGrader;
- /**
- * The hyperparameters used for the reinforcement fine-tuning job.
- */
- hyperparameters?: ReinforcementHyperparameters;
- }
- /**
- * The hyperparameters used for the fine-tuning job.
- */
- export interface SupervisedHyperparameters {
- /**
- * Number of examples in each batch. A larger batch size means that model
- * parameters are updated less frequently, but with lower variance.
- */
- batch_size?: 'auto' | number;
- /**
- * Scaling factor for the learning rate. A smaller learning rate may be useful to
- * avoid overfitting.
- */
- learning_rate_multiplier?: 'auto' | number;
- /**
- * The number of epochs to train the model for. An epoch refers to one full cycle
- * through the training dataset.
- */
- n_epochs?: 'auto' | number;
- }
- /**
- * Configuration for the supervised fine-tuning method.
- */
- export interface SupervisedMethod {
- /**
- * The hyperparameters used for the fine-tuning job.
- */
- hyperparameters?: SupervisedHyperparameters;
- }
- export declare namespace Methods {
- export { type DpoHyperparameters as DpoHyperparameters, type DpoMethod as DpoMethod, type ReinforcementHyperparameters as ReinforcementHyperparameters, type ReinforcementMethod as ReinforcementMethod, type SupervisedHyperparameters as SupervisedHyperparameters, type SupervisedMethod as SupervisedMethod, };
- }
- //# sourceMappingURL=methods.d.ts.map
|