config.json 812 B

12345678910111213141516171819202122232425262728293031
  1. {
  2. "architectures": [
  3. "BertModel"
  4. ],
  5. "attention_probs_dropout_prob": 0.1,
  6. "classifier_dropout": null,
  7. "directionality": "bidi",
  8. "gradient_checkpointing": false,
  9. "hidden_act": "gelu",
  10. "hidden_dropout_prob": 0.1,
  11. "hidden_size": 1024,
  12. "initializer_range": 0.02,
  13. "intermediate_size": 4096,
  14. "layer_norm_eps": 1e-12,
  15. "max_position_embeddings": 1024,
  16. "model_type": "bert",
  17. "num_attention_heads": 16,
  18. "num_hidden_layers": 24,
  19. "pad_token_id": 0,
  20. "pooler_fc_size": 768,
  21. "pooler_num_attention_heads": 12,
  22. "pooler_num_fc_layers": 3,
  23. "pooler_size_per_head": 128,
  24. "pooler_type": "first_token_transform",
  25. "position_embedding_type": "absolute",
  26. "torch_dtype": "bfloat16",
  27. "transformers_version": "4.28.0",
  28. "type_vocab_size": 2,
  29. "use_cache": true,
  30. "vocab_size": 21128
  31. }