tokenizer_config.json 366 B

123456789101112131415
  1. {
  2. "clean_up_tokenization_spaces": true,
  3. "cls_token": "[CLS]",
  4. "do_basic_tokenize": true,
  5. "do_lower_case": true,
  6. "mask_token": "[MASK]",
  7. "model_max_length": 512,
  8. "never_split": null,
  9. "pad_token": "[PAD]",
  10. "sep_token": "[SEP]",
  11. "strip_accents": null,
  12. "tokenize_chinese_chars": true,
  13. "tokenizer_class": "BertTokenizer",
  14. "unk_token": "[UNK]"
  15. }