risk_dto.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. # @description:
  2. # @author: licanglong
  3. # @date: 2025/12/22 16:04
  4. from typing import List, Literal, Optional
  5. from pydantic import BaseModel, Field
  6. class RiskRule(BaseModel):
  7. """
  8. 风控 / 规则引擎统一规则模型
  9. """
  10. rule_id: Optional[str] = Field(default=None, description="规则唯一标识;当前固定为空或由系统生成")
  11. rule_type: Literal["PERSONAL", "EDGE", "BUSINESS"] = Field(description="规则类型:个人消费 / 边界模糊 / 企业经营")
  12. category: str = Field(description="规则分类(一级分类)")
  13. subcategory: str = Field(description="规则细分类(二级分类)")
  14. rule_description: str = Field(description="规则的自然语言描述")
  15. decision_tendency: Literal["PERSONAL", "BUSINESS", "UNCERTAIN"] = Field(description="规则倾向的判断结果")
  16. confidence: float = Field(ge=0.0, le=1.0, description="规则判断的置信度,范围 0.0 ~ 1.0")
  17. applicable_conditions: List[str] = Field(default_factory=list, description="适用于该规则的前置条件列表")
  18. exception_conditions: List[str] = Field(default_factory=list, description="排除此规则的例外条件列表")
  19. embedding_text: str = Field(description="用于向量化入库的完整文本描述(用于 embedding)")
  20. class RiskRuleList(BaseModel):
  21. risk_rules: List[RiskRule]
  22. class RiskDecisionCase(BaseModel):
  23. """
  24. 发票风控裁决结果模型
  25. """
  26. case_id: Optional[str] = Field(default=None, description="裁决案例唯一标识;当前固定为空或由系统生成")
  27. expense_type: Literal["PERSONAL", "BUSINESS"] = Field(description="费用性质:个人消费 / 企业经营")
  28. category: str = Field(description="发票一级分类")
  29. subcategory: str = Field(description="发票二级分类")
  30. invoice_item: str = Field(description="发票类目或商品/服务名称")
  31. seller_profile: str = Field(
  32. description="卖家画像信息(已脱敏),仅保留品牌特征和行业关键信息,不包含具体地域、地址等敏感信息")
  33. decision: Literal["PERSONAL", "BUSINESS"] = Field(description="最终裁决结果")
  34. risk_level: Literal["LOW", "MEDIUM", "HIGH"] = Field(description="风险等级评估")
  35. decision_reason: List[str] = Field(default_factory=list, description="裁决依据列表(规则命中、模型判断、人工经验等)")
  36. embedding_text: str = Field(description="用于向量化入库的完整语义文本(用于 embedding 检索与相似案例匹配)")
  37. source: Optional[str] = Field(default=None, description="裁决来源;当前固定为空,可用于标识模型/规则/人工等来源")
  38. class RiskDecisionCaseList(BaseModel):
  39. decision_cases: List[RiskDecisionCase]
  40. class MerchantIndustryProfile(BaseModel):
  41. """
  42. 商户行业画像 / 行业知识库模型
  43. 用于辅助判断费用性质、风险倾向及 embedding 召回
  44. """
  45. merchant_industry_id: Optional[str] = Field(default=None, description="行业画像唯一标识;当前固定为空或由系统生成")
  46. industry_name: str = Field(description="行业名称(如:教育培训、餐饮、医疗服务等)")
  47. typical_merchants: List[str] = Field(default_factory=list, description="该行业下的典型商家或品牌示例")
  48. default_expense_nature: str = Field(description="该行业的默认费用性质(如:PERSONAL / BUSINESS / MIXED)")
  49. personal_consume_probability: float = Field(ge=0.0, le=1.0,
  50. description="该行业费用被判定为个人消费的可能性(0.0 ~ 1.0)")
  51. enterprise_legit_scenarios: List[str] = Field(default_factory=list,
  52. description="该行业下企业经营合理、合规的消费场景说明")
  53. risk_notes: List[str] = Field(default_factory=list, description="该行业在报销 / 风控中的常见风险点说明")
  54. embedding_text: str = Field(description="用于向量化入库的完整语义描述文本(行业画像 + 风险特征 + 合法场景)")
  55. class IndustryProfileList(BaseModel):
  56. industry_profiles: List[MerchantIndustryProfile]
  57. class RiskSignal(BaseModel):
  58. """
  59. 风控风险信号模型
  60. 用于表达一次可解释、可追溯的风险提示信号
  61. """
  62. signal_id: Optional[str] = Field(default=None, description="风险信号唯一标识;当前固定为空或由系统生成")
  63. signal_type: str = Field(description="风险信号类型(如:行为异常、内容异常、行业高风险、规则冲突等)")
  64. signal_name: str = Field(description="风险信号名称(简要概括风险本质)")
  65. trigger_conditions: List[str] = Field(default_factory=list, description="触发该风险信号的具体条件列表")
  66. risk_level: Literal["LOW", "MEDIUM", "HIGH"] = Field(description="该风险信号对应的风险等级")
  67. need_additional_evidence: List[str] = Field(default_factory=list, description="为进一步确认风险所需的补充证据")
  68. suggested_handling: str = Field(description="针对该风险信号的处理建议(如人工复核、补充材料、直接驳回等)")
  69. embedding_text: str = Field(description="用于向量化入库的完整语义文本(风险信号说明 + 触发逻辑 + 处理建议)")
  70. class RiskSignalList(BaseModel):
  71. signals: List[RiskSignal]
  72. class EvidenceItem(BaseModel):
  73. """
  74. 决策证据链中的单条证据
  75. """
  76. type: Literal["rule", "case", "signal", "industry"] = Field(
  77. description="证据类型:规则 / 历史案例 / 风险信号 / 行业画像")
  78. id: str = Field(description="证据唯一标识(rule_id / case_id / signal_id / industry_id)")
  79. summary: str = Field(description="该证据对最终判断产生关键影响的说明")
  80. class DecisionCompletion(BaseModel):
  81. """
  82. 最终裁决补充说明
  83. """
  84. summary: str = Field(description="最终判断结论说明,需明确判断类型并给出核心依据")
  85. evidence_chain: List[EvidenceItem] = Field(default_factory=list,
  86. description="支撑最终裁决的证据链列表(按影响顺序排列)")
  87. class FinalDecisionResult(BaseModel):
  88. """
  89. 风控最终裁决输出模型
  90. """
  91. decision: Literal[
  92. "PERSONAL_CONSUMPTION",
  93. "ENTERPRISE_OPERATION",
  94. "ENTERPRISE_WELFARE",
  95. "UNCERTAIN"
  96. ] = Field(description="最终费用性质裁决结果")
  97. confidence: float = Field(ge=0.0, le=1.0, description="最终判断置信度(0.0 ~ 1.0)")
  98. completion: DecisionCompletion = Field(description="最终裁决的解释性补充信息")
  99. risk_flags: List[str] = Field(default_factory=list, description="本次判断过程中触发的风险标识集合")
  100. need_manual_review: bool = Field(description="是否需要人工复核(基于置信度或风险等级综合判断)")
  101. class RiskEvidenceResult(BaseModel):
  102. rules: List[RiskRule]
  103. cases: List[RiskDecisionCase]
  104. industry: List[MerchantIndustryProfile]
  105. signals: List[RiskSignal]
  106. """
  107. {{
  108. "info":"<string:商品信息>",
  109. "type":"<string:商品分类>",
  110. "decision": "<BELONG | NOT_BELONG | UNCERTAIN>",
  111. "confidence":<float:置信度(0.0~1.0)>,
  112. "summary":"<string:最终判断结论,需要明确当前判断的数据所属类型,并且给出依据>",
  113. "evidence_chain":<list:[
  114. {{
  115. "summary": "<string:该证据对最终判断产生的关键影响>",
  116. "confidence":<float:置信度(0.0~1.0)>,
  117. "source": "<引用来源>"
  118. }}
  119. ]>
  120. }}
  121. """
  122. class SimilarIdentificationEvidence(BaseModel):
  123. summary: str = Field(..., description="该证据对最终判断产生的关键影响")
  124. confidence: float = Field(..., description="置信度,范围是 0.0 到 1.0")
  125. source: str = Field(..., description="引用来源")
  126. class SimilarIdentificationResult(BaseModel):
  127. info: str = Field(..., description="商品信息")
  128. type: str = Field(..., description="商品分类")
  129. decision: str = Field(..., description="判断结果:BELONG | NOT_BELONG | UNCERTAIN")
  130. confidence: float = Field(..., description="置信度,范围是 0.0 到 1.0")
  131. summary: str = Field(..., description="最终判断结论,需要明确当前判断的数据所属类型,并且给出依据")
  132. evidence_chain: List[SimilarIdentificationEvidence] = Field(..., description="证据链,包含一系列对判断有影响的证据")