| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- # @description:
- # @author: licanglong
- # @date: 2025/12/19 16:32
- from app.App import App
- from app.utils.pathutils import getpath
- class EmbeddingStoreApp(App):
- def run(self, *args, **kwargs):
- import uuid
- from typing import List
- from qdrant_client import QdrantClient
- from qdrant_client.models import VectorParams, Distance, PointStruct
- from sentence_transformers import SentenceTransformer
- if not kwargs or not kwargs.get("collection_name"):
- raise ValueError("miss collection_name value")
- if not kwargs or not kwargs.get("vector_data"):
- raise ValueError("miss vector_data value")
- collection_name: str = kwargs['collection_name']
- vector_size = 1792
- vector_data: dict = kwargs['vector_data'] # case_embed rule_embed merchants_embed edges_embed
- client = QdrantClient(host="117.72.147.109", port=16333)
- model = SentenceTransformer(getpath(r"res\models\acge_text_embedding"))
- collections = client.get_collections().collections
- exists = any(c.name == collection_name for c in collections)
- if not exists:
- client.create_collection(
- collection_name=collection_name,
- vectors_config=VectorParams(
- size=vector_size,
- distance=Distance.COSINE,
- ),
- )
- points: List[PointStruct] = []
- for item in vector_data:
- vector = model.encode(item['embedding_text'])
- point_id = str(uuid.uuid4())
- points.append(
- PointStruct(
- id=point_id,
- vector=vector.tolist(),
- payload=item,
- )
- )
- client.upsert(
- collection_name=collection_name,
- points=points,
- )
|