Xihang Dai (daisy) — Assistant AI Engineer at China Unicom Global Limited

Machine-readable resume — fetched and edited by AI agents (Claude, Cursor, ChatGPT) through the cv-pro CLI or MCP. Also served as raw JSON at /daisy.json.

{
  "username": "daisy",
  "header": {
    "name": "Xihang Dai",
    "tagline": "AI Engineer · RAG Pipelines & LLM-powered Agent Systems"
  },
  "personalInfo": {
    "email": "dxh2723192626@gmail.com",
    "location": "Hong Kong"
  },
  "experience": [
    {
      "company": "China Unicom Global Limited",
      "role": "Assistant AI Engineer",
      "startDate": "2023-09",
      "endDate": "Present",
      "bullets": [
        "Designed and implemented end-to-end Python pipelines for ingesting, preprocessing, and indexing multi-format knowledge sources using Surya OCR to build a unified enterprise knowledge base.",
        "Built an LLM-based RAG backend using FAISS, BGE embeddings, BGE reranker, and Qwen2.5-72B for answer generation, significantly improving answer accuracy.",
        "Deployed Docker-based containerised model-serving stacks across heterogeneous GPU/NPU environments (NVIDIA RTX 4090D, Huawei Ascend 910B), ensuring cross-platform compatibility.",
        "Built a RAG evaluation workflow using synthetic QA datasets, LLM-as-a-judge scoring, and Claude Code-assisted scripts for metric calculation and result summarisation.",
        "Developed a multimodal document QA system integrating InternVL2.5-78B, CLIP, and OCR-based preprocessing, enabling question answering for image-containing documents.",
        "Fine-tuned a BERT-based classification model with PyTorch and Hugging Face to replace an LLM-only pipeline across six marketing use cases, reducing average latency from 4.5s to 300ms.",
        "Designed a hybrid agent memory and state-management mechanism combining short-term conversational state with long-term structured memory stores, improving coherence in multi-turn reasoning."
      ],
      "tags": [
        "RAG",
        "LLM",
        "Agent",
        "Docker",
        "Python",
        "GPU/NPU"
      ]
    }
  ],
  "education": [
    {
      "school": "Beijing University of Posts and Telecommunications",
      "major": "Telecommunications Engineering and Management",
      "degree": "Bachelor of Engineering",
      "startDate": "2019-09",
      "endDate": "2023-06"
    }
  ],
  "projectsRecent": [],
  "projectsDetailed": [
    {
      "title": "Intelligent Question Answering Platform",
      "type": "Enterprise Project",
      "startDate": "2023-09",
      "endDate": "Present",
      "bullets": [
        "Designed and implemented end-to-end Python pipelines for ingesting, preprocessing, and indexing multi-format knowledge sources (documents, structured/semi-structured data) using Surya OCR.",
        "Built RAG backend with FAISS vector store, BGE embeddings, BGE reranker, and Qwen2.5-72B, significantly improving answer accuracy.",
        "Developed source traceability and fault-tolerance mechanisms, enhancing system trustworthiness and stability.",
        "Deployed Docker-based model-serving stacks across heterogeneous GPU/NPU environments (NVIDIA RTX 4090D, Huawei Ascend 910B).",
        "Built RAG evaluation workflow using synthetic QA datasets, LLM-as-a-judge scoring, and Claude Code-assisted scripts."
      ],
      "tags": [
        "RAG",
        "FAISS",
        "BGE",
        "Qwen2.5",
        "Docker",
        "OCR"
      ]
    },
    {
      "title": "Media Center Knowledge Q&A and AI Writing Agent",
      "type": "Enterprise Project",
      "startDate": "2024-01",
      "endDate": "Present",
      "bullets": [
        "Developed a multimodal document QA system supporting scanned documents, image-only files, and mixed-content inputs by integrating InternVL2.5-78B, CLIP, and OCR-based preprocessing.",
        "Fine-tuned and deployed a BERT-based classification model using PyTorch and Hugging Face, improving robustness and reducing average latency from 4.5s to 300ms across six marketing use cases.",
        "Introduced web search capability to solve the issue of stale model knowledge, improving timeliness of generated responses.",
        "Designed a hybrid agent memory and state-management mechanism combining short-term conversational state with long-term structured memory stores."
      ],
      "tags": [
        "Multimodal",
        "BERT",
        "LangGraph",
        "Agent Memory",
        "PyTorch"
      ]
    }
  ],
  "skills": [
    {
      "name": "LLM & Agent Systems",
      "items": [
        "Retrieval Augmented Generation (RAG)",
        "Prompt Engineering & Fine-tuning",
        "Agent Memory & State Management",
        "Intent Classification",
        "Agent Orchestration (LangGraph)"
      ]
    },
    {
      "name": "Agent Platforms & Tools",
      "items": [
        "Claude Code",
        "OpenClaw",
        "Cursor",
        "Dify"
      ]
    },
    {
      "name": "Programming & ML",
      "items": [
        "Python",
        "PyTorch",
        "TensorFlow"
      ]
    },
    {
      "name": "Infrastructure & Deployment",
      "items": [
        "Linux",
        "Docker",
        "Local LLM Deployment",
        "GPU/NPU Accelerated Inference",
        "NVIDIA RTX 4090D",
        "Huawei Ascend 910B"
      ]
    },
    {
      "name": "Languages",
      "items": [
        "Mandarin (Native)",
        "English (Fluent)",
        "Cantonese (Basic)"
      ]
    }
  ],
  "contact": [
    {
      "label": "GitHub",
      "url": "https://github.com/HA7CH"
    }
  ],
  "meta": {
    "updatedAt": "2026-05-17T07:46:38.789Z"
  }
}

{ "username": "daisy", "header": { "name": "Xihang Dai", "tagline": "AI Engineer · RAG Pipelines & LLM-powered Agent Systems" }, "personalInfo": { "email": "dxh2723192626@gmail.com", "location": "Hong Kong" }, "experience": [ { "company": "China Unicom Global Limited", "role": "Assistant AI Engineer", "startDate": "2023-09", "endDate": "Present", "bullets": [ "Designed and implemented end-to-end Python pipelines for ingesting, preprocessing, and indexing multi-format knowledge sources using Surya OCR to build a unified enterprise knowledge base.", "Built an LLM-based RAG backend using FAISS, BGE embeddings, BGE reranker, and Qwen2.5-72B for answer generation, significantly improving answer accuracy.", "Deployed Docker-based containerised model-serving stacks across heterogeneous GPU/NPU environments (NVIDIA RTX 4090D, Huawei Ascend 910B), ensuring cross-platform compatibility.", "Built a RAG evaluation workflow using synthetic QA datasets, LLM-as-a-judge scoring, and Claude Code-assisted scripts for metric calculation and result summarisation.", "Developed a multimodal document QA system integrating InternVL2.5-78B, CLIP, and OCR-based preprocessing, enabling question answering for image-containing documents.", "Fine-tuned a BERT-based classification model with PyTorch and Hugging Face to replace an LLM-only pipeline across six marketing use cases, reducing average latency from 4.5s to 300ms.", "Designed a hybrid agent memory and state-management mechanism combining short-term conversational state with long-term structured memory stores, improving coherence in multi-turn reasoning." ], "tags": [ "RAG", "LLM", "Agent", "Docker", "Python", "GPU/NPU" ] } ], "education": [ { "school": "Beijing University of Posts and Telecommunications", "major": "Telecommunications Engineering and Management", "degree": "Bachelor of Engineering", "startDate": "2019-09", "endDate": "2023-06" } ], "projectsRecent": [], "projectsDetailed": [ { "title": "Intelligent Question Answering Platform", "type": "Enterprise Project", "startDate": "2023-09", "endDate": "Present", "bullets": [ "Designed and implemented end-to-end Python pipelines for ingesting, preprocessing, and indexing multi-format knowledge sources (documents, structured/semi-structured data) using Surya OCR.", "Built RAG backend with FAISS vector store, BGE embeddings, BGE reranker, and Qwen2.5-72B, significantly improving answer accuracy.", "Developed source traceability and fault-tolerance mechanisms, enhancing system trustworthiness and stability.", "Deployed Docker-based model-serving stacks across heterogeneous GPU/NPU environments (NVIDIA RTX 4090D, Huawei Ascend 910B).", "Built RAG evaluation workflow using synthetic QA datasets, LLM-as-a-judge scoring, and Claude Code-assisted scripts." ], "tags": [ "RAG", "FAISS", "BGE", "Qwen2.5", "Docker", "OCR" ] }, { "title": "Media Center Knowledge Q&A and AI Writing Agent", "type": "Enterprise Project", "startDate": "2024-01", "endDate": "Present", "bullets": [ "Developed a multimodal document QA system supporting scanned documents, image-only files, and mixed-content inputs by integrating InternVL2.5-78B, CLIP, and OCR-based preprocessing.", "Fine-tuned and deployed a BERT-based classification model using PyTorch and Hugging Face, improving robustness and reducing average latency from 4.5s to 300ms across six marketing use cases.", "Introduced web search capability to solve the issue of stale model knowledge, improving timeliness of generated responses.", "Designed a hybrid agent memory and state-management mechanism combining short-term conversational state with long-term structured memory stores." ], "tags": [ "Multimodal", "BERT", "LangGraph", "Agent Memory", "PyTorch" ] } ], "skills": [ { "name": "LLM & Agent Systems", "items": [ "Retrieval Augmented Generation (RAG)", "Prompt Engineering & Fine-tuning", "Agent Memory & State Management", "Intent Classification", "Agent Orchestration (LangGraph)" ] }, { "name": "Agent Platforms & Tools", "items": [ "Claude Code", "OpenClaw", "Cursor", "Dify" ] }, { "name": "Programming & ML", "items": [ "Python", "PyTorch", "TensorFlow" ] }, { "name": "Infrastructure & Deployment", "items": [ "Linux", "Docker", "Local LLM Deployment", "GPU/NPU Accelerated Inference", "NVIDIA RTX 4090D", "Huawei Ascend 910B" ] }, { "name": "Languages", "items": [ "Mandarin (Native)", "English (Fluent)", "Cantonese (Basic)" ] } ], "contact": [ { "label": "GitHub", "url": "https://github.com/HA7CH" } ], "meta": { "updatedAt": "2026-05-17T07:46:38.789Z" } }