<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/">
    <channel>
        <title>赵康康</title>
        <link>https://tangly1024.com/</link>
        <description>个人站点-主NLP</description>
        <lastBuildDate>Thu, 02 Jan 2025 01:37:09 GMT</lastBuildDate>
        <docs>https://validator.w3.org/feed/docs/rss2.html</docs>
        <generator>https://github.com/jpmonette/feed</generator>
        <language>zh-CN</language>
        <copyright>All rights reserved 2025, NotionNext</copyright>
        <item>
            <title><![CDATA[RLHF]]></title>
            <link>https://tangly1024.com/article/165ffd3a-edd0-805b-b1df-d90d5d34f03a</link>
            <guid>https://tangly1024.com/article/165ffd3a-edd0-805b-b1df-d90d5d34f03a</guid>
            <pubDate>Mon, 23 Dec 2024 00:00:00 GMT</pubDate>
            <content:encoded><![CDATA[<div id="notion-article" class="mx-auto overflow-hidden "><main class="notion light-mode notion-page notion-block-165ffd3aedd0805bb1dfd90d5d34f03a"><div class="notion-viewport"></div><div class="notion-collection-page-properties"></div><div class="notion-blank notion-block-165ffd3aedd0807dafd0c9463e23d8fb"> </div></main></div>]]></content:encoded>
        </item>
        <item>
            <title><![CDATA[强化学习]]></title>
            <link>https://tangly1024.com/article/14dffd3a-edd0-8068-b20f-d5b54839f6ae</link>
            <guid>https://tangly1024.com/article/14dffd3a-edd0-8068-b20f-d5b54839f6ae</guid>
            <pubDate>Fri, 29 Nov 2024 00:00:00 GMT</pubDate>
            <content:encoded><![CDATA[<div id="notion-article" class="mx-auto overflow-hidden "><main class="notion light-mode notion-page notion-block-14dffd3aedd08068b20fd5b54839f6ae"><div class="notion-viewport"></div><div class="notion-collection-page-properties"></div><div class="notion-blank notion-block-14dffd3aedd08067affccae51193c984"> </div></main></div>]]></content:encoded>
        </item>
        <item>
            <title><![CDATA[DL-数据]]></title>
            <link>https://tangly1024.com/article/165ffd3a-edd0-802f-b54c-f925353dc790</link>
            <guid>https://tangly1024.com/article/165ffd3a-edd0-802f-b54c-f925353dc790</guid>
            <pubDate>Mon, 23 Dec 2024 00:00:00 GMT</pubDate>
            <content:encoded><![CDATA[<div id="notion-article" class="mx-auto overflow-hidden "><main class="notion light-mode notion-page notion-block-165ffd3aedd0802fb54cf925353dc790"><div class="notion-viewport"></div><div class="notion-collection-page-properties"></div><div class="notion-blank notion-block-165ffd3aedd080a9a8c4fd45526b041b"> </div></main></div>]]></content:encoded>
        </item>
        <item>
            <title><![CDATA[InfiAgent-DABench: Evaluating Agents on Data Analysis Tasks]]></title>
            <link>https://tangly1024.com/article/13cffd3a-edd0-8023-a8cc-c25cf09cf673</link>
            <guid>https://tangly1024.com/article/13cffd3a-edd0-8023-a8cc-c25cf09cf673</guid>
            <pubDate>Tue, 12 Nov 2024 00:00:00 GMT</pubDate>
            <content:encoded><![CDATA[<div id="notion-article" class="mx-auto overflow-hidden "><main class="notion light-mode notion-page notion-small-text notion-block-13cffd3aedd08023a8ccc25cf09cf673"><div class="notion-viewport"></div><div class="notion-collection-page-properties"></div><div class="notion-text notion-block-13dffd3aedd08014b25af3ac56828450"><a target="_blank" rel="noopener noreferrer" class="notion-link" href="https://arxiv.org/html/2401.05507v3">https://arxiv.org/html/2401.05507v3</a></div><div class="notion-blank notion-block-13dffd3aedd08022b715cc05044b0986"> </div><div class="notion-text notion-block-13dffd3aedd080feb076ddc19f46efab">好东西，但也真能水，翻来覆去这么几句车轱辘话</div><h2 class="notion-h notion-h1 notion-h-indent-0 notion-block-13cffd3aedd08055be2ee35ca6befba6" data-id="13cffd3aedd08055be2ee35ca6befba6"><span><div id="13cffd3aedd08055be2ee35ca6befba6" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13cffd3aedd08055be2ee35ca6befba6" title="Abstract"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title"><b>Abstract</b></span></span></h2><ul class="notion-list notion-list-disc notion-block-13cffd3aedd08042a31cc55fabdb1138"><li>first benchmark specifically designed to evaluate LLM-based agents on data analysis tasks.</li></ul><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080189efae21a53dbd11b"><li>This benchmark contains DAEval, </li><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080189efae21a53dbd11b"><li>a dataset consisting of 257 data analysis questions derived from 52 CSV files,</li><li>an agent framework which incorporates LLMs to serve as data analysis agents for both serving and evaluation. </li></ul></ul><h2 class="notion-h notion-h1 notion-h-indent-0 notion-block-13dffd3aedd080d8a4bbc51008ffa208" data-id="13dffd3aedd080d8a4bbc51008ffa208"><span><div id="13dffd3aedd080d8a4bbc51008ffa208" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd080d8a4bbc51008ffa208" title="1 Introduction"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title">1 <b>Introduction</b></span></span></h2><div class="notion-text notion-block-13dffd3aedd080ceae13e35ca654d094">open-source agents for data analysis</div><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080988286f517245388f2"><li>Open Interpreter (Lucas, <a target="_blank" rel="noopener noreferrer" class="notion-link" href="https://arxiv.org/html/2401.05507v3#bib.bib25">2023</a>)</li></ul><ul class="notion-list notion-list-disc notion-block-13dffd3aedd08029afe8d02f86b99158"><li>Open Agents (Xie et al., <a target="_blank" rel="noopener noreferrer" class="notion-link" href="https://arxiv.org/html/2401.05507v3#bib.bib54">2023</a>)</li></ul><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080e3b5fdffa9605d046f"><li>Qwen-Agent (Bai et al., <a target="_blank" rel="noopener noreferrer" class="notion-link" href="https://arxiv.org/html/2401.05507v3#bib.bib4">2023</a>) </li></ul><ul class="notion-list notion-list-disc notion-block-13dffd3aedd0801286ddf0272cb9aaf9"><li>Taskweaver (Qiao et al., <a target="_blank" rel="noopener noreferrer" class="notion-link" href="https://arxiv.org/html/2401.05507v3#bib.bib35">2023</a>).</li></ul><div class="notion-blank notion-block-13dffd3aedd080fe9eefd589abd8eacf"> </div><div class="notion-blank notion-block-13dffd3aedd080e78853cd6fb497fe29"> </div><h2 class="notion-h notion-h1 notion-h-indent-0 notion-block-13dffd3aedd080c5b67ad14d12dd3191" data-id="13dffd3aedd080c5b67ad14d12dd3191"><span><div id="13dffd3aedd080c5b67ad14d12dd3191" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd080c5b67ad14d12dd3191" title="4 Related works"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title"><b>4 Related works</b></span></span></h2><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080d78b93eba2ee8a42b9"><li>web environments:</li><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080d78b93eba2ee8a42b9"><li>WebShop (Yao et al., <a target="_blank" rel="noopener noreferrer" class="notion-link" href="https://arxiv.org/html/2401.05507v3#bib.bib59">2022a</a>) </li><li> WebArena (Zhou et al., <a target="_blank" rel="noopener noreferrer" class="notion-link" href="https://arxiv.org/html/2401.05507v3#bib.bib68">2023</a>)</li></ul></ul><ul class="notion-list notion-list-disc notion-block-13dffd3aedd0805d9f4ad7c3fb7b43e9"><li>tool using</li><ul class="notion-list notion-list-disc notion-block-13dffd3aedd0805d9f4ad7c3fb7b43e9"><li>ToolBench (Qin et al., <a target="_blank" rel="noopener noreferrer" class="notion-link" href="https://arxiv.org/html/2401.05507v3#bib.bib37">2023b</a>) </li><li>Gentopia (Xu et al., <a target="_blank" rel="noopener noreferrer" class="notion-link" href="https://arxiv.org/html/2401.05507v3#bib.bib55">2023a</a>)</li></ul></ul><ul class="notion-list notion-list-disc notion-block-13dffd3aedd0802997bbd8d394f5ba0a"><li><span class="notion-orange"><b>Benchmarks for Code</b></span></li><ul class="notion-list notion-list-disc notion-block-13dffd3aedd0802997bbd8d394f5ba0a"><li>CodeXGLUE (Lu et al., <a target="_blank" rel="noopener noreferrer" class="notion-link" href="https://arxiv.org/html/2401.05507v3#bib.bib24">2021</a>)  for both code understanding and generation</li><li>DS-1000 (Lai et al., <a target="_blank" rel="noopener noreferrer" class="notion-link" href="https://arxiv.org/html/2401.05507v3#bib.bib19">2023</a>).  benchmark for code generation in data science.</li></ul></ul><h2 class="notion-h notion-h1 notion-h-indent-0 notion-block-13dffd3aedd0805da0a7ebbdd736319d" data-id="13dffd3aedd0805da0a7ebbdd736319d"><span><div id="13dffd3aedd0805da0a7ebbdd736319d" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd0805da0a7ebbdd736319d" title="2 InfiAgent-DABench Benchmark"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title">2 InfiAgent-DABench Benchmark</span></span></h2><div class="notion-blank notion-block-13dffd3aedd080ba8ea5e4d43cfef291"> </div><h3 class="notion-h notion-h2 notion-h-indent-1 notion-block-13dffd3aedd08025bef4e64200b69544" data-id="13dffd3aedd08025bef4e64200b69544"><span><div id="13dffd3aedd08025bef4e64200b69544" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd08025bef4e64200b69544" title="2.1 Dataset Construction"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title">2.1 Dataset Construction</span></span></h3><div class="notion-blank notion-block-13dffd3aedd08000b50af8fa5cac31be"> </div><div class="notion-text notion-block-13dffd3aedd08099af28d193fdee745c">Data construction steps:</div><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080cbb869c768b53210ce"><li>Files Collection:</li><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080cbb869c768b53210ce"><li>from Github</li></ul></ul><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080cd98cee84a8e941e34"><li><span class="notion-orange"><span class="notion-default_background">Description Generation </span></span></li><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080cd98cee84a8e941e34"><li>utilize <span class="notion-orange">GPT-3.5</span> to summarize the following information for each CSV file:</li><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080099e34c9bb6203f766"><li>Description of column names and contents</li><li> Data types</li><li>Identification of missing values </li><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080309f5ff6f15f84ee3e"><a class="notion-page-link notion-block-13dffd3aedd08053a422c200e5b556fb" href="/13dffd3aedd08053a422c200e5b556fb"><span class="notion-page-title"><div class="notion-page-icon-inline notion-page-icon-image"><svg class="notion-page-title-icon notion-page-icon" alt="Prompts for Description Generation" viewBox="0 0 30 30" width="16"><path d="M16,1H4v28h22V11L16,1z M16,3.828L23.172,11H16V3.828z M24,27H6V3h8v10h10V27z M8,17h14v-2H8V17z M8,21h14v-2H8V21z M8,25h14v-2H8V25z"></path></svg></div><span class="notion-page-title-text">Prompts for Description Generation</span></span></a></ul></ul></ul></ul><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080d69790cd7052847878"><li>Concepts<b> </b>Determination:</li><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080d69790cd7052847878"><li>To ensure that our dataset aligns with real-world demands, we conduct expert interviews to identify <span class="notion-orange">key concepts</span> in data analysis. </li><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080b088e4f30a2a81e016"><a class="notion-page-link notion-block-13dffd3aedd08095b16ccd257b2ec6bc" href="/13dffd3aedd08095b16ccd257b2ec6bc"><span class="notion-page-title"><div class="notion-page-icon-inline notion-page-icon-image"><svg class="notion-page-title-icon notion-page-icon" alt="Concepts and corresponding explanations" viewBox="0 0 30 30" width="16"><path d="M16,1H4v28h22V11L16,1z M16,3.828L23.172,11H16V3.828z M24,27H6V3h8v10h10V27z M8,17h14v-2H8V17z M8,21h14v-2H8V21z M8,25h14v-2H8V25z"></path></svg></div><span class="notion-page-title-text">Concepts and corresponding explanations</span></span></a></ul></ul></ul><ul class="notion-list notion-list-disc notion-block-13dffd3aedd0801ca9e8d7ffe2954413"><li><span class="notion-orange">Open-ended Question Generation</span></li><ul class="notion-list notion-list-disc notion-block-13dffd3aedd0801ca9e8d7ffe2954413"><li>leverage <span class="notion-orange">GPT-4</span> to generate open-ended questions based on both file descriptions and the pre-defined concepts.</li><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080e8aa27d23211c74f04"><a class="notion-page-link notion-block-13dffd3aedd08012aef8dee6b9686e2c" href="/13dffd3aedd08012aef8dee6b9686e2c"><span class="notion-page-title"><div class="notion-page-icon-inline notion-page-icon-image"><svg class="notion-page-title-icon notion-page-icon" alt="Prompts for question generation" viewBox="0 0 30 30" width="16"><path d="M16,1H4v28h22V11L16,1z M16,3.828L23.172,11H16V3.828z M24,27H6V3h8v10h10V27z M8,17h14v-2H8V17z M8,21h14v-2H8V21z M8,25h14v-2H8V25z"></path></svg></div><span class="notion-page-title-text">Prompts for question generation</span></span></a></ul></ul></ul><div class="notion-blank notion-block-13dffd3aedd080838bf6e981c5d7560b"> </div><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080c0be0ee445a64a28fd"><li>Constraints and Format Requirements Generation</li><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080c0be0ee445a64a28fd"><li>Considering closed-form questions can be easily evaluated,we introduce a <span class="notion-orange">format-prompting technique that prompts GPT-4 to generate detailed constraints and format requirements</span> for open-ended questions to make them closed-form. </li><li>we conduct a experiment on <span class="notion-orange">GPT-4 auto-evaluating for above open-ended questions</span>, which.show that GPT-4 could <span class="notion-orange">only achieve 67% consistency</span> with human experts</li><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080af8beec7285515813a"><a class="notion-page-link notion-block-13dffd3aedd08010ae7ff6a81e28e64e" href="/13dffd3aedd08010ae7ff6a81e28e64e"><span class="notion-page-title"><div class="notion-page-icon-inline notion-page-icon-image"><svg class="notion-page-title-icon notion-page-icon" alt="Constraints and Format Requirements Generation" viewBox="0 0 30 30" width="16"><path d="M16,1H4v28h22V11L16,1z M16,3.828L23.172,11H16V3.828z M24,27H6V3h8v10h10V27z M8,17h14v-2H8V17z M8,21h14v-2H8V21z M8,25h14v-2H8V25z"></path></svg></div><span class="notion-page-title-text">Constraints and Format Requirements Generation</span></span></a></ul></ul></ul><ul class="notion-list notion-list-disc notion-block-13dffd3aedd0809ab97dceb5658f0a44"><li>Response Gathering</li><ul class="notion-list notion-list-disc notion-block-13dffd3aedd0809ab97dceb5658f0a44"><li>leverage <span class="notion-orange">OpenAI ADA</span> to generate answers of closed-form questions. Each question is sent to ADA for 3 times.</li></ul></ul><div class="notion-blank notion-block-13dffd3aedd080a59071e7b36b21efc6"> </div><h3 class="notion-h notion-h2 notion-h-indent-1 notion-block-13dffd3aedd0800c9799f1346d5da714" data-id="13dffd3aedd0800c9799f1346d5da714"><span><div id="13dffd3aedd0800c9799f1346d5da714" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd0800c9799f1346d5da714" title="2.2 Agent Framework"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title">2.2 Agent Framework</span></span></h3><div class="notion-blank notion-block-13dffd3aedd080068912c72661daa3ca"> </div><h3 class="notion-h notion-h2 notion-h-indent-1 notion-block-13dffd3aedd0801699ebefcbcf2cc5cf" data-id="13dffd3aedd0801699ebefcbcf2cc5cf"><span><div id="13dffd3aedd0801699ebefcbcf2cc5cf" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd0801699ebefcbcf2cc5cf" title="2.3 Human Assessment"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title">2.3 Human Assessment</span></span></h3><div class="notion-text notion-block-13dffd3aedd080559a65f841afc9bb5e">We evaluate the quality of files, questions and labels. For each part, we define one or more metrics which are crucial for a data analysis dataset：</div><ul class="notion-list notion-list-disc notion-block-13dffd3aedd0801d9e51f48ef705162f"><li>Suitableness</li></ul><ul class="notion-list notion-list-disc notion-block-13dffd3aedd0806a9c95de3877bd5705"><li>Reasonableness</li></ul><ul class="notion-list notion-list-disc notion-block-13dffd3aedd08061b414e1a29fdc6293"><li>Value</li></ul><ul class="notion-list notion-list-disc notion-block-13dffd3aedd0803fa9f3eae686bb1f05"><li><span class="notion-orange">Restrictiveness</span></li></ul><ul class="notion-list notion-list-disc notion-block-13dffd3aedd0807aab0bed8159f95547"><li>Alignment</li></ul><ul class="notion-list notion-list-disc notion-block-13dffd3aedd080ae9428ff9a15d9d366"><li>Correctness</li></ul><h4 class="notion-h notion-h3 notion-h-indent-2 notion-block-13dffd3aedd08012af59f1e71515e9fa" data-id="13dffd3aedd08012af59f1e71515e9fa"><span><div id="13dffd3aedd08012af59f1e71515e9fa" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd08012af59f1e71515e9fa" title="2.3.1 Dataset Quality"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title">2.3.1 <b>Dataset Quality</b></span></span></h4><div class="notion-blank notion-block-13dffd3aedd080bbb241e3eca647e8af"> </div><h4 class="notion-h notion-h3 notion-h-indent-2 notion-block-13dffd3aedd080d3b248d30ea98113d5" data-id="13dffd3aedd080d3b248d30ea98113d5"><span><div id="13dffd3aedd080d3b248d30ea98113d5" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd080d3b248d30ea98113d5" title="2.3.2 Human vs GPT-4"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title">2.3.2 Human vs GPT-4</span></span></h4><div class="notion-text notion-block-13dffd3aedd080aa83d4dc045c633e3e">We found that GPT-4 generated questions take advantage on <span class="notion-orange">Restrictiveness, Alignment and Difficulty</span>, and a slightly better performance on <span class="notion-orange">Reasonableness and Value,</span> which indicates that DAEval are comparable with human-made dataset.</div><div class="notion-blank notion-block-13dffd3aedd0808d89f7e2d4af42c653"> </div><div class="notion-blank notion-block-13dffd3aedd08026a1ccdf7c32ff26b3"> </div><h3 class="notion-h notion-h2 notion-h-indent-1 notion-block-13dffd3aedd08042bfbae6b7cd043ef1" data-id="13dffd3aedd08042bfbae6b7cd043ef1"><span><div id="13dffd3aedd08042bfbae6b7cd043ef1" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd08042bfbae6b7cd043ef1" title="2.4 Benchmark Statistics"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title">2.4 <b>Benchmark Statistics</b></span></span></h3><div class="notion-text notion-block-13dffd3aedd08016b9a6dd827a4cc845">We split the dataset into validation set and test set. <span class="notion-orange">The validation set is open to public</span>, including 257 questions with 52 csv files and the <span class="notion-orange">rest is for test set which is close</span>d for avoiding data leakage. </div><figure class="notion-asset-wrapper notion-asset-wrapper-image notion-block-13dffd3aedd0805faa2cc0f7faea2f6a"><div style="position:relative;display:flex;justify-content:center;align-self:center;width:336px;max-width:100%;flex-direction:column"><img style="object-fit:cover" src="https://www.notion.so/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2Fde1827de-3f10-4f2a-8991-ddbc087af6ae%2Fd6d870ef-3feb-4ae5-9ea2-688f51ef23b7%2Fimage.png?table=block&amp;id=13dffd3a-edd0-805f-aa2c-c0f7faea2f6a&amp;t=13dffd3a-edd0-805f-aa2c-c0f7faea2f6a&amp;width=336&amp;cache=v2" alt="notion image" loading="lazy" decoding="async"/></div></figure><figure class="notion-asset-wrapper notion-asset-wrapper-image notion-block-13dffd3aedd080c7afd9e3847aa1f3fa"><div style="position:relative;display:flex;justify-content:center;align-self:center;width:336px;max-width:100%;flex-direction:column"><img style="object-fit:cover" src="https://www.notion.so/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2Fde1827de-3f10-4f2a-8991-ddbc087af6ae%2F91f20c00-9295-442d-b6ba-fb476d70a4f2%2Fimage.png?table=block&amp;id=13dffd3a-edd0-80c7-afd9-e3847aa1f3fa&amp;t=13dffd3a-edd0-80c7-afd9-e3847aa1f3fa&amp;width=336&amp;cache=v2" alt="notion image" loading="lazy" decoding="async"/></div></figure><div class="notion-blank notion-block-13dffd3aedd080ada99fce76f08610e6"> </div><figure class="notion-asset-wrapper notion-asset-wrapper-image notion-block-13dffd3aedd0804288f3e2f86b845385"><div style="position:relative;display:flex;justify-content:center;align-self:center;width:480px;max-width:100%;flex-direction:column"><img style="object-fit:cover" src="https://www.notion.so/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2Fde1827de-3f10-4f2a-8991-ddbc087af6ae%2F2797cca0-5eb3-4f7e-86d8-b2bd1f5f9717%2Fimage.png?table=block&amp;id=13dffd3a-edd0-8042-88f3-e2f86b845385&amp;t=13dffd3a-edd0-8042-88f3-e2f86b845385&amp;width=480&amp;cache=v2" alt="notion image" loading="lazy" decoding="async"/></div></figure><div class="notion-blank notion-block-13dffd3aedd08063a5b3c987d2bf167f"> </div><h3 class="notion-h notion-h2 notion-h-indent-1 notion-block-13dffd3aedd08081b810ea9a93c9dbea" data-id="13dffd3aedd08081b810ea9a93c9dbea"><span><div id="13dffd3aedd08081b810ea9a93c9dbea" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd08081b810ea9a93c9dbea" title="2.5 Instruction-tuning Dataset"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title">2.5 Instruction-tuning Dataset</span></span></h3><div class="notion-text notion-block-13dffd3aedd0806ea047e3c713ee69d1">DAInstruct, an instruction-tuning dataset for data analysis. The collection of DAInstruct is carried out in three steps: </div><ol start="1" class="notion-list notion-list-numbered notion-block-13dffd3aedd08001a548f48fb2df1c23"><li>Creating a diverse set of <span class="notion-orange">data analysis questions</span> for CSV files</li><ol class="notion-list notion-list-numbered notion-block-13dffd3aedd08001a548f48fb2df1c23"><figure class="notion-asset-wrapper notion-asset-wrapper-image notion-block-13dffd3aedd0800d923df6ff37d79daa"><div style="position:relative;display:flex;justify-content:center;align-self:center;width:100%;max-width:100%;flex-direction:column;height:100%"><img style="object-fit:cover" src="https://www.notion.so/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2Fde1827de-3f10-4f2a-8991-ddbc087af6ae%2F2bc0f996-52ef-4b48-a308-b522dfd855ac%2Fimage.png?table=block&amp;id=13dffd3a-edd0-800d-923d-f6ff37d79daa&amp;t=13dffd3a-edd0-800d-923d-f6ff37d79daa&amp;width=679.9921875&amp;cache=v2" alt="notion image" loading="lazy" decoding="async"/></div></figure></ol></ol><ol start="2" class="notion-list notion-list-numbered notion-block-13dffd3aedd080149607ef0d313dcf48"><li>Collecting <span class="notion-orange">response trajectories </span>to these questions through our agent framework</li><ol class="notion-list notion-list-numbered notion-block-13dffd3aedd080149607ef0d313dcf48"><figure class="notion-asset-wrapper notion-asset-wrapper-image notion-block-13dffd3aedd080cc8ebbc86b8ce8b784"><div style="position:relative;display:flex;justify-content:center;align-self:center;width:100%;max-width:100%;flex-direction:column;height:100%"><img style="object-fit:cover" src="https://www.notion.so/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2Fde1827de-3f10-4f2a-8991-ddbc087af6ae%2F8a51bfee-500c-47f2-89d0-ac77a15b4281%2Fimage.png?table=block&amp;id=13dffd3a-edd0-80cc-8ebb-c86b8ce8b784&amp;t=13dffd3a-edd0-80cc-8ebb-c86b8ce8b784&amp;width=679.9921875&amp;cache=v2" alt="notion image" loading="lazy" decoding="async"/></div></figure></ol></ol><ol start="3" class="notion-list notion-list-numbered notion-block-13dffd3aedd0809f9417c082ce412199"><li><span class="notion-orange">Filtering</span> low-quality samples with rules and GPT-4</li><ol class="notion-list notion-list-numbered notion-block-13dffd3aedd0809f9417c082ce412199"><figure class="notion-asset-wrapper notion-asset-wrapper-image notion-block-13dffd3aedd080fd95c2eadf30d9d8d5"><div style="position:relative;display:flex;justify-content:center;align-self:center;width:100%;max-width:100%;flex-direction:column;height:100%"><img style="object-fit:cover" src="https://www.notion.so/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2Fde1827de-3f10-4f2a-8991-ddbc087af6ae%2F83db2ca6-e084-4dd3-b05c-7f4f8dd5fe85%2Fimage.png?table=block&amp;id=13dffd3a-edd0-80fd-95c2-eadf30d9d8d5&amp;t=13dffd3a-edd0-80fd-95c2-eadf30d9d8d5&amp;width=679.9921875&amp;cache=v2" alt="notion image" loading="lazy" decoding="async"/></div></figure></ol></ol><div class="notion-blank notion-block-13dffd3aedd080d5b6e9e521ed0e8527"> </div><div class="notion-blank notion-block-13dffd3aedd080f88e39e55c851ee2d4"> </div><h2 class="notion-h notion-h1 notion-h-indent-0 notion-block-13dffd3aedd080b8b9afde26b1924271" data-id="13dffd3aedd080b8b9afde26b1924271"><span><div id="13dffd3aedd080b8b9afde26b1924271" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd080b8b9afde26b1924271" title="3. Experiments"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title"><b>3. Experiments</b></span></span></h2><div class="notion-text notion-block-13dffd3aedd080c689fac33500b31f3d">steps:</div><ol start="1" class="notion-list notion-list-numbered notion-block-13dffd3aedd0808494d2c9aa49689314"><li>prompt the agent with the file, the question and constraints.</li></ol><ol start="2" class="notion-list notion-list-numbered notion-block-13dffd3aedd080769adbec4723281cef"><li>The agent answers with the calling of external Python sandbox</li></ol><ol start="3" class="notion-list notion-list-numbered notion-block-13dffd3aedd080828ed6feb0d4743bac"><li>we use GPT-3.5 to reformat the response into format requirements</li></ol><ol start="4" class="notion-list notion-list-numbered notion-block-13dffd3aedd08065b69ecc5017fc1fa4"><li>use regular expression matching to draw the answer and compare it with the label.</li></ol><figure class="notion-asset-wrapper notion-asset-wrapper-image notion-block-13dffd3aedd0801495fde5fbf5bc5739"><div style="position:relative;display:flex;justify-content:center;align-self:center;width:100%;max-width:100%;flex-direction:column;height:100%"><img style="object-fit:cover" src="https://www.notion.so/image/https%3A%2F%2Fprod-files-secure.s3.us-west-2.amazonaws.com%2Fde1827de-3f10-4f2a-8991-ddbc087af6ae%2Ffa42279e-d54d-45ae-8948-0f30eaf1dcc8%2Fimage.png?table=block&amp;id=13dffd3a-edd0-8014-95fd-e5fbf5bc5739&amp;t=13dffd3a-edd0-8014-95fd-e5fbf5bc5739&amp;width=707.984375&amp;cache=v2" alt="notion image" loading="lazy" decoding="async"/></div></figure><div class="notion-blank notion-block-13dffd3aedd080a1ae14d473eb7956ab"> </div><h3 class="notion-h notion-h2 notion-h-indent-1 notion-block-13dffd3aedd080f798bafe71628a59ad" data-id="13dffd3aedd080f798bafe71628a59ad"><span><div id="13dffd3aedd080f798bafe71628a59ad" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd080f798bafe71628a59ad" title="3.1 Setup"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title">3.1 <b>Setup</b></span></span></h3><h4 class="notion-h notion-h3 notion-h-indent-2 notion-block-13dffd3aedd080418b79d0c2e81827ff" data-id="13dffd3aedd080418b79d0c2e81827ff"><span><div id="13dffd3aedd080418b79d0c2e81827ff" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd080418b79d0c2e81827ff" title="3.1.1 Models"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title">3.1.1 Models</span></span></h4><div class="notion-blank notion-block-13dffd3aedd08004b751e8cf2c7c687b"> </div><div class="notion-blank notion-block-13dffd3aedd080aba9a0eca31625a2c1"> </div><h4 class="notion-h notion-h3 notion-h-indent-2 notion-block-13dffd3aedd0800e9eabfc936160d1a8" data-id="13dffd3aedd0800e9eabfc936160d1a8"><span><div id="13dffd3aedd0800e9eabfc936160d1a8" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd0800e9eabfc936160d1a8" title="3.1.2 Implementation Details"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title">3.1.2 Implementation Details</span></span></h4><div class="notion-text notion-block-13dffd3aedd080f4b82dc8caa32ca45e">We first prompt LLMs with the file, question and constraints. </div><div class="notion-text notion-block-13dffd3aedd080b9a282d08cbb502293"><span class="notion-orange">Considering that most models hardly follow the format requirements exactly, we add a reformat step after the original responses which formats the responses with the format requirements with one-shot prompting. </span></div><div class="notion-blank notion-block-13dffd3aedd080309219c9b0749700f7"> </div><div class="notion-blank notion-block-13dffd3aedd0802ab258f5510c8f10e7"> </div><h3 class="notion-h notion-h2 notion-h-indent-1 notion-block-13dffd3aedd0807c9cead94ef920be6d" data-id="13dffd3aedd0807c9cead94ef920be6d"><span><div id="13dffd3aedd0807c9cead94ef920be6d" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd0807c9cead94ef920be6d" title="3.2 Results"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title"><b>3.2 Results</b></span></span></h3><div class="notion-blank notion-block-13dffd3aedd08081adf7ed7cf633e7bf"> </div><h3 class="notion-h notion-h2 notion-h-indent-1 notion-block-13dffd3aedd080b5836ac07d6e9b3ffd" data-id="13dffd3aedd080b5836ac07d6e9b3ffd"><span><div id="13dffd3aedd080b5836ac07d6e9b3ffd" class="notion-header-anchor"></div><a class="notion-hash-link" href="#13dffd3aedd080b5836ac07d6e9b3ffd" title="3.3 Findings"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title"><b>3.3 Findings</b></span></span></h3><div class="notion-blank notion-block-13dffd3aedd080be8e23e5eca3d6d826"> </div><div class="notion-text notion-block-13cffd3aedd080d08a46e24bf61f6aa8"><b>2. InfiAgent-DABench基准</b></div><div class="notion-text notion-block-13cffd3aedd08024afd3e113c1541a22">InfiAgent-DABench基准旨在评估LLM代理在数据分析任务中的表现，主要包括两个组件：DAEval评估数据集和代理框架。</div><div class="notion-text notion-block-13cffd3aedd080ecba7de0aca769e9cb"><b>2.1 数据集构建</b></div><div class="notion-text notion-block-13cffd3aedd08065aecdfd47eaa6a070">DAEval数据集由真实的CSV文件及相应的问题组成。作者从GitHub上收集了52个CSV文件，确保主题多样性并避免数据重复。然后，使用GPT-4根据文件描述和从专家访谈中获得的数据分析关键概念生成开放式问题。为了便于自动评估，作者采用格式提示技术，将开放式问题转换为封闭式格式，生成详细的约束和格式要求，使问题具有唯一答案，便于解析和匹配。整个数据集经过人类专家的严格评估，确保高质量。</div><div class="notion-text notion-block-13cffd3aedd080048d5ccb1c8486c687"><b>2.2 代理框架</b></div><div class="notion-text notion-block-13cffd3aedd080e9afbee73789dd8ddf">为了支持LLM在数据分析任务中的表现，作者构建了一个代理框架，允许LLM在代理设置中解决数据分析问题。该框架采用ReAct方法，代理根据问题计划、编写代码、调用Python沙箱执行，并总结输出以解决问题。最终答案被重新格式化为特定格式，并与封闭式标签匹配。</div><div class="notion-text notion-block-13cffd3aedd08025aafdc2288835f75c"><b>2.3 人工评估</b></div><div class="notion-text notion-block-13cffd3aedd08039b55ce7b0abb22fad">为了确保数据集的质量，作者进行了全面的人工评估。评估包括数据集质量评估和人类与GPT-4的比较。在数据集质量评估中，专家从多个维度严格检查数据集，过滤掉所有不合格的样本。在人类与GPT-4的比较中，评估了人类和GPT-4在数据分析任务中的表现差异。</div><div class="notion-text notion-block-13cffd3aedd080adac6fd62be172a6b7"><b>2.4 基准统计</b></div><div class="notion-text notion-block-13cffd3aedd0808ab916d6c2e9b50a40">DAEval数据集包含257个问题，涉及52个CSV文件，涵盖现实的数据分析需求和广泛的领域。这些问题根据从专家访谈中获得的关键概念生成，确保数据集的多样性和代表性。</div><div class="notion-text notion-block-13cffd3aedd08056a3c5f105164f5190"><b>2.5 指令微调数据集</b></div><div class="notion-text notion-block-13cffd3aedd08028a7e1e6fd0beddc28">为了促进开源LLM在数据分析代理能力方面的构建，作者开发了一种自动化方法，构建了一个名为DAInstruct的指令微调数据集。该数据集基于数据分析关键词和真实的CSV文件构建指令，然后使用代理框架和GPT-4生成这些指令的响应。通过在该数据集上训练开源LLM，作者进一步开发了专注于数据分析的代理DAAgent。</div><div class="notion-text notion-block-13cffd3aedd0801baf12db9b2f53f4f9"><b>3. 实验</b></div><div class="notion-text notion-block-13cffd3aedd0809ebc3bdf3faaeb867f"><b>3.1 设置</b></div><div class="notion-text notion-block-13cffd3aedd0804eae47f7006925e328">作者评估了34个最先进的LLM，发现它们在处理数据分析任务时仍面临挑战。此外，作者开发了DAAgent，一个专门的代理，专注于数据分析任务。DAAgent在DAInstruct上进行训练，表现优于GPT-3.5，提升了3.9%。</div><div class="notion-text notion-block-13cffd3aedd08025a07fe2d80f2cc00c"><b>3.2 结果</b></div><div class="notion-text notion-block-13cffd3aedd080d78243d0bc30add674">实验结果表明，当前的LLM在数据分析任务中仍面临挑战。然而，DAAgent在DAInstruct上进行训练后，表现优于GPT-3.5，提升了3.9%。</div><div class="notion-text notion-block-13cffd3aedd0809eb66fd46c0e1beb8a"><b>3.3 发现</b></div><div class="notion-text notion-block-13cffd3aedd08028ace6ed6420341465">作者的研究揭示了当前LLM在数据分析任务中面临的挑战，并强调了开发专门代理以提高性能的重要性。</div><div class="notion-text notion-block-13cffd3aedd08044bc03ebb4cd503f8b"><b>4. 相关工作</b></div><div class="notion-text notion-block-13cffd3aedd080119ba7e0f45ee625a7">在数据分析任务中评估基于大型语言模型（LLM）的代理性能，涉及多个相关领域的研究。</div><div class="notion-text notion-block-13cffd3aedd080389983d81e03ce452a"><b>4.1 代码生成基准</b></div><div class="notion-text notion-block-13cffd3aedd080b49ab0cfb73fdd160c">现有的代码生成基准，如HumanEval、MBPP和DS-1000，主要评估模型的代码补全能力。然而，这些基准通常不适用于评估需要复杂能力（如任务规划、自我调试）的LLM代理。例如，DS-1000仅要求模型插入或完成小部分代码片段，无法全面评估代理在数据分析任务中的表现。</div><div class="notion-text notion-block-13cffd3aedd080eabe0ee837f1c6aadd"><b>4.2 工具使用基准</b></div><div class="notion-text notion-block-13cffd3aedd0805f92d6d024274f7306">一些研究关注LLM在使用工具时的表现。例如，ToolBench和Gentopia评估LLM在调用真实世界API时的能力。然而，这些基准主要关注工具使用，而非数据分析任务的综合能力。</div><div class="notion-text notion-block-13cffd3aedd08090b120df640370fbd9"><b>4.3 数据分析代理</b></div><div class="notion-text notion-block-13cffd3aedd080c688a1fd418a356cbc">随着LLM代理概念的发展，研究人员开始探索其在数据分析任务中的应用。OpenAI的高级数据分析（ADA）允许用户上传数据文件，并在沙箱中运行LLM生成的代码进行数据分析。此外，开源社区也开发了多个数据分析代理，如Open Interpreter、Open Agents、Qwen-Agent和TaskWeaver。这些代理的快速发展，突显了提出专门基准评估其性能的紧迫性和重要性。</div><div class="notion-text notion-block-13cffd3aedd08016bc1bfd03930cc889"><b>5. 局限性与未来工作</b></div><div class="notion-text notion-block-13cffd3aedd080dbbbdedcac1f613ae4">InfiAgent-DABench目前未包含与数据可视化相关的问题。在专家访谈中，作者认识到可视化在数据分析中的关键作用。然而，设计封闭式的可视化问题具有挑战性，因为结果通常是图形而非文本。未来的研究可以考虑以下两种评估方法：</div><ol start="1" class="notion-list notion-list-numbered notion-block-13cffd3aedd080cc9c40f58d0cfd4cc4"><li><b>代码正确性评估</b>：此方法允许使用仅支持文本的LLM进行评估，但可能导致不准确。在数据分析任务中，主要目标是生成准确的可视化表示，而不仅仅是底层代码。仅评估代码可能难以精确评估某些情况下的可视化效果。</li></ol><ol start="2" class="notion-list notion-list-numbered notion-block-13cffd3aedd0807d9378e03873898722"><li><b>使用多模态模型评估</b>：例如，Qwen-Agent利用视觉-语言模型（如GPT-4 Vision）作为评估器。此方法直接评估视觉输出的准确性。然而，当前的多模态模型性能尚未达到理想状态，且成本较高，可能导致对数据可视化的评估不够理想。</li></ol><div class="notion-text notion-block-13cffd3aedd08052b9e3c229ed170847">未来的研究应致力于开发精确且方便的可视化评估方法，以完善数据分析任务的评估体系。</div><div class="notion-text notion-block-13cffd3aedd080b8b7ebed43ef0f8ed4"><b>6. 结论</b></div><div class="notion-text notion-block-13cffd3aedd0808ab313cbe62eeec6c1">本文介绍了InfiAgent-DABench，这是首个专门用于评估LLM代理在数据分析任务中表现的基准。通过对23个最先进的LLM的广泛基准测试，作者发现当前模型在有效处理数据分析任务时仍面临挑战。此外，作者开发了DAAgent，一个专门用于数据分析的代理，并在DAInstruct上进行训练。作者的贡献不仅为评估LLM代理在数据分析中的表现提供了有价值的基准，也为理解这些模型在该领域的当前能力和局限性提供了见解。</div></main></div>]]></content:encoded>
        </item>
        <item>
            <title><![CDATA[基础概念再次厘清]]></title>
            <link>https://tangly1024.com/article/153ffd3a-edd0-8060-a6fb-ca5916258479</link>
            <guid>https://tangly1024.com/article/153ffd3a-edd0-8060-a6fb-ca5916258479</guid>
            <pubDate>Thu, 05 Dec 2024 00:00:00 GMT</pubDate>
            <content:encoded><![CDATA[<div id="notion-article" class="mx-auto overflow-hidden "><main class="notion light-mode notion-page notion-block-153ffd3aedd08060a6fbca5916258479"><div class="notion-viewport"></div><div class="notion-collection-page-properties"></div><div class="notion-blank notion-block-154ffd3aedd08023a54ef01e5283bbc9"> </div><h2 class="notion-h notion-h1 notion-h-indent-0 notion-block-153ffd3aedd08058b9acfce9648ce1fe" data-id="153ffd3aedd08058b9acfce9648ce1fe"><span><div id="153ffd3aedd08058b9acfce9648ce1fe" class="notion-header-anchor"></div><a class="notion-hash-link" href="#153ffd3aedd08058b9acfce9648ce1fe" title="Introduction"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title">Introduction</span></span></h2><div class="notion-text notion-block-154ffd3aedd08099bc51f0f4b99c82f8">这篇文章用来厘清看论文时经常会混淆的基础概念</div><div class="notion-text notion-block-154ffd3aedd08072b239c771822365c5">很多作者由于背景的不同，在论述公式的时候，用的符号标记不同</div><div class="notion-text notion-block-154ffd3aedd080738d2fdbac304d9f72">很多时候并不是有错误，而是理解的不同，或者习惯性用法的不同，导致看论文时稀里糊涂</div><div class="notion-blank notion-block-154ffd3aedd080c49502df2808237784"> </div><div class="notion-text notion-block-154ffd3aedd0805aa706f9d77f976a9d">记住下面几点</div><ul class="notion-list notion-list-disc notion-block-154ffd3aedd08043a9f4e04bc890aeee"><li>数学一直在发展，很多符号都是沿用了原始的写法，但是定义发生了变化，或者内容变了</li></ul><ul class="notion-list notion-list-disc notion-block-154ffd3aedd080bf983febcbd9bdffce"><li>同一个概念会有不同写法，<b>很多符号不存在标准写法</b></li><ul class="notion-list notion-list-disc notion-block-154ffd3aedd080bf983febcbd9bdffce"><li>比如导数，在牛顿、莱布尼兹之前就有人研究了，牛顿莱布尼兹又各自去做了研究，个人有个人的写法，这可不就混乱了吗</li></ul></ul><div class="notion-blank notion-block-154ffd3aedd080e4bdcbca204fdc3709"> </div><div class="notion-blank notion-block-154ffd3aedd080c98eb8ebc29e99fd78"> </div><h2 class="notion-h notion-h1 notion-h-indent-0 notion-block-154ffd3aedd080c8863cc876799193d4" data-id="154ffd3aedd080c8863cc876799193d4"><span><div id="154ffd3aedd080c8863cc876799193d4" class="notion-header-anchor"></div><a class="notion-hash-link" href="#154ffd3aedd080c8863cc876799193d4" title="导数、微分、梯度、方向导数、偏导数、雅可比矩阵、黑森矩阵一次厘清"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title">导数、微分、梯度、方向导数、偏导数、雅可比矩阵、黑森矩阵一次厘清</span></span></h2><div class="notion-blank notion-block-154ffd3aedd080b288d1d6978d805d5b"> </div><h3 class="notion-h notion-h2 notion-h-indent-1 notion-block-154ffd3aedd080b480fdc9d0ffd9dc36" data-id="154ffd3aedd080b480fdc9d0ffd9dc36"><span><div id="154ffd3aedd080b480fdc9d0ffd9dc36" class="notion-header-anchor"></div><a class="notion-hash-link" href="#154ffd3aedd080b480fdc9d0ffd9dc36" title="1. 导数"><svg viewBox="0 0 16 16" width="16" height="16"><path fill-rule="evenodd" d="M7.775 3.275a.75.75 0 001.06 1.06l1.25-1.25a2 2 0 112.83 2.83l-2.5 2.5a2 2 0 01-2.83 0 .75.75 0 00-1.06 1.06 3.5 3.5 0 004.95 0l2.5-2.5a3.5 3.5 0 00-4.95-4.95l-1.25 1.25zm-4.69 9.64a2 2 0 010-2.83l2.5-2.5a2 2 0 012.83 0 .75.75 0 001.06-1.06 3.5 3.5 0 00-4.95 0l-2.5 2.5a3.5 3.5 0 004.95 4.95l1.25-1.25a.75.75 0 00-1.06-1.06l-1.25 1.25a2 2 0 01-2.83 0z"></path></svg></a><span class="notion-h-title">1. 导数</span></span></h3><div class="notion-text notion-block-154ffd3aedd080fa9354d50f953e84fd">要解决的问题：</div><ul class="notion-list notion-list-disc notion-block-154ffd3aedd080878814d353674bc975"><li>为什么链式法则不能看作除法</li></ul><ul class="notion-list notion-list-disc notion-block-154ffd3aedd080eb8edff7a2bdbfabb2"><li>是一个整体还是可以拆开</li></ul><div class="notion-blank notion-block-154ffd3aedd0807baf9cd9736fe8c921"> </div><div class="notion-text notion-block-154ffd3aedd08031b7e8d0225dae12e5">古典的导数定义：</div><div class="notion-text notion-block-154ffd3aedd08033b6f8e77882e2188f">此时<b>导数就是切线的斜率</b></div><div class="notion-blank notion-block-154ffd3aedd08032b2b0ecd53e51e314"> </div><div class="notion-text notion-block-154ffd3aedd080f2ae72ce60e24a1921">但是，这玩意儿是有问题的</div><a class="notion-page-link notion-block-154ffd3aedd0805a8a14cbbaa4b60e7a" href="/154ffd3aedd0805a8a14cbbaa4b60e7a"><span class="notion-page-title"><div class="notion-page-icon-inline notion-page-icon-image"><svg class="notion-page-title-icon notion-page-icon" alt="古典导数头上的乌云" viewBox="0 0 30 30" width="16"><path d="M16,1H4v28h22V11L16,1z M16,3.828L23.172,11H16V3.828z M24,27H6V3h8v10h10V27z M8,17h14v-2H8V17z M8,21h14v-2H8V21z M8,25h14v-2H8V25z"></path></svg></div><span class="notion-page-title-text">古典导数头上的乌云</span></span></a><div class="notion-blank notion-block-154ffd3aedd08092b64ef5b4285aa933"> </div><div class="notion-text notion-block-154ffd3aedd080cb856fe9e3093ace70">你去深抠里面细节，就会发现总有对不上的，就跟波粒二象性一样，哪儿哪儿都怪</div><div class="notion-blank notion-block-154ffd3aedd0806b9a66e6875a85402e"> </div><div class="notion-text notion-block-154ffd3aedd080818196eebed04e496a">所以一种解法就是把它看作一个整体</div><div class="notion-text notion-block-154ffd3aedd0808bb327ec832c1aabd3">不再用公式定义导数，而是直接给他一个符号</div><div class="notion-blank notion-block-154ffd3aedd080dd95b4ed6ac1d99ea2"> </div><div class="notion-text notion-block-154ffd3aedd0804a920ac012ad43ca52">比如，对于 函数<!-- -->在<!-- -->点的导数，有如下几种写法</div><ul class="notion-list notion-list-disc notion-block-154ffd3aedd080bface2fea9ce5b9e48"><li>莱布尼兹记号<!-- --> </li></ul><ul class="notion-list notion-list-disc notion-block-154ffd3aedd080fab437e217cad11124"><li>拉格朗日记号</li></ul><ul class="notion-list notion-list-disc notion-block-154ffd3aedd080259302d6120991c6c3"><li>牛顿记号 </li></ul><ul class="notion-list notion-list-disc notion-block-154ffd3aedd0807d87b5e4a0d189a11d"><li>欧拉记号</li></ul><div class="notion-blank notion-block-154ffd3aedd080468f55e8e4048d97eb"> </div><div class="notion-text notion-block-154ffd3aedd080f8a121e7f821507061">只要涉及以下要素： </div><ul class="notion-list notion-list-disc notion-block-154ffd3aedd080cb98a7e7875197b853"><li>函数:</li></ul><ul class="notion-list notion-list-disc notion-block-154ffd3aedd0806b9f20f08d6e18d10c"><li>自变量</li></ul><ul class="notion-list notion-list-disc notion-block-154ffd3aedd080e8acc8d74dc3d790a7"><li>因变量</li></ul><ul class="notion-list notion-list-disc notion-block-154ffd3aedd08017878dc930cee97212"><li>某点</li></ul><div class="notion-text notion-block-154ffd3aedd0809cb2e8ca436495eeca">甚至都不需要全部都有，拉格朗日记号也没有因变量，因为他研究的场合不关注这个</div><div class="notion-blank notion-block-154ffd3aedd080878868d477ad1f74f7"> </div><div class="notion-blank notion-block-154ffd3aedd0809cb53bc4b7d8d7df96"> </div><div class="notion-text notion-block-154ffd3aedd080f3af63dbbe14d97dfa">现在我们可以解答这两个问题，当然不能看作除法，也不能拆开看</div><div class="notion-text notion-block-154ffd3aedd080988410e5eeb4a9a302">但是为什么有的时候看作除法也能算得通，也看到别人写<!-- -->呢</div><ul class="notion-list notion-list-disc notion-block-154ffd3aedd08045b6ebf515b3d401ad"><li>先说除法也能算得通的事，简单来说，是在一元下</li></ul><ul class="notion-list notion-list-disc notion-block-154ffd3aedd0806abc3febd43d572faf"><li>可以拆分开也是因为，研究的场合不同，比如研究微分的时候，微分跟导数一体两面，公用一套符号很正常，但是在微分方面看得通的定义，在导数这里未必行得通，下次可以关注，拆开的情况是不是大部分还是在涉及微分的研究方面</li></ul><div class="notion-blank notion-block-154ffd3aedd080f5af2cc63bdba5d324"> </div><div class="notion-blank notion-block-154ffd3aedd080f7b9c1cc9f90b35a1c"> </div><div class="notion-text notion-block-154ffd3aedd08002b58cd3c0ff845b2f">可以参考这里</div><div class="notion-text notion-block-154ffd3aedd0802ea33ddcd32be00b20">dy/dx 这是一个整体，可以确定一个极限，为什么可以将其分开（请再看一下问题补充说明）？ - jack的回答 - 知乎
<a target="_blank" rel="noopener noreferrer" class="notion-link" href="https://www.zhihu.com/question/432588711/answer/1629028795">https://www.zhihu.com/question/432588711/answer/1629028795</a></div><div class="notion-blank notion-block-154ffd3aedd080cc8951d6c7018e0cd4"> </div><div class="notion-blank notion-block-154ffd3aedd0809a8092e95ccbb1fe0e"> </div><div class="notion-text notion-block-154ffd3aedd080189228d097437cbd6b">最后我们做总结，未必准确，但是在一般深度学习方面，这么理解是没问题的</div><ul class="notion-list notion-list-disc notion-block-154ffd3aedd080069949c456995c99ae"><li>根据无穷小量这个概念，我们有了切线，导数就是切线的斜率</li></ul><ul class="notion-list notion-list-disc notion-block-154ffd3aedd0804babb0dac58573c358"><li>导数的符号<!-- -->，其中 <!-- -->是一个整体，这个算符表示括号里的东西“对x求导”</li><ul class="notion-list notion-list-disc notion-block-154ffd3aedd0804babb0dac58573c358"><li>所以二阶导数应该写作<!-- -->，而不是<!-- -->，因为表示对<!-- -->求导两次</li><li>很多奇怪的写法都是因为把<!-- -->写成了<!-- -->，所以才会被看作为除法</li></ul></ul><ul class="notion-list notion-list-disc notion-block-154ffd3aedd08015b7b1f03ceae0c225"><li>把<!-- -->和<!-- -->当作独立的对象，也就是<!-- -->可以看作除法的唯一场合：</li><ul class="notion-list notion-list-disc notion-block-154ffd3aedd08015b7b1f03ceae0c225"><li>必须是因<span class="notion-orange"></span><span class="notion-orange">改变了</span><span class="notion-orange"></span>而引起了变化</li><li>例如一元微分中完全没问题，除法是通的</li></ul></ul><ul class="notion-list notion-list-disc notion-block-154ffd3aedd08095b162c85acb7c94cd"><li>区分<!-- -->，这个区分也很重要</li><ul class="notion-list notion-list-disc notion-block-154ffd3aedd08095b162c85acb7c94cd"><li>表示无穷微小量，不可测</li><li>表示的有限量，虽然小但是有</li></ul></ul><div class="notion-blank notion-block-154ffd3aedd080f5aa01c7d1bee3d490"> </div><table class="notion-simple-table notion-block-155ffd3aedd080bf98bdf64788112d79"><tbody><tr class="notion-simple-table-row notion-block-155ffd3aedd08046b72dfc0923858d0f"><td class="" style="width:120px"><div class="notion-simple-table-cell">符号</div></td><td class="" style="width:120px"><div class="notion-simple-table-cell">含义</div></td><td class="" style="width:120px"><div class="notion-simple-table-cell">特点</div></td><td class="" style="width:120px"><div class="notion-simple-table-cell">适用场景</div></td></tr><tr class="notion-simple-table-row notion-block-155ffd3aedd0801c8560ef18e03b4d40"><td class="" style="width:120px"><div class="notion-simple-table-cell"></div></td><td class="" style="width:120px"><div class="notion-simple-table-cell">自变量的有限变化量</div></td><td class="" style="width:120px"><div class="notion-simple-table-cell">有限、确定值</div></td><td class="" style="width:120px"><div class="notion-simple-table-cell">离散变化或实际计算</div></td></tr><tr class="notion-simple-table-row notion-block-155ffd3aedd080caab9ae47b83d5c493"><td class="" style="width:120px"><div class="notion-simple-table-cell"></div></td><td class="" style="width:120px"><div class="notion-simple-table-cell">因变量的有限变化量</div></td><td class="" style="width:120px"><div class="notion-simple-table-cell">真实变化量，由 Δx\Delta x 确定</div></td><td class="" style="width:120px"><div class="notion-simple-table-cell">离散变化或实际计算</div></td></tr><tr class="notion-simple-table-row notion-block-155ffd3aedd08055b627d150d358387f"><td class="" style="width:120px"><div class="notion-simple-table-cell"></div></td><td class="" style="width:120px"><div class="notion-simple-table-cell">自变量的微小变化量（无穷小）</div></td><td class="" style="width:120px"><div class="notion-simple-table-cell">无限趋近于零，通常作为独立变量</div></td><td class="" style="width:120px"><div class="notion-simple-table-cell">连续变化，局部变化描述</div></td></tr><tr class="notion-simple-table-row notion-block-155ffd3aedd080e387cff151feb318f3"><td class="" style="width:120px"><div class="notion-simple-table-cell"></div></td><td class="" style="width:120px"><div class="notion-simple-table-cell">因变量的微小变化量（无穷小）</div></td><td class="" style="width:120px"><div class="notion-simple-table-cell">由导数和 dxdx 决定，表示线性化近似变化量</div></td><td class="" style="width:120px"><div class="notion-simple-table-cell">连续变化，局部变化描述</div></td></tr></tbody></table><div class="notion-blank notion-block-155ffd3aedd08006b562f3d64ecda825"> </div><div class="notion-blank notion-block-154ffd3aedd0806298c9d2b190b6518f"> </div><div class="notion-blank notion-block-154ffd3aedd0805cab7cf314b09f245a"> </div><div class="notion-blank notion-block-154ffd3aedd080cc8ea7e6c5a04c5321"> </div><div class="notion-blank notion-block-154ffd3aedd080248e0ffe7020ec1189"> </div><div class="notion-blank notion-block-154ffd3aedd0804bae3ffb5a161b81d7"> </div><div class="notion-blank notion-block-154ffd3aedd0801a9127df157e46dcbe"> </div><div class="notion-blank notion-block-154ffd3aedd080138e28d8e9ca27ecb7"> </div><div class="notion-blank notion-block-154ffd3aedd080e1a56cec43d3155fff"> </div><div class="notion-blank notion-block-154ffd3aedd080ee9d23f1273e49954d"> </div><div class="notion-text notion-block-154ffd3aedd080c78227c4194a45d34e"><a target="_blank" rel="noopener noreferrer" class="notion-link" href="https://www.zhihu.com/question/432588711">https://www.zhihu.com/question/432588711</a></div><div class="notion-blank notion-block-154ffd3aedd0807cad7ee217cd0f7cc4"> </div></main></div>]]></content:encoded>
        </item>
        <item>
            <title><![CDATA[DL-公式推导]]></title>
            <link>https://tangly1024.com/article/14fffd3a-edd0-8062-85a6-fb50d7870f30</link>
            <guid>https://tangly1024.com/article/14fffd3a-edd0-8062-85a6-fb50d7870f30</guid>
            <pubDate>Sun, 01 Dec 2024 00:00:00 GMT</pubDate>
            <content:encoded><![CDATA[<div id="notion-article" class="mx-auto overflow-hidden "><main class="notion light-mode notion-page notion-block-14fffd3aedd0806285a6fb50d7870f30"><div class="notion-viewport"></div><div class="notion-collection-page-properties"></div><div class="notion-blank notion-block-14fffd3aedd0802bba54ce4fd4af26d2"> </div></main></div>]]></content:encoded>
        </item>
    </channel>
</rss>