-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.html
134 lines (102 loc) · 33.2 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=2"><meta name="theme-color" content="#222"><meta http-equiv="X-UA-COMPATIBLE" content="IE=edge,chrome=1"><meta name="renderer" content="webkit"><link rel="icon" type="image/ico" sizes="32x32" href="/assets/favicon.ico"><link rel="apple-touch-icon" sizes="180x180" href="/assets/apple-touch-icon.png"><link rel="alternate" href="/rss.xml" title="Пусть этот камень будет более крепким, чем человек" type="application/rss+xml"><link rel="alternate" href="/atom.xml" title="Пусть этот камень будет более крепким, чем человек" type="application/atom+xml"><link rel="alternate" type="application/json" title="Пусть этот камень будет более крепким, чем человек" href="https://forcheetah.github.io/feed.json"><link rel="preconnect" href="https://lf9-cdn-tos.bytecdntp.com"><link rel="preconnect" href="https://at.alicdn.com"><link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Mulish:400,400italic,700,700italic%7CFredericka%20the%20Great:400,400italic,700,700italic%7CNoto%20Serif%20JP:400,400italic,700,700italic%7CNoto%20Serif%20SC:400,400italic,700,700italic%7CInconsolata:400,400italic,700,700italic&display=swap&subset=latin,latin-ext" media="none" onload="this.media='all'"><link rel="stylesheet" href="/css/app.css?v=0.4.2"><link rel="modulepreload" href="/js/chunk-FJ7AJ5BW.js"><link rel="modulepreload" href="/js/chunk-MQTNP6EI.js"><link rel="modulepreload" href="/js/chunk-QAWHJ5B3.js"><link rel="modulepreload" href="/js/index.esm-SU253EAQ.js"><link rel="modulepreload" href="/js/post-SZ2V6ERD.js"><link rel="modulepreload" href="/js/quicklink-GO25OZIT.js"><link rel="modulepreload" href="/js/siteInit.js"><link rel="preload" href="https://forcheetah.github.io/assets/lunbo5.webp" as="image" fetchpriority="high"><link rel="preload" href="https://forcheetah.github.io/assets/danger.webp" as="image" fetchpriority="high"><link rel="preload" href="https://forcheetah.github.io/assets/lunbo1.webp" as="image" fetchpriority="high"><link rel="preload" href="https://forcheetah.github.io/assets/lunbo3.webp" as="image" fetchpriority="high"><link rel="preload" href="https://forcheetah.github.io/assets/lunbo4.webp" as="image" fetchpriority="high"><link rel="preload" href="https://forcheetah.github.io/assets/lunbo2.webp" as="image" fetchpriority="high"><meta name="keywords" content="AI, CNN, Inference"><meta name="description" content="有自己的博客很帅,但是我很懒,要命!!!"><link rel="canonical" href="https://forcheetah.github.io/"><title>暮冬Z羡慕的博客 = Пусть этот камень будет более крепким, чем человек = 神经网络推理、加速、AI编译。 我必须立刻开始挣扎!</title><meta name="generator" content="Hexo 7.0.0"></head><body itemscope="" itemtype="http://schema.org/WebPage"><div id="loading"><div class="cat"><div class="body"></div><div class="head"><div class="face"></div></div><div class="foot"><div class="tummy-end"></div><div class="bottom"></div><div class="legs left"></div><div class="legs right"></div></div><div class="paw"><div class="hands left"></div><div class="hands right"></div></div></div></div><div id="container"><header id="header" itemscope="" itemtype="http://schema.org/WPHeader"><div class="inner"><div id="brand"><div class="pjax"><a class="logo" href="/" rel="start"><p class="artboard">暮冬Z羡慕的博客</p><h1 class="title" itemprop="name headline">Пусть этот камень будет более крепким, чем человек</h1></a><p class="meta" itemprop="description">= 神经网络推理、加速、AI编译。 我必须立刻开始挣扎! =</p></div></div><nav id="nav"><div class="inner"><div class="toggle"><div class="lines" aria-label="Toggle navigation bar"><span class="line"></span><span class="line"></span><span class="line"></span></div></div><ul class="menu"><li class="item title"><a href="/" rel="start">暮冬Z羡慕的博客</a></li></ul><ul class="right" id="rightNav"><li class="item theme"><i class="ic i-sun"></i></li><li class="item search"><i class="ic i-search"></i></li></ul></div></nav></div><div class="pjax" id="imgs"><ul><li class="item" style="background-image: url("https://forcheetah.github.io/assets/lunbo5.webp");"></li><li class="item" style="background-image: url("https://forcheetah.github.io/assets/danger.webp");"></li><li class="item" style="background-image: url("https://forcheetah.github.io/assets/lunbo1.webp");"></li><li class="item" style="background-image: url("https://forcheetah.github.io/assets/lunbo3.webp");"></li><li class="item" style="background-image: url("https://forcheetah.github.io/assets/lunbo4.webp");"></li><li class="item" style="background-image: url("https://forcheetah.github.io/assets/lunbo2.webp");"></li></ul></div></header><div id="waves"><svg class="waves" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 24 150 28" preserveAspectRatio="none" shape-rendering="auto"><defs><path id="gentle-wave" d="M-160 44c30 0 58-18 88-18s 58 18 88 18 58-18 88-18 58 18 88 18 v44h-352z"></path></defs><g class="parallax"><use xlink:href="#gentle-wave" x="48" y="0"></use><use xlink:href="#gentle-wave" x="48" y="3"></use><use xlink:href="#gentle-wave" x="48" y="5"></use><use xlink:href="#gentle-wave" x="48" y="7"></use></g></svg></div><main><div class="inner"><div class="pjax" id="main"><div class="index wrap"><h2 class="divider">Sticky Posts</h2><div class="segments sticky"><article class="item"><div class="cover"><a href="/2024/05/24/category/" itemprop="url" title="博客汇总目录"><img loading="eager" decoding="async" src="https://forcheetah.github.io/assets/lunbo6.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2024-05-24 21:25:53"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2024-05-24T21:25:53+08:00">2024-05-24</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>3.1k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>3 mins.</span></span></div><h3><a href="/2024/05/24/category/" itemprop="url" title="博客汇总目录">博客汇总目录</a></h3><div class="excerpt"># 暮冬 Z 羡慕 的博客 文章汇总
# 卷积加速算法
【Im2Col】卷积加速算法【1】 NCHW
【Im2Col】卷积加速算法【2】NHWC
【im2col】昇腾卷积加速算法
【Winograd】卷积加速算法原理及实现
【gemm】Gemm 计算加速
【gemm】内存对齐
[【conv 加速】NVDLA 卷积加速算法]
# AI 推理引擎
【推理引擎】常见推理引擎
【推理引擎】NCNN 和 Tengine 量化推理逻辑对比
【量化】连续卷积层首尾量化的可行性
【NCNN】学习 ncnn 模型转换
【Tengine】推理流程脑图【1】
</div><div class="meta footer"><span><a href="/categories/%E7%9B%AE%E5%BD%95/" itemprop="url" title="目录"><i class="ic i-flag"></i>目录</a></span></div><a href="/2024/05/24/category/" class="btn" itemprop="url" title="博客汇总目录">more...</a></div></article></div><h2 class="divider">Post List</h2><div class="segments posts"><article class="item"><div class="cover"><a href="/2025/04/27/Tengine02/" itemprop="url" title="【Tengine】推理流程脑图【2】"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo11.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2025-04-27 21:17:52"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2025-04-27T21:17:52+08:00">2025-04-27</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>3.7k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>3 mins.</span></span></div><h3><a href="/2025/04/27/Tengine02/" itemprop="url" title="【Tengine】推理流程脑图【2】">【Tengine】推理流程脑图【2】</a></h3><div class="excerpt"># 前言
本篇通流程脑图和代码介绍 Tengine 推理引擎的推理流程。本篇是第二篇。第一篇地址。Tengine 工程地址。
作为初学者,错误在所难免,还望不吝赐教。
# 介绍
Tengine 由 OPEN AI LAB 主导开发,该项目实现了深度学习神经网络模型在嵌入式设备上的快速、高效部署需求。为实现在众多 AIoT 应用中的跨平台部署,该项目使用 C 语言进行核心模块开发,针对嵌入式设备资源有限的特点进行了深度框架裁剪。同时采用了完全分离的前后端设计,有利于 CPU、GPU、NPU 等异构计算单元的快速移植和部署,降低评估、迁移成本。
# 总流程图
总流程图:
init_tengin</div><div class="meta footer"><span><a href="/categories/Tengine/" itemprop="url" title="Tengine"><i class="ic i-flag"></i>Tengine</a></span></div><a href="/2025/04/27/Tengine02/" class="btn" itemprop="url" title="【Tengine】推理流程脑图【2】">more...</a></div></article><article class="item"><div class="cover"><a href="/2025/04/24/Tengine01/" itemprop="url" title="【Tengine】推理流程脑图【1】"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo13.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2025-04-24 20:13:25"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2025-04-24T20:13:25+08:00">2025-04-24</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>4.9k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>4 mins.</span></span></div><h3><a href="/2025/04/24/Tengine01/" itemprop="url" title="【Tengine】推理流程脑图【1】">【Tengine】推理流程脑图【1】</a></h3><div class="excerpt"># 前言
本篇通过流程脑图和代码介绍 Tengine 推理引擎的推理流程。本篇是第一部分。Tengine 工程地址。
作为初学者,错误在所难免,还望不吝赐教。
# 介绍
Tengine 由 OPEN AI LAB 主导开发,该项目实现了深度学习神经网络模型在嵌入式设备上的快速、高效部署需求。为实现在众多 AIoT 应用中的跨平台部署,该项目使用 C 语言进行核心模块开发,针对嵌入式设备资源有限的特点进行了深度框架裁剪。同时采用了完全分离的前后端设计,有利于 CPU、GPU、NPU 等异构计算单元的快速移植和部署,降低评估、迁移成本。
# 推理流程
不了解 AI 推理引擎的人,可能难以理解推理</div><div class="meta footer"><span><a href="/categories/Tengine/" itemprop="url" title="Tengine"><i class="ic i-flag"></i>Tengine</a></span></div><a href="/2025/04/24/Tengine01/" class="btn" itemprop="url" title="【Tengine】推理流程脑图【1】">more...</a></div></article><article class="item"><div class="cover"><a href="/2025/04/16/ncnn01/" itemprop="url" title="【NCNN】学习ncnn模型转换"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo9.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2025-04-16 21:23:06"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2025-04-16T21:23:06+08:00">2025-04-16</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>5.7k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>5 mins.</span></span></div><h3><a href="/2025/04/16/ncnn01/" itemprop="url" title="【NCNN】学习ncnn模型转换">【NCNN】学习ncnn模型转换</a></h3><div class="excerpt"># 前言
本篇通过代码介绍 NCNN 的模型转换过程。模型转换过程逻辑简单,所以本篇文章只对关键节点进行介绍。NCNN 工程地址。
作为初学者,错误在所难免,还望不吝赐教。
# 介绍
NCNN 是由 腾讯优图实验室 开发的 开源神经网络推理框架,专注于为 移动端和嵌入式设备 提供高效、轻量的深度学习模型部署解决方案。自 2017 年开源(基于 BSD 3-Clause 协议)以来,因其高性能、低功耗和跨平台特性,成为移动端 AI 推理的主流框架之一。
# 使用
ncnn 编译部署方法,网络上很多,也可以参考工程中的 :docs/how-to-build/how-to-bui</div><div class="meta footer"><span><a href="/categories/NCNN/" itemprop="url" title="NCNN"><i class="ic i-flag"></i>NCNN</a></span></div><a href="/2025/04/16/ncnn01/" class="btn" itemprop="url" title="【NCNN】学习ncnn模型转换">more...</a></div></article><article class="item"><div class="cover"><a href="/2025/03/28/compile03/" itemprop="url" title="【编译器】使用llvm编译自定义语言【3】编译 object"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo12.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2025-03-28 21:58:41"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2025-03-28T21:58:41+08:00">2025-03-28</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>7.6k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>7 mins.</span></span></div><h3><a href="/2025/03/28/compile03/" itemprop="url" title="【编译器】使用llvm编译自定义语言【3】编译 object">【编译器】使用llvm编译自定义语言【3】编译 object</a></h3><div class="excerpt"># 前言
本篇是使用 llvm 编译自定义语言的第三篇。第一篇【编译器】使用 llvm 编译自定义语言【1】构建 AST 文章自顶向下介绍了抽象语法树 AST 的构建过程,第二篇【编译器】使用 llvm 编译自定义语言【2】转 llvm IR 文章介绍将抽象语法树 AST 转化为 llvm IR 的过程。本篇将简单介绍,如何将前述得到的 llvm IR 编译成 Object 。所使用代码例子来自 LLVM 官方教程 My First Language Frontend with LLVM Tutorial。
本篇介绍无法代替官方教程,感兴趣请参考 My First Language Front</div><div class="meta footer"><span><a href="/categories/%E7%BC%96%E8%AF%91%E5%99%A8/" itemprop="url" title="编译器"><i class="ic i-flag"></i>编译器</a></span></div><a href="/2025/03/28/compile03/" class="btn" itemprop="url" title="【编译器】使用llvm编译自定义语言【3】编译 object">more...</a></div></article><article class="item"><div class="cover"><a href="/2025/03/20/compile02/" itemprop="url" title="【编译器】使用llvm编译自定义语言【2】转llvm IR"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo9.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2025-03-20 19:41:57"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2025-03-20T19:41:57+08:00">2025-03-20</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>7.1k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>6 mins.</span></span></div><h3><a href="/2025/03/20/compile02/" itemprop="url" title="【编译器】使用llvm编译自定义语言【2】转llvm IR">【编译器】使用llvm编译自定义语言【2】转llvm IR</a></h3><div class="excerpt"># 前言
本篇是使用 llvm 编译自定义语言的第二篇。前一篇【编译器】使用 llvm 编译自定义语言【1】文章自顶向下介绍了抽象语法树 AST 的构建过程,本篇接续上一篇,介绍将抽象语法树 AST 转化为 llvm IR 的过程。所使用代码例子来自 LLVM 官方教程 My First Language Frontend with LLVM Tutorial。
代码请于 My First Language Frontend with LLVM Tutorial 下载。本篇介绍无法代替官方教程,感兴趣请参考 My First Language Frontend with LLVM Tutori</div><div class="meta footer"><span><a href="/categories/%E7%BC%96%E8%AF%91%E5%99%A8/" itemprop="url" title="编译器"><i class="ic i-flag"></i>编译器</a></span></div><a href="/2025/03/20/compile02/" class="btn" itemprop="url" title="【编译器】使用llvm编译自定义语言【2】转llvm IR">more...</a></div></article><article class="item"><div class="cover"><a href="/2025/03/13/compile01/" itemprop="url" title="【编译器】使用llvm编译自定义语言【1】构建AST"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo1.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2025-03-13 19:25:46"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2025-03-13T19:25:46+08:00">2025-03-13</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>25k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>22 mins.</span></span></div><h3><a href="/2025/03/13/compile01/" itemprop="url" title="【编译器】使用llvm编译自定义语言【1】构建AST">【编译器】使用llvm编译自定义语言【1】构建AST</a></h3><div class="excerpt"># 前言
本篇介绍如何使用 LLVM 编译自定义语言。使用的是 LLVM 官方例子 My First Language Frontend with LLVM Tutorial。官方例子采用自底向上的方式详细讲述了自定义万花筒语言通过 llvm 编译的过程,但是自底向上较难理解,过程较长。本篇将自上而下地、简要地介绍如何将自定义语言转换为 llvm IR,实现编译的。
尽管官方用例非常详细,笔者在阅读的时候仍然感到内容多,较 “散”,难以把握。每当这时候我会选择从上而下,整理归类,最后发现知识就具象化了。本篇介绍无法代替官方教程,感兴趣请参考 My First Language Frontend</div><div class="meta footer"><span><a href="/categories/%E7%BC%96%E8%AF%91%E5%99%A8/" itemprop="url" title="编译器"><i class="ic i-flag"></i>编译器</a></span></div><a href="/2025/03/13/compile01/" class="btn" itemprop="url" title="【编译器】使用llvm编译自定义语言【1】构建AST">more...</a></div></article><article class="item"><div class="cover"><a href="/2025/02/05/aicompile04/" itemprop="url" title="【AI编译】如何进行内存分配"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo10.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2025-02-05 19:34:55"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2025-02-05T19:34:55+08:00">2025-02-05</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>2.3k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>2 mins.</span></span></div><h3><a href="/2025/02/05/aicompile04/" itemprop="url" title="【AI编译】如何进行内存分配">【AI编译】如何进行内存分配</a></h3><div class="excerpt"># 前言
本文讲解神经网络推理过程中的内存分配相关内容。
作为初学者,错误在所难免,还望不吝赐教。
# Tensor 类型
不同的 tensor 有着不同的生命周期,神经网络推理过程中主要有三种 Tensor 类型:
1. 输入输出 tensor
输出 tensor 是下一节点的输入 tensor,它们一体两面,这种类型的 tensor 生命周期起于 “生产节点”,终于最后一个 “消费节点”。
2. 权重 tensor
权重 tensor 和算子绑定在一起,生命周期随着算子开始,也随算子结束。但也可能存在共享权重的情况,例如 tiling 操作产生的并行算子共享 Weight,算能 TPU</div><div class="meta footer"><span><a href="/categories/compile/" itemprop="url" title="compile"><i class="ic i-flag"></i>compile</a></span></div><a href="/2025/02/05/aicompile04/" class="btn" itemprop="url" title="【AI编译】如何进行内存分配">more...</a></div></article><article class="item"><div class="cover"><a href="/2025/01/21/zatanNoval2/" itemprop="url" title="【感想】写作进度报告2"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/girl.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2025-01-21 21:21:26"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2025-01-21T21:21:26+08:00">2025-01-21</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>1k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>1 mins.</span></span></div><h3><a href="/2025/01/21/zatanNoval2/" itemprop="url" title="【感想】写作进度报告2">【感想】写作进度报告2</a></h3><div class="excerpt"># 进度
小说文字数量已经超过 8 万,如果第一部的目标是 40 万字的话,那么现在已经完成了 1/5,可喜可贺。
当世界观一点点建立完成之后,书里的人物就活了,他该如何行动,产生怎样的情况与冲突,都是自然而然的。这时候写故事发展,不是写他应该怎么去做,而是在那种情景中他会怎么去做。这么想想,作者仿佛变成一个观察这个微缩世界的旁观者。
# 需要改进
1. 反派描写
发现描写反派有些困难,毕竟我们平时接触不到,纯凭想象。如何把反派写活确实不容易。
现实中的人都会相对中庸一点,而书中的人就要极端一点。
在达到这个水平之后,还可以给反派一些其他的特点,比如杀人如麻中也对某些事情怀有温情,不</div><div class="meta footer"><span><a href="/categories/%E6%9D%82%E8%B0%88/" itemprop="url" title="杂谈"><i class="ic i-flag"></i>杂谈</a></span></div><a href="/2025/01/21/zatanNoval2/" class="btn" itemprop="url" title="【感想】写作进度报告2">more...</a></div></article><article class="item"><div class="cover"><a href="/2025/01/15/aicompile03/" itemprop="url" title="【AI编译】layer-group之后如何tiling"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo13.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2025-01-15 21:12:53"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2025-01-15T21:12:53+08:00">2025-01-15</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>3k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>3 mins.</span></span></div><h3><a href="/2025/01/15/aicompile03/" itemprop="url" title="【AI编译】layer-group之后如何tiling">【AI编译】layer-group之后如何tiling</a></h3><div class="excerpt"># 前言
本篇讲解笔者实现 tiling 算法的一些经验。
前述文章 《如何进行 layer-group》讲解了 layer group 的内容。《Tiling 操作能优化哪些时间》提到 Tiling 的概念和作用。感兴趣的话可以阅读。
本篇文章参考过 《超强干货!地平线编译器大牛的编译优化实践总结》,《Arm 周易编译器工程》,《算能 TPU layer group 讲解》,《算能 TPU 视频讲解》 等文章和工程,欢迎大家参考。
作为初学者,错误在所难免,还望不吝赐教。
# 回顾
如图所示,AI 编译优化的基本流程是 1. 图优化 (算子融合,常量折叠等) 2. 拆分 (layer gr</div><div class="meta footer"><span><a href="/categories/compile/" itemprop="url" title="compile"><i class="ic i-flag"></i>compile</a></span></div><a href="/2025/01/15/aicompile03/" class="btn" itemprop="url" title="【AI编译】layer-group之后如何tiling">more...</a></div></article><article class="item"><div class="cover"><a href="/2025/01/14/aicompile02/" itemprop="url" title="【AI编译】如何进行layer-group"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo12.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2025-01-14 20:09:18"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2025-01-14T20:09:18+08:00">2025-01-14</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>2.3k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>2 mins.</span></span></div><h3><a href="/2025/01/14/aicompile02/" itemprop="url" title="【AI编译】如何进行layer-group">【AI编译】如何进行layer-group</a></h3><div class="excerpt"># 前言
本篇介绍 AI 编译领域 layer-group 算法。
本篇文章参考过 《超强干货!地平线编译器大牛的编译优化实践总结》,《Arm 周易编译器工程》,《算能 TPU layer group 讲解》,《算能 TPU 视频讲解》 等文章和工程,欢迎大家参考。
作为初学者,错误在所难免,还望不吝赐教。
# Layer group
如图所示,AI 编译优化的基本流程是 1. 图优化 (算子融合,常量折叠等) 2. 拆分 (layer group 和 tiling) 3. 并行和调度。最后得到当前编译的时间消耗。
在 AI 编译领域,LayerGroup 指的是将神经网络中的多个层(lay</div><div class="meta footer"><span><a href="/categories/compile/" itemprop="url" title="compile"><i class="ic i-flag"></i>compile</a></span></div><a href="/2025/01/14/aicompile02/" class="btn" itemprop="url" title="【AI编译】如何进行layer-group">more...</a></div></article></div></div><nav class="pagination"><div class="inner"><span class="page-number current">1</span><a class="page-number" href="/page/2/">2</a><span class="space">…</span><a class="page-number" href="/page/4/">4</a><a class="extend next" rel="next" href="/page/2/"><i class="ic i-angle-right" aria-label="Next page"></i></a></div></nav></div><div id="sidebar"><div class="inner"><div class="panels"><div class="inner"><div class="contents panel pjax" data-title="Contents"></div><div class="related panel pjax" data-title="Related"></div><div class="overview panel" data-title="Overview"><div class="author" itemprop="author" itemscope="itemscope" itemtype="http://schema.org/Person"><img class="image" loading="lazy" decoding="async" itemprop="image" alt="XianMu" src="/assets/avatar.webp"><p class="name" itemprop="name">XianMu</p><div class="description" itemprop="description">有自己的博客很帅,但是我很懒,要命!!!</div></div><nav class="state"><div class="item posts"><a href="/archives/"><span class="count">39</span><span class="name">posts</span></a></div><div class="item categories"><a href="/categories/"><span class="count">16</span><span class="name">categories</span></a></div><div class="item tags"><a href="/tags/"><span class="count">21</span><span class="name">tags</span></a></div></nav><div class="social"><a target="_blank" rel="noopener" href="https://github.com/ForCheetah" class="item github" title="https://github.com/ForCheetah"><i class="ic i-github"></i></a><a target="_blank" rel="noopener" href="https://www.zhihu.com/people/guai-dao-ji-de-3-50" class="item zhihu" title="https://www.zhihu.com/people/guai-dao-ji-de-3-50"><i class="ic i-zhihu"></i></a><a href="/huasen.w@foxmail.com" class="item email" title="huasen.w@foxmail.com"><i class="ic i-envelope"></i></a></div><div class="menu"><li class="item"><a href="/" rel="section"><i class="ic i-home"></i>Home</a></li></div></div></div></div><ul id="quick"><li class="prev pjax"></li><li class="up"><i class="ic i-arrow-up"></i></li><li class="down"><i class="ic i-arrow-down"></i></li><li class="next pjax"><a href="/page/2/" rel="next" title="Next Post"><i class="ic i-chevron-right"></i></a></li><li class="percent"></li></ul></div></div><div class="dimmer"></div></div></main><footer id="footer"><div class="inner"><div class="widgets"><div class="rpost pjax"><h2>Random Posts</h2><ul><li class="item"><div class="breadcrumb"><a href="/categories/%E6%9D%82%E8%B0%88/" title="In杂谈">杂谈</a></div><span><a href="/2024/05/26/zatan3D/">【3D建模】IS-7攻城锤流纹岩皮肤展示</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/tvm/" title="Intvm">tvm</a></div><span><a href="/2024/10/21/tvm04/">【TVM】通过代码学习编译流程【4】BuildRelay</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/tvm/" title="Intvm">tvm</a></div><span><a href="/2024/10/10/tvm01/">【TVM】通过代码学习编译流程【1】必要知识</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/%E6%9D%82%E8%B0%88/" title="In杂谈">杂谈</a></div><span><a href="/2025/01/21/zatanNoval2/">【感想】写作进度报告2</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/%E6%9D%82%E8%B0%88/" title="In杂谈">杂谈</a></div><span><a href="/2024/06/15/zatanE3/">【3D建模】T110E3卡迪夫蓝调皮肤模型</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/%E6%8E%A8%E7%90%86%E5%BC%95%E6%93%8E/" title="In推理引擎">推理引擎</a></div><span><a href="/2024/06/16/engine2/">【推理引擎】常见AI推理框架</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/%E9%87%8F%E5%8C%96/" title="In量化">量化</a></div><span><a href="/2025/01/03/quanti01/">【量化】连续卷积层首尾量化的可行性</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/%E7%9B%AE%E5%BD%95/" title="In目录">目录</a></div><span><a href="/2024/05/24/category/">博客汇总目录</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/%E6%9D%82%E8%B0%88/" title="In杂谈">杂谈</a></div><span><a href="/2024/08/04/zatanNoval/">【感想】写作进度报告1</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/compile/" title="Incompile">compile</a></div><span><a href="/2025/01/14/aicompile02/">【AI编译】如何进行layer-group</a></span></li></ul></div><div class="rpost pjax"><h2>Recent Comments</h2></div></div><div class="status"><div class="copyright">© 2010 -<span itemprop="copyrightYear">2025</span><span class="with-love"><i class="ic i-sakura rotate"></i></span><span class="author" itemprop="copyrightHolder">XianMu @ 暮冬Z羡慕的博客</span></div><div class="count"><span class="post-meta-item-icon"><i class="ic i-chart-area"></i></span><span title="Symbols count total">275k words</span><span class="post-meta-divider"> | </span><span class="post-meta-item-icon"><i class="ic i-coffee"></i></span><span title="Reading time total">4:10</span></div><div class="powered-by">Powered by <a target="_blank" rel="noopener" href="https://hexo.io/">Hexo</a> & Theme.<a target="_blank" rel="noopener" href="https://github.com/theme-shoka-x/hexo-theme-shokaX/">ShokaX</a></div></div></div></footer></div><script data-config="" type="text/javascript">var LOCAL = {
ispost: false,
path: ``,
favicon: {
show: `(●´3`●) Here we go again.`,
hide: `(´Д`) It's a disaster!`
},
search: {
placeholder: "Search for Posts",
empty: "We didn't find any results for the search: ${query}",
stats: "${hits} results found in ${time} ms"
},
copy_tex: false,
katex: false,
mermaid: false,
audio: undefined,
fancybox: true,
nocopy: false,
outime: true,
template: `<div class="note warning"><p><span class="label warning">Article Timeliness Alert</span><br>This is an article published {{publish}} days ago and last updated {{updated}} days ago. Some information may have changed, so please be careful to screen it.</p></div>`,
quiz: {
choice: `Multiple Choice`,
multiple: `Multiple Answer`,
true_false: `True/False`,
essay: `Questions`,
gap_fill: `Gap Filling`,
mistake: `Wrong Answer`
},
ignores: [
(uri) => uri.includes('#'),
(uri) => new RegExp(LOCAL.path + '$').test(uri),
[]
]
};
</script><script src="https://lf9-cdn-tos.bytecdntp.com/cdn/expire-6-M/pace/1.2.4/pace.min.js" async=""></script><script src="/js/siteInit.js?v=0.4.2" type="module" fetchpriority="high" defer=""></script></body></html>