Commit 0e84f69a by TJL233

add files 5.3

parent a65db92a
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
## 什么是 Prompt
## 什么是 Prompt
## From a news
<img width="793" alt="image" src="https://user-images.githubusercontent.com/14297658/230248003-c3d30c1c-bef1-4744-a0f8-82f2fc5067c5.png">
## 什么是 Prompt
人类说话的时候是见人下菜的,见人说人话,见鬼说鬼话,会结合当前的环境进行调整
但是对于AI而言,她是没有没有环境的前提假设的,所以我们需要给AI一个环境来帮助他理解人类的处境
- 没有上下文的问答
<img width="782" alt="image" src="https://user-images.githubusercontent.com/14297658/230248219-4c51fd4e-93bf-4e2a-96fc-afd6c6f7aae2.png">
- 包含了一定上下文的问答
<img width="716" alt="image" src="https://user-images.githubusercontent.com/14297658/230248384-9eb650ca-2922-4510-ac10-790c4c57e415.png">
## 如何设计 Prompt,以及怎么样才能得到更好的 Prompt
<img width="744" alt="image" src="https://user-images.githubusercontent.com/14297658/230248675-59472e62-748b-4914-ba96-f705920bbb27.png">
<img width="720" alt="image" src="https://user-images.githubusercontent.com/14297658/230251842-9df052e8-7fad-44b3-8574-e4829aa3b81d.png">
<img width="726" alt="image" src="https://user-images.githubusercontent.com/14297658/230251928-3bb58e0a-c0c7-401f-98ac-d4b8e9ea688d.png">
- 目标
<img width="725" alt="image" src="https://user-images.githubusercontent.com/14297658/230249190-3ef5b4df-eec2-4fa9-b76e-311dfc2a0d3c.png">
<img width="725" alt="image" src="https://user-images.githubusercontent.com/14297658/230249285-c53cdad1-6fa2-4a8e-91e5-f045d40a9ee9.png">
- 语言
<img width="719" alt="image" src="https://user-images.githubusercontent.com/14297658/230249453-ed0d1a0f-fe28-45db-ac1e-1725c11f13ba.png">
- 上下文
<img width="721" alt="image" src="https://user-images.githubusercontent.com/14297658/230249667-b7a83517-7f5a-4500-98b6-3e84ab23a92d.png">
- 多样性
<img width="732" alt="image" src="https://user-images.githubusercontent.com/14297658/230250290-01e0df33-007e-4264-a462-1661ec52fbe0.png">
- 质量评估
<img width="742" alt="image" src="https://user-images.githubusercontent.com/14297658/230250752-930d2700-38ea-4254-9d26-4fd579cfd967.png">
<img width="679" alt="image" src="https://user-images.githubusercontent.com/14297658/230251295-fb1560fe-5775-424d-ba2e-e613e344d803.png">
## Prompt before and after
| | 前ChatGPT时代 | 后GPT时代 |
|-----------|----------------------|------------------|
| 为什么之前很少有人提到prompt? | 缺乏对语言模型的关注,大部分时候使用的是专用的小模型,小模型对 Prompt不敏感,训练代价低没人拿来做噱头 | 重新认识到语言模型的重要性,通用和跨领域达模型月来越多,大模型对prompt更加敏感,切代价更高 |
| QA(问答)和Prompt的区别 | QA侧重回答特定问题,Prompt提供上下文和指导生成文本,QA中模型对任务的理解隐藏在模型参数中,不需要显性提出 | Prompt提供更广泛的上下文,可以引导文本生成超出回答特定问题的范围,大模型只训练通用任务,所以在遇到专用任务时,需要设计更好的提示信息 |
| Prompt由什么组成? | Prompt可以由关键词、短语或完整句子组成;任务描述,任务目标,和例子,不提供例子就是0-shot | Prompt还可以包括其他上下文,例如主题或任务信息,提供n个例子就是n-shot|
| 如何改进Prompt? | Hard/Soft Imporvement 找到与具体任务相关的token进行搜索和优化 | 使用机器学习自动生成高质量的prompt,询问 Chat GPT 如何生存高质量的Promopt |
Prompt的 doc,推荐还是在gist上面看,地址如下
Prompt的 doc,推荐还是在gist上面看,地址如下
https://gist.github.com/sawyerbutton/2305cd60e134ffc06d8402fbfc4b7035
\ No newline at end of file
from re import S
from re import S
import torch.nn as nn
import torch
import torch.nn.functional as F
from utils.CustomLayers import ConvActivation, Inception
class Auxiliary_classcification(nn.Module):
def __init__(self, input_channels, num_classes):
super().__init__()
self.avgpool = nn.AvgPool2d(kernel_size=5, stride=3)
self.conv = ConvActivation(input_channels, 128, kernel_size=1) # output[batch, 128, 4, 4]
self.fc1 = nn.Linear(2048, 1024)
self.fc2 = nn.Linear(1024, num_classes)
def forward(self, x):
# aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
x = self.avgpool(x)
# aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
x = self.conv(x)
# N x 128 x 4 x 4
x = torch.flatten(x)
x = F.dropout(x, p=0.5, training=self.training)
x = self.fc1(x)
x = F.relu(x, inplace=True)
x = F.dropout(x, p=0.5, training=self.training)
x = self.fc2(x)
return x
class GoogLeNet(nn.Module):
def __init__(self, num_classes=None, aux_logits=False, init_weights=False):
super().__init__()
self.aux_logits = aux_logits
self.conv1 = ConvActivation(3, 64, kernel_size=7, stride=2, padding=3)
self.maxpool = nn.MaxPool2d(3, stride=2, ceil_mode=True)
self.conv2 = ConvActivation(64, 192, kernel_size=3, padding=1)
self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)
# input_channels, out_nc1x1, out_nc3x3_reduce, out_nc3x3, out_nc5x5_reduce, out_nc5x5, out_nc_pool
self.inception3a = Inception(192, 64, 96, 128, 16, 32, 32)
self.inception3b = Inception(256, 128, 128, 192, 32, 96, 64)
self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
self.inception4a = Inception(480, 192, 96, 208, 16, 48, 64)
self.inception4b = Inception(512, 160, 112, 224, 24, 64, 64)
self.inception4c = Inception(512, 128, 128, 256, 24, 64, 64)
self.inception4d = Inception(512, 112, 144, 288, 32, 64, 64)
self.inception4e = Inception(528, 256, 160, 320, 32, 128, 128)
self.maxpool4 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
self.inception5a = Inception(832, 256, 160, 320, 32, 128, 128)
self.inception5b = Inception(832, 384, 192, 384, 48, 128, 128)
if self.aux_logits:
self.aux1 = Auxiliary_classcification(input_channels=512, num_classes=num_classes)
self.aux2 = Auxiliary_classcification(input_channels=528, num_classes=num_classes)
self.avgpool = nn.AdaptiveAvgPool2d((1,1))
self.dropout = nn.Dropout(0.4)
self.fc = nn.Linear(1024, num_classes)
if init_weights:
self._initial_weights()
def forward(self, x):
x = self.conv1(x) # [b*3*224*224] --> [b*64*112*112]
x = self.maxpool(x) # [b*64*112*112] --> [b*64*56*56]
x = self.conv2(x) # [b*64*56*56] --> [b*192*56*56]
x = self.maxpool2(x) # [b*192*56*56] --> [b*192*28*28]
x = self.inception3a(x) # [b*192*28*28] --> [b*256*28*28]
x = self.inception3b(x) # [b*256*28*28] --> [b*480*28*28]
x = self.maxpool3(x) # [b*256*14*14] --> [b*480*14*14]
x = self.inception4a(x) # [b*480*14*14] --> [b*512*14*14]
if self.training and self.aux_logits: # eval model lose this layer
aux1 = self.aux1(x)
x = self.inception4b(x) # [b*512*14*14] --> [b*512*14*14]
x = self.inception4c(x) # [b*512*14*14] --> [b*512*14*14]
x = self.inception4d(x) # [b*512*14*14] --> [b*528*14*14]
if self.training and self.aux_logits: # eval model lose this layer
aux2 = self.aux2(x)
x = self.inception4e(x) # [b*528*14*14] --> [b*832*14*14]
x = self.maxpool3(x) # [b*832*7*7] --> [b*832*7*7]
x = self.inception5a(x) # [b*832*7*7] --> [b*832*7*7]
x = self.inception5b(x) # [b*832*7*7] --> [b*1024*7*7]
x = self.avgpool(x) # [b*1027*7*7] --> [b*1024*1*1]
x = torch.flatten(x, 1) # [b*1027*1*1] --> [b*1024]
x = self.fc(x) # [b*1024] --> [b*num_classes]
if self.training and self.aux_logits: # eval model lose this layer
return x, aux2, aux1
return x
def _initial_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment