llama.cpp/kompute/src/Algorithm.cpp

428 lines
15 KiB
C++
Raw Normal View History

// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (c) 2023 Nomic, Inc. All rights reserved.
*
* This software is licensed under the terms of the Software for Open Models License (SOM),
* version 1.0, as detailed in the LICENSE_SOM.txt file. A copy of this license should accompany
* this software. Except as expressly granted in the SOM license, all rights are reserved by Nomic, Inc.
*/
#include <fstream>
#include "kompute/Algorithm.hpp"
namespace kp {
Algorithm::~Algorithm()
{
KP_LOG_DEBUG("Kompute Algorithm Destructor started");
this->destroy();
}
bool
Algorithm::isInit()
{
return this->mPipeline && this->mPipelineCache && this->mPipelineLayout &&
this->mDescriptorPool && this->mDescriptorSet &&
this->mDescriptorSetLayout && this->mShaderModule;
}
void
Algorithm::destroy()
{
// We don't have to free memory on destroy as it's freed by the
// commandBuffer destructor if (this->mPushConstantsData) {
// free(this->mPushConstantsData);
// }
// if (this->mSpecializationConstantsData) {
// free(this->mSpecializationConstantsData);
// }
if (!this->mDevice) {
KP_LOG_WARN("Kompute Algorithm destroy function reached with null "
"Device pointer");
return;
}
if (this->mFreePipeline && this->mPipeline) {
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline");
if (!this->mPipeline) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"pipeline but it is null");
}
this->mDevice->destroy(
*this->mPipeline,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mPipeline = nullptr;
}
if (this->mFreePipelineLayout && this->mPipelineLayout) {
KP_LOG_DEBUG("Kompute Algorithm Destroying pipeline layout");
if (!this->mPipelineLayout) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"pipeline layout but it is null");
}
this->mDevice->destroy(
*this->mPipelineLayout,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mPipelineLayout = nullptr;
}
if (this->mFreeShaderModule && this->mShaderModule) {
KP_LOG_DEBUG("Kompute Algorithm Destroying shader module");
if (!this->mShaderModule) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy shader "
"module but it is null");
}
this->mDevice->destroy(
*this->mShaderModule,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mShaderModule = nullptr;
}
freeParameters();
}
void
Algorithm::freeParameters()
{
if (this->mFreeDescriptorSetLayout && this->mDescriptorSetLayout) {
KP_LOG_DEBUG("Kompute Algorithm Destroying Descriptor Set Layout");
if (!this->mDescriptorSetLayout) {
KP_LOG_WARN("Kompute Algorithm Error requested to destroy "
"descriptor set layout but it is null");
}
this->mDevice->destroy(
*this->mDescriptorSetLayout,
(vk::Optional<const vk::AllocationCallbacks>)nullptr);
this->mDescriptorSetLayout = nullptr;
}
}
void
Algorithm::createParameters()
{
KP_LOG_DEBUG("Kompute Algorithm createParameters started");
if (!*this->mDescriptorPool) {
KP_LOG_ERROR("Kompute Algorithm can not create descriptor pool");
return;
}
std::vector<vk::DescriptorSetLayoutBinding> descriptorSetBindings;
for (size_t i = 0; i < this->mTensors.size(); i++) {
descriptorSetBindings.push_back(
vk::DescriptorSetLayoutBinding(i, // Binding index
vk::DescriptorType::eStorageBuffer,
1, // Descriptor count
vk::ShaderStageFlagBits::eCompute));
}
// This is the component that is fed into the pipeline
vk::DescriptorSetLayoutCreateInfo descriptorSetLayoutInfo(
vk::DescriptorSetLayoutCreateFlags(),
static_cast<uint32_t>(descriptorSetBindings.size()),
descriptorSetBindings.data());
KP_LOG_DEBUG("Kompute Algorithm creating descriptor set layout");
this->mDescriptorSetLayout = std::make_shared<vk::DescriptorSetLayout>();
vk::Result result = this->mDevice->createDescriptorSetLayout(
&descriptorSetLayoutInfo, nullptr, this->mDescriptorSetLayout.get());
if (result != vk::Result::eSuccess) {
KP_LOG_ERROR("Failed to create descriptor set layout. Error code: {}", vk::to_string(result));
} else {
this->mFreeDescriptorSetLayout = true;
KP_LOG_DEBUG("Successfully allocated descriptor set layout.");
}
vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo(
*this->mDescriptorPool,
1, // Descriptor set layout count
this->mDescriptorSetLayout.get());
KP_LOG_DEBUG("Kompute Algorithm allocating descriptor sets");
this->mDescriptorSet = std::make_shared<vk::DescriptorSet>();
result = this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo,
this->mDescriptorSet.get());
if (result != vk::Result::eSuccess) {
KP_LOG_ERROR("Failed to allocate descriptor sets. Error code: {}", vk::to_string(result));
} else {
this->mFreeDescriptorSet = true;
KP_LOG_DEBUG("Successfully allocated descriptor sets.");
}
this->mFreeDescriptorSet = true;
KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets");
for (size_t i = 0; i < this->mTensors.size(); i++) {
std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
vk::DescriptorBufferInfo descriptorBufferInfo =
this->mTensors[i]->constructDescriptorBufferInfo();
computeWriteDescriptorSets.push_back(
vk::WriteDescriptorSet(*this->mDescriptorSet,
i, // Destination binding
0, // Destination array element
1, // Descriptor count
vk::DescriptorType::eStorageBuffer,
nullptr, // Descriptor image info
&descriptorBufferInfo));
this->mDevice->updateDescriptorSets(computeWriteDescriptorSets,
nullptr);
}
KP_LOG_DEBUG("Kompute Algorithm successfully run init");
}
void
Algorithm::updateParameters()
{
KP_LOG_DEBUG("Kompute Algorithm updateParameters started");
if (!*this->mDescriptorPool) {
KP_LOG_ERROR("Kompute Algorithm can not create descriptor pool");
return;
}
vk::DescriptorSetAllocateInfo descriptorSetAllocateInfo(
*this->mDescriptorPool,
1, // Descriptor set layout count
this->mDescriptorSetLayout.get());
KP_LOG_DEBUG("Kompute Algorithm allocating descriptor sets");
this->mDescriptorSet = std::make_shared<vk::DescriptorSet>();
vk::Result result = this->mDevice->allocateDescriptorSets(&descriptorSetAllocateInfo,
this->mDescriptorSet.get());
if (result != vk::Result::eSuccess) {
KP_LOG_ERROR("Failed to allocate descriptor sets. Error code: {}", vk::to_string(result));
} else {
this->mFreeDescriptorSet = true;
KP_LOG_DEBUG("Successfully allocated descriptor sets.");
}
this->mFreeDescriptorSet = true;
KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets");
for (size_t i = 0; i < this->mTensors.size(); i++) {
std::vector<vk::WriteDescriptorSet> computeWriteDescriptorSets;
vk::DescriptorBufferInfo descriptorBufferInfo =
this->mTensors[i]->constructDescriptorBufferInfo();
computeWriteDescriptorSets.push_back(
vk::WriteDescriptorSet(*this->mDescriptorSet,
i, // Destination binding
0, // Destination array element
1, // Descriptor count
vk::DescriptorType::eStorageBuffer,
nullptr, // Descriptor image info
&descriptorBufferInfo));
this->mDevice->updateDescriptorSets(computeWriteDescriptorSets,
nullptr);
}
KP_LOG_DEBUG("Kompute Algorithm successfully run init");
}
void
Algorithm::createShaderModule()
{
KP_LOG_DEBUG("Kompute Algorithm createShaderModule started");
vk::ShaderModuleCreateInfo shaderModuleInfo(vk::ShaderModuleCreateFlags(),
sizeof(uint32_t) *
this->mSpirv.size(),
this->mSpirv.data());
KP_LOG_DEBUG("Kompute Algorithm Creating shader module. ShaderFileSize: {}",
this->mSpirv.size());
this->mFreeShaderModule = true;
this->mShaderModule = std::make_shared<vk::ShaderModule>();
this->mDevice->createShaderModule(
&shaderModuleInfo, nullptr, this->mShaderModule.get());
this->mFreeShaderModule = true;
KP_LOG_DEBUG("Kompute Algorithm create shader module success");
}
void
Algorithm::createPipeline()
{
KP_LOG_DEBUG("Kompute Algorithm calling create Pipeline");
vk::PipelineLayoutCreateInfo pipelineLayoutInfo(
vk::PipelineLayoutCreateFlags(),
1, // Set layout count
this->mDescriptorSetLayout.get());
vk::PushConstantRange pushConstantRange;
if (this->mPushConstantsSize) {
pushConstantRange.setStageFlags(vk::ShaderStageFlagBits::eCompute);
pushConstantRange.setOffset(0);
pushConstantRange.setSize(this->mPushConstantsDataTypeMemorySize *
this->mPushConstantsSize);
pipelineLayoutInfo.setPushConstantRangeCount(1);
pipelineLayoutInfo.setPPushConstantRanges(&pushConstantRange);
}
this->mPipelineLayout = std::make_shared<vk::PipelineLayout>();
this->mDevice->createPipelineLayout(
&pipelineLayoutInfo, nullptr, this->mPipelineLayout.get());
this->mFreePipelineLayout = true;
std::vector<vk::SpecializationMapEntry> specializationEntries;
for (uint32_t i = 0; i < this->mSpecializationConstantsSize; i++) {
vk::SpecializationMapEntry specializationEntry(
static_cast<uint32_t>(i),
static_cast<uint32_t>(
this->mSpecializationConstantsDataTypeMemorySize * i),
this->mSpecializationConstantsDataTypeMemorySize);
specializationEntries.push_back(specializationEntry);
}
// This passes ownership of the memory so we remove ownership from
// specialization container by using "transferDataOwnership"
vk::SpecializationInfo specializationInfo(
static_cast<uint32_t>(specializationEntries.size()),
specializationEntries.data(),
this->mSpecializationConstantsDataTypeMemorySize *
this->mSpecializationConstantsSize,
this->mSpecializationConstantsData);
vk::PipelineShaderStageCreateInfo shaderStage(
vk::PipelineShaderStageCreateFlags(),
vk::ShaderStageFlagBits::eCompute,
*this->mShaderModule,
"main",
&specializationInfo);
vk::ComputePipelineCreateInfo pipelineInfo(vk::PipelineCreateFlags(),
shaderStage,
*this->mPipelineLayout,
vk::Pipeline(),
0);
#ifdef KOMPUTE_CREATE_PIPELINE_RESULT_VALUE
vk::ResultValue<vk::Pipeline> pipelineResult =
this->mDevice->createComputePipeline(*mPipelineCache, pipelineInfo);
if (pipelineResult.result != vk::Result::eSuccess) {
throw std::runtime_error("Failed to create pipeline result: " +
vk::to_string(pipelineResult.result));
}
vk::Pipeline& pipeline = pipelineResult.value;
this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
this->mFreePipeline = true;
#else
vk::Pipeline pipeline =
this->mDevice->createComputePipeline(*mPipelineCache, pipelineInfo)
.value;
this->mPipeline = std::make_shared<vk::Pipeline>(pipeline);
this->mFreePipeline = true;
#endif
// TODO: Update to consistent
// this->mPipeline = std::make_shared<vk::Pipeline>();
// this->mDevice->createComputePipelines(
// *this->mPipelineCache, 1, &pipelineInfo, nullptr,
// this->mPipeline.get());
KP_LOG_DEBUG("Kompute Algorithm Create Pipeline Success");
}
void
Algorithm::recordBindCore(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute Algorithm binding pipeline");
commandBuffer.bindPipeline(vk::PipelineBindPoint::eCompute,
*this->mPipeline);
KP_LOG_DEBUG("Kompute Algorithm binding descriptor sets");
commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute,
*this->mPipelineLayout,
0, // First set
*this->mDescriptorSet,
nullptr // Dispatcher
);
}
void
Algorithm::recordBindPush(const vk::CommandBuffer& commandBuffer)
{
if (this->mPushConstantsSize) {
KP_LOG_DEBUG("Kompute Algorithm binding push constants memory size: {}",
this->mPushConstantsSize *
this->mPushConstantsDataTypeMemorySize);
commandBuffer.pushConstants(*this->mPipelineLayout,
vk::ShaderStageFlagBits::eCompute,
0,
this->mPushConstantsSize *
this->mPushConstantsDataTypeMemorySize,
this->mPushConstantsData);
}
}
void
Algorithm::recordDispatch(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute Algorithm recording dispatch");
commandBuffer.dispatch(
this->mWorkgroup[0], this->mWorkgroup[1], this->mWorkgroup[2]);
}
void
Algorithm::setWorkgroup(const Workgroup& workgroup, uint32_t minSize)
{
KP_LOG_INFO("Kompute OpAlgoCreate setting dispatch size");
// The dispatch size is set up based on either explicitly provided template
// parameters or by default it would take the shape and size of the tensors
if (workgroup[0] > 0) {
// If at least the x value is provided we use mainly the parameters
// provided
this->mWorkgroup = { workgroup[0],
workgroup[1] > 0 ? workgroup[1] : 1,
workgroup[2] > 0 ? workgroup[2] : 1 };
} else {
this->mWorkgroup = { minSize, 1, 1 };
}
KP_LOG_INFO("Kompute OpAlgoCreate set dispatch size X: {}, Y: {}, Z: {}",
this->mWorkgroup[0],
this->mWorkgroup[1],
this->mWorkgroup[2]);
}
const Workgroup&
Algorithm::getWorkgroup()
{
return this->mWorkgroup;
}
const std::vector<std::shared_ptr<Tensor>>&
Algorithm::getTensors()
{
return this->mTensors;
}
void Algorithm::setTensors(const std::vector<std::shared_ptr<Tensor>>& tensors)
{
this->mTensors = tensors;
}
}