mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 05:48:47 +01:00
common : better default number of threads (#934)
* commit * fix * try-catch * apply code review * improve * improve * add macos headers * done * remove color * fix windows * minor * fix * Apply suggestions from code review Co-authored-by: DannyDaemonic <DannyDaemonic@gmail.com> * remove * minor * minor --------- Co-authored-by: jon-chuang <jon-chuang@users.noreply.github.com> Co-authored-by: DannyDaemonic <DannyDaemonic@gmail.com>
This commit is contained in:
parent
76a884920a
commit
a5d30b1f53
@ -1,13 +1,18 @@
|
|||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
#include <iostream>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <iostream>
|
|
||||||
|
#if defined(__APPLE__) && defined(__MACH__)
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/sysctl.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined (_WIN32)
|
#if defined (_WIN32)
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
@ -25,19 +30,43 @@ extern "C" __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int
|
|||||||
#define CP_UTF8 65001
|
#define CP_UTF8 65001
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
int32_t get_num_physical_cores() {
|
||||||
// determine sensible default number of threads.
|
|
||||||
// std::thread::hardware_concurrency may not be equal to the number of cores, or may return 0.
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
std::ifstream cpuinfo("/proc/cpuinfo");
|
std::ifstream cpuinfo("/proc/cpuinfo");
|
||||||
params.n_threads = std::count(std::istream_iterator<std::string>(cpuinfo),
|
std::string line;
|
||||||
std::istream_iterator<std::string>(),
|
while (std::getline(cpuinfo, line)) {
|
||||||
std::string("processor"));
|
std::size_t pos = line.find("cpu cores");
|
||||||
#endif
|
if (pos != std::string::npos) {
|
||||||
if (params.n_threads == 0) {
|
pos = line.find(": ", pos);
|
||||||
params.n_threads = std::max(1, (int32_t) std::thread::hardware_concurrency());
|
if (pos != std::string::npos) {
|
||||||
|
try {
|
||||||
|
// Extract the number and return it
|
||||||
|
return static_cast<int32_t>(std::stoul(line.substr(pos + 2)));
|
||||||
|
} catch (const std::invalid_argument &) {
|
||||||
|
// Ignore if we could not parse
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
#elif defined(__APPLE__) && defined(__MACH__)
|
||||||
|
int32_t num_physical_cores;
|
||||||
|
size_t len = sizeof(num_physical_cores);
|
||||||
|
int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
|
||||||
|
if (result == 0) {
|
||||||
|
return num_physical_cores;
|
||||||
|
}
|
||||||
|
result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
|
||||||
|
if (result == 0) {
|
||||||
|
return num_physical_cores;
|
||||||
|
}
|
||||||
|
#elif defined(_WIN32)
|
||||||
|
//TODO: Implement
|
||||||
|
#endif
|
||||||
|
unsigned int n_threads = std::thread::hardware_concurrency();
|
||||||
|
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||||
bool invalid_param = false;
|
bool invalid_param = false;
|
||||||
std::string arg;
|
std::string arg;
|
||||||
gpt_params default_params;
|
gpt_params default_params;
|
||||||
|
@ -13,11 +13,12 @@
|
|||||||
//
|
//
|
||||||
// CLI argument parsing
|
// CLI argument parsing
|
||||||
//
|
//
|
||||||
|
int32_t get_num_physical_cores();
|
||||||
|
|
||||||
struct gpt_params {
|
struct gpt_params {
|
||||||
int32_t seed = -1; // RNG seed
|
int32_t seed = -1; // RNG seed
|
||||||
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
int32_t n_threads = get_num_physical_cores();
|
||||||
int32_t n_predict = -1; // new tokens to predict
|
int32_t n_predict = -1; // new tokens to predict
|
||||||
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
|
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
|
||||||
int32_t n_ctx = 512; // context size
|
int32_t n_ctx = 512; // context size
|
||||||
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
|
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
|
||||||
|
Loading…
Reference in New Issue
Block a user