common : better default number of threads (#934)

* commit * fix * try-catch * apply code review * improve * improve * add macos headers * done * remove color * fix windows * minor * fix * Apply suggestions from code review Co-authored-by: DannyDaemonic <DannyDaemonic@gmail.com> * remove * minor * minor --------- Co-authored-by: jon-chuang <jon-chuang@users.noreply.github.com> Co-authored-by: DannyDaemonic <DannyDaemonic@gmail.com>
2024-12-25 05:48:47 +01:00 · 2023-04-30 14:41:35 -04:00 · 2023-04-30 14:41:35 -04:00 · a5d30b1f53
commit a5d30b1f53
parent 76a884920a
2 changed files with 42 additions and 12 deletions
--- a/examples/common.cpp
+++ b/examples/common.cpp
@ -1,13 +1,18 @@
 #include "common.h"
 #include <cassert>
 #include <iostream>
 #include <cstring>
 #include <fstream>
 #include <string>
 #include <iterator>
 #include <algorithm>
 #include <sstream>
-#include <iostream>
+
 #if defined(__APPLE__) && defined(__MACH__)
 #include <sys/types.h>
 #include <sys/sysctl.h>
 #endif
 #if defined (_WIN32)
 #include <fcntl.h>
@ -25,19 +30,43 @@ extern "C" __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int
 #define CP_UTF8 65001
 #endif
-bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
+int32_t get_num_physical_cores() {
    // determine sensible default number of threads.
    // std::thread::hardware_concurrency may not be equal to the number of cores, or may return 0.
 #ifdef __linux__
    std::ifstream cpuinfo("/proc/cpuinfo");
-    params.n_threads = std::count(std::istream_iterator<std::string>(cpuinfo),
+    std::string line;
-                                  std::istream_iterator<std::string>(),
+    while (std::getline(cpuinfo, line)) {
-                                  std::string("processor"));
+        std::size_t pos = line.find("cpu cores");
-#endif
+        if (pos != std::string::npos) {
-    if (params.n_threads == 0) {
+            pos = line.find(": ", pos);
-        params.n_threads = std::max(1, (int32_t) std::thread::hardware_concurrency());
+            if (pos != std::string::npos) {
                try {
                    // Extract the number and return it
                    return static_cast<int32_t>(std::stoul(line.substr(pos + 2)));
                } catch (const std::invalid_argument &) {
                    // Ignore if we could not parse
                }
            }
        }
    }
 #elif defined(__APPLE__) && defined(__MACH__)
    int32_t num_physical_cores;
    size_t len = sizeof(num_physical_cores);
    int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
    if (result == 0) {
        return num_physical_cores;
    }
    result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
    if (result == 0) {
        return num_physical_cores;
    }
 #elif defined(_WIN32)
    //TODO: Implement
 #endif
    unsigned int n_threads = std::thread::hardware_concurrency();
    return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
 }
 bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
    bool invalid_param = false;
    std::string arg;
    gpt_params default_params;
--- a/examples/common.h
+++ b/examples/common.h
@ -13,11 +13,12 @@
 //
 // CLI argument parsing
 //
 int32_t get_num_physical_cores();
 struct gpt_params {
    int32_t seed          = -1;   // RNG seed
-    int32_t n_threads     = std::min(4, (int32_t) std::thread::hardware_concurrency());
+    int32_t n_threads     = get_num_physical_cores();
-    int32_t n_predict     = -1;   // new tokens to predict
+    int32_t n_predict     = -1;  // new tokens to predict
    int32_t n_parts       = -1;   // amount of model parts (-1 = determine from model dimensions)
    int32_t n_ctx         = 512;  // context size
    int32_t n_batch       = 512;  // batch size for prompt processing (must be >=32 to use BLAS)