I used llbuild ninja build --profile PATH
to create a JSON file of build summary in the following format:
{ "name": "Building a.c", "ph": "B", "pid": 0, "tid": 0, "ts": 19967021195},
{ "name": "Building b.c", "ph": "B", "pid": 0, "tid": 1, "ts": 19967021196},
{ "name": "Building c.c", "ph": "B", "pid": 0, "tid": 0, "ts": 19967021197},
{ "name": "Building d.c", "ph": "B", "pid": 0, "tid": 1, "ts": 19967021198},
A multi-line editor was used to remove all the "keys" of the JSON file and keep only the "values" and thus it became a CSV.
The code reads this file and outputs top N slowest processes (build or link) separated by thread.
The limitations are:
- Cannot handle
,
in"name"
, so they must be removed beforehand. - Cannot create a fancy charts.
I'm not looking to make it any faster, or memory efficient. Comments about design and code organisation are welcome!
main.cpp
/* This file needs a json file path and prints top 10 time slowest processes for each thread. */
#include <iostream>
#include <fstream>
#include <string>
#include <string_view>
#include <vector>
#include <map>
#include <optional>
#include <cassert>
#include <cmath>
#include <algorithm>
#include "main.hh"
const int N_SPLIT{5};
namespace global::string_utils {
/**
* Split a given string using the delimiter.
* https://codereview.stackexchange.com/a/247292/205955
*/
std::vector<std::string_view> split_a_line(std::string_view in_string, const char delimiter = ',')
{
std::vector<std::string_view> r_out_list;
r_out_list.reserve(N_SPLIT);
in_string.remove_prefix(std::min(in_string.find_first_of(' ') + 1, in_string.size()));
for (;;) {
if (in_string.empty()) {
return r_out_list;
}
const std::string_view::size_type pos = in_string.find(delimiter);
if (pos == std::string_view::npos) {
r_out_list.push_back(in_string);
return r_out_list;
}
r_out_list.push_back(in_string.substr(0, pos));
in_string = in_string.substr(pos + 1, std::string_view::npos);
}
return r_out_list;
}
} // namespace global::string_utils
/**
* Read a line from the file and create an `Item`.
*/
std::unique_ptr<Item> Parser::parse_line()
{
std::unique_ptr<Item> new_item{nullptr};
const bool ok = std::getline(_in_stream, _a_line, '\n').operator bool();
if (!ok || _a_line.empty()) {
return new_item;
}
std::vector<std::string_view> split_line_str = global::string_utils::split_a_line(_a_line);
if (split_line_str.size() == 5) {
new_item.reset(new Item{std::string(split_line_str[0]),
std::stoi(std::string(split_line_str[3])),
std::stoll(std::string(split_line_str[4]))});
}
assert(split_line_str.size() == 5 || split_line_str.empty());
return new_item;
}
/**
* Parse the file and store processes separated by threads.
*/
static std::vector<std::vector<Process>> separate_by_thread(Parser *parser)
{
std::vector<std::vector<Process>> all_threads;
/* Every element in the vector corresponds to a thread. */
std::vector<std::map<std::string, long long int>> deduplicators;
while (parser->good()) {
std::unique_ptr<Item> item{parser->parse_line()};
if (!item) {
continue;
}
/* Threads are also zero-based. */
if (deduplicators.size() <= item->thread) {
deduplicators.resize(item->thread + 1);
}
if (all_threads.size() <= item->thread) {
all_threads.resize(item->thread + 1);
}
std::map<std::string, long long int> &dedup = deduplicators[item->thread];
const std::map<std::string, long long int>::iterator old_val = dedup.find(item->process_name);
if (old_val != dedup.end()) {
const long long int duration{item->time_stamp - old_val->second};
assert(duration >= 0);
std::vector<Process> &all_processes = all_threads[item->thread];
all_processes.emplace_back(item->process_name, item->thread, duration);
dedup.erase(old_val);
}
else {
dedup.insert({item->process_name, item->time_stamp});
}
}
return all_threads;
}
int main()
{
const std::string json_path{"/Users/me/project/project-build/save copy 2.json"};
const int max_print{10};
std::unique_ptr<Parser> parser{nullptr};
try {
parser.reset(new Parser(json_path));
}
catch (const std::string &ex) {
std::cerr << ex << std::endl;
return -1;
}
/* List of threads of list of Processes. */
std::vector<std::vector<Process>> all_threads = separate_by_thread(parser.get());
for (std::vector<Process> &all_processes : all_threads) {
std::sort(all_processes.begin(), all_processes.end());
}
for (const std::vector<Process> &all_processes : all_threads) {
for (int i = 0; i < max_print; i++) {
const Process &process = all_processes[i];
std::cout << process.get_thread() << " " << process.get_name() << " "
<< process.get_duration() << "\n";
}
}
}
main.hh
#pragma once
namespace global::stringutils {
std::vector<std::string_view> split_a_line(std::string_view in_string,
const char delimiter = ',');
}
struct Item {
const std::string process_name;
const int thread;
const long long int time_stamp;
};
class Process {
private:
std::string _process_name;
int _thread;
long long int _duration;
public:
Process(std::string_view process, const int thread, const long long int duration)
: _process_name(process), _thread(thread), _duration(duration)
{
assert(duration >= 0);
}
std::string_view get_name() const
{
return _process_name;
}
int get_thread() const
{
return _thread;
}
const long long int get_duration() const
{
return _duration;
}
friend bool operator<(const Process &one, const Process &other)
{
return one.get_duration() > other.get_duration();
}
};
class Parser {
private:
std::ifstream _in_stream;
std::string _a_line;
public:
Parser(std::string_view filepath)
{
_in_stream.open(std::string(filepath));
if (!_in_stream) {
throw strerror(errno) + std::string(" : '") + std::string(filepath) + "'\n";
}
}
bool good() const
{
return _in_stream.operator bool();
}
std::unique_ptr<Item> parse_line();
};
1 Answer 1
General Observations
As a utility this could be made more usable and flexible if it took command line arguments, such as the file to process and the number of lines to print rather than having them hard coded in the program.
A well written utility would also handle the part done by hand in an editor. There might not be any reason to process a JSON file.
The file extension .hh
is non-standard for C++, prefer .hpp
or .h
.
The code mixes C with C++ too much.
About Assert in C
If the goal for this is to run fast at some point rather than the code always being debuggable that rather than have an assert()
in the code you should change that assert()
to an if statement that throws an exception. When optimized assert
statements disappear, they are a debugging tool.
Using strerror
May not be Portable
The C string function strerror()
is part of the POSIX and may not port to other platforms well. The error messages from each platform may be different
It would also be safer if the code created an exception
for this statement:
if (!_in_stream) {
throw std::strerror(errno) + std::string(" : '") + std::string(filepath) + "'\n";
}
since the string created with std::strerror() is out of scope in main()
.
Separate the Classes
In the current code all of the classes are in a single header file, this is harder to maintain and it makes it much more difficult to reuse the code for either class. By separating them into different header files you gain more flexibility. Namespaces can span multiple header files (std::
does for example).
Complex Initialization
This statement is overly complex and might be very hard to debug if there are issues:
new_item.reset(new Item{ std::string(split_line_str[0]),
std::stoi(std::string(split_line_str[3])),
std::stoll(std::string(split_line_str[4])) });
It would be better if it were at least 2 statements.
Magic Numbers
This constant is globally defined
const int N_SPLIT{ 5 };
but it is only used once when it could be used multiple times:
/**
* Read a line from the file and create an `Item`.
*/
std::unique_ptr<Item> Parser::parse_line()
{
std::unique_ptr<Item> new_item{ nullptr };
const bool ok = std::getline(_in_stream, _a_line, '\n').operator bool();
if (!ok || _a_line.empty()) {
return new_item;
}
std::vector<std::string_view> split_line_str = global::string_utils::split_a_line(_a_line);
if (split_line_str.size() == 5) {
new_item.reset(new Item{ std::string(split_line_str[0]),
std::stoi(std::string(split_line_str[3])),
std::stoll(std::string(split_line_str[4])) });
}
assert(split_line_str.size() == 5 || split_line_str.empty());
return new_item;
}
All of the 5
s in the code above should be using
The code would also be more readable / understandable if 0
, 3
and 4
had symbolic names.
Type Mismatch
Using some compilers these statements generate compiler warnings about type mismatch, perhaps it would be better if thread
were defined as size_t
since size()
always returns size_t
.
if (deduplicators.size() <= item->thread) {
deduplicators.resize(item->thread + 1);
}
if (all_threads.size() <= item->thread) {
all_threads.resize(item->thread + 1);
}
-
1\$\begingroup\$ What's the issue with assert ? If the code cannot handle certain input, I'd like it to crash in debug builds. \$\endgroup\$aki– aki2020年09月15日 07:54:08 +00:00Commented Sep 15, 2020 at 7:54
-
\$\begingroup\$ @anki What about release builds? \$\endgroup\$2020年09月15日 11:29:15 +00:00Commented Sep 15, 2020 at 11:29
-
\$\begingroup\$ the code will not crash at least and hopefully someone will report a bug that there's a problem :) \$\endgroup\$aki– aki2020年09月15日 11:52:47 +00:00Commented Sep 15, 2020 at 11:52