Poor man's "about::tracing" for llbuild build time summary in JSON file

Question 1

I used llbuild ninja build --profile PATH to create a JSON file of build summary in the following format:

{ "name": "Building a.c", "ph": "B", "pid": 0, "tid": 0, "ts": 19967021195},
{ "name": "Building b.c", "ph": "B", "pid": 0, "tid": 1, "ts": 19967021196},
{ "name": "Building c.c", "ph": "B", "pid": 0, "tid": 0, "ts": 19967021197},
{ "name": "Building d.c", "ph": "B", "pid": 0, "tid": 1, "ts": 19967021198},

A multi-line editor was used to remove all the "keys" of the JSON file and keep only the "values" and thus it became a CSV.

The code reads this file and outputs top N slowest processes (build or link) separated by thread.

The limitations are:

Cannot handle , in "name", so they must be removed beforehand.
Cannot create a fancy charts.

I'm not looking to make it any faster, or memory efficient. Comments about design and code organisation are welcome!

main.cpp

/* This file needs a json file path and prints top 10 time slowest processes for each thread. */
#include <iostream>
#include <fstream>
#include <string>
#include <string_view>
#include <vector>
#include <map>
#include <optional>
#include <cassert>
#include <cmath>
#include <algorithm>
#include "main.hh"
const int N_SPLIT{5};
namespace global::string_utils {
/**
 * Split a given string using the delimiter.
 * https://codereview.stackexchange.com/a/247292/205955
 */
std::vector<std::string_view> split_a_line(std::string_view in_string, const char delimiter = ',')
{
 std::vector<std::string_view> r_out_list;
 r_out_list.reserve(N_SPLIT);
 in_string.remove_prefix(std::min(in_string.find_first_of(' ') + 1, in_string.size()));
 for (;;) {
 if (in_string.empty()) {
 return r_out_list;
 }
 const std::string_view::size_type pos = in_string.find(delimiter);
 if (pos == std::string_view::npos) {
 r_out_list.push_back(in_string);
 return r_out_list;
 }
 r_out_list.push_back(in_string.substr(0, pos));
 in_string = in_string.substr(pos + 1, std::string_view::npos);
 }
 return r_out_list;
}
} // namespace global::string_utils
/**
 * Read a line from the file and create an `Item`.
 */
std::unique_ptr<Item> Parser::parse_line()
{
 std::unique_ptr<Item> new_item{nullptr};
 const bool ok = std::getline(_in_stream, _a_line, '\n').operator bool();
 if (!ok || _a_line.empty()) {
 return new_item;
 }
 std::vector<std::string_view> split_line_str = global::string_utils::split_a_line(_a_line);
 if (split_line_str.size() == 5) {
 new_item.reset(new Item{std::string(split_line_str[0]),
 std::stoi(std::string(split_line_str[3])),
 std::stoll(std::string(split_line_str[4]))});
 }
 assert(split_line_str.size() == 5 || split_line_str.empty());
 return new_item;
}
/**
 * Parse the file and store processes separated by threads.
 */
static std::vector<std::vector<Process>> separate_by_thread(Parser *parser)
{
 std::vector<std::vector<Process>> all_threads;
 /* Every element in the vector corresponds to a thread. */
 std::vector<std::map<std::string, long long int>> deduplicators;
 while (parser->good()) {
 std::unique_ptr<Item> item{parser->parse_line()};
 if (!item) {
 continue;
 }
 /* Threads are also zero-based. */
 if (deduplicators.size() <= item->thread) {
 deduplicators.resize(item->thread + 1);
 }
 if (all_threads.size() <= item->thread) {
 all_threads.resize(item->thread + 1);
 }
 std::map<std::string, long long int> &dedup = deduplicators[item->thread];
 const std::map<std::string, long long int>::iterator old_val = dedup.find(item->process_name);
 if (old_val != dedup.end()) {
 const long long int duration{item->time_stamp - old_val->second};
 assert(duration >= 0);
 std::vector<Process> &all_processes = all_threads[item->thread];
 all_processes.emplace_back(item->process_name, item->thread, duration);
 dedup.erase(old_val);
 }
 else {
 dedup.insert({item->process_name, item->time_stamp});
 }
 }
 return all_threads;
}
int main()
{
 const std::string json_path{"/Users/me/project/project-build/save copy 2.json"};
 const int max_print{10};
 std::unique_ptr<Parser> parser{nullptr};
 try {
 parser.reset(new Parser(json_path));
 }
 catch (const std::string &ex) {
 std::cerr << ex << std::endl;
 return -1;
 }
 /* List of threads of list of Processes. */
 std::vector<std::vector<Process>> all_threads = separate_by_thread(parser.get());
 for (std::vector<Process> &all_processes : all_threads) {
 std::sort(all_processes.begin(), all_processes.end());
 }
 for (const std::vector<Process> &all_processes : all_threads) {
 for (int i = 0; i < max_print; i++) {
 const Process &process = all_processes[i];
 std::cout << process.get_thread() << " " << process.get_name() << " "
 << process.get_duration() << "\n";
 }
 }
}

main.hh

#pragma once
namespace global::stringutils {
 std::vector<std::string_view> split_a_line(std::string_view in_string,
 const char delimiter = ',');
}
struct Item {
 const std::string process_name;
 const int thread;
 const long long int time_stamp;
};
class Process {
private:
 std::string _process_name;
 int _thread;
 long long int _duration;
public:
 Process(std::string_view process, const int thread, const long long int duration)
 : _process_name(process), _thread(thread), _duration(duration)
 {
 assert(duration >= 0);
 }
 std::string_view get_name() const
 {
 return _process_name;
 }
 int get_thread() const
 {
 return _thread;
 }
 const long long int get_duration() const
 {
 return _duration;
 }
 friend bool operator<(const Process &one, const Process &other)
 {
 return one.get_duration() > other.get_duration();
 }
};
class Parser {
private:
 std::ifstream _in_stream;
 std::string _a_line;
public:
 Parser(std::string_view filepath)
 {
 _in_stream.open(std::string(filepath));
 if (!_in_stream) {
 throw strerror(errno) + std::string(" : '") + std::string(filepath) + "'\n";
 }
 }
 bool good() const
 {
 return _in_stream.operator bool();
 }
 std::unique_ptr<Item> parse_line();
};

Question 2

General Observations

As a utility this could be made more usable and flexible if it took command line arguments, such as the file to process and the number of lines to print rather than having them hard coded in the program.

A well written utility would also handle the part done by hand in an editor. There might not be any reason to process a JSON file.

The file extension .hh is non-standard for C++, prefer .hpp or .h.

The code mixes C with C++ too much.

About Assert in C

If the goal for this is to run fast at some point rather than the code always being debuggable that rather than have an assert() in the code you should change that assert() to an if statement that throws an exception. When optimized assert statements disappear, they are a debugging tool.

Using `strerror` May not be Portable

The C string function strerror() is part of the POSIX and may not port to other platforms well. The error messages from each platform may be different

It would also be safer if the code created an exception for this statement:

 if (!_in_stream) {
 throw std::strerror(errno) + std::string(" : '") + std::string(filepath) + "'\n";
 }

since the string created with std::strerror() is out of scope in main().

Separate the Classes

In the current code all of the classes are in a single header file, this is harder to maintain and it makes it much more difficult to reuse the code for either class. By separating them into different header files you gain more flexibility. Namespaces can span multiple header files (std:: does for example).

Complex Initialization

This statement is overly complex and might be very hard to debug if there are issues:

 new_item.reset(new Item{ std::string(split_line_str[0]),
 std::stoi(std::string(split_line_str[3])),
 std::stoll(std::string(split_line_str[4])) });

It would be better if it were at least 2 statements.

Magic Numbers

This constant is globally defined

const int N_SPLIT{ 5 };

but it is only used once when it could be used multiple times:

/**
 * Read a line from the file and create an `Item`.
 */
std::unique_ptr<Item> Parser::parse_line()
{
 std::unique_ptr<Item> new_item{ nullptr };
 const bool ok = std::getline(_in_stream, _a_line, '\n').operator bool();
 if (!ok || _a_line.empty()) {
 return new_item;
 }
 std::vector<std::string_view> split_line_str = global::string_utils::split_a_line(_a_line);
 if (split_line_str.size() == 5) {
 new_item.reset(new Item{ std::string(split_line_str[0]),
 std::stoi(std::string(split_line_str[3])),
 std::stoll(std::string(split_line_str[4])) });
 }
 assert(split_line_str.size() == 5 || split_line_str.empty());
 return new_item;
}

All of the 5s in the code above should be using

The code would also be more readable / understandable if 0, 3 and 4 had symbolic names.

Type Mismatch

Using some compilers these statements generate compiler warnings about type mismatch, perhaps it would be better if thread were defined as size_t since size() always returns size_t.

 if (deduplicators.size() <= item->thread) {
 deduplicators.resize(item->thread + 1);
 }
 if (all_threads.size() <= item->thread) {
 all_threads.resize(item->thread + 1);
 }

Question 3

What's the issue with assert ? If the code cannot handle certain input, I'd like it to crash in debug builds.

Question 4

@anki What about release builds?

Question 5

the code will not crash at least and hopefully someone will report a bug that there's a problem :)

score 2 · Accepted Answer · 2020-09-14 22:45:20Z

General Observations

As a utility this could be made more usable and flexible if it took command line arguments, such as the file to process and the number of lines to print rather than having them hard coded in the program.

A well written utility would also handle the part done by hand in an editor. There might not be any reason to process a JSON file.

The file extension .hh is non-standard for C++, prefer .hpp or .h.

The code mixes C with C++ too much.

About Assert in C

If the goal for this is to run fast at some point rather than the code always being debuggable that rather than have an assert() in the code you should change that assert() to an if statement that throws an exception. When optimized assert statements disappear, they are a debugging tool.

Using `strerror` May not be Portable

The C string function strerror() is part of the POSIX and may not port to other platforms well. The error messages from each platform may be different

It would also be safer if the code created an exception for this statement:

 if (!_in_stream) {
 throw std::strerror(errno) + std::string(" : '") + std::string(filepath) + "'\n";
 }

since the string created with std::strerror() is out of scope in main().

Separate the Classes

In the current code all of the classes are in a single header file, this is harder to maintain and it makes it much more difficult to reuse the code for either class. By separating them into different header files you gain more flexibility. Namespaces can span multiple header files (std:: does for example).

Complex Initialization

This statement is overly complex and might be very hard to debug if there are issues:

 new_item.reset(new Item{ std::string(split_line_str[0]),
 std::stoi(std::string(split_line_str[3])),
 std::stoll(std::string(split_line_str[4])) });

It would be better if it were at least 2 statements.

Magic Numbers

This constant is globally defined

const int N_SPLIT{ 5 };

but it is only used once when it could be used multiple times:

/**
 * Read a line from the file and create an `Item`.
 */
std::unique_ptr<Item> Parser::parse_line()
{
 std::unique_ptr<Item> new_item{ nullptr };
 const bool ok = std::getline(_in_stream, _a_line, '\n').operator bool();
 if (!ok || _a_line.empty()) {
 return new_item;
 }
 std::vector<std::string_view> split_line_str = global::string_utils::split_a_line(_a_line);
 if (split_line_str.size() == 5) {
 new_item.reset(new Item{ std::string(split_line_str[0]),
 std::stoi(std::string(split_line_str[3])),
 std::stoll(std::string(split_line_str[4])) });
 }
 assert(split_line_str.size() == 5 || split_line_str.empty());
 return new_item;
}

All of the 5s in the code above should be using

The code would also be more readable / understandable if 0, 3 and 4 had symbolic names.

Type Mismatch

Using some compilers these statements generate compiler warnings about type mismatch, perhaps it would be better if thread were defined as size_t since size() always returns size_t.

 if (deduplicators.size() <= item->thread) {
 deduplicators.resize(item->thread + 1);
 }
 if (all_threads.size() <= item->thread) {
 all_threads.resize(item->thread + 1);
 }

What's the issue with assert ? If the code cannot handle certain input, I'd like it to crash in debug builds.
the code will not crash at least and hopefully someone will report a bug that there's a problem :)

Stack Exchange Network

Poor man's "about::tracing" for llbuild build time summary in JSON file

1 Answer 1

General Observations

About Assert in C

Using `strerror` May not be Portable

Separate the Classes

Complex Initialization

Magic Numbers

Type Mismatch

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Hot Network Questions

Poor man's "about::tracing" for llbuild build time summary in JSON file

1 Answer 1

General Observations

About Assert in C

Using strerror May not be Portable

Separate the Classes

Complex Initialization

Magic Numbers

Type Mismatch

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Related

Hot Network Questions

Using `strerror` May not be Portable