Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit de1b9d9

Browse files
committed
Init
0 parents commit de1b9d9

File tree

3 files changed

+270
-0
lines changed

3 files changed

+270
-0
lines changed

‎README.md‎

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# hq
2+
3+
A HTML processor inspired by jq (https://github.com/stedolan/jq)
4+
5+
## Building & Usage
6+
7+
### Building
8+
9+
#### Dependencies
10+
- meson
11+
- modest (https://github.com/lexborisov/Modest)
12+
13+
#### Build
14+
`meson build && ninja -C build`
15+
16+
The executable will be built to `build/hq`.
17+
18+
### Usage
19+
20+
#### Dependencies
21+
- modest (https://github.com/lexborisov/Modest)
22+
23+
#### Use
24+
25+
Application help text:
26+
```
27+
hq (html query) - commandline HTML processor © Robin Broda, 2018
28+
Usage: build/hq [options] <selector> <mode> [mode argument]
29+
30+
Options:
31+
-h, --help show this text
32+
33+
<selector> selector to match
34+
<mode> processing mode
35+
may be one of { data, text, attr }:
36+
data - return raw html of matching elements
37+
text - return inner text of matching elements
38+
attr - return attribute value X of matching elements
39+
[mode argument] - attribute to return
40+
```
41+
42+
Example usage:
43+
44+
`curl -s https://coderobe.net | hq a data`
45+
```
46+
<a href="https://keybase.io/coderobe">Keybase (coderobe)</a>
47+
<a href="https://github.com/coderobe">Github (coderobe)</a>
48+
<a href="https://twitter.com/coderobe">Twitter (coderobe)</a>
49+
```
50+
51+
52+
`curl -s https://coderobe.net | hq a text`
53+
```
54+
Keybase (coderobe)
55+
Github (coderobe)
56+
Twitter (coderobe)
57+
```
58+
59+
`curl -s https://coderobe.net | hq a attr href`
60+
```
61+
https://keybase.io/coderobe
62+
https://github.com/coderobe
63+
https://twitter.com/coderobe
64+
```
65+
66+
You get the idea.
67+
68+
## License
69+
70+
This work, written by Robin Broda (coderobe) in 2018, is licensed under the terms of the GNU Affero General Public License v3.0

‎main.c‎

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
#include <stdlib.h>
2+
#include <stdio.h>
3+
#include <string.h>
4+
5+
#include <myhtml/myhtml.h>
6+
#include <myhtml/serialization.h>
7+
#include <mycss/selectors/serialization.h>
8+
#include <modest/finder/finder.h>
9+
10+
char* readeof(){
11+
const static int buffer_size = 1024;
12+
char buffer[buffer_size];
13+
size_t content_size = 1; // 0円
14+
char* content = malloc(sizeof(char)*buffer_size);
15+
if(content == NULL){
16+
perror("Failed to allocate");
17+
exit(EXIT_FAILURE);
18+
}
19+
content[0] = '0円';
20+
while(fgets(buffer, buffer_size, stdin)){
21+
char* content_old = content;
22+
content_size += strlen(buffer);
23+
content = realloc(content, content_size);
24+
if(content == NULL){
25+
perror("Failed to allocate");
26+
free(content_old);
27+
exit(EXIT_FAILURE);
28+
}
29+
strcat(content, buffer);
30+
}
31+
return content;
32+
}
33+
34+
unsigned int serializer_log(const char* data, size_t len, void* ctx){
35+
printf("%.*s", (int)len, data);
36+
return 0;
37+
}
38+
39+
void opthandler(const char* arg, const char* progname){
40+
if(!strcmp(arg, "help") || !strcmp(arg, "h")){
41+
fprintf(stderr, "hq (html query) - commandline HTML processor © Robin Broda, 2018\n");
42+
fprintf(stderr, "Usage: %s [options] <selector> <mode> [mode argument]\n\n", progname);
43+
fprintf(stderr, "Options:\n");
44+
fprintf(stderr, "-h, --help\tshow this text\n");
45+
fprintf(stderr, "\n");
46+
fprintf(stderr, "<selector>\tselector to match\n");
47+
fprintf(stderr, "<mode>\t\tprocessing mode\n");
48+
fprintf(stderr, "\t\tmay be one of { data, text, attr }:\n");
49+
fprintf(stderr, "\t\tdata - return raw html of matching elements\n");
50+
fprintf(stderr, "\t\ttext - return inner text of matching elements\n");
51+
fprintf(stderr, "\t\tattr - return attribute value X of matching elements\n");
52+
fprintf(stderr, "\t\t\t[mode argument] - attribute to return\n");
53+
exit(EXIT_SUCCESS);
54+
}
55+
}
56+
57+
int main(int argc, const char* argv[]){
58+
if(argc == 1) opthandler("help", argv[0]);
59+
60+
size_t shifts = 0; // offset of new argv
61+
while(argc > 1){
62+
if(argv[1][0] == '-'){
63+
const char* arg = argv[1];
64+
if(arg[1] == '-'){
65+
const char* longarg = arg+2;
66+
opthandler(longarg, 0[argv-shifts]);
67+
}else{
68+
for(size_t i = 1; i < strlen(arg); i++){
69+
const char shortarg[2] = { arg[i], '0円' };
70+
opthandler(shortarg, 0[argv-shifts]);
71+
}
72+
}
73+
shifts++;
74+
argv++;
75+
argc--;
76+
}else{
77+
argv[0] = 0[argv-shifts]; // restore argv[0]
78+
break;
79+
}
80+
}
81+
82+
const char* selector;
83+
if(argc > 1){
84+
selector = argv[1];
85+
}else{
86+
fprintf(stderr, "No selector given\n");
87+
exit(EXIT_FAILURE);
88+
}
89+
90+
const char* mode;
91+
if(argc > 2){
92+
mode = argv[2];
93+
}else{
94+
fprintf(stderr, "No mode given\n");
95+
exit(EXIT_FAILURE);
96+
}
97+
98+
char* input = readeof();
99+
100+
myhtml_t* myhtml = myhtml_create();
101+
mystatus_t mystatus = myhtml_init(myhtml, MyHTML_OPTIONS_DEFAULT, 1, 0);
102+
if(mystatus){
103+
fprintf(stderr, "Failed to init MyHTML\n");
104+
exit(EXIT_FAILURE);
105+
}
106+
107+
myhtml_tree_t* html_tree = myhtml_tree_create();
108+
mystatus = myhtml_tree_init(html_tree, myhtml);
109+
if(mystatus){
110+
fprintf(stderr, "Failed to init MyHTML tree\n");
111+
exit(EXIT_FAILURE);
112+
}
113+
114+
mystatus = myhtml_parse(html_tree, MyENCODING_UTF_8, input, strlen(input));
115+
if(mystatus){
116+
fprintf(stderr, "Failed to parse HTML\n");
117+
exit(EXIT_FAILURE);
118+
}
119+
120+
mycss_t* mycss = mycss_create();
121+
mystatus = mycss_init(mycss);
122+
if(mystatus){
123+
fprintf(stderr, "Failed to init MyCSS\n");
124+
exit(EXIT_FAILURE);
125+
}
126+
127+
mycss_entry_t* css_entry = mycss_entry_create();
128+
mystatus = mycss_entry_init(mycss, css_entry);
129+
if(mystatus){
130+
fprintf(stderr, "Failed to init MyCSS entry\n");
131+
exit(EXIT_FAILURE);
132+
}
133+
134+
modest_finder_t* finder = modest_finder_create_simple();
135+
136+
mycss_selectors_list_t* selectors_list = mycss_selectors_parse(
137+
mycss_entry_selectors(css_entry),
138+
MyENCODING_UTF_8,
139+
selector, strlen(selector), &mystatus
140+
);
141+
142+
if(selectors_list == NULL || (selectors_list->flags & MyCSS_SELECTORS_FLAGS_SELECTOR_BAD)){
143+
fprintf(stderr, "Bad selector\n");
144+
exit(EXIT_FAILURE);
145+
}
146+
147+
myhtml_collection_t* collection = NULL;
148+
modest_finder_by_selectors_list(finder, html_tree->node_html, selectors_list, &collection);
149+
150+
if(collection){
151+
for(size_t i = 0; i < collection->length; i++){
152+
if(!strcmp(mode, "text")){
153+
myhtml_serialization_tree_callback(collection->list[i]->child, serializer_log, NULL);
154+
printf("\n");
155+
}else if(!strcmp(mode, "data")){
156+
myhtml_serialization_tree_callback(collection->list[i], serializer_log, NULL);
157+
printf("\n");
158+
}else if(!strcmp(mode, "attr")){
159+
const char* attr_name;
160+
if(argc > 3){
161+
attr_name = argv[3];
162+
}else{
163+
fprintf(stderr, "No attr name given");
164+
exit(EXIT_FAILURE);
165+
}
166+
myhtml_tree_node_t* node = collection->list[i];
167+
myhtml_token_node_t* token = node->token;
168+
if(token == NULL) continue;
169+
myhtml_token_attr_t* attr = token->attr_first;
170+
if(attr == NULL) continue;
171+
172+
do{
173+
if(!strcmp(attr_name, mycore_string_data(&attr->key))){
174+
printf("%s\n", mycore_string_data(&attr->value));
175+
}
176+
if(attr != token->attr_last) attr = attr->next;
177+
}while(attr != token->attr_last);
178+
}else{
179+
fprintf(stderr, "invalid mode: '%s'\n", mode);
180+
exit(EXIT_FAILURE);
181+
}
182+
}
183+
}
184+
185+
// cleanup
186+
myhtml_collection_destroy(collection);
187+
mycss_selectors_list_destroy(mycss_entry_selectors(css_entry), selectors_list, true);
188+
modest_finder_destroy(finder, true);
189+
mycss_destroy(css_entry->mycss, true);
190+
mycss_entry_destroy(css_entry, true);
191+
myhtml_destroy(html_tree->myhtml);
192+
myhtml_tree_destroy(html_tree);
193+
free(input);
194+
return 0;
195+
}

‎meson.build‎

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
project('hq', 'c')
2+
modest = dependency('modest')
3+
executable('hq', 'main.c',
4+
dependencies: [modest]
5+
)

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /