mirror of https://github.com/ddevault/scdoc
commit
a4193949ec
@ -0,0 +1 @@ |
||||
build |
@ -0,0 +1,17 @@ |
||||
#ifndef _SCDOC_STRING_H |
||||
#define _SCDOC_STRING_H |
||||
#include <stdint.h> |
||||
|
||||
struct str { |
||||
char *str; |
||||
size_t len, size; |
||||
}; |
||||
|
||||
typedef struct str str_t; |
||||
|
||||
str_t *str_create(); |
||||
void str_free(str_t *str); |
||||
void str_reset(str_t *str); |
||||
int str_append_ch(str_t *str, uint32_t ch); |
||||
|
||||
#endif |
@ -0,0 +1,43 @@ |
||||
#ifndef _SCDOC_UNICODE_H |
||||
#define _SCDOC_UNICODE_H |
||||
#include <stddef.h> |
||||
#include <stdint.h> |
||||
#include <stdio.h> |
||||
|
||||
// Technically UTF-8 supports up to 6 byte codepoints, but Unicode itself
|
||||
// doesn't really bother with more than 4.
|
||||
#define UTF8_MAX_SIZE 4 |
||||
|
||||
#define UTF8_INVALID 0x80 |
||||
|
||||
/**
|
||||
* Grabs the next UTF-8 character and advances the string pointer |
||||
*/ |
||||
uint32_t utf8_decode(const char **str); |
||||
|
||||
/**
|
||||
* Encodes a character as UTF-8 and returns the length of that character. |
||||
*/ |
||||
size_t utf8_encode(char *str, uint32_t ch); |
||||
|
||||
/**
|
||||
* Returns the size of the next UTF-8 character |
||||
*/ |
||||
int utf8_size(const char *str); |
||||
|
||||
/**
|
||||
* Returns the size of a UTF-8 character |
||||
*/ |
||||
size_t utf8_chsize(uint32_t ch); |
||||
|
||||
/**
|
||||
* Reads and returns the next character from the file. |
||||
*/ |
||||
uint32_t utf8_fgetch(FILE *f); |
||||
|
||||
/**
|
||||
* Writes this character to the file and returns the number of bytes written. |
||||
*/ |
||||
size_t utf8_fputch(FILE *f, uint32_t ch); |
||||
|
||||
#endif |
@ -0,0 +1,16 @@ |
||||
#ifndef _SCDOC_PARSER_H |
||||
#define _SCDOC_PARSER_H |
||||
#include <stdarg.h> |
||||
#include <stdint.h> |
||||
#include <stdio.h> |
||||
|
||||
struct parser { |
||||
FILE *input, *output; |
||||
int line, col; |
||||
}; |
||||
|
||||
void parser_fatal(struct parser *parser, const char *err); |
||||
uint32_t parser_getch(struct parser *parser); |
||||
int roff_macro(struct parser *p, char *cmd, ...); |
||||
|
||||
#endif |
@ -0,0 +1,29 @@ |
||||
# TODO: Just use a makefile |
||||
project( |
||||
'scdoc', |
||||
'c', |
||||
license: 'MIT', |
||||
meson_version: '>=0.43.0', |
||||
default_options: [ |
||||
'c_std=c99', |
||||
'warning_level=2', |
||||
'werror=true', |
||||
], |
||||
) |
||||
|
||||
add_project_arguments('-Wno-unused-parameter', language: 'c') |
||||
|
||||
executable( |
||||
'scdoc', [ |
||||
'src/main.c', |
||||
'src/string.c', |
||||
'src/utf8_chsize.c', |
||||
'src/utf8_decode.c', |
||||
'src/utf8_encode.c', |
||||
'src/utf8_fgetch.c', |
||||
'src/utf8_fputch.c', |
||||
'src/utf8_size.c', |
||||
'src/util.c', |
||||
], |
||||
include_directories: include_directories('include') |
||||
) |
@ -0,0 +1,80 @@ |
||||
scdoc(5) |
||||
|
||||
# NAME |
||||
|
||||
scdoc - syntax description for scdoc markup language |
||||
|
||||
# DESCRIPTION |
||||
|
||||
scdoc is a tool designed to make the process of writing man pages more |
||||
friendly. It converts scdoc files into roff macros, which can then be converted |
||||
to man pages or a number of other formats. The syntax is inspired by, but not |
||||
directly taken from, markdown. Input files *must* use the UTF-8 encoding. |
||||
|
||||
# PREAMBLE |
||||
|
||||
Each scdoc file must begin with the following preamble: |
||||
|
||||
*name*(_section_) |
||||
|
||||
The *name* is the name of the man page you are writing, and _section_ is the |
||||
section you're writing for (see *man*(1) for information on manual sections). |
||||
|
||||
# SECTION HEADERS |
||||
|
||||
Each section of your man page should begin with something similar to the |
||||
following: |
||||
|
||||
# HEADER NAME |
||||
|
||||
Subsection headers are also understood - use two hashes. Each header must have |
||||
an empty line on either side. |
||||
|
||||
# PARAGRAPHS |
||||
|
||||
Begin a new paragraph with an empty line. |
||||
|
||||
# FORMATTING |
||||
|
||||
Text can be made *bold* or _underlined_ with asterisks and underscores: \*bold\* |
||||
or \_underlined\_. |
||||
|
||||
# INDENTATION |
||||
|
||||
You may indent lines with tab characters ("\t") to indent them by 4 spaces in |
||||
the output. Indented lines may not contain headers. |
||||
|
||||
# LISTS |
||||
|
||||
You may start bulleted lists with dashes, like so: |
||||
|
||||
``` |
||||
- Item 1 |
||||
- Item 2 |
||||
- Item 3 |
||||
``` |
||||
|
||||
You may also use numbered lists like so: |
||||
|
||||
``` |
||||
1. Item 1 |
||||
2. Item 2 |
||||
3. Item 3 |
||||
``` |
||||
|
||||
# LITERAL TEXT |
||||
|
||||
You may turn off scdoc formatting and output literal text with escape codes and |
||||
literal blocks. Inserting a \\ into your source will cause the subsequent symbol |
||||
to be treated as a literal and copied directly to the output. You may also make |
||||
blocks of literal syntax like so: |
||||
|
||||
``` |
||||
\`\`\` |
||||
_This formatting_ will *not* be interpreted by scdoc. |
||||
\`\`\` |
||||
``` |
||||
|
||||
These blocks will be indented one level. Note that literal text is shown |
||||
literally in the man viewer - that is, it's not a means for inserting your own |
||||
roff macros into the output. |
@ -0,0 +1,95 @@ |
||||
#include <assert.h> |
||||
#include <ctype.h> |
||||
#include <stdio.h> |
||||
#include <stdlib.h> |
||||
#include <time.h> |
||||
#include <unistd.h> |
||||
#include "string.h" |
||||
#include "unicode.h" |
||||
#include "util.h" |
||||
|
||||
char date[256]; |
||||
|
||||
static int parse_section(struct parser *p) { |
||||
str_t *section = str_create(); |
||||
uint32_t ch; |
||||
while ((ch = parser_getch(p)) != UTF8_INVALID) { |
||||
if (isdigit(ch)) { |
||||
assert(str_append_ch(section, ch) != -1); |
||||
} else if (ch == ')') { |
||||
if (!section->str) { |
||||
break; |
||||
} |
||||
int sec = strtol(section->str, NULL, 10); |
||||
if (sec < 1 || sec > 9) { |
||||
parser_fatal(p, "Expected section between 1 and 9"); |
||||
break; |
||||
} |
||||
str_free(section); |
||||
return sec; |
||||
} else { |
||||
parser_fatal(p, "Expected digit or )"); |
||||
break; |
||||
} |
||||
}; |
||||
parser_fatal(p, "Expected manual section"); |
||||
return -1; |
||||
} |
||||
|
||||
static void parse_preamble(struct parser *p) { |
||||
str_t *name = str_create(); |
||||
int section = -1; |
||||
uint32_t ch; |
||||
do { |
||||
ch = parser_getch(p); |
||||
if (isalnum(ch)) { |
||||
assert(str_append_ch(name, ch) != -1); |
||||
} else if (ch == '(') { |
||||
section = parse_section(p); |
||||
} else if (ch == '\n') { |
||||
if (name->len == 0) { |
||||
parser_fatal(p, "Expected preamble"); |
||||
} |
||||
if (section == -1) { |
||||
parser_fatal(p, "Expected manual section"); |
||||
} |
||||
char sec[2] = { '0' + section, 0 }; |
||||
roff_macro(p, "TH", name->str, sec, date, NULL); |
||||
break; |
||||
} |
||||
} while (ch != UTF8_INVALID); |
||||
str_free(name); |
||||
} |
||||
|
||||
static void output_preamble(struct parser *p) { |
||||
// TODO: Add version here
|
||||
fprintf(p->output, ".\\\" Generated by scdoc\n"); |
||||
fprintf(p->output, ".\\\" Fix weird qutation marks:\n"); |
||||
fprintf(p->output, ".\\\" http://bugs.debian.org/507673\n"); |
||||
fprintf(p->output, ".\\\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html\n"); |
||||
fprintf(p->output, ".ie \\n(.g .ds Aq \\(aq\n"); |
||||
fprintf(p->output, ".el .ds Aq '\n"); |
||||
fprintf(p->output, ".\\\" Disable hyphenation:\n"); |
||||
roff_macro(p, "nh", NULL); |
||||
fprintf(p->output, ".\\\" Generated content:\n"); |
||||
} |
||||
|
||||
int main(int argc, char **argv) { |
||||
if (argc > 1) { |
||||
fprintf(stderr, "Usage: scdoc < input.scd > output.roff"); |
||||
return 1; |
||||
} |
||||
time_t now; |
||||
time(&now); |
||||
struct tm *now_tm = localtime(&now); |
||||
strftime(date, sizeof(date), "%F", now_tm); |
||||
struct parser p = { |
||||
.input = stdin, |
||||
.output = stdout, |
||||
.line = 1, |
||||
.col = 1 |
||||
}; |
||||
output_preamble(&p); |
||||
parse_preamble(&p); |
||||
return 0; |
||||
} |
@ -0,0 +1,55 @@ |
||||
#include <stdlib.h> |
||||
#include <stdint.h> |
||||
#include "string.h" |
||||
#include "unicode.h" |
||||
|
||||
static void sanity_check(str_t *str) { |
||||
if (str->str == NULL) { |
||||
str->str = malloc(16); |
||||
str->size = 16; |
||||
str->len = 0; |
||||
str->str[0] = '\0'; |
||||
} |
||||
} |
||||
|
||||
static int ensure_capacity(str_t *str, size_t len) { |
||||
if (len + 1 >= str->size) { |
||||
char *new = realloc(str->str, str->size * 2); |
||||
if (!new) { |
||||
return 0; |
||||
} |
||||
str->str = new; |
||||
str->size *= 2; |
||||
} |
||||
return 1; |
||||
} |
||||
|
||||
str_t *str_create() { |
||||
return calloc(sizeof(str_t), 1); |
||||
} |
||||
|
||||
void str_free(str_t *str) { |
||||
if (!str) return; |
||||
free(str->str); |
||||
free(str); |
||||
} |
||||
|
||||
void str_reset(str_t *str) { |
||||
str->len = 0; |
||||
str->str[0] = '\0'; |
||||
} |
||||
|
||||
int str_append_ch(str_t *str, uint32_t ch) { |
||||
int size = utf8_chsize(ch); |
||||
if (size <= 0) { |
||||
return -1; |
||||
} |
||||
sanity_check(str); |
||||
if (!ensure_capacity(str, str->len + size)) { |
||||
return -1; |
||||
} |
||||
utf8_encode(&str->str[str->len], ch); |
||||
str->len += size; |
||||
str->str[str->len] = '\0'; |
||||
return size; |
||||
} |
@ -0,0 +1,14 @@ |
||||
#include <stdint.h> |
||||
#include <stddef.h> |
||||
#include "unicode.h" |
||||
|
||||
size_t utf8_chsize(uint32_t ch) { |
||||
if (ch < 0x80) { |
||||
return 1; |
||||
} else if (ch < 0x800) { |
||||
return 2; |
||||
} else if (ch < 0x10000) { |
||||
return 3; |
||||
} |
||||
return 4; |
||||
} |
@ -0,0 +1,38 @@ |
||||
#include <stdint.h> |
||||
#include <stddef.h> |
||||
#include "unicode.h" |
||||
|
||||
uint8_t masks[] = { |
||||
0x7F, |
||||
0x1F, |
||||
0x0F, |
||||
0x07, |
||||
0x03, |
||||
0x01 |
||||
}; |
||||
|
||||
uint32_t utf8_decode(const char **char_str) { |
||||
uint8_t **s = (uint8_t **)char_str; |
||||
|
||||
uint32_t cp = 0; |
||||
if (**s < 128) { |
||||
// shortcut
|
||||
cp = **s; |
||||
++*s; |
||||
return cp; |
||||
} |
||||
int size = utf8_size((char *)*s); |
||||
if (size == -1) { |
||||
++*s; |
||||
return UTF8_INVALID; |
||||
} |
||||
uint8_t mask = masks[size - 1]; |
||||
cp = **s & mask; |
||||
++*s; |
||||
while (--size) { |
||||
cp <<= 6; |
||||
cp |= **s & 0x3f; |
||||
++*s; |
||||
} |
||||
return cp; |
||||
} |
@ -0,0 +1,30 @@ |
||||
#include <stdint.h> |
||||
#include <stddef.h> |
||||
#include "unicode.h" |
||||
|
||||
size_t utf8_encode(char *str, uint32_t ch) { |
||||
size_t len = 0; |
||||
uint8_t first; |
||||
|
||||
if (ch < 0x80) { |
||||
first = 0; |
||||
len = 1; |
||||
} else if (ch < 0x800) { |
||||
first = 0xc0; |
||||
len = 2; |
||||
} else if (ch < 0x10000) { |
||||
first = 0xe0; |
||||
len = 3; |
||||
} else { |
||||
first = 0xf0; |
||||
len = 4; |
||||
} |
||||
|
||||
for (size_t i = len - 1; i > 0; --i) { |
||||
str[i] = (ch & 0x3f) | 0x80; |
||||
ch >>= 6; |
||||
} |
||||
|
||||
str[0] = ch | first; |
||||
return len; |
||||
} |
@ -0,0 +1,21 @@ |
||||
#include <stdint.h> |
||||
#include <stdio.h> |
||||
#include "unicode.h" |
||||
|
||||
uint32_t utf8_fgetch(FILE *f) { |
||||
char buffer[UTF8_MAX_SIZE]; |
||||
int c = fgetc(f); |
||||
if (c == EOF) { |
||||
return UTF8_INVALID; |
||||
} |
||||
buffer[0] = (char)c; |
||||
int size = utf8_size(buffer); |
||||
if (size > 1) { |
||||
int amt = fread(&buffer[1], 1, size - 1, f); |
||||
if (amt != size - 1) { |
||||
return UTF8_INVALID; |
||||
} |
||||
} |
||||
const char *ptr = buffer; |
||||
return utf8_decode(&ptr); |
||||
} |
@ -0,0 +1,10 @@ |
||||
#include <stdint.h> |
||||
#include <stdio.h> |
||||
#include "unicode.h" |
||||
|
||||
size_t utf8_fputch(FILE *f, uint32_t ch) { |
||||
char buffer[UTF8_MAX_SIZE]; |
||||
char *ptr = buffer; |
||||
size_t size = utf8_encode(ptr, ch); |
||||
return fwrite(&buffer, 1, size, f); |
||||
} |
@ -0,0 +1,27 @@ |
||||
#include <stdint.h> |
||||
#include <stddef.h> |
||||
#include "unicode.h" |
||||
|
||||
struct { |
||||
uint8_t mask; |
||||
uint8_t result; |
||||
int octets; |
||||
} sizes[] = { |
||||
{ 0x80, 0x00, 1 }, |
||||
{ 0xE0, 0xC0, 2 }, |
||||
{ 0xF0, 0xE0, 3 }, |
||||
{ 0xF8, 0xF0, 4 }, |
||||
{ 0xFC, 0xF8, 5 }, |
||||
{ 0xFE, 0xF8, 6 }, |
||||
{ 0x80, 0x80, -1 }, |
||||
}; |
||||
|
||||
int utf8_size(const char *s) { |
||||
uint8_t c = (uint8_t)*s; |
||||
for (size_t i = 0; i < sizeof(sizes) / 2; ++i) { |
||||
if ((c & sizes[i].mask) == sizes[i].result) { |
||||
return sizes[i].octets; |
||||
} |
||||
} |
||||
return -1; |
||||
} |
@ -0,0 +1,50 @@ |
||||
#include <stdarg.h> |
||||
#include <stdlib.h> |
||||
#include <stdint.h> |
||||
#include <stdio.h> |
||||
#include "unicode.h" |
||||
#include "util.h" |
||||
|
||||
void parser_fatal(struct parser *parser, const char *err) { |
||||
fprintf(stderr, "Error at %d:%d: %s\n", |
||||
parser->line, parser->col, err); |
||||
fclose(parser->input); |
||||
fclose(parser->output); |
||||
exit(1); |
||||
} |
||||
|
||||
uint32_t parser_getch(struct parser *parser) { |
||||
uint32_t ch = utf8_fgetch(parser->input); |
||||
if (ch == '\n') { |
||||
parser->col = 0; |
||||
++parser->line; |
||||
} else { |
||||
++parser->col; |
||||
} |
||||
return ch; |
||||
} |
||||
|
||||
int roff_macro(struct parser *p, char *cmd, ...) { |
||||
FILE *f = p->output; |
||||
int l = fprintf(f, ".%s", cmd); |
||||
va_list ap; |
||||
va_start(ap, cmd); |
||||
const char *arg; |
||||
while ((arg = va_arg(ap, const char *))) { |
||||
fputc(' ', f); |
||||
fputc('"', f); |
||||
while (*arg) { |
||||
uint32_t ch = utf8_decode(&arg); |
||||
if (ch == '"') { |
||||
fputc('\\', f); |
||||
++l; |
||||
} |
||||
l += utf8_fputch(f, ch); |
||||
} |
||||
fputc('"', f); |
||||
l += 3; |
||||
} |
||||
va_end(ap); |
||||
fputc('\n', f); |
||||
return l + 1; |
||||
} |
Loading…
Reference in new issue