Initial commit

master
Drew DeVault 7 years ago
commit a4193949ec
  1. 1
      .gitignore
  2. 17
      include/string.h
  3. 43
      include/unicode.h
  4. 16
      include/util.h
  5. 29
      meson.build
  6. 80
      scdoc.5.scd
  7. 95
      src/main.c
  8. 55
      src/string.c
  9. 14
      src/utf8_chsize.c
  10. 38
      src/utf8_decode.c
  11. 30
      src/utf8_encode.c
  12. 21
      src/utf8_fgetch.c
  13. 10
      src/utf8_fputch.c
  14. 27
      src/utf8_size.c
  15. 50
      src/util.c

1
.gitignore vendored

@ -0,0 +1 @@
build

@ -0,0 +1,17 @@
#ifndef _SCDOC_STRING_H
#define _SCDOC_STRING_H
#include <stdint.h>
struct str {
char *str;
size_t len, size;
};
typedef struct str str_t;
str_t *str_create();
void str_free(str_t *str);
void str_reset(str_t *str);
int str_append_ch(str_t *str, uint32_t ch);
#endif

@ -0,0 +1,43 @@
#ifndef _SCDOC_UNICODE_H
#define _SCDOC_UNICODE_H
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
// Technically UTF-8 supports up to 6 byte codepoints, but Unicode itself
// doesn't really bother with more than 4.
#define UTF8_MAX_SIZE 4
#define UTF8_INVALID 0x80
/**
* Grabs the next UTF-8 character and advances the string pointer
*/
uint32_t utf8_decode(const char **str);
/**
* Encodes a character as UTF-8 and returns the length of that character.
*/
size_t utf8_encode(char *str, uint32_t ch);
/**
* Returns the size of the next UTF-8 character
*/
int utf8_size(const char *str);
/**
* Returns the size of a UTF-8 character
*/
size_t utf8_chsize(uint32_t ch);
/**
* Reads and returns the next character from the file.
*/
uint32_t utf8_fgetch(FILE *f);
/**
* Writes this character to the file and returns the number of bytes written.
*/
size_t utf8_fputch(FILE *f, uint32_t ch);
#endif

@ -0,0 +1,16 @@
#ifndef _SCDOC_PARSER_H
#define _SCDOC_PARSER_H
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
struct parser {
FILE *input, *output;
int line, col;
};
void parser_fatal(struct parser *parser, const char *err);
uint32_t parser_getch(struct parser *parser);
int roff_macro(struct parser *p, char *cmd, ...);
#endif

@ -0,0 +1,29 @@
# TODO: Just use a makefile
project(
'scdoc',
'c',
license: 'MIT',
meson_version: '>=0.43.0',
default_options: [
'c_std=c99',
'warning_level=2',
'werror=true',
],
)
add_project_arguments('-Wno-unused-parameter', language: 'c')
executable(
'scdoc', [
'src/main.c',
'src/string.c',
'src/utf8_chsize.c',
'src/utf8_decode.c',
'src/utf8_encode.c',
'src/utf8_fgetch.c',
'src/utf8_fputch.c',
'src/utf8_size.c',
'src/util.c',
],
include_directories: include_directories('include')
)

@ -0,0 +1,80 @@
scdoc(5)
# NAME
scdoc - syntax description for scdoc markup language
# DESCRIPTION
scdoc is a tool designed to make the process of writing man pages more
friendly. It converts scdoc files into roff macros, which can then be converted
to man pages or a number of other formats. The syntax is inspired by, but not
directly taken from, markdown. Input files *must* use the UTF-8 encoding.
# PREAMBLE
Each scdoc file must begin with the following preamble:
*name*(_section_)
The *name* is the name of the man page you are writing, and _section_ is the
section you're writing for (see *man*(1) for information on manual sections).
# SECTION HEADERS
Each section of your man page should begin with something similar to the
following:
# HEADER NAME
Subsection headers are also understood - use two hashes. Each header must have
an empty line on either side.
# PARAGRAPHS
Begin a new paragraph with an empty line.
# FORMATTING
Text can be made *bold* or _underlined_ with asterisks and underscores: \*bold\*
or \_underlined\_.
# INDENTATION
You may indent lines with tab characters ("\t") to indent them by 4 spaces in
the output. Indented lines may not contain headers.
# LISTS
You may start bulleted lists with dashes, like so:
```
- Item 1
- Item 2
- Item 3
```
You may also use numbered lists like so:
```
1. Item 1
2. Item 2
3. Item 3
```
# LITERAL TEXT
You may turn off scdoc formatting and output literal text with escape codes and
literal blocks. Inserting a \\ into your source will cause the subsequent symbol
to be treated as a literal and copied directly to the output. You may also make
blocks of literal syntax like so:
```
\`\`\`
_This formatting_ will *not* be interpreted by scdoc.
\`\`\`
```
These blocks will be indented one level. Note that literal text is shown
literally in the man viewer - that is, it's not a means for inserting your own
roff macros into the output.

@ -0,0 +1,95 @@
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include "string.h"
#include "unicode.h"
#include "util.h"
char date[256];
static int parse_section(struct parser *p) {
str_t *section = str_create();
uint32_t ch;
while ((ch = parser_getch(p)) != UTF8_INVALID) {
if (isdigit(ch)) {
assert(str_append_ch(section, ch) != -1);
} else if (ch == ')') {
if (!section->str) {
break;
}
int sec = strtol(section->str, NULL, 10);
if (sec < 1 || sec > 9) {
parser_fatal(p, "Expected section between 1 and 9");
break;
}
str_free(section);
return sec;
} else {
parser_fatal(p, "Expected digit or )");
break;
}
};
parser_fatal(p, "Expected manual section");
return -1;
}
static void parse_preamble(struct parser *p) {
str_t *name = str_create();
int section = -1;
uint32_t ch;
do {
ch = parser_getch(p);
if (isalnum(ch)) {
assert(str_append_ch(name, ch) != -1);
} else if (ch == '(') {
section = parse_section(p);
} else if (ch == '\n') {
if (name->len == 0) {
parser_fatal(p, "Expected preamble");
}
if (section == -1) {
parser_fatal(p, "Expected manual section");
}
char sec[2] = { '0' + section, 0 };
roff_macro(p, "TH", name->str, sec, date, NULL);
break;
}
} while (ch != UTF8_INVALID);
str_free(name);
}
static void output_preamble(struct parser *p) {
// TODO: Add version here
fprintf(p->output, ".\\\" Generated by scdoc\n");
fprintf(p->output, ".\\\" Fix weird qutation marks:\n");
fprintf(p->output, ".\\\" http://bugs.debian.org/507673\n");
fprintf(p->output, ".\\\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html\n");
fprintf(p->output, ".ie \\n(.g .ds Aq \\(aq\n");
fprintf(p->output, ".el .ds Aq '\n");
fprintf(p->output, ".\\\" Disable hyphenation:\n");
roff_macro(p, "nh", NULL);
fprintf(p->output, ".\\\" Generated content:\n");
}
int main(int argc, char **argv) {
if (argc > 1) {
fprintf(stderr, "Usage: scdoc < input.scd > output.roff");
return 1;
}
time_t now;
time(&now);
struct tm *now_tm = localtime(&now);
strftime(date, sizeof(date), "%F", now_tm);
struct parser p = {
.input = stdin,
.output = stdout,
.line = 1,
.col = 1
};
output_preamble(&p);
parse_preamble(&p);
return 0;
}

@ -0,0 +1,55 @@
#include <stdlib.h>
#include <stdint.h>
#include "string.h"
#include "unicode.h"
static void sanity_check(str_t *str) {
if (str->str == NULL) {
str->str = malloc(16);
str->size = 16;
str->len = 0;
str->str[0] = '\0';
}
}
static int ensure_capacity(str_t *str, size_t len) {
if (len + 1 >= str->size) {
char *new = realloc(str->str, str->size * 2);
if (!new) {
return 0;
}
str->str = new;
str->size *= 2;
}
return 1;
}
str_t *str_create() {
return calloc(sizeof(str_t), 1);
}
void str_free(str_t *str) {
if (!str) return;
free(str->str);
free(str);
}
void str_reset(str_t *str) {
str->len = 0;
str->str[0] = '\0';
}
int str_append_ch(str_t *str, uint32_t ch) {
int size = utf8_chsize(ch);
if (size <= 0) {
return -1;
}
sanity_check(str);
if (!ensure_capacity(str, str->len + size)) {
return -1;
}
utf8_encode(&str->str[str->len], ch);
str->len += size;
str->str[str->len] = '\0';
return size;
}

@ -0,0 +1,14 @@
#include <stdint.h>
#include <stddef.h>
#include "unicode.h"
size_t utf8_chsize(uint32_t ch) {
if (ch < 0x80) {
return 1;
} else if (ch < 0x800) {
return 2;
} else if (ch < 0x10000) {
return 3;
}
return 4;
}

@ -0,0 +1,38 @@
#include <stdint.h>
#include <stddef.h>
#include "unicode.h"
uint8_t masks[] = {
0x7F,
0x1F,
0x0F,
0x07,
0x03,
0x01
};
uint32_t utf8_decode(const char **char_str) {
uint8_t **s = (uint8_t **)char_str;
uint32_t cp = 0;
if (**s < 128) {
// shortcut
cp = **s;
++*s;
return cp;
}
int size = utf8_size((char *)*s);
if (size == -1) {
++*s;
return UTF8_INVALID;
}
uint8_t mask = masks[size - 1];
cp = **s & mask;
++*s;
while (--size) {
cp <<= 6;
cp |= **s & 0x3f;
++*s;
}
return cp;
}

@ -0,0 +1,30 @@
#include <stdint.h>
#include <stddef.h>
#include "unicode.h"
size_t utf8_encode(char *str, uint32_t ch) {
size_t len = 0;
uint8_t first;
if (ch < 0x80) {
first = 0;
len = 1;
} else if (ch < 0x800) {
first = 0xc0;
len = 2;
} else if (ch < 0x10000) {
first = 0xe0;
len = 3;
} else {
first = 0xf0;
len = 4;
}
for (size_t i = len - 1; i > 0; --i) {
str[i] = (ch & 0x3f) | 0x80;
ch >>= 6;
}
str[0] = ch | first;
return len;
}

@ -0,0 +1,21 @@
#include <stdint.h>
#include <stdio.h>
#include "unicode.h"
uint32_t utf8_fgetch(FILE *f) {
char buffer[UTF8_MAX_SIZE];
int c = fgetc(f);
if (c == EOF) {
return UTF8_INVALID;
}
buffer[0] = (char)c;
int size = utf8_size(buffer);
if (size > 1) {
int amt = fread(&buffer[1], 1, size - 1, f);
if (amt != size - 1) {
return UTF8_INVALID;
}
}
const char *ptr = buffer;
return utf8_decode(&ptr);
}

@ -0,0 +1,10 @@
#include <stdint.h>
#include <stdio.h>
#include "unicode.h"
size_t utf8_fputch(FILE *f, uint32_t ch) {
char buffer[UTF8_MAX_SIZE];
char *ptr = buffer;
size_t size = utf8_encode(ptr, ch);
return fwrite(&buffer, 1, size, f);
}

@ -0,0 +1,27 @@
#include <stdint.h>
#include <stddef.h>
#include "unicode.h"
struct {
uint8_t mask;
uint8_t result;
int octets;
} sizes[] = {
{ 0x80, 0x00, 1 },
{ 0xE0, 0xC0, 2 },
{ 0xF0, 0xE0, 3 },
{ 0xF8, 0xF0, 4 },
{ 0xFC, 0xF8, 5 },
{ 0xFE, 0xF8, 6 },
{ 0x80, 0x80, -1 },
};
int utf8_size(const char *s) {
uint8_t c = (uint8_t)*s;
for (size_t i = 0; i < sizeof(sizes) / 2; ++i) {
if ((c & sizes[i].mask) == sizes[i].result) {
return sizes[i].octets;
}
}
return -1;
}

@ -0,0 +1,50 @@
#include <stdarg.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include "unicode.h"
#include "util.h"
void parser_fatal(struct parser *parser, const char *err) {
fprintf(stderr, "Error at %d:%d: %s\n",
parser->line, parser->col, err);
fclose(parser->input);
fclose(parser->output);
exit(1);
}
uint32_t parser_getch(struct parser *parser) {
uint32_t ch = utf8_fgetch(parser->input);
if (ch == '\n') {
parser->col = 0;
++parser->line;
} else {
++parser->col;
}
return ch;
}
int roff_macro(struct parser *p, char *cmd, ...) {
FILE *f = p->output;
int l = fprintf(f, ".%s", cmd);
va_list ap;
va_start(ap, cmd);
const char *arg;
while ((arg = va_arg(ap, const char *))) {
fputc(' ', f);
fputc('"', f);
while (*arg) {
uint32_t ch = utf8_decode(&arg);
if (ch == '"') {
fputc('\\', f);
++l;
}
l += utf8_fputch(f, ch);
}
fputc('"', f);
l += 3;
}
va_end(ap);
fputc('\n', f);
return l + 1;
}
Loading…
Cancel
Save