Súbor: [Platon] / cpdf / parse.c (stiahnutie)
Revízia 1.22, Tue Feb 18 19:29:03 2003 UTC (21 years, 11 months ago) by lynx
Zmeny od 1.21: +17 -14
[lines]
sync...
|
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <assert.h>
#include <ctype.h>
#include "cpdf.h"
#include "parse.h"
#include "filter.h"
#include "utils.h"
/* function compute number of locations of char c from buffer to character c
*/
int strlenc(char *buffer, char c, char to)
{
int i = 0;
while (*buffer && *buffer != to) {
if (*buffer == c)
i++;
buffer++;
}
return i;
}
/* function compute lenght of string to char to
*/
int strlento(char *buffer, char to)
{
char *old = buffer;
while (*buffer && *buffer != to)
buffer++;
return buffer - old;
}
/* convert hex to bin
*/
char *hex(char *buffer)
{
char *hex_array, *end;
char num[5] = { '0', 'x', '\0', '\0', '\0' };
int i;
if (*buffer != '<')
return NULL;
if ((i = strlento(buffer + 1, '>') >> 1) == 0)
return NULL;
end = hex_array = (char *) xmalloc(i);
for (buffer++; *buffer != '>'; hex_array++) {
num[2] = *buffer;
if (*(buffer + 1) == '>') {
num[3] = '0';
buffer++;
} else {
num[3] = *(buffer + 1);
buffer += 2;
}
*hex_array = (char) strtoul((char *) &num, NULL, 16);
}
return end;
}
/* interpreter for special characters in name string */
char *name(char *buffer)
{
char *ret, *end;
char num[5] = { '0', 'x', '\0', '\0', '\0' };
if (*buffer != '/')
return NULL;
if (memchr(buffer, '#', MAXNAMELEN) == NULL)
return buffer;
end = ret = (char *) xmalloc(MAXNAMELEN);
while (*buffer && *buffer != ' ') {
if (*buffer == '#') {
num[2] = *(buffer + 1);
num[3] = *(buffer + 2);
*ret = (char) strtoul(num, NULL, 16);
ret++;
buffer += 3;
}
*ret = *buffer;
ret++;
buffer++;
}
return end;
}
char **array(char *buffer)
{
char **ret = NULL, *last;
static char delim[2] = { '[', ']' };
int index = 0, len, end = 0;
last = buffer + 1;
do {
ret = (char **) realloc(ret, (index + 1) << 2);
if (*buffer == '[' || *buffer == ']') {
if (*buffer == '[') {
ret[index] = &delim[0];
end++;
} else {
ret[index] = &delim[1];
end--;
}
buffer++;
index++;
continue;
}
while (1) {
while (*buffer != ' ' && *buffer != ']'
&& *buffer != '\n')
buffer++;
if (buffer - last == 0) {
buffer++;
continue;
} else
break;
}
len = buffer - last;
ret[index] = (char *) xcalloc(len + 1, sizeof(char));
strncpy(ret[index], last, len);
index++;
if (*buffer != ']')
last = ++buffer;
else
last = buffer;
} while (end);
return ret;
}
#define BUFLEN 255
char *read_obj(long offset)
{
char entry[BUFLEN], *obj = NULL;
int size = 0;
fseek(fp, offset, SEEK_SET);
fget(entry, BUFLEN, fp);
do {
fget(entry, BUFLEN, fp);
if (strstr(entry, "endobj"))
break;
obj = resize(obj, size + strlen(entry));
strncpy(obj + size, entry, strlen(entry));
size += strlen(entry);
/* stream need special care in reading */
if (strstr(entry, "stream")) {
while (1) {
size++;
obj = resize(obj, size);
if ((obj[size - 1] = fgetc(fp)) == 'e') {
size += 8;
obj = resize(obj, size);
fread(obj + size - 8, 8,
sizeof(char), fp);
if (!strncmp
(obj + size - 8, "ndstream",
8))
break;
}
}
}
} while (1);
return obj;
}
int get_typenum(char *dictionary)
{
char *p;
p = strstr(dictionary, "Type");
if (!p) {
if ((p = strstr(dictionary, "stream")) != NULL)
return CONTENTS;
return DATA;
}
for (p += 4; *p != '/'; p++);
p++;
if (!strncmp(p, "Catalog", 7))
return CATALOG;
if (!strncmp(p, "Page", 4)) {
if (p[4] == 's')
return PAGES;
return PAGE;
}
return DATA;
}
void *get_object(int num, int *type)
{
void *ret = NULL;
char *dict;
struct object *obj;
if (!num || num > obj_count)
abort();
obj = (struct object *) *(objects + num - 1);
if (obj->parsed) {
if (type)
*type = obj->type;
return obj->parsed;
}
dict = read_obj(obj->offset);
obj->type = get_typenum(dict);
if (type)
*type = obj->type;
/* maybe goto's are ugly but here are usefull :) */
if (obj->type == CATALOG) {
ret = fill_catalog(dict);
goto out;
}
if (obj->type == PAGES) {
ret = fill_pages(dict);
goto out;
}
if (obj->type == PAGE) {
ret = fill_page(dict);
goto out;
}
if (obj->type == CONTENTS) {
ret = fill_contents(dict);
goto out;
}
if (obj->type == RESOURCES) {
ret = fill_resources(dict);
goto out;
}
if (obj->type == DATA) {
obj->parsed = dict;
return dict;
}
out:
free(dict);
obj->parsed = ret;
return ret;
}
/* add object to object structure */
void add_obj(char *s, int num)
{
struct object *o =
(struct object *) xcalloc(sizeof(struct object), 1);
sscanf(s, "%ld 00000 n", &o->offset);
o->obj_num = num;
objects[num - 1] = o;
return;
}
/* get filter number */
char get_filternum(char *filter)
{
if (*filter == 'F' && *(filter + 1) == 'l')
return FLATEDECODE;
if (!strncmp(filter, "DCTDecode", 9))
return DCTDECODE;
return -1;
}
/* get offset of start xref table */
long get_startxref(void)
{
char startxref[XREFLINE];
int i;
fstat(fp->_fileno, &fpstat);
fseek(fp, fpstat.st_size - XREFLINE, SEEK_SET);
fread(&startxref[0], XREFLINE, sizeof(char), fp);
for (i = 0; !isdigit(startxref[i]); i++);
return strtoul(&startxref[i], NULL, 10);
}
void probe_ptree(int num)
{
void *ptr;
struct pages *pages;
int type;
pages = (struct pages *) get_object(num, &type);
page_tree = (int *) xcalloc(pages->count + 1, sizeof(int));
current_page = 0;
pages->kids++;
while (current_page != pages->count) {
num = atoi(*pages->kids);
if (num) {
ptr = get_object(num, &type);
if (type == PAGES) {
pages = ptr;
pages->kids++;
continue;
} else {
for (num = pages->count; num; num--) {
page_tree[current_page] =
atoi(*pages->kids);
current_page++;
free(pages->kids[0]);
free(pages->kids[1]);
free(pages->kids[2]);
pages->kids += 3;
}
free(pages->old_kids);
if (!pages->parent)
break;
pages =
(struct pages *) get_object(pages->
parent,
NULL);
}
}
free(pages->kids[0]);
free(pages->kids[1]);
free(pages->kids[2]);
pages->kids += 3;
}
free(pages->old_kids);
return;
}
/* find objects and add it to object structure */
void parse_xref(void)
{
char xref[XREFLINE];
char *xreftable, *p;
int entry[2], xtblsize;
trailer.prev = get_startxref();
objects = NULL;
obj_count = 0;
while (trailer.prev) {
fseek(fp, trailer.prev, SEEK_SET);
fget(xref, XREFLINE, fp);
if (strncmp(xref, "xref", 4)) {
fprintf(stderr, "PDF document is corrupted!\n");
/* reconstruction ? */
return;
}
fget(xref, XREFLINE, fp);
sscanf(xref, "%d %d", &entry[0], &entry[1]);
xtblsize = entry[1] * XREFLINE + 1;
p = xreftable = (char *) xmalloc(xtblsize);
fread(xreftable, xtblsize - 1, sizeof(char), fp);
if (!entry[0])
entry[0]++;
else
entry[1] += entry[0];
if (entry[1] > obj_count) {
objects = (struct object **)
realloc(objects, entry[1] << 2);
obj_count = entry[1];
}
for (p[xtblsize] = '\0'; *p != '\0'; p += XREFLINE) {
if (p[XREFLINE - 3] == 'f')
continue;
p[XREFLINE - 1] = '\0';
add_obj(p, entry[0]);
entry[0]++;
}
free(xreftable);
{
char tline[3 * XREFLINE];
fget(tline, 3 * XREFLINE, fp);
trailer.prev = 0;
while (tline[0] != '>' && tline[1] != '>') {
if ((p = strstr(tline, "Root")) != NULL)
sscanf(p, "Root %d 0 R",
&trailer.root);
if ((p = strstr(tline, "Info")) != NULL)
sscanf(p, "Info %d 0 R",
&trailer.info);
if ((p = strstr(tline, "Prev")) != NULL)
sscanf(p, "Prev %ld",
&trailer.prev);
if ((p = strstr(tline, "Encrypt")) != NULL)
sscanf(p,
"Encrypt %d 0 R",
&trailer.encrypt);
fget(tline, 3 * XREFLINE, fp);
}
}
}
return;
}
int parse_catalog(void)
{
struct catalog *c = NULL;
c = (struct catalog *) get_object(trailer.root, NULL);
if (!c) {
fprintf(stderr, "Can't find catalog!\n");
exit(-1);
}
return c->pages;
}
void parse_pdf(void)
{
int ret;
parse_xref();
ret = parse_catalog();
probe_ptree(ret);
return;
}
struct catalog *fill_catalog(char *dictionary)
{
char *p;
struct catalog *catalog =
(struct catalog *) xcalloc(sizeof(struct catalog), 1);
p = strstr(dictionary, "Pages");
if (!p) {
/* error */
return NULL;
}
sscanf(p, "Pages %d 0 R", &catalog->pages);
return catalog;
}
struct pages *fill_pages(char *dictionary)
{
char *p;
struct pages *pages =
(struct pages *) xcalloc(sizeof(struct pages), 1);
p = strstr(dictionary, "Count");
if (!p) {
/* error */
return NULL;
}
sscanf(p, "Count %d", &pages->count);
p = strstr(dictionary, "Kids");
if (!p) {
/* error */
return NULL;
}
pages->old_kids = pages->kids = array(p + 5);
p = strstr(dictionary, "Parent");
if (p)
sscanf(p, "Parent %d 0 R", &pages->parent);
return pages;
}
struct page *fill_page(char *dictionary)
{
struct page *page;
char *p;
int ref;
page = (struct page *) xcalloc(sizeof(struct page), 1);
p = strstr(dictionary, "Contents");
if (!p) {
/* error */
return NULL;
}
if (p[9] == '<') { /* if /Contents << */
page->contents = fill_contents(dictionary);
} else {
sscanf(p, "Contents %d 0 R", &ref);
page->contents = (struct contents *) get_object(ref, NULL);
}
p = strstr(dictionary, "Resources");
if (!p) {
/* error */
return NULL;
}
if (p[10] == '<') /* if /Resources << */
page->resources = fill_resources(dictionary);
else {
sscanf(p, "Resources %d 0 R", &ref);
page->resources =
(struct resources *) get_object(ref, NULL);
}
p = strstr(dictionary, "MediaBox");
if (!p) {
/* error */
return NULL;
}
/* mediabox = array(&p[9]);
page->mediabox[0] = (short) strtoul(mediabox[1], NULL, 10);
free(mediabox[1]);
page->mediabox[1] = (short) strtoul(mediabox[2], NULL, 10);
free(mediabox[2]);
page->mediabox[2] = (short) strtoul(mediabox[3], NULL, 10);
free(mediabox[3]);
page->mediabox[3] = (short) strtoul(mediabox[4], NULL, 10);
free(mediabox[4]);
free(mediabox);*/
sscanf(p, "MediaBox [%hd %hd %hd %hd]", &page->mediabox[0],
&page->mediabox[1], &page->mediabox[2], &page->mediabox[3]);
p = strstr(dictionary, "Parent");
if (!p) {
/* error */
return NULL;
}
sscanf(p, "Parent %d 0 R", &page->parent);
p = strstr(dictionary, "Rotate");
if (p)
sscanf(p, "/Rotate %hd", &page->rotate);
return page;
}
struct contents *fill_contents(char *dictionary)
{
struct contents *content;
char *p;
content = (struct contents *) xcalloc(sizeof(struct contents), 1);
p = strstr(dictionary, "Length");
if (!p) {
/* error */
free(content);
return NULL;
}
sscanf(p, "Length %d", &content->length);
for (p += 6; *p != '/'; p++)
if (*p == '0' && *(p + 1) == ' ' && *(p + 2) == 'R') {
p = (char *) get_object(content->length, NULL);
sscanf(p, "%d\nendobj", &content->length);
break;
}
p = strstr(dictionary, "Filter");
if (p)
content->filter = get_filternum(&p[8]);
else
content->filter = -1;
p = strstr(dictionary, "stream") + sizeof("stream");
if (!p) {
/* error */
free(content);
return NULL;
}
content->stream = (unsigned char *) xcalloc(content->length, 1);
memcpy(content->stream, p, content->length);
return content;
}
struct resources *fill_resources(char *dictionary)
{
struct resources *resources;
resources =
(struct resources *) xcalloc(sizeof(struct resources), 1);
return resources;
}
Platon Group <platon@platon.sk> http://platon.sk/
|