verzia 1.19, 2002/12/19 00:16:44 |
verzia 1.20, 2002/12/23 02:40:05 |
|
|
#include "parse.h" |
#include "parse.h" |
#include "filter.h" |
#include "filter.h" |
#include "utils.h" |
#include "utils.h" |
|
|
/* function compute number of locations of char c from buffer to character c |
/* function compute number of locations of char c from buffer to character c |
*/ |
*/ |
int strlenc(char *buffer, char c, char to) |
int strlenc(char *buffer, char c, char to) |
Riadok 87 char *name(char *buffer) |
|
Riadok 88 char *name(char *buffer) |
|
|
|
char **array(char *buffer) |
char **array(char *buffer) |
{ |
{ |
char **ret = NULL,*last; |
char **ret = NULL, *last; |
static char delim[2] = { '[', ']' }; |
static char delim[2] = { '[', ']' }; |
int index = 0, len,end = 0; |
int index = 0, len, end = 0; |
last = buffer + 1; |
last = buffer + 1; |
do { |
do { |
ret = |
ret = |
Riadok 98 char **array(char *buffer) |
|
Riadok 99 char **array(char *buffer) |
|
if (*buffer == '[') { |
if (*buffer == '[') { |
ret[index] = &delim[0]; |
ret[index] = &delim[0]; |
end++; |
end++; |
} |
} else { |
else { |
|
ret[index] = &delim[1]; |
ret[index] = &delim[1]; |
end--; |
end--; |
} |
} |
Riadok 107 char **array(char *buffer) |
|
Riadok 107 char **array(char *buffer) |
|
index++; |
index++; |
continue; |
continue; |
} |
} |
while(1) { |
while (1) { |
while (*buffer != ' ' && *buffer != ']' && *buffer != '\n') |
while (*buffer != ' ' && *buffer != ']' |
buffer++; |
&& *buffer != '\n') |
if(buffer - last == 0) { |
buffer++; |
buffer++; |
if (buffer - last == 0) { |
continue; |
buffer++; |
} |
continue; |
else |
} else |
break; |
break; |
} |
} |
len = buffer - last; |
len = buffer - last; |
ret[index] = (char *) xcalloc(len + 1, sizeof(char)); |
ret[index] = (char *) xcalloc(len + 1, sizeof(char)); |
Riadok 129 char **array(char *buffer) |
|
Riadok 129 char **array(char *buffer) |
|
return ret; |
return ret; |
} |
} |
|
|
|
|
#define BUFLEN 255 |
#define BUFLEN 255 |
char *read_obj(long offset) |
char *read_obj(long offset) |
{ |
{ |
char entry[BUFLEN], *obj = NULL; |
char entry[BUFLEN], *obj = NULL; |
int size = 0; |
int size = 0; |
fseek(fp, offset, SEEK_SET); |
fseek(fp, offset, SEEK_SET); |
|
memset(entry, '\0', BUFLEN); |
fget(entry, BUFLEN, fp); |
fget(entry, BUFLEN, fp); |
do { |
do { |
fget(entry, BUFLEN, fp); |
fget(entry, BUFLEN, fp); |
size += strlen(entry) + 1; |
/* stream need special care in reading */ |
obj = resize(obj, size); |
if (strstr(entry, "stream")) { |
strcat(obj, entry); |
char *p; |
} while (!strstr(entry, "endobj")); |
int len; |
|
long off; |
|
obj = resize(obj, size + strlen(entry)); |
|
strncpy(obj + size, entry, strlen(entry) + 1); |
|
size += strlen(entry); |
|
p = strstr(obj, "Length"); |
|
sscanf(p, "Length %d", &len); |
|
for (p += 6; *p != '/'; p++) |
|
if (!strncmp(p, "0 R", 3)) { |
|
off = ftell(fp); |
|
p = (char *) get_object(len, NULL); |
|
sscanf(p, "%d\nendobj", &len); |
|
fseek(fp, off, SEEK_SET); |
|
break; |
|
} |
|
len += sizeof("endstream"); |
|
size += len; |
|
obj = resize(obj, size); |
|
fread(obj + strlen(obj), sizeof(char), len, fp); |
|
continue; |
|
} |
|
if (strstr(entry, "endobj")) |
|
break; |
|
obj = resize(obj, size + strlen(entry)); |
|
strncpy(obj + size, entry, strlen(entry)); |
|
size += strlen(entry); |
|
} while (1); |
return obj; |
return obj; |
} |
} |
|
|
Riadok 151 int get_typenum(char *dictionary) |
|
Riadok 177 int get_typenum(char *dictionary) |
|
char *p; |
char *p; |
p = strstr(dictionary, "Type"); |
p = strstr(dictionary, "Type"); |
if (!p) { |
if (!p) { |
if((p = strstr(dictionary,"stream")) != NULL) |
if ((p = strstr(dictionary, "stream")) != NULL) |
return CONTENTS; |
return CONTENTS; |
return DATA; |
return DATA; |
} |
} |
for(p += 4;*p != '/';p++); |
for (p += 4; *p != '/'; p++); |
p++; |
p++; |
if (!strncmp(p, "Catalog", 7)) |
if (!strncmp(p, "Catalog", 7)) |
return CATALOG; |
return CATALOG; |
if (!strncmp(p, "Page", 4)) { |
if (!strncmp(p, "Page", 4)) { |
if(p[4] == 's') |
if (p[4] == 's') |
return PAGES; |
return PAGES; |
return PAGE; |
return PAGE; |
} |
} |
return DATA; |
return DATA; |
} |
} |
|
|
void *get_object(int num,int *type) |
void *get_object(int num, int *type) |
{ |
{ |
void *ret = NULL; |
void *ret = NULL; |
char *dict; |
char *dict; |
struct object **obj; |
struct object *obj = (struct object *) *(objects + num - 1); |
for (obj = objects; obj; obj++) { |
if (obj->parsed) { |
if ((*obj)->obj_num == num) { |
if (type) |
if ((*obj)->parsed) { |
*type = obj->type; |
if(type) |
return obj->parsed; |
*type = (*obj)->type; |
} |
return (*obj)->parsed; |
dict = read_obj(obj->offset); |
} |
obj->type = get_typenum(dict); |
dict = read_obj((*obj)->offset); |
if (type) |
(*obj)->type = get_typenum(dict); |
*type = obj->type; |
if(type) |
if (obj->type == CATALOG) { |
*type = (*obj)->type; |
ret = fill_catalog(dict); |
if((*obj)->type == CATALOG) { |
goto out; |
ret = fill_catalog(dict); |
} |
break; |
if (obj->type == PAGES) { |
} |
ret = fill_pages(dict); |
if((*obj)->type == PAGES) { |
goto out; |
ret = fill_pages(dict); |
} |
break; |
if (obj->type == PAGE) { |
} |
ret = fill_page(dict); |
if((*obj)->type == PAGE) { |
goto out; |
ret = fill_page(dict); |
} |
break; |
if (obj->type == CONTENTS) { |
} |
ret = fill_contents(dict); |
if((*obj)->type == CONTENTS) { |
goto out; |
ret = fill_contents(dict); |
} |
break; |
if (obj->type == RESOURCES) { |
} |
ret = fill_resources(dict); |
if((*obj)->type == RESOURCES) { |
goto out; |
ret = fill_resources(dict); |
} |
break; |
if (obj->type == DATA) { |
} |
obj->parsed = dict; |
if((*obj)->type == DATA) { |
return dict; |
(*obj)->parsed = dict; |
|
return dict; |
|
} |
|
} |
|
|
|
} |
} |
|
out: |
free(dict); |
free(dict); |
(*obj)->parsed = ret; |
obj->parsed = ret; |
return ret; |
return ret; |
} |
} |
|
|
/* add object to object structure */ |
/* add object to object structure */ |
void add_obj(char *s) |
void add_obj(char *s, int num) |
{ |
{ |
long old_off; |
|
struct object *o = |
struct object *o = |
(struct object *) xcalloc(sizeof(struct object), 1); |
(struct object *) xcalloc(sizeof(struct object), 1); |
old_off = ftell(fp); |
|
sscanf(s, "%ld 00000 n", &o->offset); |
sscanf(s, "%ld 00000 n", &o->offset); |
fseek(fp, o->offset, SEEK_SET); |
o->obj_num = num; |
fget(s, XREFLINE, fp); |
objects[num - 1] = o; |
sscanf(s, "%d 0 obj", &o->obj_num); |
|
fseek(fp, old_off, SEEK_SET); |
|
objects = |
|
(struct object **) realloc(objects, obj_count * sizeof(char *)); |
|
objects[obj_count - 1] = o; |
|
obj_count++; |
|
return; |
return; |
} |
} |
|
|
/* get filter number */ |
/* get filter number */ |
char get_filternum(char *filter) |
char get_filternum(char *filter) |
{ |
{ |
if (!strncmp(filter, "FlateDecode", 11)) |
if (!strncmp(filter, "Fl", 2)) |
return FLATEDECODE; |
return FLATEDECODE; |
if (!strncmp(filter, "DCTDecode", 9)) |
if (!strncmp(filter, "DCTDecode", 9)) |
return DCTDECODE; |
return DCTDECODE; |
Riadok 267 long get_startxref(void) |
|
Riadok 281 long get_startxref(void) |
|
|
|
void probe_ptree(int num) |
void probe_ptree(int num) |
{ |
{ |
struct pages *pages; |
struct pages *pages; |
int type,a; |
int type, a; |
pages = (struct pages *) get_object(num,NULL); |
pages = (struct pages *) get_object(num, NULL); |
pages->kids++; |
pages->kids++; |
a = atoi(*pages->kids); |
a = atoi(*pages->kids); |
pages = (struct pages *) get_object(a,&type); |
pages = (struct pages *) get_object(a, &type); |
while(type != PAGE) { |
while (type != PAGE) { |
a = atoi(pages->kids[1]); |
a = atoi(pages->kids[1]); |
pages = (struct pages *) get_object(a,&type); |
pages = (struct pages *) get_object(a, &type); |
} |
} |
page_tree[current_page] = a; |
page_tree[current_page] = a; |
return; |
return; |
Riadok 303 void probe_ptree(int num) |
|
Riadok 317 void probe_ptree(int num) |
|
/* find objects and add it to object structure */ |
/* find objects and add it to object structure */ |
void parse_xref(void) |
void parse_xref(void) |
{ |
{ |
char xref[XREFLINE]; |
char xref[XREFLINE], *p; |
char *p; |
int entry[2]; |
fseek(fp, get_startxref(), SEEK_SET); |
fseek(fp, get_startxref(), SEEK_SET); |
fget(xref, XREFLINE, fp); |
fget(xref, XREFLINE, fp); |
if (!strstr(xref, "xref")) { |
if (!strstr(xref, "xref")) { |
Riadok 313 void parse_xref(void) |
|
Riadok 327 void parse_xref(void) |
|
return; |
return; |
} |
} |
objects = NULL; |
objects = NULL; |
obj_count = 1; |
obj_count = 0; |
do { |
do { |
fget(xref, XREFLINE, fp); |
fget(xref, XREFLINE, fp); |
|
sscanf(xref, "%d %d", &entry[0], &entry[1]); |
|
entry[1] += entry[0]; |
|
if (entry[0] == 0) |
|
entry[0]++; |
|
if (entry[1] > obj_count) { |
|
objects = (struct object **) realloc(objects, |
|
entry[1] * |
|
sizeof(char |
|
*)); |
|
obj_count = entry[1]; |
|
} |
do { |
do { |
fget(xref, XREFLINE, fp); |
fget(xref, XREFLINE, fp); |
if (!strncmp(xref, "trailer", 7)) |
if (!strncmp(xref, "trailer", 7)) |
break; |
break; |
if (xref[XREFLINE - 3] == 'f') |
if (xref[XREFLINE - 3] == 'f') |
continue; |
continue; |
add_obj(&xref[0]); |
add_obj(xref, entry[0]); |
|
entry[0]++; |
} while (1); |
} while (1); |
{ |
{ |
char *tline = (char *) malloc(3 * XREFLINE); |
char *tline = (char *) xmalloc(3 * XREFLINE); |
fget(tline, 3 * XREFLINE, fp); |
|
trailer.prev = 0; |
|
while (tline[0] != '>' && tline[1] != '>') { |
|
if ((p = strstr(tline, "Root")) != NULL) |
|
sscanf(p, "Root %d 0 R", &trailer.root); |
|
if ((p = strstr(tline, "Info")) != NULL) |
|
sscanf(p, "Info %d 0 R", &trailer.info); |
|
if ((p = strstr(tline, "Prev")) != NULL) |
|
sscanf(p, "Prev %ld", &trailer.prev); |
|
if ((p = strstr(tline, "Encrypt")) != NULL) |
|
sscanf(p, |
|
"Encrypt %d 0 R", &trailer.encrypt); |
|
fget(tline, 3 * XREFLINE, fp); |
fget(tline, 3 * XREFLINE, fp); |
} |
trailer.prev = 0; |
free(tline); |
while (tline[0] != '>' && tline[1] != '>') { |
|
if ((p = strstr(tline, "Root")) != NULL) |
|
sscanf(p, "Root %d 0 R", |
|
&trailer.root); |
|
if ((p = strstr(tline, "Info")) != NULL) |
|
sscanf(p, "Info %d 0 R", |
|
&trailer.info); |
|
if ((p = strstr(tline, "Prev")) != NULL) |
|
sscanf(p, "Prev %ld", |
|
&trailer.prev); |
|
if ((p = strstr(tline, "Encrypt")) != NULL) |
|
sscanf(p, |
|
"Encrypt %d 0 R", |
|
&trailer.encrypt); |
|
fget(tline, 3 * XREFLINE, fp); |
|
} |
|
free(tline); |
} |
} |
if (!trailer.prev) |
if (!trailer.prev) |
break; |
break; |
fseek(fp, trailer.prev, SEEK_SET); |
fseek(fp, trailer.prev, SEEK_SET); |
|
fget(xref, XREFLINE, fp); |
} while (1); |
} while (1); |
return; |
return; |
} |
} |
Riadok 352 void parse_xref(void) |
|
Riadok 383 void parse_xref(void) |
|
int parse_catalog(void) |
int parse_catalog(void) |
{ |
{ |
struct catalog *c = NULL; |
struct catalog *c = NULL; |
c = (struct catalog *) get_object(trailer.root,NULL); |
c = (struct catalog *) get_object(trailer.root, NULL); |
if (!c) { |
if (!c) { |
/* error */ |
/* error */ |
} |
} |
Riadok 365 void parse_pdf(void) |
|
Riadok 396 void parse_pdf(void) |
|
struct pages *pages; |
struct pages *pages; |
parse_xref(); |
parse_xref(); |
ret = parse_catalog(); |
ret = parse_catalog(); |
pages = (struct pages *) get_object(ret,NULL); |
pages = (struct pages *) get_object(ret, NULL); |
page_tree = (int *) xcalloc(pages->count, 1); |
page_tree = (int *) xcalloc(pages->count, 1); |
current_page = 0; |
current_page = 0; |
ret = parse_catalog(); |
ret = parse_catalog(); |
Riadok 377 void parse_pdf(void) |
|
Riadok 408 void parse_pdf(void) |
|
|
|
struct catalog *fill_catalog(char *dictionary) |
struct catalog *fill_catalog(char *dictionary) |
{ |
{ |
char *p; |
char *p; |
struct catalog *catalog = (struct catalog *) xcalloc(sizeof(struct catalog),1); |
struct catalog *catalog = |
p = strstr(dictionary,"Pages"); |
(struct catalog *) xcalloc(sizeof(struct catalog), 1); |
if(!p) { |
p = strstr(dictionary, "Pages"); |
/* error*/ |
if (!p) { |
return NULL; |
/* error */ |
} |
return NULL; |
sscanf(p,"Pages %d 0 R",&catalog->pages); |
} |
return catalog; |
sscanf(p, "Pages %d 0 R", &catalog->pages); |
|
return catalog; |
} |
} |
|
|
struct pages *fill_pages(char *dictionary) |
struct pages *fill_pages(char *dictionary) |
Riadok 406 struct pages *fill_pages(char *dictionar |
|
Riadok 438 struct pages *fill_pages(char *dictionar |
|
} |
} |
pages->old_kids = pages->kids = array(p + 5); |
pages->old_kids = pages->kids = array(p + 5); |
p = strstr(dictionary, "Parent"); |
p = strstr(dictionary, "Parent"); |
if(p) |
if (p) |
sscanf(p,"Parent %d 0 R",&pages->parent); |
sscanf(p, "Parent %d 0 R", &pages->parent); |
return pages; |
return pages; |
} |
} |
|
|
Riadok 426 struct page *fill_page(char *dictionary) |
|
Riadok 458 struct page *fill_page(char *dictionary) |
|
page->contents = fill_contents(dictionary); |
page->contents = fill_contents(dictionary); |
} else { |
} else { |
sscanf(p, "Contents %d 0 R", &ref); |
sscanf(p, "Contents %d 0 R", &ref); |
page->contents = (struct contents *) get_object(ref,NULL); |
page->contents = (struct contents *) get_object(ref, NULL); |
} |
} |
p = strstr(dictionary, "Resources"); |
p = strstr(dictionary, "Resources"); |
if (!p) { |
if (!p) { |
Riadok 437 struct page *fill_page(char *dictionary) |
|
Riadok 469 struct page *fill_page(char *dictionary) |
|
page->resources = fill_resources(dictionary); |
page->resources = fill_resources(dictionary); |
else { |
else { |
sscanf(p, "Resources %d 0 R", &ref); |
sscanf(p, "Resources %d 0 R", &ref); |
page->resources = (struct resources *) get_object(ref,NULL); |
page->resources = |
|
(struct resources *) get_object(ref, NULL); |
} |
} |
p = strstr(dictionary, "MediaBox"); |
p = strstr(dictionary, "MediaBox"); |
if (!p) { |
if (!p) { |
Riadok 469 struct page *fill_page(char *dictionary) |
|
Riadok 502 struct page *fill_page(char *dictionary) |
|
struct contents *fill_contents(char *dictionary) |
struct contents *fill_contents(char *dictionary) |
{ |
{ |
struct contents *content; |
struct contents *content; |
char *p,*endstream; |
char *p; |
int i = 0; |
|
content = (struct contents *) xcalloc(sizeof(struct contents), 1); |
content = (struct contents *) xcalloc(sizeof(struct contents), 1); |
p = strstr(dictionary, "Length"); |
p = strstr(dictionary, "Length"); |
if (!p) { |
if (!p) { |
Riadok 478 struct contents *fill_contents(char *dic |
|
Riadok 510 struct contents *fill_contents(char *dic |
|
free(content); |
free(content); |
return NULL; |
return NULL; |
} |
} |
|
|
sscanf(p, "Length %d", &content->length); |
sscanf(p, "Length %d", &content->length); |
for(p += 6;*p != '/';p++) |
for (p += 6; *p != '/'; p++) |
if(!strncmp(p,"0 R",3)) { |
if (!strncmp(p, "0 R", 3)) { |
p = (char *) get_object(content->length,NULL); |
p = (char *) get_object(content->length, NULL); |
sscanf(p,"%d\nendobj",&content->length); |
sscanf(p, "%d\nendobj", &content->length); |
break; |
break; |
} |
} |
p = strstr(dictionary, "Filter"); |
p = strstr(dictionary, "Filter"); |
if (!p) { |
if (!p) { |
/* error */ |
/* error */ |
Riadok 499 struct contents *fill_contents(char *dic |
|
Riadok 531 struct contents *fill_contents(char *dic |
|
free(content); |
free(content); |
return NULL; |
return NULL; |
} |
} |
content->stream = (unsigned char *) xcalloc(content->length,1); |
content->stream = (unsigned char *) xcalloc(content->length, 1); |
memcpy(content->stream,p,content->length); |
memcpy(content->stream, p, content->length); |
return content; |
return content; |
} |
} |
|
|