verzia 1.17, 2002/11/10 16:58:30 |
verzia 1.18, 2002/11/30 19:38:09 |
|
|
#include <sys/stat.h> |
#include <sys/stat.h> |
#include <sys/types.h> |
#include <sys/types.h> |
#include <unistd.h> |
#include <unistd.h> |
|
#include <assert.h> |
|
|
#include "cpdf.h" |
#include "cpdf.h" |
#include "parse.h" |
#include "parse.h" |
Riadok 86 char *name(char *buffer) |
|
Riadok 87 char *name(char *buffer) |
|
|
|
char **array(char *buffer) |
char **array(char *buffer) |
{ |
{ |
char **ret = NULL, *last; |
char **ret = NULL,*last; |
static char delim[4] = { '[', '\0', ']', '\0' }; |
static char delim[2] = { '[', ']' }; |
int index = 0, len; |
int index = 0, len,end = 0; |
last = buffer + 1; |
last = buffer + 1; |
do { |
do { |
ret = |
ret = |
(char **) realloc(ret, (index + 1) * sizeof(char **)); |
(char **) realloc(ret, (index + 1) * sizeof(char **)); |
if (*buffer == '[' || *buffer == ']') { |
if (*buffer == '[' || *buffer == ']') { |
if (*buffer == '[') |
if (*buffer == '[') { |
ret[index] = &delim[0]; |
ret[index] = &delim[0]; |
else |
end++; |
ret[index] = &delim[2]; |
} |
|
else { |
|
ret[index] = &delim[1]; |
|
end--; |
|
} |
buffer++; |
buffer++; |
index++; |
index++; |
continue; |
continue; |
} |
} |
while (*buffer != ' ' && *buffer != ']' && *buffer != '\n') |
while(1) { |
|
while (*buffer != ' ' && *buffer != ']' && *buffer != '\n') |
|
buffer++; |
|
if(buffer - last == 0) { |
buffer++; |
buffer++; |
|
continue; |
|
} |
|
else |
|
break; |
|
} |
len = buffer - last; |
len = buffer - last; |
ret[index] = (char *) xcalloc(len + 1, sizeof(char)); |
ret[index] = (char *) xcalloc(len + 1, sizeof(char)); |
strncpy(ret[index], last, len); |
strncpy(ret[index], last, len); |
Riadok 112 char **array(char *buffer) |
|
Riadok 125 char **array(char *buffer) |
|
last = ++buffer; |
last = ++buffer; |
else |
else |
last = buffer; |
last = buffer; |
} while (*buffer != '\n'); |
} while (end); |
return ret; |
return ret; |
} |
} |
|
|
|
|
#define BUFLEN 255 |
#define BUFLEN 255 |
char *read_obj(long offset,int *obj_num) |
char *read_obj(long offset) |
{ |
{ |
char buffer[BUFLEN]; |
char entry[BUFLEN], *obj = NULL; |
int size = 0; |
int size = 0; |
char *obj = NULL; |
|
fseek(fp, offset, SEEK_SET); |
fseek(fp, offset, SEEK_SET); |
fget(buffer, BUFLEN, fp); |
fget(entry, BUFLEN, fp); |
sscanf(buffer, "%d 0 obj ", obj_num); |
do { |
fget(buffer, BUFLEN, fp); |
fget(entry, BUFLEN, fp); |
if (buffer[0] == '<' && buffer[1] == '<') |
size += strlen(entry) + 1; |
fget(buffer, BUFLEN, fp); |
obj = resize(obj, size); |
while (!strstr(buffer, "endobj")) { |
strcat(obj, entry); |
size += strlen(buffer) + 1; |
} while (!strstr(entry, "endobj")); |
obj = (char *) realloc(obj, size); |
|
strcat(obj, buffer); |
|
fget(buffer, BUFLEN, fp); |
|
} |
|
return obj; |
return obj; |
} |
} |
|
|
|
int get_typenum(char *dictionary) |
|
{ |
|
char *p; |
|
p = strstr(dictionary, "Type"); |
|
if (!p) { |
|
if((p = strstr(dictionary,"stream")) != NULL) |
|
return CONTENTS; |
|
return DATA; |
|
} |
|
for(p += 4;*p != '/';p++); |
|
p++; |
|
if (!strncmp(p, "Catalog", 7)) |
|
return CATALOG; |
|
if (!strncmp(p, "Page", 4)) { |
|
if(p[4] == 's') |
|
return PAGES; |
|
return PAGE; |
|
} |
|
return DATA; |
|
} |
|
|
|
void *get_object(int num,int *type) |
|
{ |
|
void *ret = NULL; |
|
char *dict; |
|
struct object **obj; |
|
for (obj = objects; obj; obj++) { |
|
if ((*obj)->obj_num == num) { |
|
if ((*obj)->parsed) { |
|
if(type) |
|
*type = (*obj)->type; |
|
return (*obj)->parsed; |
|
} |
|
dict = read_obj((*obj)->offset); |
|
(*obj)->type = get_typenum(dict); |
|
if(type) |
|
*type = (*obj)->type; |
|
if((*obj)->type == CATALOG) { |
|
ret = fill_catalog(dict); |
|
break; |
|
} |
|
if((*obj)->type == PAGES) { |
|
ret = fill_pages(dict); |
|
break; |
|
} |
|
if((*obj)->type == PAGE) { |
|
ret = fill_page(dict); |
|
break; |
|
} |
|
if((*obj)->type == CONTENTS) { |
|
ret = fill_contents(dict); |
|
break; |
|
} |
|
if((*obj)->type == RESOURCES) { |
|
ret = fill_resources(dict); |
|
break; |
|
} |
|
if((*obj)->type == DATA) { |
|
(*obj)->parsed = dict; |
|
return dict; |
|
} |
|
} |
|
|
|
} |
|
free(dict); |
|
(*obj)->parsed = ret; |
|
return ret; |
|
} |
|
|
/* add object to object structure */ |
/* add object to object structure */ |
void add_obj(char *s) |
void add_obj(char *s) |
{ |
{ |
int obj_num; |
long old_off; |
long offset, old_off; |
struct object *o = |
struct object *o = (struct object *) xcalloc(sizeof(struct object),1); |
(struct object *) xcalloc(sizeof(struct object), 1); |
sscanf(s, "%ld ", &offset); |
|
old_off = ftell(fp); |
old_off = ftell(fp); |
o->unparsed = read_obj(offset,&obj_num); |
sscanf(s, "%ld 00000 n", &o->offset); |
object[obj_num] = o; |
fseek(fp, o->offset, SEEK_SET); |
|
fget(s, XREFLINE, fp); |
|
sscanf(s, "%d 0 obj", &o->obj_num); |
fseek(fp, old_off, SEEK_SET); |
fseek(fp, old_off, SEEK_SET); |
|
objects = |
|
(struct object **) realloc(objects, obj_count * sizeof(char *)); |
|
objects[obj_count - 1] = o; |
|
obj_count++; |
return; |
return; |
} |
} |
|
|
/* get filter number */ |
/* get filter number */ |
char get_filternum(char *filter) |
char get_filternum(char *filter) |
{ |
{ |
if (!strncmp(filter, "/FlateDecode",12)) |
if (!strncmp(filter, "FlateDecode", 11)) |
return FLATEDECODE; |
return FLATEDECODE; |
if (!strncmp(filter, "/DCTDecode",10)) |
if (!strncmp(filter, "DCTDecode", 9)) |
return DCTDECODE; |
return DCTDECODE; |
return -1; |
return -1; |
} |
} |
Riadok 185 long get_startxref(void) |
|
Riadok 267 long get_startxref(void) |
|
|
|
void probe_ptree(int num) |
void probe_ptree(int num) |
{ |
{ |
char *p, **kids, **old; |
struct pages *pages; |
p = strstr(object[num]->unparsed, "/Page"); |
int type,a; |
if (!p) { |
pages = (struct pages *) get_object(num,NULL); |
/* error */ |
pages->kids++; |
return; |
a = atoi(*pages->kids); |
|
pages = (struct pages *) get_object(a,&type); |
|
while(type != PAGE) { |
|
a = atoi(pages->kids[1]); |
|
pages = (struct pages *) get_object(a,&type); |
} |
} |
if (p[5] == 's') |
page_tree[current_page] = a; |
object[num]->type = PAGES; |
|
else { |
|
page_tree[current_page] = num; |
|
current_page++; |
|
return; |
|
} |
|
p = strstr(object[num]->unparsed, "/Kids"); |
|
if (!p) { |
|
/* error */ |
|
return; |
|
} |
|
old = kids = array(&p[6]); |
|
for (kids++; **kids != ']'; kids += 3) { |
|
probe_ptree(atoi(*kids)); |
|
free(kids[0]); |
|
free(kids[1]); |
|
free(kids[2]); |
|
} |
|
free(old); |
|
return; |
return; |
|
/* while(1) { |
|
if(**pages->kids == ']' && !pages->parent) |
|
break; |
|
if(**pages->kids == ']') { |
|
free(pages->old_kids); |
|
pages = (struct pages *) get_object(pages->parent,NULL); |
|
} |
|
num = atoi(*pages->kids); |
|
p = (struct pages *) get_object(num,&type); |
|
if(type == PAGE) { |
|
page_tree[current_page] = num; |
|
current_page++; |
|
pages->kids += 3; |
|
continue; |
|
} |
|
pages->kids += 3; |
|
pages = p; |
|
} |
|
return;*/ |
} |
} |
|
|
/* find objects and add it to object structure */ |
/* find objects and add it to object structure */ |
Riadok 223 void parse_xref(void) |
|
Riadok 309 void parse_xref(void) |
|
fget(xref, XREFLINE, fp); |
fget(xref, XREFLINE, fp); |
if (!strstr(xref, "xref")) { |
if (!strstr(xref, "xref")) { |
fprintf(stderr, "PDF document si corrupted!\n"); |
fprintf(stderr, "PDF document si corrupted!\n"); |
/* reconstruction */ |
/* reconstruction ? */ |
return; |
return; |
} |
} |
object = NULL; |
objects = NULL; |
obj_count = 0; |
obj_count = 1; |
{ |
do { |
int entry[2]; |
fget(xref, XREFLINE, fp); |
do { |
do { |
fget(xref, XREFLINE, fp); |
fget(xref, XREFLINE, fp); |
sscanf(xref, "%d %d", &entry[0], &entry[1]); |
if (!strncmp(xref, "trailer", 7)) |
if (entry[0] + entry[1] > obj_count) { |
|
object = (struct object **) |
|
realloc(object, entry[0] + entry[1]); |
|
obj_count = entry[0] + entry[1]; |
|
} |
|
do { |
|
fget(xref, XREFLINE, fp); |
|
if (!strncmp(xref,"trailer",7)) |
|
break; |
|
if (xref[XREFLINE - 3] == 'f') |
|
continue; |
|
add_obj(&xref[0]); |
|
} while (1); |
|
if (memchr(xref, '<', strlen(xref)) == NULL) |
|
fseek(fp, 3, SEEK_CUR); |
|
fget(xref, XREFLINE, fp); |
|
trailer.prev = 0; |
|
while (xref[0] != '>' && xref[1] != '>') { |
|
if ((p = strstr(xref, "/Root")) != NULL) |
|
sscanf(p, |
|
"/Root %d 0 R", |
|
&trailer.root); |
|
if ((p = strstr(xref, "/Info")) != NULL) |
|
sscanf(xref, |
|
"/Info %d 0 R", |
|
&trailer.info); |
|
if ((p = strstr(xref, "/Prev")) != NULL) |
|
sscanf(xref, "/Prev %ld", |
|
&trailer.prev); |
|
if ((p = strstr(xref, "/Encrypt")) != NULL) |
|
sscanf(xref, |
|
"/Encrypt %d 0 R", |
|
&trailer.encrypt); |
|
fget(xref, XREFLINE, fp); |
|
} |
|
if (!trailer.prev) |
|
break; |
break; |
fseek(fp, trailer.prev, SEEK_SET); |
if (xref[XREFLINE - 3] == 'f') |
|
continue; |
|
add_obj(&xref[0]); |
} while (1); |
} while (1); |
} |
{ |
|
char *tline = (char *) malloc(3 * XREFLINE); |
|
fget(tline, 3 * XREFLINE, fp); |
|
trailer.prev = 0; |
|
while (tline[0] != '>' && tline[1] != '>') { |
|
if ((p = strstr(tline, "Root")) != NULL) |
|
sscanf(p, "Root %d 0 R", &trailer.root); |
|
if ((p = strstr(tline, "Info")) != NULL) |
|
sscanf(p, "Info %d 0 R", &trailer.info); |
|
if ((p = strstr(tline, "Prev")) != NULL) |
|
sscanf(p, "Prev %ld", &trailer.prev); |
|
if ((p = strstr(tline, "Encrypt")) != NULL) |
|
sscanf(p, |
|
"Encrypt %d 0 R", &trailer.encrypt); |
|
fget(tline, 3 * XREFLINE, fp); |
|
} |
|
free(tline); |
|
} |
|
if (!trailer.prev) |
|
break; |
|
fseek(fp, trailer.prev, SEEK_SET); |
|
} while (1); |
return; |
return; |
} |
} |
|
|
int parse_catalog(void) |
int parse_catalog(void) |
{ |
{ |
char *p; |
struct catalog *c = NULL; |
struct catalog *c; |
c = (struct catalog *) get_object(trailer.root,NULL); |
object[trailer.root]->type = CATALOG; |
if (!c) { |
p = strstr(object[trailer.root]->unparsed, "/Pages"); |
|
if (!p) { |
|
/* error */ |
/* error */ |
} |
} |
object[trailer.root]->parsed = c = |
|
(struct catalog *) xmalloc(sizeof(struct catalog)); |
|
sscanf(p, "/Pages %d 0 R", &c->pages); |
|
free(object[trailer.root]->unparsed); |
|
object[trailer.root]->unparsed = NULL; |
|
return c->pages; |
return c->pages; |
} |
} |
|
|
void parse_pdf(void) |
void parse_pdf(void) |
{ |
{ |
int ret, count; |
int ret; |
char *p; |
struct pages *pages; |
parse_xref(); |
parse_xref(); |
ret = parse_catalog(); |
ret = parse_catalog(); |
p = strstr(object[ret]->unparsed, "/Count"); |
pages = (struct pages *) get_object(ret,NULL); |
sscanf(p, "/Count %d", &count); |
page_tree = (int *) xcalloc(pages->count, 1); |
page_tree = (int *) malloc(count); |
|
current_page = 0; |
current_page = 0; |
|
ret = parse_catalog(); |
probe_ptree(ret); |
probe_ptree(ret); |
current_page = 0; |
current_page = 0; |
loop(); |
loop(); |
return; |
return; |
} |
} |
|
|
void fill_page(int page_num) |
struct catalog *fill_catalog(char *dictionary) |
|
{ |
|
char *p; |
|
struct catalog *catalog = (struct catalog *) xcalloc(sizeof(struct catalog),1); |
|
p = strstr(dictionary,"Pages"); |
|
if(!p) { |
|
/* error*/ |
|
return NULL; |
|
} |
|
sscanf(p,"Pages %d 0 R",&catalog->pages); |
|
return catalog; |
|
} |
|
|
|
struct pages *fill_pages(char *dictionary) |
|
{ |
|
char *p; |
|
struct pages *pages = |
|
(struct pages *) xcalloc(sizeof(struct pages), 1); |
|
p = strstr(dictionary, "Count"); |
|
if (!p) { |
|
/* error */ |
|
return NULL; |
|
} |
|
sscanf(p, "Count %d", &pages->count); |
|
p = strstr(dictionary, "Kids"); |
|
if (!p) { |
|
/* error */ |
|
return NULL; |
|
} |
|
pages->old_kids = pages->kids = array(p + 5); |
|
p = strstr(dictionary, "Parent"); |
|
if(p) |
|
sscanf(p,"Parent %d 0 R",&pages->parent); |
|
return pages; |
|
} |
|
|
|
struct page *fill_page(char *dictionary) |
{ |
{ |
struct page *page; |
struct page *page; |
struct object *obj; |
char *p, **mediabox; |
char *p, **mediabox,dict = 0; |
int ref; |
int contents, resources; |
page = (struct page *) xcalloc(sizeof(struct page), 1); |
if (object[page_num]->type == PAGE) |
p = strstr(dictionary, "Contents"); |
return; |
|
page = (struct page *) xmalloc(sizeof(struct page)); |
|
obj = object[page_num]; |
|
p = strstr(obj->unparsed, "/Contents"); |
|
if (!p) { |
if (!p) { |
/* error */ |
/* error */ |
return; |
return NULL; |
} |
} |
if (p[11] == '<') { /* if /Contents << */ |
if (p[9] == '<') { /* if /Contents << */ |
contents = page_num; |
page->contents = fill_contents(dictionary); |
dict = 1; |
} else { |
} |
sscanf(p, "Contents %d 0 R", &ref); |
else |
page->contents = (struct contents *) get_object(ref,NULL); |
sscanf(p, "/Contents %d 0 R", &contents); |
} |
page->contents = fill_contents(contents,dict); |
p = strstr(dictionary, "Resources"); |
p = strstr(obj->unparsed, "/Resources"); |
|
if (!p) { |
if (!p) { |
/* error */ |
/* error */ |
return; |
return NULL; |
} |
} |
if (p[11] == '<') { /* if /Resources << */ |
if (p[10] == '<') /* if /Resources << */ |
resources = page_num; |
page->resources = fill_resources(dictionary); |
dict = 1; |
else { |
} |
sscanf(p, "Resources %d 0 R", &ref); |
else |
page->resources = (struct resources *) get_object(ref,NULL); |
sscanf(p, "/Resources %d 0 R", &resources); |
} |
page->resources = fill_resources(resources,dict); |
p = strstr(dictionary, "MediaBox"); |
p = strstr(obj->unparsed, "/MediaBox"); |
|
if (!p) { |
if (!p) { |
/* error */ |
/* error */ |
return; |
return NULL; |
} |
} |
mediabox = array(&p[10]); |
mediabox = array(&p[9]); |
page->mediabox[0] = (short) strtoul(mediabox[1], NULL, 10); |
page->mediabox[0] = (short) strtoul(mediabox[1], NULL, 10); |
free(mediabox[1]); |
free(mediabox[1]); |
page->mediabox[1] = (short) strtoul(mediabox[2], NULL, 10); |
page->mediabox[1] = (short) strtoul(mediabox[2], NULL, 10); |
Riadok 358 void fill_page(int page_num) |
|
Riadok 454 void fill_page(int page_num) |
|
page->mediabox[3] = (short) strtoul(mediabox[4], NULL, 10); |
page->mediabox[3] = (short) strtoul(mediabox[4], NULL, 10); |
free(mediabox[4]); |
free(mediabox[4]); |
free(mediabox); |
free(mediabox); |
p = strstr(obj->unparsed, "/Parent"); |
p = strstr(dictionary, "Parent"); |
if (!p) { |
if (!p) { |
/* error */ |
/* error */ |
return; |
return NULL; |
} |
} |
sscanf(p, "/Parent %d 0 R", &page->parent); |
sscanf(p, "Parent %d 0 R", &page->parent); |
p = strstr(obj->unparsed, "/Rotate"); |
p = strstr(dictionary, "Rotate"); |
if(p) |
if (p) |
sscanf(p, "/Rotate %hd",&page->rotate); |
sscanf(p, "/Rotate %hd", &page->rotate); |
free(obj->unparsed); |
return page; |
obj->unparsed = NULL; |
|
obj->type = PAGE; |
|
obj->parsed = page; |
|
return; |
|
} |
} |
|
|
struct contents *fill_contents(int page_num) |
struct contents *fill_contents(char *dictionary) |
{ |
{ |
struct contents *content; |
struct contents *content; |
struct object *obj; |
|
struct page *page; |
|
char *p; |
char *p; |
obj = object[page_num]; |
content = (struct contents *) xcalloc(sizeof(struct contents), 1); |
content = (struct contents *) xmalloc(sizeof(struct contents)); |
p = strstr(dictionary, "Length"); |
p = strstr(obj->unparsed, "/Length"); |
|
if (!p) { |
if (!p) { |
/* error */ |
/* error */ |
free(content); |
free(content); |
return NULL; |
return NULL; |
} |
} |
sscanf(p, "/Length %d", &content->length); |
|
p = strstr(obj->unparsed, "/Filter"); |
sscanf(p, "Length %d", &content->length); |
|
for(p += 6;*p != '/';p++) |
|
if(!strncmp(p,"0 R",3)) { |
|
p = (char *) get_object(content->length,NULL); |
|
sscanf(p,"%d\nendobj",&content->length); |
|
break; |
|
} |
|
p = strstr(dictionary, "Filter"); |
if (!p) { |
if (!p) { |
/* error */ |
/* error */ |
free(content); |
free(content); |
return NULL; |
return NULL; |
} |
} |
content->filter = get_filternum(&p[8]); |
content->filter = get_filternum(&p[8]); |
p = strstr(obj->unparsed, "stream"); |
p = strstr(dictionary, "stream"); |
if (!p) { |
if (!p) { |
/* error */ |
/* error */ |
free(content); |
free(content); |
Riadok 404 struct contents *fill_contents(int page_ |
|
Riadok 500 struct contents *fill_contents(int page_ |
|
} |
} |
content->stream = (unsigned char *) xmalloc(content->length); |
content->stream = (unsigned char *) xmalloc(content->length); |
strncpy((char *) content->stream, &p[7], content->length); |
strncpy((char *) content->stream, &p[7], content->length); |
if(!dict) { |
|
obj->type = CONTENT; |
|
free(obj->unparsed); |
|
obj->unparsed = NULL; |
|
obj->parsed = content; |
|
} |
|
return content; |
return content; |
} |
} |
|
|
struct resources *fill_resources(int page_num) |
struct resources *fill_resources(char *dictionary) |
{ |
{ |
struct resources *resources; |
struct resources *resources; |
struct object *obj; |
resources = |
struct page *page; |
(struct resources *) xcalloc(sizeof(struct resources), 1); |
obj = object[page_num]; |
|
resources = (struct resources *) xmalloc(sizeof(struct resources)); |
|
return resources; |
return resources; |
} |
} |