#include #include #include #include #include #include #include #include #include "cpdf.h" #include "parse.h" #include "filter.h" #include "utils.h" /* function compute number of locations of char c from buffer to character c */ int strlenc(char *buffer, char c, char to) { int i = 0; while (*buffer && *buffer != to) { if (*buffer == c) i++; buffer++; } return i; } /* function compute lenght of string to char to */ int strlento(char *buffer, char to) { char *old = buffer; while (*buffer && *buffer != to) buffer++; return buffer - old; } /* convert hex to bin */ char *hex(char *buffer) { char *hex_array, *end; char num[5] = { '0', 'x', '\0', '\0', '\0' }; int i; if (*buffer != '<') return NULL; if ((i = strlento(buffer + 1, '>') >> 1) == 0) return NULL; end = hex_array = (char *) xmalloc(i); for (buffer++; *buffer != '>'; hex_array++) { num[2] = *buffer; if (*(buffer + 1) == '>') { num[3] = '0'; buffer++; } else { num[3] = *(buffer + 1); buffer += 2; } *hex_array = (char) strtoul((char *) &num, NULL, 16); } return end; } /* interpreter for special characters in name string */ char *name(char *buffer) { char *ret, *end; char num[5] = { '0', 'x', '\0', '\0', '\0' }; if (*buffer != '/') return NULL; if (memchr(buffer, '#', MAXNAMELEN) == NULL) return buffer; end = ret = (char *) xmalloc(MAXNAMELEN); while (*buffer && *buffer != ' ') { if (*buffer == '#') { num[2] = *(buffer + 1); num[3] = *(buffer + 2); *ret = (char) strtoul(num, NULL, 16); ret++; buffer += 3; } *ret = *buffer; ret++; buffer++; } return end; } char **array(char *buffer) { char **ret = NULL,*last; static char delim[2] = { '[', ']' }; int index = 0, len,end = 0; last = buffer + 1; do { ret = (char **) realloc(ret, (index + 1) * sizeof(char **)); if (*buffer == '[' || *buffer == ']') { if (*buffer == '[') { ret[index] = &delim[0]; end++; } else { ret[index] = &delim[1]; end--; } buffer++; index++; continue; } while(1) { while (*buffer != ' ' && *buffer != ']' && *buffer != '\n') buffer++; if(buffer - last == 0) { buffer++; continue; } else break; } len = buffer - last; ret[index] = (char *) xcalloc(len + 1, sizeof(char)); strncpy(ret[index], last, len); index++; if (*buffer != ']') last = ++buffer; else last = buffer; } while (end); return ret; } #define BUFLEN 255 char *read_obj(long offset) { char entry[BUFLEN], *obj = NULL; int size = 0; fseek(fp, offset, SEEK_SET); fget(entry, BUFLEN, fp); do { fget(entry, BUFLEN, fp); size += strlen(entry) + 1; obj = resize(obj, size); strcat(obj, entry); } while (!strstr(entry, "endobj")); return obj; } int get_typenum(char *dictionary) { char *p; p = strstr(dictionary, "Type"); if (!p) { if((p = strstr(dictionary,"stream")) != NULL) return CONTENTS; return DATA; } for(p += 4;*p != '/';p++); p++; if (!strncmp(p, "Catalog", 7)) return CATALOG; if (!strncmp(p, "Page", 4)) { if(p[4] == 's') return PAGES; return PAGE; } return DATA; } void *get_object(int num,int *type) { void *ret = NULL; char *dict; struct object **obj; for (obj = objects; obj; obj++) { if ((*obj)->obj_num == num) { if ((*obj)->parsed) { if(type) *type = (*obj)->type; return (*obj)->parsed; } dict = read_obj((*obj)->offset); (*obj)->type = get_typenum(dict); if(type) *type = (*obj)->type; if((*obj)->type == CATALOG) { ret = fill_catalog(dict); break; } if((*obj)->type == PAGES) { ret = fill_pages(dict); break; } if((*obj)->type == PAGE) { ret = fill_page(dict); break; } if((*obj)->type == CONTENTS) { ret = fill_contents(dict); break; } if((*obj)->type == RESOURCES) { ret = fill_resources(dict); break; } if((*obj)->type == DATA) { (*obj)->parsed = dict; return dict; } } } free(dict); (*obj)->parsed = ret; return ret; } /* add object to object structure */ void add_obj(char *s) { long old_off; struct object *o = (struct object *) xcalloc(sizeof(struct object), 1); old_off = ftell(fp); sscanf(s, "%ld 00000 n", &o->offset); fseek(fp, o->offset, SEEK_SET); fget(s, XREFLINE, fp); sscanf(s, "%d 0 obj", &o->obj_num); fseek(fp, old_off, SEEK_SET); objects = (struct object **) realloc(objects, obj_count * sizeof(char *)); objects[obj_count - 1] = o; obj_count++; return; } /* get filter number */ char get_filternum(char *filter) { if (!strncmp(filter, "FlateDecode", 11)) return FLATEDECODE; if (!strncmp(filter, "DCTDecode", 9)) return DCTDECODE; return -1; } /* get offset of start xref table */ long get_startxref(void) { char startxref[XREFLINE]; int i = XREFLINE - 2; long offset; startxref[XREFLINE - 1] = '\0'; fstat(fp->_fileno, &fpstat); offset = fpstat.st_size - EOFSIZE; fseek(fp, offset, SEEK_SET); while (1) { startxref[i] = fgetc(fp); if (startxref[i] == '\r' || startxref[i] == '\n') break; i--; offset--; fseek(fp, offset, SEEK_SET); } return strtoul(&startxref[i + 1], NULL, 10); } void probe_ptree(int num) { struct pages *pages; int type,a; pages = (struct pages *) get_object(num,NULL); pages->kids++; a = atoi(*pages->kids); pages = (struct pages *) get_object(a,&type); while(type != PAGE) { a = atoi(pages->kids[1]); pages = (struct pages *) get_object(a,&type); } page_tree[current_page] = a; return; /* while(1) { if(**pages->kids == ']' && !pages->parent) break; if(**pages->kids == ']') { free(pages->old_kids); pages = (struct pages *) get_object(pages->parent,NULL); } num = atoi(*pages->kids); p = (struct pages *) get_object(num,&type); if(type == PAGE) { page_tree[current_page] = num; current_page++; pages->kids += 3; continue; } pages->kids += 3; pages = p; } return;*/ } /* find objects and add it to object structure */ void parse_xref(void) { char xref[XREFLINE]; char *p; fseek(fp, get_startxref(), SEEK_SET); fget(xref, XREFLINE, fp); if (!strstr(xref, "xref")) { fprintf(stderr, "PDF document si corrupted!\n"); /* reconstruction ? */ return; } objects = NULL; obj_count = 1; do { fget(xref, XREFLINE, fp); do { fget(xref, XREFLINE, fp); if (!strncmp(xref, "trailer", 7)) break; if (xref[XREFLINE - 3] == 'f') continue; add_obj(&xref[0]); } while (1); { char *tline = (char *) malloc(3 * XREFLINE); fget(tline, 3 * XREFLINE, fp); trailer.prev = 0; while (tline[0] != '>' && tline[1] != '>') { if ((p = strstr(tline, "Root")) != NULL) sscanf(p, "Root %d 0 R", &trailer.root); if ((p = strstr(tline, "Info")) != NULL) sscanf(p, "Info %d 0 R", &trailer.info); if ((p = strstr(tline, "Prev")) != NULL) sscanf(p, "Prev %ld", &trailer.prev); if ((p = strstr(tline, "Encrypt")) != NULL) sscanf(p, "Encrypt %d 0 R", &trailer.encrypt); fget(tline, 3 * XREFLINE, fp); } free(tline); } if (!trailer.prev) break; fseek(fp, trailer.prev, SEEK_SET); } while (1); return; } int parse_catalog(void) { struct catalog *c = NULL; c = (struct catalog *) get_object(trailer.root,NULL); if (!c) { /* error */ } return c->pages; } void parse_pdf(void) { int ret; struct pages *pages; parse_xref(); ret = parse_catalog(); pages = (struct pages *) get_object(ret,NULL); page_tree = (int *) xcalloc(pages->count, 1); current_page = 0; ret = parse_catalog(); probe_ptree(ret); current_page = 0; loop(); return; } struct catalog *fill_catalog(char *dictionary) { char *p; struct catalog *catalog = (struct catalog *) xcalloc(sizeof(struct catalog),1); p = strstr(dictionary,"Pages"); if(!p) { /* error*/ return NULL; } sscanf(p,"Pages %d 0 R",&catalog->pages); return catalog; } struct pages *fill_pages(char *dictionary) { char *p; struct pages *pages = (struct pages *) xcalloc(sizeof(struct pages), 1); p = strstr(dictionary, "Count"); if (!p) { /* error */ return NULL; } sscanf(p, "Count %d", &pages->count); p = strstr(dictionary, "Kids"); if (!p) { /* error */ return NULL; } pages->old_kids = pages->kids = array(p + 5); p = strstr(dictionary, "Parent"); if(p) sscanf(p,"Parent %d 0 R",&pages->parent); return pages; } struct page *fill_page(char *dictionary) { struct page *page; char *p, **mediabox; int ref; page = (struct page *) xcalloc(sizeof(struct page), 1); p = strstr(dictionary, "Contents"); if (!p) { /* error */ return NULL; } if (p[9] == '<') { /* if /Contents << */ page->contents = fill_contents(dictionary); } else { sscanf(p, "Contents %d 0 R", &ref); page->contents = (struct contents *) get_object(ref,NULL); } p = strstr(dictionary, "Resources"); if (!p) { /* error */ return NULL; } if (p[10] == '<') /* if /Resources << */ page->resources = fill_resources(dictionary); else { sscanf(p, "Resources %d 0 R", &ref); page->resources = (struct resources *) get_object(ref,NULL); } p = strstr(dictionary, "MediaBox"); if (!p) { /* error */ return NULL; } mediabox = array(&p[9]); page->mediabox[0] = (short) strtoul(mediabox[1], NULL, 10); free(mediabox[1]); page->mediabox[1] = (short) strtoul(mediabox[2], NULL, 10); free(mediabox[2]); page->mediabox[2] = (short) strtoul(mediabox[3], NULL, 10); free(mediabox[3]); page->mediabox[3] = (short) strtoul(mediabox[4], NULL, 10); free(mediabox[4]); free(mediabox); p = strstr(dictionary, "Parent"); if (!p) { /* error */ return NULL; } sscanf(p, "Parent %d 0 R", &page->parent); p = strstr(dictionary, "Rotate"); if (p) sscanf(p, "/Rotate %hd", &page->rotate); return page; } struct contents *fill_contents(char *dictionary) { struct contents *content; char *p,*endstream; int i = 0; content = (struct contents *) xcalloc(sizeof(struct contents), 1); p = strstr(dictionary, "Length"); if (!p) { /* error */ free(content); return NULL; } sscanf(p, "Length %d", &content->length); for(p += 6;*p != '/';p++) if(!strncmp(p,"0 R",3)) { p = (char *) get_object(content->length,NULL); sscanf(p,"%d\nendobj",&content->length); break; } p = strstr(dictionary, "Filter"); if (!p) { /* error */ free(content); return NULL; } content->filter = get_filternum(&p[8]); p = strstr(dictionary, "stream") + sizeof("stream"); if (!p) { /* error */ free(content); return NULL; } content->stream = (unsigned char *) xcalloc(content->length,1); memcpy(content->stream,p,content->length); return content; } struct resources *fill_resources(char *dictionary) { struct resources *resources; resources = (struct resources *) xcalloc(sizeof(struct resources), 1); return resources; }