#include #include #include #include #include #include #include #include "cpdf.h" #include "parse.h" #include "filter.h" #include "utils.h" /* function compute number of locations of char c from buffer to character c */ int strlenc(char *buffer, char c, char to) { int i = 0; while (*buffer && *buffer != to) { if (*buffer == c) i++; buffer++; } return i; } /* function compute lenght of string to char to */ int strlento(char *buffer, char to) { char *old = buffer; while (*buffer && *buffer != to) buffer++; return buffer - old; } /* convert hex to bin */ char *hex(char *buffer) { char *hex_array, *end; char num[5] = { '0', 'x', '\0', '\0', '\0' }; int i; if (*buffer != '<') return NULL; if ((i = strlento(buffer + 1, '>') >> 1) == 0) return NULL; end = hex_array = (char *) xmalloc(i); for (buffer++; *buffer != '>'; hex_array++) { num[2] = *buffer; if (*(buffer + 1) == '>') { num[3] = '0'; buffer++; } else { num[3] = *(buffer + 1); buffer += 2; } *hex_array = (char) strtoul((char *) &num, NULL, 16); } return end; } /* interpreter for special characters in name string */ char *name(char *buffer) { char *ret, *end; char num[5] = { '0', 'x', '\0', '\0', '\0' }; if (*buffer != '/') return NULL; if (memchr(buffer, '#', MAXNAMELEN) == NULL) return buffer; end = ret = (char *) xmalloc(MAXNAMELEN); while (*buffer && *buffer != ' ') { if (*buffer == '#') { num[2] = *(buffer + 1); num[3] = *(buffer + 2); *ret = (char) strtoul(num, NULL, 16); ret++; buffer += 3; } *ret = *buffer; ret++; buffer++; } return end; } char **array(char *buffer) { char **ret = NULL, *last; static char delim[4] = { '[', '\0', ']', '\0' }; int index = 0, len; last = buffer + 1; do { ret = (char **) realloc(ret, (index + 1) * sizeof(char **)); if (*buffer == '[' || *buffer == ']') { if (*buffer == '[') ret[index] = &delim[0]; else ret[index] = &delim[2]; buffer++; index++; continue; } while (*buffer != ' ' && *buffer != ']' && *buffer != '\n') buffer++; len = buffer - last; ret[index] = (char *) xcalloc(len + 1, sizeof(char)); strncpy(ret[index], last, len); index++; if (*buffer != ']') last = ++buffer; else last = buffer; } while (*buffer != '\n'); return ret; } #define BUFLEN 255 void read_obj(long offset) { char buffer[BUFLEN]; int obj_num, size = 0; struct object *obj = (struct object *) xcalloc(sizeof(struct object), 1); fseek(fp, offset, SEEK_SET); fget(buffer, BUFLEN, fp); sscanf(buffer, "%d ", &obj_num); fget(buffer, BUFLEN, fp); if (buffer[0] == '<' && buffer[1] == '<') fget(buffer, BUFLEN, fp); while (!strstr(buffer, "endobj")) { size += strlen(buffer) + 1; obj->unparsed = realloc(obj->unparsed, size); strcat(obj->unparsed, buffer); fget(buffer, BUFLEN, fp); } object[obj_num] = obj; return; } /* add object to object structure */ void add_obj(char *s) { long offset, old_off; sscanf(s, "%ld ", &offset); old_off = ftell(fp); read_obj(offset); fseek(fp, old_off, SEEK_SET); } /* get filter number */ char get_filternum(char *filter) { if (!strcmp(filter, "/FlateDecode")) return FLATEDECODE; if (!strcmp(filter, "/DCTDecode")) return DCTDECODE; return -1; } /* get offset of start xref table */ long get_startxref(void) { char startxref[XREFLINE]; long offset; int i = XREFLINE - 2; startxref[XREFLINE - 1] = '\0'; fstat(fp->_fileno, &fpstat); offset = fpstat.st_size - 8; fseek(fp, offset, SEEK_SET); while (1) { startxref[i] = fgetc(fp); if (startxref[i] == '\r' || startxref[i] == '\n') break; i--; offset--; fseek(fp, offset, SEEK_SET); } return strtoul(&startxref[i + 1], NULL, 10); } void probe_ptree(int num) { char *p, **kids, **old; p = strstr(object[num]->unparsed, "/Page"); if (!p) { /* error */ return; } if (p[5] == 's') object[num]->type = PAGES; else { page_tree[current_page] = num; current_page++; return; } p = strstr(object[num]->unparsed, "/Kids "); if (!p) { /* error */ return; } old = kids = array(&p[6]); for (kids++; **kids != ']'; kids += 3) { probe_ptree(atoi(*kids)); free(kids[0]); free(kids[1]); free(kids[2]); } free(old); return; } /* find objects and add it to object structure */ void xref(void) { char xref[XREFLINE]; long offset = 0; offset = get_startxref(); fseek(fp, offset, SEEK_SET); fget(xref, XREFLINE, fp); if (!strstr(xref, "xref")) { /* error */ return; } object = NULL; obj_count = 0; { int entry[2]; do { fget(xref, XREFLINE, fp); sscanf(xref, "%d %d", &entry[0], &entry[1]); if (entry[0] + entry[1] > obj_count) { object = (struct object **) realloc(object, entry[0] + entry[1]); obj_count = entry[0] + entry[1]; } do { fget(xref, XREFLINE, fp); if(xref[XREFLINE - 3] == 'f') continue; add_obj(&xref[0]); } while (strncmp(xref, "trailer", 7)); fseek(fp, 3, SEEK_CUR); fget(xref, XREFLINE, fp); trailer.prev = 0; while (xref[0] != '>' && xref[1] != '>') { if (!strncmp(xref, "/Root", 5)) sscanf(xref, "/Root %d 0 R \n", &trailer.root); if (!strncmp(xref, "/Info", 5)) sscanf(xref, "/Info %d 0 R \n", &trailer.info); if (!strncmp(xref, "/Prev", 5)) sscanf(xref, "/Prev %ld", &trailer.prev); if (!strncmp(xref, "/Encrypt",8)) sscanf(xref, "/Encrypt %d 0 R \n", &trailer.encrypt); fget(xref, XREFLINE, fp); } if (!trailer.prev) break; fseek(fp, trailer.prev, SEEK_SET); } while (1); } return; } int parse_catalog(void) { char *p; struct catalog *c; object[trailer.root]->type = CATALOG; p = strstr(object[trailer.root]->unparsed, "/Pages"); if (!p) { /* error */ } object[trailer.root]->parsed = c = (struct catalog *) xmalloc(sizeof(struct catalog)); sscanf(p, "/Pages %d 0 R \n", &c->pages); free(object[trailer.root]->unparsed); object[trailer.root]->unparsed = NULL; return c->pages; } void parse_pdf(void) { int ret, count; char *p; xref(); ret = parse_catalog(); p = strstr(object[ret]->unparsed, "/Count "); sscanf(p, "/Count %d \n", &count); page_tree = (int *) malloc(count); current_page = 0; probe_ptree(ret); return; } void fill_page(int page_num) { struct page *page; struct object *obj; char *p, **mediabox; int contents, resources; if (object[page_num]->type == PAGE) return; page = (struct page *) xmalloc(sizeof(struct page)); obj = object[page_num]; p = strstr(obj->unparsed, "/Contents "); if (!p) { /* error */ return; } if(p[11] == '<') /* if /Contents << */ contents = page_num; else sscanf(p, "/Contents %d 0 R", &contents); p = strstr(obj->unparsed, "/Resources "); if (!p) { /* error */ return; } if(p[11] == '<') /* if /Resources << */ resources = page_num; else sscanf(p, "/Resources %d 0 R", &resources); page->contents = fill_contents(contents); page->resources = fill_resources(resources); p = strstr(obj->unparsed, "/Mediabox "); if (!p) { /* error */ return; } mediabox = array(&p[11]); page->mediabox[0] = (short) strtoul(mediabox[1], NULL, 10); free(mediabox[1]); page->mediabox[1] = (short) strtoul(mediabox[2], NULL, 10); free(mediabox[2]); page->mediabox[2] = (short) strtoul(mediabox[3], NULL, 10); free(mediabox[3]); page->mediabox[3] = (short) strtoul(mediabox[4], NULL, 10); free(mediabox[4]); free(mediabox); p = strstr(obj->unparsed, "/Parent "); if (!p) { /* error */ return; } sscanf(p, "/Parent %d 0 R", &page->parent); free(obj->unparsed); obj->unparsed = NULL; obj->type = PAGE; obj->parsed = page; return; } struct contents *fill_contents(int page_num) { struct contents *content; struct object *obj; struct page *page; char *p; obj = object[page_num]; page = (struct page *) obj->parsed; if(page->contents) return page->contents; content = (struct contents *) xmalloc(sizeof(struct contents)); p = strstr(obj->unparsed, "/Length "); if (!p) { /* error */ free(content); return NULL; } sscanf(p, "/Length %d", &content->length); p = strstr(obj->unparsed, "/Filter "); if (!p) { /* error */ free(content); return NULL; } content->filter = get_filternum(&p[9]); p = strstr(obj->unparsed, "stream"); if (!p) { /* error */ free(content); return NULL; } content->stream = (char *) xmalloc(content->length); strncpy(content->stream, &p[7], content->length); return content; } struct resources *fill_resources(int page_num) { struct resources *resources; struct object *obj; struct page *page; obj = object[page_num]; page = obj->parsed; if(page->resources) return page->resources; resources = (struct resources *) xmalloc(sizeof(struct resources)); return resources; }