Platon Technologies
neprihlásený Prihlásiť Registrácia
SlovakEnglish
open source software development oslavujeme 10 rokov vývoja otvoreného softvéru! Štvrtok, 28. marec 2024

Súbor: [Platon] / cpdf / parse.c (stiahnutie)

Revízia 1.16, Sat Oct 26 06:27:52 2002 UTC (21 years, 5 months ago) by lynx

Zmeny od 1.15: +50 -28 [lines]

Sync.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include "cpdf.h"
#include "parse.h"
#include "filter.h"
#include "utils.h"
/* function compute number of locations of char c from buffer to character c 
 */
int strlenc(char *buffer, char c, char to)
{
    int i = 0;
    while (*buffer && *buffer != to) {
        if (*buffer == c)
            i++;
        buffer++;
    }
    return i;
}

/* function compute lenght of string to char to
 */
int strlento(char *buffer, char to)
{
    char *old = buffer;
    while (*buffer && *buffer != to)
        buffer++;
    return buffer - old;
}

/* convert hex to bin
 */
char *hex(char *buffer)
{
    char *hex_array, *end;
    char num[5] = { '0', 'x', '\0', '\0', '\0' };
    int i;
    if (*buffer != '<')
        return NULL;
    if ((i = strlento(buffer + 1, '>') >> 1) == 0)
        return NULL;
    end = hex_array = (char *) xmalloc(i);
    for (buffer++; *buffer != '>'; hex_array++) {
        num[2] = *buffer;
        if (*(buffer + 1) == '>') {
            num[3] = '0';
            buffer++;
        } else {
            num[3] = *(buffer + 1);
            buffer += 2;
        }
        *hex_array = (char) strtoul((char *) &num, NULL, 16);
    }
    return end;
}

/* interpreter for special characters in name string */
char *name(char *buffer)
{
    char *ret, *end;
    char num[5] = { '0', 'x', '\0', '\0', '\0' };
    if (*buffer != '/')
        return NULL;
    if (memchr(buffer, '#', MAXNAMELEN) == NULL)
        return buffer;
    end = ret = (char *) xmalloc(MAXNAMELEN);
    while (*buffer && *buffer != ' ') {
        if (*buffer == '#') {
            num[2] = *(buffer + 1);
            num[3] = *(buffer + 2);
            *ret = (char) strtoul(num, NULL, 16);
            ret++;
            buffer += 3;
        }
        *ret = *buffer;
        ret++;
        buffer++;
    }
    return end;
}

char **array(char *buffer)
{
    char **ret = NULL, *last;
    static char delim[4] = { '[', '\0', ']', '\0' };
    int index = 0, len;
    last = buffer + 1;
    do {
        ret =
            (char **) realloc(ret, (index + 1) * sizeof(char **));
        if (*buffer == '[' || *buffer == ']') {
            if (*buffer == '[')
                ret[index] = &delim[0];
            else
                ret[index] = &delim[2];
            buffer++;
            index++;
            continue;
        }
        while (*buffer != ' ' && *buffer != ']' && *buffer != '\n')
            buffer++;
        len = buffer - last;
        ret[index] = (char *) xcalloc(len + 1, sizeof(char));
        strncpy(ret[index], last, len);
        index++;
        if (*buffer != ']')
            last = ++buffer;
        else
            last = buffer;
    } while (*buffer != '\n');
    return ret;
}


#define BUFLEN 255
void read_obj(long offset)
{
    char buffer[BUFLEN];
    int obj_num, size = 0;
    struct object *obj =
        (struct object *) xcalloc(sizeof(struct object), 1);
    fseek(fp, offset, SEEK_SET);
    fget(buffer, BUFLEN, fp);
    sscanf(buffer, "%d ", &obj_num);
    fget(buffer, BUFLEN, fp);
    if (buffer[0] == '<' && buffer[1] == '<')
        fget(buffer, BUFLEN, fp);
    while (!strstr(buffer, "endobj")) {
        size += strlen(buffer) + 1;
        obj->unparsed = realloc(obj->unparsed, size);
        strcat(obj->unparsed, buffer);
        fget(buffer, BUFLEN, fp);
    }
    object[obj_num] = obj;
    return;
}

/* add object to object structure */
void add_obj(char *s)
{
    long offset, old_off;
    sscanf(s, "%ld ", &offset);
    old_off = ftell(fp);
    read_obj(offset);
    fseek(fp, old_off, SEEK_SET);
}

/* get filter number */
char get_filternum(char *filter)
{
    if (!strcmp(filter, "/FlateDecode"))
        return FLATEDECODE;
    if (!strcmp(filter, "/DCTDecode"))
        return DCTDECODE;
    return -1;
}

/* get offset of start xref table */
long get_startxref(void)
{
    char startxref[XREFLINE];
    long offset;
    int i = XREFLINE - 2;
    startxref[XREFLINE - 1] = '\0';
    fstat(fp->_fileno, &fpstat);
    offset = fpstat.st_size - 8;
    fseek(fp, offset, SEEK_SET);
    while (1) {
        startxref[i] = fgetc(fp);
        if (startxref[i] == '\r' || startxref[i] == '\n')
            break;
        i--;
        offset--;
        fseek(fp, offset, SEEK_SET);
    }
    return strtoul(&startxref[i + 1], NULL, 10);
}

void probe_ptree(int num)
{
    char *p, **kids, **old;
    p = strstr(object[num]->unparsed, "/Page");
    if (!p) {
        /* error */
        return;
    }
    if (p[5] == 's')
        object[num]->type = PAGES;
    else {
        page_tree[current_page] = num;
        current_page++;
        return;
    }
    p = strstr(object[num]->unparsed, "/Kids ");
    if (!p) {
        /* error */
        return;
    }
    old = kids = array(&p[6]);
    for (kids++; **kids != ']'; kids += 3) {
        probe_ptree(atoi(*kids));
        free(kids[0]);
        free(kids[1]);
        free(kids[2]);
    }
    free(old);
    return;
}

/* find objects and add it to object structure */
void xref(void)
{
    char xref[XREFLINE];
    long offset = 0;
    offset = get_startxref();
    fseek(fp, offset, SEEK_SET);
    fget(xref, XREFLINE, fp);
    if (!strstr(xref, "xref")) {
        /* error */
        return;
    }
    object = NULL;
    obj_count = 0;
    {
        int entry[2];
        do {
            fget(xref, XREFLINE, fp);
            sscanf(xref, "%d %d", &entry[0], &entry[1]);
            if (entry[0] + entry[1] > obj_count) {
                object = (struct object **)
                    realloc(object, entry[0] + entry[1]);
                obj_count = entry[0] + entry[1];
            }
            do {
            fget(xref, XREFLINE, fp);    
            if(xref[XREFLINE - 3] == 'f')
                continue;
            add_obj(&xref[0]);
            } while (strncmp(xref, "trailer", 7));
            fseek(fp, 3, SEEK_CUR);
            fget(xref, XREFLINE, fp);
            trailer.prev = 0;
            while (xref[0] != '>' && xref[1] != '>') {
                if (!strncmp(xref, "/Root", 5))
                    sscanf(xref,
                           "/Root %d 0 R \n",
                           &trailer.root);
                if (!strncmp(xref, "/Info", 5))
                    sscanf(xref,
                           "/Info %d 0 R \n",
                           &trailer.info);
                if (!strncmp(xref, "/Prev", 5))
                    sscanf(xref, "/Prev %ld",
                           &trailer.prev);
                if (!strncmp(xref, "/Encrypt",8))
                    sscanf(xref, 
                        "/Encrypt %d 0 R \n",
                        &trailer.encrypt);
                fget(xref, XREFLINE, fp);
            }
            if (!trailer.prev)
                break;
            fseek(fp, trailer.prev, SEEK_SET);
        } while (1);
    }
    return;
}

int parse_catalog(void)
{
    char *p;
    struct catalog *c;
    object[trailer.root]->type = CATALOG;
    p = strstr(object[trailer.root]->unparsed, "/Pages");
    if (!p) {
        /* error */
    }
    object[trailer.root]->parsed = c =
        (struct catalog *) xmalloc(sizeof(struct catalog));
    sscanf(p, "/Pages %d 0 R \n", &c->pages);
    free(object[trailer.root]->unparsed);
    object[trailer.root]->unparsed = NULL;
    return c->pages;
}

void parse_pdf(void)
{
    int ret, count;
    char *p;
    xref();
    ret = parse_catalog();
    p = strstr(object[ret]->unparsed, "/Count ");
    sscanf(p, "/Count %d \n", &count);
    page_tree = (int *) malloc(count);
    current_page = 0;
    probe_ptree(ret);
    return;
}

void fill_page(int page_num)
{
    struct page *page;
    struct object *obj;
    char *p, **mediabox;
    int contents, resources;
    if (object[page_num]->type == PAGE)
        return;
    page = (struct page *) xmalloc(sizeof(struct page));
    obj = object[page_num];
    p = strstr(obj->unparsed, "/Contents ");
    if (!p) {
        /* error */
        return;
    }
    if(p[11] == '<')        /* if /Contents << */
        contents = page_num;
    else
        sscanf(p, "/Contents %d 0 R", &contents);
    p = strstr(obj->unparsed, "/Resources ");
    if (!p) {
        /* error */
        return;
    }
    if(p[11] == '<')         /* if /Resources << */
        resources = page_num;
    else 
        sscanf(p, "/Resources %d 0 R", &resources);
    page->contents = fill_contents(contents);
    page->resources = fill_resources(resources);
    p = strstr(obj->unparsed, "/Mediabox ");
    if (!p) {
        /* error */
        return;
    }
    mediabox = array(&p[11]);
    page->mediabox[0] = (short) strtoul(mediabox[1], NULL, 10);
    free(mediabox[1]);
    page->mediabox[1] = (short) strtoul(mediabox[2], NULL, 10);
    free(mediabox[2]);
    page->mediabox[2] = (short) strtoul(mediabox[3], NULL, 10);
    free(mediabox[3]);
    page->mediabox[3] = (short) strtoul(mediabox[4], NULL, 10);
    free(mediabox[4]);
    free(mediabox);
    p = strstr(obj->unparsed, "/Parent ");
    if (!p) {
        /* error */
        return;
    }
    sscanf(p, "/Parent %d 0 R", &page->parent);
    free(obj->unparsed);
    obj->unparsed = NULL;
    obj->type = PAGE;
    obj->parsed = page;
    return;
}

struct contents *fill_contents(int page_num)
{
    struct contents *content;
    struct object *obj;
    struct page *page;
    char *p;
    obj = object[page_num];
    page = (struct page *) obj->parsed;
    if(page->contents)
        return page->contents;
    content = (struct contents *) xmalloc(sizeof(struct contents));
    p = strstr(obj->unparsed, "/Length ");
    if (!p) {
        /* error */
        free(content);
        return NULL;
    }
    sscanf(p, "/Length %d", &content->length);
    p = strstr(obj->unparsed, "/Filter ");
    if (!p) {
        /* error */
        free(content);
        return NULL;
    }
    content->filter = get_filternum(&p[9]);
    p = strstr(obj->unparsed, "stream");
    if (!p) {
        /* error */
        free(content);
        return NULL; 
    }
    content->stream = (char *) xmalloc(content->length);
    strncpy(content->stream, &p[7], content->length);
    return content;
}

struct resources *fill_resources(int page_num)
{
struct resources *resources;
struct object *obj;
struct page *page;
obj = object[page_num];
page = obj->parsed;
if(page->resources)
    return page->resources;
resources = (struct resources *) xmalloc(sizeof(struct resources));
return resources;
}

Platon Group <platon@platon.sk> http://platon.sk/
Copyright © 2002-2006 Platon Group
Stránka používa redakčný systém Metafox
Na začiatok